mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-05 01:50:27 +08:00
feat: Add API indexer and enhance embedding management
- Add new API indexer script for document processing - Update embedding manager with improved functionality - Remove old cache files and update dependencies - Modify workflow execute documentation 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
141
.claude/python_script/api_indexer.py
Normal file
141
.claude/python_script/api_indexer.py
Normal file
@@ -0,0 +1,141 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
API Documentation Indexer
|
||||
Parses Markdown documentation to create a searchable index of classes and methods.
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any
|
||||
|
||||
from core.file_indexer import FileIndexer
|
||||
|
||||
class ApiIndexer:
|
||||
def __init__(self, config: Dict, root_path: str = "."):
|
||||
self.config = config
|
||||
self.root_path = Path(root_path).resolve()
|
||||
self.file_indexer = FileIndexer(config, root_path)
|
||||
self.api_index_file = self.file_indexer.cache_dir / "api_index.json"
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
def build_index(self):
|
||||
"""Builds the API index from Markdown files."""
|
||||
self.logger.info("Building API index...")
|
||||
file_index = self.file_indexer.load_index()
|
||||
if not file_index:
|
||||
self.logger.info("File index not found, building it first.")
|
||||
self.file_indexer.build_index()
|
||||
file_index = self.file_indexer.load_index()
|
||||
|
||||
api_index = {}
|
||||
for file_info in file_index.values():
|
||||
if file_info.extension == ".md":
|
||||
self.logger.debug(f"Parsing {file_info.path}")
|
||||
try:
|
||||
with open(file_info.path, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
self._parse_markdown(content, file_info.relative_path, api_index)
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error parsing {file_info.path}: {e}")
|
||||
|
||||
self._save_index(api_index)
|
||||
self.logger.info(f"API index built with {len(api_index)} classes.")
|
||||
|
||||
def _parse_markdown(self, content: str, file_path: str, api_index: Dict):
|
||||
"""Parses a single Markdown file for class and method info."""
|
||||
class_name_match = re.search(r"^#\s+([A-Za-z0-9_]+)", content)
|
||||
if not class_name_match:
|
||||
return
|
||||
|
||||
class_name = class_name_match.group(1)
|
||||
api_index[class_name] = {
|
||||
"file_path": file_path,
|
||||
"description": "",
|
||||
"methods": {}
|
||||
}
|
||||
|
||||
# Simple description extraction
|
||||
desc_match = re.search(r"\*\*Description:\*\*\s*(.+)", content)
|
||||
if desc_match:
|
||||
api_index[class_name]["description"] = desc_match.group(1).strip()
|
||||
|
||||
# Method extraction
|
||||
method_sections = re.split(r"###\s+", content)[1:]
|
||||
for i, section in enumerate(method_sections):
|
||||
method_signature_match = re.search(r"`(.+?)`", section)
|
||||
if not method_signature_match:
|
||||
continue
|
||||
|
||||
signature = method_signature_match.group(1)
|
||||
method_name_match = re.search(r"([A-Za-z0-9_]+)\(“, signature)
|
||||
if not method_name_match:
|
||||
continue
|
||||
|
||||
method_name = method_name_match.group(1)
|
||||
|
||||
method_description = ""
|
||||
method_desc_match = re.search(r"\*\*Description:\*\*\s*(.+)", section)
|
||||
if method_desc_match:
|
||||
method_description = method_desc_match.group(1).strip()
|
||||
|
||||
# A simple way to get a line number approximation
|
||||
line_number = content.count("\n", 0, content.find(f"### `{signature}`")) + 1
|
||||
|
||||
api_index[class_name]["methods"Показать больше] = {
|
||||
"signature": signature,
|
||||
"description": method_description,
|
||||
"line_number": line_number
|
||||
}
|
||||
|
||||
def _save_index(self, api_index: Dict):
|
||||
"""Saves the API index to a file."""
|
||||
try:
|
||||
with open(self.api_index_file, "w", encoding="utf-8") as f:
|
||||
json.dump(api_index, f, indent=2)
|
||||
except IOError as e:
|
||||
self.logger.error(f"Could not save API index: {e}")
|
||||
|
||||
def search(self, class_name: str, method_name: str = None) -> Any:
|
||||
"""Searches the API index for a class or method."""
|
||||
if not self.api_index_file.exists():
|
||||
self.build_index()
|
||||
|
||||
with open(self.api_index_file, "r", encoding="utf-8") as f:
|
||||
api_index = json.load(f)
|
||||
|
||||
if class_name not in api_index:
|
||||
return None
|
||||
|
||||
if method_name:
|
||||
return api_index[class_name]["methods"].get(method_name)
|
||||
else:
|
||||
return api_index[class_name]
|
||||
|
||||
if __name__ == "__main__":
|
||||
from core.config import get_config
|
||||
import argparse
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
parser = argparse.ArgumentParser(description="API Documentation Indexer.")
|
||||
parser.add_argument("--build", action="store_true", help="Build the API index.")
|
||||
parser.add_argument("--search_class", help="Search for a class.")
|
||||
parser.add_argument("--search_method", help="Search for a method within a class (requires --search_class).")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
config = get_config()
|
||||
api_indexer = ApiIndexer(config.to_dict())
|
||||
|
||||
if args.build:
|
||||
api_indexer.build_index()
|
||||
|
||||
if args.search_class:
|
||||
result = api_indexer.search(args.search_class, args.search_method)
|
||||
if result:
|
||||
print(json.dumps(result, indent=2))
|
||||
else:
|
||||
print("Not found.")
|
||||
156
.claude/python_script/cache/embedding_index.json
vendored
156
.claude/python_script/cache/embedding_index.json
vendored
@@ -1,156 +0,0 @@
|
||||
{
|
||||
"analyzer.py": {
|
||||
"file_path": "analyzer.py",
|
||||
"content_hash": "9a7665c34d5ac84634342f8b1425bb13",
|
||||
"embedding_hash": "fb5b5a58ec8e070620747c7313b0b2b6",
|
||||
"created_time": 1758175163.6748724,
|
||||
"vector_size": 384
|
||||
},
|
||||
"config.yaml": {
|
||||
"file_path": "config.yaml",
|
||||
"content_hash": "fc0526eea28cf37d15425035d2dd17d9",
|
||||
"embedding_hash": "4866d8bd2b14c16c448c34c0251d199e",
|
||||
"created_time": 1758175163.6748896,
|
||||
"vector_size": 384
|
||||
},
|
||||
"install.sh": {
|
||||
"file_path": "install.sh",
|
||||
"content_hash": "6649df913eadef34fa2f253aed541dfd",
|
||||
"embedding_hash": "54af072da7c1139108c79b64bd1ee291",
|
||||
"created_time": 1758175163.6748998,
|
||||
"vector_size": 384
|
||||
},
|
||||
"requirements.txt": {
|
||||
"file_path": "requirements.txt",
|
||||
"content_hash": "e981a0aa103bdec4a99b75831967766d",
|
||||
"embedding_hash": "37bc877ea041ad606234262423cf578a",
|
||||
"created_time": 1758175163.6749053,
|
||||
"vector_size": 384
|
||||
},
|
||||
"setup.py": {
|
||||
"file_path": "setup.py",
|
||||
"content_hash": "7b93af473bfe37284c6cf493458bc421",
|
||||
"embedding_hash": "bdda9a6e8d3bd34465436b119a17e263",
|
||||
"created_time": 1758175163.6749127,
|
||||
"vector_size": 384
|
||||
},
|
||||
"__init__.py": {
|
||||
"file_path": "__init__.py",
|
||||
"content_hash": "c981c4ffc664bbd3c253d0dc82f48ac6",
|
||||
"embedding_hash": "3ab1a0c5d0d4bd832108b7a6ade0ad9c",
|
||||
"created_time": 1758175163.6749194,
|
||||
"vector_size": 384
|
||||
},
|
||||
"cache\\file_index.json": {
|
||||
"file_path": "cache\\file_index.json",
|
||||
"content_hash": "6534fef14d12e39aff1dc0dcf5b91d1d",
|
||||
"embedding_hash": "d76efa530f0d21e52f9d5b3a9ccc358c",
|
||||
"created_time": 1758175163.6749268,
|
||||
"vector_size": 384
|
||||
},
|
||||
"core\\config.py": {
|
||||
"file_path": "core\\config.py",
|
||||
"content_hash": "ee72a95cea7397db8dd25b10a4436eaa",
|
||||
"embedding_hash": "65d1fca1cf59bcd36409c3b11f50aab1",
|
||||
"created_time": 1758175163.6749349,
|
||||
"vector_size": 384
|
||||
},
|
||||
"core\\context_analyzer.py": {
|
||||
"file_path": "core\\context_analyzer.py",
|
||||
"content_hash": "2e9ac2050e463c9d3f94bad23e65d4e5",
|
||||
"embedding_hash": "dfb51c8eaafd96ac544b3d9c8dcd3f51",
|
||||
"created_time": 1758175163.674943,
|
||||
"vector_size": 384
|
||||
},
|
||||
"core\\embedding_manager.py": {
|
||||
"file_path": "core\\embedding_manager.py",
|
||||
"content_hash": "cafa24b0431c6463266dde8b37fc3ab7",
|
||||
"embedding_hash": "531c3206f0caf9789873719cdd644e99",
|
||||
"created_time": 1758175163.6749508,
|
||||
"vector_size": 384
|
||||
},
|
||||
"core\\file_indexer.py": {
|
||||
"file_path": "core\\file_indexer.py",
|
||||
"content_hash": "0626c89c060d6022261ca094aed47093",
|
||||
"embedding_hash": "93d5fc6e84334d3bd9be0f07f9823b20",
|
||||
"created_time": 1758175163.6749592,
|
||||
"vector_size": 384
|
||||
},
|
||||
"core\\gitignore_parser.py": {
|
||||
"file_path": "core\\gitignore_parser.py",
|
||||
"content_hash": "5f1d87fb03bc3b19833406be0fa5125f",
|
||||
"embedding_hash": "784be673b6b428cce60ab5390bfc7f08",
|
||||
"created_time": 1758175163.6749675,
|
||||
"vector_size": 384
|
||||
},
|
||||
"core\\path_matcher.py": {
|
||||
"file_path": "core\\path_matcher.py",
|
||||
"content_hash": "89132273951a091610c1579ccc44f3a7",
|
||||
"embedding_hash": "e01ca0180c2834a514ad6d8e62315ce0",
|
||||
"created_time": 1758175163.6749754,
|
||||
"vector_size": 384
|
||||
},
|
||||
"core\\__init__.py": {
|
||||
"file_path": "core\\__init__.py",
|
||||
"content_hash": "3a323be141f1ce6b9d9047aa444029b0",
|
||||
"embedding_hash": "3fc5a5427067e59b054428083a5899ca",
|
||||
"created_time": 1758175163.6749818,
|
||||
"vector_size": 384
|
||||
},
|
||||
"tools\\module_analyzer.py": {
|
||||
"file_path": "tools\\module_analyzer.py",
|
||||
"content_hash": "926289c2fd8d681ed20c445d2ac34fa1",
|
||||
"embedding_hash": "3378fcde062914859b765d8dfce1207f",
|
||||
"created_time": 1758175163.67499,
|
||||
"vector_size": 384
|
||||
},
|
||||
"tools\\tech_stack.py": {
|
||||
"file_path": "tools\\tech_stack.py",
|
||||
"content_hash": "eef6eabcbc8ba0ece0dfacb9314f3585",
|
||||
"embedding_hash": "bc3aa5334ef17328490bc5a8162d776a",
|
||||
"created_time": 1758175163.674997,
|
||||
"vector_size": 384
|
||||
},
|
||||
"tools\\workflow_updater.py": {
|
||||
"file_path": "tools\\workflow_updater.py",
|
||||
"content_hash": "40d7d884e0db24eb45aa27739fef8210",
|
||||
"embedding_hash": "00488f4acdb7fe1b5126da4da3bb9869",
|
||||
"created_time": 1758175163.6750047,
|
||||
"vector_size": 384
|
||||
},
|
||||
"tools\\__init__.py": {
|
||||
"file_path": "tools\\__init__.py",
|
||||
"content_hash": "41bf583571f4355e4af90842d0674b1f",
|
||||
"embedding_hash": "fccd7745f9e1e242df3bace7cee9759c",
|
||||
"created_time": 1758175163.6750097,
|
||||
"vector_size": 384
|
||||
},
|
||||
"utils\\cache.py": {
|
||||
"file_path": "utils\\cache.py",
|
||||
"content_hash": "dc7c08bcd9af9ae465020997e4b9127e",
|
||||
"embedding_hash": "68394bc0f57a0f66b83a57249b39957d",
|
||||
"created_time": 1758175163.6750169,
|
||||
"vector_size": 384
|
||||
},
|
||||
"utils\\colors.py": {
|
||||
"file_path": "utils\\colors.py",
|
||||
"content_hash": "8ce555a2dcf4057ee7adfb3286d47da2",
|
||||
"embedding_hash": "1b18e22acb095e83ed291b6c5dc7a2ce",
|
||||
"created_time": 1758175163.6750243,
|
||||
"vector_size": 384
|
||||
},
|
||||
"utils\\io_helpers.py": {
|
||||
"file_path": "utils\\io_helpers.py",
|
||||
"content_hash": "fb276a0e46b28f80d5684368a8b15e57",
|
||||
"embedding_hash": "f6ff8333b1afc5b98d4644f334c18cda",
|
||||
"created_time": 1758175163.6750326,
|
||||
"vector_size": 384
|
||||
},
|
||||
"utils\\__init__.py": {
|
||||
"file_path": "utils\\__init__.py",
|
||||
"content_hash": "f305ede9cbdec2f2e0189a4b89558b7e",
|
||||
"embedding_hash": "7d3f10fe4210d40eafd3c065b8e0c8b7",
|
||||
"created_time": 1758175163.6750393,
|
||||
"vector_size": 384
|
||||
}
|
||||
}
|
||||
BIN
.claude/python_script/cache/embeddings.pkl
vendored
BIN
.claude/python_script/cache/embeddings.pkl
vendored
Binary file not shown.
@@ -66,11 +66,12 @@ file_extensions:
|
||||
# Embedding/RAG configuration
|
||||
embedding:
|
||||
enabled: true # Set to true to enable RAG features
|
||||
model: "all-MiniLM-L6-v2" # Lightweight sentence transformer
|
||||
model: "codesage/codesage-large-v2" # CodeSage V2 for code embeddings
|
||||
cache_dir: "cache"
|
||||
similarity_threshold: 0.3
|
||||
max_context_length: 512
|
||||
batch_size: 32
|
||||
similarity_threshold: 0.6 # Higher threshold for better code similarity
|
||||
max_context_length: 2048 # Increased for CodeSage V2 capabilities
|
||||
batch_size: 8 # Reduced for larger model
|
||||
trust_remote_code: true # Required for CodeSage V2
|
||||
|
||||
# Context analysis settings
|
||||
context_analysis:
|
||||
|
||||
Binary file not shown.
@@ -75,6 +75,7 @@ class EmbeddingManager:
|
||||
self.similarity_threshold = config.get('embedding', {}).get('similarity_threshold', 0.6)
|
||||
self.max_context_length = config.get('embedding', {}).get('max_context_length', 512)
|
||||
self.batch_size = config.get('embedding', {}).get('batch_size', 32)
|
||||
self.trust_remote_code = config.get('embedding', {}).get('trust_remote_code', False)
|
||||
|
||||
# Setup cache directories
|
||||
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
@@ -95,7 +96,11 @@ class EmbeddingManager:
|
||||
if self._model is None:
|
||||
try:
|
||||
self.logger.info(f"Loading embedding model: {self.model_name}")
|
||||
self._model = SentenceTransformer(self.model_name)
|
||||
# Initialize with trust_remote_code for CodeSage V2
|
||||
if self.trust_remote_code:
|
||||
self._model = SentenceTransformer(self.model_name, trust_remote_code=True)
|
||||
else:
|
||||
self._model = SentenceTransformer(self.model_name)
|
||||
self.logger.info(f"Model loaded successfully")
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to load embedding model: {e}")
|
||||
@@ -203,7 +208,7 @@ class EmbeddingManager:
|
||||
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
content = f.read()
|
||||
|
||||
# Truncate content if too long
|
||||
# Truncate content if too long (CodeSage V2 supports longer contexts)
|
||||
if len(content) > self.max_context_length * 4: # Approximate token limit
|
||||
content = content[:self.max_context_length * 4]
|
||||
|
||||
|
||||
@@ -2,14 +2,18 @@
|
||||
numpy>=1.21.0
|
||||
scikit-learn>=1.0.0
|
||||
|
||||
# Sentence Transformers for advanced embeddings
|
||||
sentence-transformers>=2.2.0
|
||||
# Sentence Transformers for advanced embeddings (CodeSage V2 compatible)
|
||||
sentence-transformers>=3.0.0
|
||||
transformers>=4.40.0
|
||||
|
||||
# Optional: For better performance and additional models
|
||||
torch>=1.9.0
|
||||
# PyTorch for model execution (required for CodeSage V2)
|
||||
torch>=2.0.0
|
||||
|
||||
# Development and testing
|
||||
pytest>=6.0.0
|
||||
|
||||
# Data handling
|
||||
pandas>=1.3.0
|
||||
|
||||
# Additional dependencies for CodeSage V2
|
||||
accelerate>=0.26.0
|
||||
|
||||
Reference in New Issue
Block a user