Refactor code structure for improved readability and maintainability

This commit is contained in:
catlog22
2025-12-12 11:19:58 +08:00
parent 77de8d857b
commit b74a90b416
169 changed files with 29206 additions and 369 deletions

1
reference/codanna Submodule

Submodule reference/codanna added at 80ed5e3d5f

View File

@@ -0,0 +1,47 @@
# Git
.git
.gitignore
# Python cache files
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# Virtual environments
venv/
env/
ENV/
# IDE files
.idea/
.vscode/
*.swp
*.swo
# OS specific files
.DS_Store
Thumbs.db
# Code Index MCP specific files
.code_indexer/
# Docker files
Dockerfile
.dockerignore

View File

@@ -0,0 +1,26 @@
# Set default behavior to automatically normalize line endings
* text=auto
# Force specific file types to use LF line endings
*.py text eol=lf
*.js text eol=lf
*.ts text eol=lf
*.json text eol=lf
*.md text eol=lf
*.yml text eol=lf
*.yaml text eol=lf
*.toml text eol=lf
*.txt text eol=lf
# Force specific file types to use CRLF line endings
*.bat text eol=crlf
*.cmd text eol=crlf
# Binary files should be left untouched
*.png binary
*.jpg binary
*.jpeg binary
*.gif binary
*.ico binary
*.zip binary
*.tar.gz binary

View File

@@ -0,0 +1,96 @@
name: Release
on:
release:
types: [published]
workflow_dispatch:
inputs:
tag:
description: 'Release tag (vX.Y.Z) to re-run publish flow'
required: true
type: string
concurrency:
group: release-${{ github.event_name == 'workflow_dispatch' && format('refs/tags/{0}', github.event.inputs.tag) || github.ref }}
cancel-in-progress: false
jobs:
verify-and-build:
runs-on: ubuntu-latest
env:
RELEASE_REF: ${{ github.event_name == 'workflow_dispatch' && format('refs/tags/{0}', github.event.inputs.tag) || github.ref }}
RELEASE_TAG: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.tag || github.ref_name }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ env.RELEASE_REF }}
- name: Ensure tag points to default branch
run: |
git fetch origin
TARGET_BRANCH=$(git remote show origin | awk '/HEAD branch/ {print $NF}')
if [ -z "$TARGET_BRANCH" ]; then
TARGET_BRANCH=master
fi
if ! git merge-base --is-ancestor "$(git rev-parse HEAD)" "origin/${TARGET_BRANCH}"; then
echo "::error::Release tag must point to a commit reachable from ${TARGET_BRANCH}"
exit 1
fi
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Install uv
run: python -m pip install --upgrade pip uv
- name: Cache uv environments
uses: actions/cache@v4
with:
path: |
.venv
.uv-cache
key: uv-${{ runner.os }}-${{ hashFiles('uv.lock') }}
restore-keys: |
uv-${{ runner.os }}-
- name: Install dependencies
run: uv sync --frozen
- name: Install build tooling
run: uv pip install build twine
- name: Build distributions
run: uv run python -m build
- name: Twine check
run: uv run twine check dist/*
- name: Upload dist artifacts
uses: actions/upload-artifact@v4
with:
name: dist-${{ env.RELEASE_TAG }}
path: dist/*
retention-days: 7
publish:
needs: verify-and-build
runs-on: ubuntu-latest
environment:
name: production
env:
RELEASE_TAG: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.tag || github.ref_name }}
steps:
- name: Download build artifacts
uses: actions/download-artifact@v4
with:
name: dist-${{ env.RELEASE_TAG }}
path: dist
- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
packages-dir: dist
password: ${{ secrets.PYPI_API_TOKEN }}

View File

@@ -0,0 +1,51 @@
# Python cache files
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# Virtual environments
venv/
env/
ENV/
# IDE files
.idea/
.vscode/
*.swp
*.swo
# OS specific files
.DS_Store
Thumbs.db
# Code Index MCP specific files
.code_indexer/
# Claude Code generated files
CLAUDE.local.md
.claude/
.claude_chat/
claude_*
COMMIT_MESSAGE.txt
RELEASE_NOTE.txt
.llm-context/
AGENTS.md

View File

@@ -0,0 +1,24 @@
[MAIN]
# Ignore auto-generated protobuf files
ignore-paths=src/code_index_mcp/scip/proto/scip_pb2.py
[MESSAGES CONTROL]
# Disable specific warnings for protobuf generated code
disable=
# Generated code warnings
protected-access,
bad-indentation,
line-too-long,
# Other common warnings we might want to disable globally
unused-import,
logging-fstring-interpolation
[FORMAT]
# Maximum number of characters on a single line
max-line-length=100
[DESIGN]
# Maximum number of arguments for function / method
max-args=7
# Maximum number of locals for function / method body
max-locals=20

View File

@@ -0,0 +1 @@
3.11

View File

@@ -0,0 +1,28 @@
{
"$schema": "https://modelcontextprotocol.io/schemas/mcp.json",
"mcpServers": {
"code-index": {
"command": "uv",
"args": [
"run",
"code-index-mcp"
],
"transport": {
"type": "stdio"
},
"metadata": {
"name": "Code Index MCP",
"description": "Local code-aware MCP server with project indexing, search, and file tools.",
"homepage": "https://github.com/johnhuang316/code-index-mcp",
"capabilities": [
"code-search",
"symbol-indexing",
"file-system"
]
}
}
},
"llmfeed_extension": {
"path": ".well-known/mcp.llmfeed.json"
}
}

View File

@@ -0,0 +1,32 @@
{
"$schema": "https://modelcontextprotocol.io/schemas/mcp-llmfeed.json",
"feed_type": "mcp_server_list",
"servers": [
{
"id": "code-index",
"name": "Code Index MCP",
"description": "Exposes project-aware indexing, search, and file utilities for LLM agents via MCP transports.",
"version": "2.9.1",
"transport": "stdio",
"command": "uv",
"args": [
"run",
"code-index-mcp"
],
"links": {
"documentation": "https://github.com/johnhuang316/code-index-mcp#readme",
"source": "https://github.com/johnhuang316/code-index-mcp"
},
"capabilities": [
"code-search",
"symbol-indexing",
"file-system"
],
"tags": [
"fastmcp",
"code-intelligence",
"watcher"
]
}
]
}

View File

@@ -0,0 +1,24 @@
# Use lightweight Python image
FROM python:3.11-slim
# Install git (for code analysis)
RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
# Set working directory
WORKDIR /app
# Copy dependency list and install dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy code
COPY . .
# Set Python path
ENV PYTHONPATH="${PYTHONPATH}:/app:/app/src"
# No default project directory mount point needed, user will explicitly set project path
# Run MCP tool
# MCP server uses stdio mode by default
ENTRYPOINT ["python", "-m", "code_index_mcp.server"]

View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2015 johnhuang316
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -0,0 +1,412 @@
# Code Index MCP
<div align="center">
[![MCP Server](https://img.shields.io/badge/MCP-Server-blue)](https://modelcontextprotocol.io)
[![Python](https://img.shields.io/badge/Python-3.10%2B-green)](https://www.python.org/)
[![License](https://img.shields.io/badge/License-MIT-yellow)](LICENSE)
**Intelligent code indexing and analysis for Large Language Models**
Transform how AI understands your codebase with advanced search, analysis, and navigation capabilities.
</div>
<a href="https://glama.ai/mcp/servers/@johnhuang316/code-index-mcp">
<img width="380" height="200" src="https://glama.ai/mcp/servers/@johnhuang316/code-index-mcp/badge" alt="code-index-mcp MCP server" />
</a>
## Overview
Code Index MCP is a [Model Context Protocol](https://modelcontextprotocol.io) server that bridges the gap between AI models and complex codebases. It provides intelligent indexing, advanced search capabilities, and detailed code analysis to help AI assistants understand and navigate your projects effectively.
**Perfect for:** Code review, refactoring, documentation generation, debugging assistance, and architectural analysis.
## Quick Start
### 🚀 **Recommended Setup (Most Users)**
The easiest way to get started with any MCP-compatible application:
**Prerequisites:** Python 3.10+ and [uv](https://github.com/astral-sh/uv)
1. **Add to your MCP configuration** (e.g., `claude_desktop_config.json` or `~/.claude.json`):
```json
{
"mcpServers": {
"code-index": {
"command": "uvx",
"args": ["code-index-mcp"]
}
}
}
```
> Optional: append `--project-path /absolute/path/to/repo` to the `args` array so the server
> initializes with that repository automatically (equivalent to calling `set_project_path`
> after startup).
2. **Restart your application** `uvx` automatically handles installation and execution
3. **Start using** (give these prompts to your AI assistant):
```
Set the project path to /Users/dev/my-react-app
Find all TypeScript files in this project
Search for "authentication" functions
Analyze the main App.tsx file
```
*If you launch with `--project-path`, you can skip the first command above - the server already
knows the project location.*
### Codex CLI Configuration
If you are using Anthropic's Codex CLI, add the server to `~/.codex/config.toml`.
On Windows the file lives at `C:\Users\<you>\.codex\config.toml`:
```toml
[mcp_servers.code-index]
type = "stdio"
command = "uvx"
args = ["code-index-mcp"]
```
> You can append `--project-path C:/absolute/path/to/repo` to the `args` list to set the project
> automatically on startup (same effect as running the `set_project_path` tool).
On Windows, `uvx` needs the standard profile directories to be present.
Keep the environment override in the same block so the MCP starts reliably:
```toml
env = {
HOME = "C:\\Users\\<you>",
APPDATA = "C:\\Users\\<you>\\AppData\\Roaming",
LOCALAPPDATA = "C:\\Users\\<you>\\AppData\\Local",
SystemRoot = "C:\\Windows"
}
```
Linux and macOS already expose the required XDG paths and `HOME`, so you can usually omit the `env`
table there.
Add overrides only if you run the CLI inside a restricted container.
### FastMCP & Discovery Manifests
- Run `fastmcp run fastmcp.json` to launch the server via [FastMCP](https://fastmcp.wiki/) with
the correct source entrypoint and dependency metadata. Pass `--project-path` (or call the
`set_project_path` tool after startup) so the index boots against the right repository.
- Serve or copy `.well-known/mcp.json` to share a standards-compliant MCP manifest. Clients that
support the `.well-known` convention (e.g., Claude Desktop, Codex CLI) can import this file
directly instead of crafting configs manually.
- Publish `.well-known/mcp.llmfeed.json` when you want to expose the richer LLM Feed metadata.
It references the same `code-index` server definition plus documentation/source links, which
helps registries present descriptions, tags, and capabilities automatically.
When sharing the manifests, remind consumers to supply `--project-path` (or to call
`set_project_path`) so the server indexes the intended repository.
## Typical Use Cases
**Code Review**: "Find all places using the old API"
**Refactoring Help**: "Where is this function called?"
**Learning Projects**: "Show me the main components of this React project"
**Debugging**: "Search for all error handling related code"
## Key Features
### 🔍 **Intelligent Search & Analysis**
- **Dual-Strategy Architecture**: Specialized tree-sitter parsing for 7 core languages, fallback strategy for 50+ file types
- **Direct Tree-sitter Integration**: No regex fallbacks for specialized languages - fail fast with clear errors
- **Advanced Search**: Auto-detects and uses the best available tool (ugrep, ripgrep, ag, or grep)
- **Universal File Support**: Comprehensive coverage from advanced AST parsing to basic file indexing
- **File Analysis**: Deep insights into structure, imports, classes, methods, and complexity metrics after running `build_deep_index`
### 🗂️ **Multi-Language Support**
- **7 Languages with Tree-sitter AST Parsing**: Python, JavaScript, TypeScript, Java, Go, Objective-C, Zig
- **50+ File Types with Fallback Strategy**: C/C++, Rust, Ruby, PHP, and all other programming languages
- **Document & Config Files**: Markdown, JSON, YAML, XML with appropriate handling
- **Web Frontend**: Vue, React, Svelte, HTML, CSS, SCSS
- **Java Web & Build**: JSP/Tag files (`.jsp`, `.jspx`, `.jspf`, `.tag`, `.tagx`), Grails/GSP (`.gsp`), Gradle & Groovy builds (`.gradle`, `.groovy`), `.properties`, and Protocol Buffers (`.proto`)
- **Database**: SQL variants, NoSQL, stored procedures, migrations
- **Configuration**: JSON, YAML, XML, Markdown
- **[View complete list](#supported-file-types)**
### ⚡ **Real-time Monitoring & Auto-refresh**
- **File Watcher**: Automatic index updates when files change
- **Cross-platform**: Native OS file system monitoring
- **Smart Processing**: Batches rapid changes to prevent excessive rebuilds
- **Shallow Index Refresh**: Watches file changes and keeps the file list current; run a deep rebuild when you need symbol metadata
### ⚡ **Performance & Efficiency**
- **Tree-sitter AST Parsing**: Native syntax parsing for accurate symbol extraction
- **Persistent Caching**: Stores indexes for lightning-fast subsequent access
- **Smart Filtering**: Intelligent exclusion of build directories and temporary files
- **Memory Efficient**: Optimized for large codebases
- **Direct Dependencies**: No fallback mechanisms - fail fast with clear error messages
## Supported File Types
<details>
<summary><strong>📁 Programming Languages (Click to expand)</strong></summary>
**Languages with Specialized Tree-sitter Strategies:**
- **Python** (`.py`, `.pyw`) - Full AST analysis with class/method extraction and call tracking
- **JavaScript** (`.js`, `.jsx`, `.mjs`, `.cjs`) - ES6+ class and function parsing with tree-sitter
- **TypeScript** (`.ts`, `.tsx`) - Complete type-aware symbol extraction with interfaces
- **Java** (`.java`) - Full class hierarchy, method signatures, and call relationships
- **Go** (`.go`) - Struct methods, receiver types, and function analysis
- **Objective-C** (`.m`, `.mm`) - Class/instance method distinction with +/- notation
- **Zig** (`.zig`, `.zon`) - Function and struct parsing with tree-sitter AST
**All Other Programming Languages:**
All other programming languages use the **FallbackParsingStrategy** which provides basic file indexing and metadata extraction. This includes:
- **System & Low-Level:** C/C++ (`.c`, `.cpp`, `.h`, `.hpp`), Rust (`.rs`)
- **Object-Oriented:** C# (`.cs`), Kotlin (`.kt`), Scala (`.scala`), Swift (`.swift`)
- **Scripting & Dynamic:** Ruby (`.rb`), PHP (`.php`), Shell (`.sh`, `.bash`)
- **And 40+ more file types** - All handled through the fallback strategy for basic indexing
</details>
<details>
<summary><strong>🌐 Web & Frontend (Click to expand)</strong></summary>
**Frameworks & Libraries:**
- Vue (`.vue`)
- Svelte (`.svelte`)
- Astro (`.astro`)
**Styling:**
- CSS (`.css`, `.scss`, `.less`, `.sass`, `.stylus`, `.styl`)
- HTML (`.html`)
**Templates:**
- Handlebars (`.hbs`, `.handlebars`)
- EJS (`.ejs`)
- Pug (`.pug`)
- FreeMarker (`.ftl`)
- Mustache (`.mustache`)
- Liquid (`.liquid`)
- ERB (`.erb`)
</details>
<details>
<summary><strong>🗄️ Database & SQL (Click to expand)</strong></summary>
**SQL Variants:**
- Standard SQL (`.sql`, `.ddl`, `.dml`)
- Database-specific (`.mysql`, `.postgresql`, `.psql`, `.sqlite`, `.mssql`, `.oracle`, `.ora`, `.db2`)
**Database Objects:**
- Procedures & Functions (`.proc`, `.procedure`, `.func`, `.function`)
- Views & Triggers (`.view`, `.trigger`, `.index`)
**Migration & Tools:**
- Migration files (`.migration`, `.seed`, `.fixture`, `.schema`)
- Tool-specific (`.liquibase`, `.flyway`)
**NoSQL & Modern:**
- Graph & Query (`.cql`, `.cypher`, `.sparql`, `.gql`)
</details>
<details>
<summary><strong>📄 Documentation & Config (Click to expand)</strong></summary>
- Markdown (`.md`, `.mdx`)
- Configuration (`.json`, `.xml`, `.yml`, `.yaml`, `.properties`)
</details>
### 🛠️ **Development Setup**
For contributing or local development:
1. **Clone and install:**
```bash
git clone https://github.com/johnhuang316/code-index-mcp.git
cd code-index-mcp
uv sync
```
2. **Configure for local development:**
```json
{
"mcpServers": {
"code-index": {
"command": "uv",
"args": ["run", "code-index-mcp"]
}
}
}
```
3. **Debug with MCP Inspector:**
```bash
npx @modelcontextprotocol/inspector uv run code-index-mcp
```
<details>
<summary><strong>Alternative: Manual pip Installation</strong></summary>
If you prefer traditional pip management:
```bash
pip install code-index-mcp
```
Then configure:
```json
{
"mcpServers": {
"code-index": {
"command": "code-index-mcp",
"args": []
}
}
}
```
</details>
## Available Tools
### 🏗️ **Project Management**
| Tool | Description |
|------|-------------|
| **`set_project_path`** | Initialize indexing for a project directory |
| **`refresh_index`** | Rebuild the shallow file index after file changes |
| **`build_deep_index`** | Generate the full symbol index used by deep analysis |
| **`get_settings_info`** | View current project configuration and status |
*Run `build_deep_index` when you need symbol-level data; the default shallow index powers quick file discovery.*
### 🔍 **Search & Discovery**
| Tool | Description |
|------|-------------|
| **`search_code_advanced`** | Smart search with regex, fuzzy matching, file filtering, and paginated results (10 per page by default) |
| **`find_files`** | Locate files using glob patterns (e.g., `**/*.py`) |
| **`get_file_summary`** | Analyze file structure, functions, imports, and complexity (requires deep index) |
### 🔄 **Monitoring & Auto-refresh**
| Tool | Description |
|------|-------------|
| **`get_file_watcher_status`** | Check file watcher status and configuration |
| **`configure_file_watcher`** | Enable/disable auto-refresh and configure settings |
### 🛠️ **System & Maintenance**
| Tool | Description |
|------|-------------|
| **`create_temp_directory`** | Set up storage directory for index data |
| **`check_temp_directory`** | Verify index storage location and permissions |
| **`clear_settings`** | Reset all cached data and configurations |
| **`refresh_search_tools`** | Re-detect available search tools (ugrep, ripgrep, etc.) |
## Usage Examples
### 🎯 **Quick Start Workflow**
**1. Initialize Your Project**
```
Set the project path to /Users/dev/my-react-app
```
*Automatically indexes your codebase and creates searchable cache*
**2. Explore Project Structure**
```
Find all TypeScript component files in src/components
```
*Uses: `find_files` with pattern `src/components/**/*.tsx`*
**3. Analyze Key Files**
```
Give me a summary of src/api/userService.ts
```
*Uses: `get_file_summary` to show functions, imports, and complexity*
*Tip: run `build_deep_index` first if you get a `needs_deep_index` response.*
### 🔍 **Advanced Search Examples**
<details>
<summary><strong>Code Pattern Search</strong></summary>
```
Search for all function calls matching "get.*Data" using regex
```
*Finds: `getData()`, `getUserData()`, `getFormData()`, etc.*
</details>
<details>
<summary><strong>Fuzzy Function Search</strong></summary>
```
Find authentication-related functions with fuzzy search for 'authUser'
```
*Matches: `authenticateUser`, `authUserToken`, `userAuthCheck`, etc.*
</details>
<details>
<summary><strong>Language-Specific Search</strong></summary>
```
Search for "API_ENDPOINT" only in Python files
```
*Uses: `search_code_advanced` with `file_pattern: "*.py"` (defaults to 10 matches; use `max_results` to expand or `start_index` to page)*
</details>
<details>
<summary><strong>Auto-refresh Configuration</strong></summary>
```
Configure automatic index updates when files change
```
*Uses: `configure_file_watcher` to enable/disable monitoring and set debounce timing*
</details>
<details>
<summary><strong>Project Maintenance</strong></summary>
```
I added new components, please refresh the project index
```
*Uses: `refresh_index` to update the searchable cache*
</details>
## Troubleshooting
### 🔄 **Auto-refresh Not Working**
If automatic index updates aren't working when files change, try:
- `pip install watchdog` (may resolve environment isolation issues)
- Use manual refresh: Call the `refresh_index` tool after making file changes
- Check file watcher status: Use `get_file_watcher_status` to verify monitoring is active
## Development & Contributing
### 🔧 **Building from Source**
```bash
git clone https://github.com/johnhuang316/code-index-mcp.git
cd code-index-mcp
uv sync
uv run code-index-mcp
```
### 🐛 **Debugging**
```bash
npx @modelcontextprotocol/inspector uvx code-index-mcp
```
### 🤝 **Contributing**
Contributions are welcome! Please feel free to submit a Pull Request.
---
### 📜 **License**
[MIT License](LICENSE)
### 🌐 **Translations**
- [繁體中文](README_zh.md)
- [日本語](README_ja.md)

View File

@@ -0,0 +1,419 @@
# Code Index MCP
<div align="center">
[![MCP Server](https://img.shields.io/badge/MCP-Server-blue)](https://modelcontextprotocol.io)
[![Python](https://img.shields.io/badge/Python-3.10%2B-green)](https://www.python.org/)
[![License](https://img.shields.io/badge/License-MIT-yellow)](LICENSE)
**大規模言語モデルのためのインテリジェントコードインデックス作成と解析**
高度な検索、解析、ナビゲーション機能で、AIのコードベース理解を根本的に変革します。
</div>
<a href="https://glama.ai/mcp/servers/@johnhuang316/code-index-mcp">
<img width="380" height="200" src="https://glama.ai/mcp/servers/@johnhuang316/code-index-mcp/badge" alt="code-index-mcp MCP server" />
</a>
## 概要
Code Index MCPは、AIモデルと複雑なコードベースの橋渡しをする[Model Context Protocol](https://modelcontextprotocol.io)サーバーです。インテリジェントなインデックス作成、高度な検索機能、詳細なコード解析を提供し、AIアシスタントがプロジェクトを効果的に理解しナビゲートできるようにします。
**最適な用途:**コードレビュー、リファクタリング、ドキュメント生成、デバッグ支援、アーキテクチャ解析。
## クイックスタート
### 🚀 **推奨セットアップ(ほとんどのユーザー)**
任意MCP対応アプリケーションで開始する最も簡単な方法
**前提条件:** Python 3.10+ および [uv](https://github.com/astral-sh/uv)
1. **MCP設定に追加** (例:`claude_desktop_config.json` または `~/.claude.json`)
```json
{
"mcpServers": {
"code-index": {
"command": "uvx",
"args": ["code-index-mcp"]
}
}
}
```
> 起動時にプロジェクトを自動設定したい場合は、`args` 配列の末尾に
> `--project-path /absolute/path/to/repo` を追加してください。これで起動直後に
> `set_project_path` を呼び出した場合と同じ状態になります。
2. **アプリケーションを再起動** `uvx`がインストールと実行を自動処理
3. **使用開始**AIアシスタントにこれらのプロンプトを与える
```
プロジェクトパスを/Users/dev/my-react-appに設定
このプロジェクトのすべてのTypeScriptファイルを検索
「authentication」関連関数を検索
メインのApp.tsxファイルを解析
```
*起動時に `--project-path` を付けた場合は、最初のコマンドは不要です。サーバーが既にパスを認識しています。*
### Codex CLI 設定
Anthropic の Codex CLI を使用している場合は、`~/.codex/config.toml` に次のサーバー設定を追加します。
Windows では `C:\Users\<you>\.codex\config.toml` に保存されています。
```toml
[mcp_servers.code-index]
type = "stdio"
command = "uvx"
args = ["code-index-mcp"]
```
> 起動時にプロジェクトを設定したい場合は、`args` リストに `--project-path C:/absolute/path/to/repo` を追加してください。
> これは起動後に `set_project_path` ツールを呼び出すのと同じ効果です。
Windows の `uvx` は標準ユーザープロファイルディレクトリが必要です。
MCP を安定して起動するために、同じブロックに次の環境変数を残してください。
```toml
env = {
HOME = "C:\\Users\\<you>",
APPDATA = "C:\\Users\\<you>\\AppData\\Roaming",
LOCALAPPDATA = "C:\\Users\\<you>\\AppData\\Local",
SystemRoot = "C:\\Windows"
}
```
Linux と macOS では OS が `HOME` や XDG 系のパスを標準で公開しているため、通常は `env` セクションは不要です。
制限されたコンテナで実行する場合やキャッシュ/設定の保存先を手動で変更したいときだけ上書きしてください。
環境変数の一覧は [`uv` の環境変数リファレンス](https://docs.astral.sh/uv/reference/environment/)`HOME`、`XDG_CACHE_HOME`、`XDG_CONFIG_HOME`、`APPDATA` など)を参照してください。
## 一般的な使用ケース
**コードレビュー**「旧いAPIを使用しているすべての箇所を検索」
**リファクタリング支援**:「この関数はどこで呼ばれている?」
**プロジェクト学習**「このReactプロジェクトの主要コンポーネントを表示」
**デバッグ支援**:「エラーハンドリング関連のコードをすべて検索」
## 主な機能
### 🔍 **インテリジェント検索・解析**
- **二重戦略アーキテクチャ**7つのコア言語に特化したTree-sitter解析、50+ファイルタイプにフォールバック戦略
- **直接Tree-sitter統合**:特化言語で正規表現フォールバックなし - 明確なエラーメッセージで高速フェイル
- **高度な検索**最適なツールugrep、ripgrep、ag、grepを自動検出・使用
- **汎用ファイルサポート**高度なAST解析から基本ファイルインデックスまでの包括的カバレッジ
- **ファイル解析**`build_deep_index` 実行後に構造、インポート、クラス、メソッド、複雑度メトリクスを深く把握
### 🗂️ **多言語サポート**
- **7言語でTree-sitter AST解析**Python、JavaScript、TypeScript、Java、Go、Objective-C、Zig
- **50+ファイルタイプでフォールバック戦略**C/C++、Rust、Ruby、PHPおよびすべての他のプログラミング言語
- **文書・設定ファイル**Markdown、JSON、YAML、XML適切な処理
- **Webフロントエンド**Vue、React、Svelte、HTML、CSS、SCSS
- **Java Webとビルド**JSP/タグファイル(`.jsp`, `.jspx`, `.jspf`, `.tag`, `.tagx`、Grails/GSP`.gsp`、Gradle/Groovyスクリプト`.gradle`, `.groovy`)、`.properties`、Protocol Buffers`.proto`
- **データベース**SQLバリアント、NoSQL、ストアドプロシージャ、マイグレーション
- **設定ファイル**JSON、YAML、XML、Markdown
- **[完全なリストを表示](#サポートされているファイルタイプ)**
### ⚡ **リアルタイム監視・自動更新**
- **ファイルウォッチャー**:ファイル変更時の自動インデックス更新
- **クロスプラットフォーム**ネイティブOSファイルシステム監視
- **スマート処理**:急速な変更をバッチ処理して過度な再構築を防止
- **浅いインデックス更新**:ファイル変更を監視して最新のファイル一覧を維持し、シンボルが必要な場合は `build_deep_index` を実行
### ⚡ **パフォーマンス・効率性**
- **Tree-sitter AST解析**:正確なシンボル抽出のためのネイティブ構文解析
- **永続キャッシュ**:超高速な後続アクセスのためのインデックス保存
- **スマートフィルタリング**:ビルドディレクトリと一時ファイルのインテリジェント除外
- **メモリ効率**:大規模コードベース向けに最適化
- **直接依存関係**:フォールバック機構なし - 明確なエラーメッセージで高速フェイル
## サポートされているファイルタイプ
<details>
<summary><strong>📁 プログラミング言語(クリックで展開)</strong></summary>
**特化Tree-sitter戦略言語**
- **Python** (`.py`, `.pyw`) - クラス/メソッド抽出と呼び出し追跡を含む完全AST解析
- **JavaScript** (`.js`, `.jsx`, `.mjs`, `.cjs`) - Tree-sitterを使用したES6+クラスと関数解析
- **TypeScript** (`.ts`, `.tsx`) - インターフェースを含む完全な型認識シンボル抽出
- **Java** (`.java`) - 完全なクラス階層、メソッドシグネチャ、呼び出し関係
- **Go** (`.go`) - 構造体メソッド、レシーバータイプ、関数解析
- **Objective-C** (`.m`, `.mm`) - +/-記法を使用したクラス/インスタンスメソッド区別
- **Zig** (`.zig`, `.zon`) - Tree-sitter ASTを使用した関数と構造体解析
**すべての他のプログラミング言語:**
すべての他のプログラミング言語は**フォールバック解析戦略**を使用し、基本ファイルインデックスとメタデータ抽出を提供します。これには以下が含まれます:
- **システム・低レベル言語:** C/C++ (`.c`, `.cpp`, `.h`, `.hpp`)、Rust (`.rs`)
- **オブジェクト指向言語:** C# (`.cs`)、Kotlin (`.kt`)、Scala (`.scala`)、Swift (`.swift`)
- **スクリプト・動的言語:** Ruby (`.rb`)、PHP (`.php`)、Shell (`.sh`, `.bash`)
- **および40+ファイルタイプ** - すべてフォールバック戦略による基本インデックス処理
</details>
<details>
<summary><strong>🌐 Web・フロントエンドクリックで展開</strong></summary>
**フレームワーク・ライブラリ:**
- Vue (`.vue`)
- Svelte (`.svelte`)
- Astro (`.astro`)
**スタイリング:**
- CSS (`.css`, `.scss`, `.less`, `.sass`, `.stylus`, `.styl`)
- HTML (`.html`)
**テンプレート:**
- Handlebars (`.hbs`, `.handlebars`)
- EJS (`.ejs`)
- Pug (`.pug`)
- FreeMarker (`.ftl`)
- Mustache (`.mustache`)
- Liquid (`.liquid`)
- ERB (`.erb`)
</details>
<details>
<summary><strong>🗄️ データベース・SQLクリックで展開</strong></summary>
**SQL バリアント:**
- 標準SQL (`.sql`, `.ddl`, `.dml`)
- データベース固有 (`.mysql`, `.postgresql`, `.psql`, `.sqlite`, `.mssql`, `.oracle`, `.ora`, `.db2`)
**データベースオブジェクト:**
- プロシージャ・関数 (`.proc`, `.procedure`, `.func`, `.function`)
- ビュー・トリガー (`.view`, `.trigger`, `.index`)
**マイグレーション・ツール:**
- マイグレーションファイル (`.migration`, `.seed`, `.fixture`, `.schema`)
- ツール固有 (`.liquibase`, `.flyway`)
**NoSQL・モダンDB**
- グラフ・クエリ (`.cql`, `.cypher`, `.sparql`, `.gql`)
</details>
<details>
<summary><strong>📄 ドキュメント・設定(クリックで展開)</strong></summary>
- Markdown (`.md`, `.mdx`)
- 設定 (`.json`, `.xml`, `.yml`, `.yaml`, `.properties`)
</details>
## クイックスタート
### 🚀 **推奨セットアップ(ほとんどのユーザー向け)**
任意のMCP対応アプリケーションで開始する最も簡単な方法
**前提条件:** Python 3.10+ と [uv](https://github.com/astral-sh/uv)
1. **MCP設定に追加**(例:`claude_desktop_config.json` または `~/.claude.json`
```json
{
"mcpServers": {
"code-index": {
"command": "uvx",
"args": ["code-index-mcp"]
}
}
}
```
2. **アプリケーションを再起動** `uvx` が自動的にインストールと実行を処理
### 🛠️ **開発セットアップ**
貢献やローカル開発用:
1. **クローンとインストール:**
```bash
git clone https://github.com/johnhuang316/code-index-mcp.git
cd code-index-mcp
uv sync
```
2. **ローカル開発用設定:**
```json
{
"mcpServers": {
"code-index": {
"command": "uv",
"args": ["run", "code-index-mcp"]
}
}
}
```
3. **MCP Inspectorでデバッグ**
```bash
npx @modelcontextprotocol/inspector uv run code-index-mcp
```
<details>
<summary><strong>代替案手動pipインストール</strong></summary>
従来のpip管理を好む場合
```bash
pip install code-index-mcp
```
そして設定:
```json
{
"mcpServers": {
"code-index": {
"command": "code-index-mcp",
"args": []
}
}
}
```
</details>
## 利用可能なツール
### 🏗️ **プロジェクト管理**
| ツール | 説明 |
|--------|------|
| **`set_project_path`** | プロジェクトディレクトリのインデックス作成を初期化 |
| **`refresh_index`** | ファイル変更後に浅いファイルインデックスを再構築 |
| **`build_deep_index`** | 深い解析で使う完全なシンボルインデックスを生成 |
| **`get_settings_info`** | 現在のプロジェクト設定と状態を表示 |
*シンボルレベルのデータが必要な場合は `build_deep_index` を実行してください。デフォルトの浅いインデックスは高速なファイル探索を担います。*
### 🔍 **検索・発見**
| ツール | 説明 |
|--------|------|
| **`search_code_advanced`** | 正規表現、ファジーマッチング、ファイルフィルタリング対応のスマート検索。デフォルトで 1 ページあたり 10 件を返し、`max_results` と `start_index` で調整可能 |
| **`find_files`** | globパターンを使用したファイル検索`**/*.py` |
| **`get_file_summary`** | ファイル構造、関数、インポート、複雑度の解析(深いインデックスが必要) |
### 🔄 **監視・自動更新**
| ツール | 説明 |
|--------|------|
| **`get_file_watcher_status`** | ファイルウォッチャーの状態と設定を確認 |
| **`configure_file_watcher`** | 自動更新の有効化/無効化と設定の構成 |
### 🛠️ **システム・メンテナンス**
| ツール | 説明 |
|--------|------|
| **`create_temp_directory`** | インデックスデータの保存ディレクトリをセットアップ |
| **`check_temp_directory`** | インデックス保存場所と権限を確認 |
| **`clear_settings`** | すべてのキャッシュデータと設定をリセット |
| **`refresh_search_tools`** | 利用可能な検索ツールugrep、ripgrep等を再検出 |
## 使用例
### 🎯 **クイックスタートワークフロー**
**1. プロジェクトの初期化**
```
プロジェクトパスを /Users/dev/my-react-app に設定してください
```
*コードベースを自動インデックス作成し、検索可能なキャッシュを構築*
**2. プロジェクト構造の探索**
```
src/components で全てのTypeScriptコンポーネントファイルを見つけてください
```
*使用ツール:`find_files`、パターン `src/components/**/*.tsx`*
**3. キーファイルの解析**
```
src/api/userService.ts の要約を教えてください
```
*使用ツール:`get_file_summary` で関数、インポート、複雑度を表示*
*ヒント:`needs_deep_index` が返った場合は `build_deep_index` を先に実行してください。*
### 🔍 **高度な検索例**
<details>
<summary><strong>コードパターン検索</strong></summary>
```
正規表現を使って "get.*Data" にマッチする全ての関数呼び出しを検索してください
```
*発見:`getData()`、`getUserData()`、`getFormData()` など*
</details>
<details>
<summary><strong>ファジー関数検索</strong></summary>
```
'authUser' でファジー検索して認証関連の関数を見つけてください
```
*マッチ:`authenticateUser`、`authUserToken`、`userAuthCheck` など*
</details>
<details>
<summary><strong>言語固有検索</strong></summary>
```
Pythonファイルのみで "API_ENDPOINT" を検索してください
```
*使用ツール:`search_code_advanced`、`file_pattern: "*.py"`(デフォルトは 10 件。`max_results` で件数を増やし、`start_index` でページ送り)*
</details>
<details>
<summary><strong>自動更新設定</strong></summary>
```
ファイル変更時の自動インデックス更新を設定してください
```
*使用ツール:`configure_file_watcher` で監視の有効化/無効化とデバウンス時間を設定*
</details>
<details>
<summary><strong>プロジェクトメンテナンス</strong></summary>
```
新しいコンポーネントを追加したので、プロジェクトインデックスを更新してください
```
*使用ツール:`refresh_index` で検索可能なキャッシュを更新*
</details>
## トラブルシューティング
### 🔄 **自動リフレッシュが動作しない**
ファイル変更時に自動インデックス更新が動作しない場合、以下を試してください:
- `pip install watchdog`(環境分離の問題を解決する可能性があります)
- 手動リフレッシュを使用:ファイル変更後に `refresh_index` ツールを呼び出す
- ファイルウォッチャーステータスを確認:`get_file_watcher_status` を使用して監視がアクティブかどうかを確認
## 開発・貢献
### 🔧 **ソースからのビルド**
```bash
git clone https://github.com/johnhuang316/code-index-mcp.git
cd code-index-mcp
uv sync
uv run code-index-mcp
```
### 🐛 **デバッグ**
```bash
npx @modelcontextprotocol/inspector uvx code-index-mcp
```
### 🤝 **貢献**
貢献を歓迎します!お気軽にプルリクエストを提出してください。
---
### 📜 **ライセンス**
[MIT License](LICENSE)
### 🌐 **翻訳**
- [English](README.md)
- [繁體中文](README_zh.md)

View File

@@ -0,0 +1,319 @@
# 코드 인덱스 MCP
<div align="center">
[![MCP Server](https://img.shields.io/badge/MCP-Server-blue)](https://modelcontextprotocol.io)
[![Python](https://img.shields.io/badge/Python-3.10%2B-green)](https://www.python.org/)
[![License](https://img.shields.io/badge/License-MIT-yellow)](LICENSE)
**대규모 언어 모델을 위한 지능형 코드 인덱싱과 분석**
고급 검색, 정밀 분석, 유연한 탐색 기능으로 AI가 코드베이스를 이해하고 활용하는 방식을 혁신하세요.
</div>
<a href="https://glama.ai/mcp/servers/@johnhuang316/code-index-mcp">
<img width="380" height="200" src="https://glama.ai/mcp/servers/@johnhuang316/code-index-mcp/badge" alt="code-index-mcp MCP server" />
</a>
## 개요
Code Index MCP는 [Model Context Protocol](https://modelcontextprotocol.io) 기반 MCP 서버로, AI 어시스턴트와 복잡한 코드베이스 사이를 연결합니다. 빠른 인덱싱, 강력한 검색, 정밀한 코드 분석을 제공하여 AI가 프로젝트 구조를 정확히 파악하고 효과적으로 지원하도록 돕습니다.
**이럴 때 안성맞춤:** 코드 리뷰, 리팩터링, 문서화, 디버깅 지원, 아키텍처 분석
## 빠른 시작
### 🚀 **권장 설정 (대부분의 사용자)**
어떤 MCP 호환 애플리케이션에서도 몇 단계만으로 시작할 수 있습니다.
**사전 준비:** Python 3.10+ 및 [uv](https://github.com/astral-sh/uv)
1. **MCP 설정에 서버 추가** (예: `claude_desktop_config.json` 또는 `~/.claude.json`)
```json
{
"mcpServers": {
"code-index": {
"command": "uvx",
"args": ["code-index-mcp"]
}
}
}
```
> 시작할 때 프로젝트를 자동으로 지정하려면 `args` 배열 끝에
> `--project-path /absolute/path/to/repo` 를 추가하세요. 이렇게 하면 시작 직후 `set_project_path` 를 호출한 것과 동일한 상태가 됩니다.
2. **애플리케이션 재시작** `uvx`가 설치와 실행을 자동으로 처리합니다.
3. **사용 시작** (AI 어시스턴트에게 아래 프롬프트를 전달)
```
프로젝트 경로를 /Users/dev/my-react-app 으로 설정해줘
이 프로젝트에서 모든 TypeScript 파일을 찾아줘
"authentication" 관련 함수를 검색해줘
src/App.tsx 파일을 분석해줘
```
*실행 시 `--project-path` 옵션을 사용했다면 첫 번째 명령은 건너뛰어도 됩니다. 서버가 이미 경로를 알고 있습니다.*
### Codex CLI 설정
Anthropic의 Codex CLI를 사용하는 경우 `~/.codex/config.toml`에 다음 MCP 서버 설정을 추가하세요.
Windows에서는 `C:\Users\<you>\.codex\config.toml`에 위치합니다.
```toml
[mcp_servers.code-index]
type = "stdio"
command = "uvx"
args = ["code-index-mcp"]
```
> 실행 시 프로젝트를 자동으로 지정하려면 `args` 리스트에 `--project-path C:/absolute/path/to/repo` 를 추가하세요.
> 이는 이후에 `set_project_path` 도구를 호출하는 것과 같은 효과입니다.
Windows의 `uvx`는 기본 사용자 프로필 디렉터리가 필요합니다.
MCP가 안정적으로 시작되도록 같은 블록에 아래 환경 변수 덮어쓰기를 유지하세요.
```toml
env = {
HOME = "C:\Users\<you>",
APPDATA = "C:\Users\<you>\AppData\Roaming",
LOCALAPPDATA = "C:\Users\<you>\AppData\Local",
SystemRoot = "C:\Windows"
}
```
Linux와 macOS는 운영체제가 `HOME`과 XDG 경로를 기본으로 제공하므로 대부분 별도의 `env` 섹션이 필요하지 않습니다.
제한된 컨테이너에서 실행하거나 캐시/설정 위치를 수동으로 바꾸고 싶을 때만 덮어쓰면 됩니다.
환경 변수 전체 목록은 [`uv` 환경 변수 문서](https://docs.astral.sh/uv/reference/environment/) (`HOME`, `XDG_CACHE_HOME`, `XDG_CONFIG_HOME`, `APPDATA` 등)를 참고하세요.
## 대표 사용 사례
**코드 리뷰:** "예전 API를 사용하는 부분을 모두 찾아줘"
**리팩터링 지원:** "이 함수는 어디에서 호출되나요?"
**프로젝트 학습:** "이 React 프로젝트의 핵심 컴포넌트를 보여줘"
**디버깅:** "에러 처리 로직이 있는 파일을 찾아줘"
## 주요 기능
### 🧠 **지능형 검색과 분석**
- **듀얼 전략 아키텍처:** 7개 핵심 언어는 전용 tree-sitter 파서를 사용하고, 그 외 50+ 파일 형식은 폴백 전략으로 처리
- **직접 Tree-sitter 통합:** 특화 언어에 정규식 폴백 없음 문제 시 즉시 실패하고 명확한 오류 메시지 제공
- **고급 검색:** ugrep, ripgrep, ag, grep 중 최적의 도구를 자동 선택해 활용
- **범용 파일 지원:** 정교한 AST 분석부터 기본 파일 인덱싱까지 폭넓게 커버
- **파일 분석:** `build_deep_index` 실행 후 구조, 임포트, 클래스, 메서드, 복잡도 지표를 심층적으로 파악
### 🗂️ **다중 언어 지원**
- **Tree-sitter AST 분석(7종):** Python, JavaScript, TypeScript, Java, Go, Objective-C, Zig
- **폴백 전략(50+ 형식):** C/C++, Rust, Ruby, PHP 등 대부분의 프로그래밍 언어 지원
- **문서 및 설정 파일:** Markdown, JSON, YAML, XML 등 상황에 맞는 처리
- **웹 프론트엔드:** Vue, React, Svelte, HTML, CSS, SCSS
- **Java 웹 & 빌드:** JSP/태그 (`.jsp`, `.jspx`, `.jspf`, `.tag`, `.tagx`), Grails/GSP (`.gsp`), Gradle/Groovy 스크립트 (`.gradle`, `.groovy`), `.properties`, Protocol Buffers (`.proto`)
- **데이터 계층:** SQL, NoSQL, 스토어드 프로시저, 마이그레이션 스크립트
- **구성 파일:** JSON, YAML, XML, Markdown
- **[지원 파일 전체 목록 보기](#지원-파일-형식)**
### 🔄 **실시간 모니터링 & 자동 새로고침**
- **파일 워처:** 파일 변경 시 자동으로 얕은 인덱스(파일 목록) 갱신
- **크로스 플랫폼:** 운영체제 기본 파일시스템 이벤트 활용
- **스마트 처리:** 빠른 변경을 묶어 과도한 재빌드를 방지
- **얕은 인덱스 갱신:** 파일 목록을 최신 상태로 유지하며, 심볼 데이터가 필요하면 `build_deep_index`를 실행
### ⚡ **성능 & 효율성**
- **Tree-sitter AST 파싱:** 정확한 심볼 추출을 위한 네이티브 구문 분석
- **지속 캐싱:** 인덱스를 저장해 이후 응답 속도를 극대화
- **스마트 필터링:** 빌드 디렉터리·임시 파일을 자동 제외
- **메모리 효율:** 대규모 코드베이스를 염두에 둔 설계
- **직접 의존성:** 불필요한 폴백 없이 명확한 오류 메시지 제공
## 지원 파일 형식
<details>
<summary><strong>💻 프로그래밍 언어 (클릭하여 확장)</strong></summary>
**전용 Tree-sitter 전략 언어:**
- **Python** (`.py`, `.pyw`) 클래스/메서드 추출 및 호출 추적이 포함된 완전 AST 분석
- **JavaScript** (`.js`, `.jsx`, `.mjs`, `.cjs`) ES6+ 클래스와 함수를 tree-sitter로 파싱
- **TypeScript** (`.ts`, `.tsx`) 인터페이스를 포함한 타입 인지 심볼 추출
- **Java** (`.java`) 클래스 계층, 메서드 시그니처, 호출 관계 분석
- **Go** (`.go`) 구조체 메서드, 리시버 타입, 함수 분석
- **Objective-C** (`.m`, `.mm`) 클래스/인스턴스 메서드를 +/- 표기로 구분
- **Zig** (`.zig`, `.zon`) 함수와 구조체를 tree-sitter AST로 분석
**기타 모든 프로그래밍 언어:**
나머지 언어는 **폴백 파싱 전략**으로 기본 메타데이터와 파일 인덱싱을 제공합니다. 예:
- **시스템/저수준:** C/C++ (`.c`, `.cpp`, `.h`, `.hpp`), Rust (`.rs`)
- **객체지향:** C# (`.cs`), Kotlin (`.kt`), Scala (`.scala`), Swift (`.swift`)
- **스크립트:** Ruby (`.rb`), PHP (`.php`), Shell (`.sh`, `.bash`)
- **그 외 40+ 형식** 폴백 전략으로 빠른 탐색 가능
</details>
<details>
<summary><strong>🌐 웹 프론트엔드 & UI</strong></summary>
- 프레임워크: Vue (`.vue`), Svelte (`.svelte`), Astro (`.astro`)
- 스타일링: CSS (`.css`, `.scss`, `.less`, `.sass`, `.stylus`, `.styl`), HTML (`.html`)
- 템플릿: Handlebars (`.hbs`, `.handlebars`), EJS (`.ejs`), Pug (`.pug`), FreeMarker (`.ftl`), Mustache (`.mustache`), Liquid (`.liquid`), ERB (`.erb`)
</details>
<details>
<summary><strong>🗄️ 데이터 계층 & SQL</strong></summary>
- **SQL 변형:** 표준 SQL (`.sql`, `.ddl`, `.dml`), 데이터베이스별 방언 (`.mysql`, `.postgresql`, `.psql`, `.sqlite`, `.mssql`, `.oracle`, `.ora`, `.db2`)
- **DB 객체:** 프로시저/함수 (`.proc`, `.procedure`, `.func`, `.function`), 뷰/트리거/인덱스 (`.view`, `.trigger`, `.index`)
- **마이그레이션 도구:** 마이그레이션 파일 (`.migration`, `.seed`, `.fixture`, `.schema`), 도구 구성 (`.liquibase`, `.flyway`)
- **NoSQL & 그래프:** 질의 언어 (`.cql`, `.cypher`, `.sparql`, `.gql`)
</details>
<details>
<summary><strong>📄 문서 & 설정 파일</strong></summary>
- Markdown (`.md`, `.mdx`)
- 구성 파일 (`.json`, `.xml`, `.yml`, `.yaml`, `.properties`)
</details>
## 사용 가능한 도구
### 🏗️ **프로젝트 관리**
| 도구 | 설명 |
|------|------|
| **`set_project_path`** | 프로젝트 디렉터리의 인덱스를 초기화 |
| **`refresh_index`** | 파일 변경 후 얕은 파일 인덱스를 재생성 |
| **`build_deep_index`** | 심층 분석에 사용하는 전체 심볼 인덱스를 생성 |
| **`get_settings_info`** | 현재 프로젝트 설정과 상태를 확인 |
*심볼 레벨 데이터가 필요하면 `build_deep_index`를 실행하세요. 기본 얕은 인덱스는 빠른 파일 탐색을 담당합니다.*
### 🔍 **검색 & 탐색**
| 도구 | 설명 |
|------|------|
| **`search_code_advanced`** | 정규식, 퍼지 매칭, 파일 필터링을 지원하는 스마트 검색 (기본적으로 페이지당 10개 결과 반환, `max_results`·`start_index`로 조정 가능) |
| **`find_files`** | 글롭 패턴으로 파일 찾기 (예: `**/*.py`) |
| **`get_file_summary`** | 파일 구조, 함수, 임포트, 복잡도를 분석 (심층 인덱스 필요) |
### 🔄 **모니터링 & 자동 새로고침**
| 도구 | 설명 |
|------|------|
| **`get_file_watcher_status`** | 파일 워처 상태와 구성을 확인 |
| **`configure_file_watcher`** | 자동 새로고침 설정 (활성/비활성, 지연 시간, 추가 제외 패턴) |
### 🛠️ **시스템 & 유지 관리**
| 도구 | 설명 |
|------|------|
| **`create_temp_directory`** | 인덱스 저장용 임시 디렉터리를 생성 |
| **`check_temp_directory`** | 인덱스 저장 위치와 권한을 확인 |
| **`clear_settings`** | 모든 설정과 캐시 데이터를 초기화 |
| **`refresh_search_tools`** | 사용 가능한 검색 도구를 재검색 (ugrep, ripgrep 등) |
## 사용 예시
### 🧭 **빠른 시작 워크플로**
**1. 프로젝트 초기화**
```
프로젝트 경로를 /Users/dev/my-react-app 으로 설정해줘
```
*프로젝트를 설정하고 얕은 인덱스를 생성합니다.*
**2. 프로젝트 구조 탐색**
```
src/components 안의 TypeScript 컴포넌트 파일을 모두 찾아줘
```
*사용 도구: `find_files` (`src/components/**/*.tsx`)*
**3. 핵심 파일 분석**
```
src/api/userService.ts 요약을 알려줘
```
*사용 도구: `get_file_summary` (함수, 임포트, 복잡도 표시)*
*팁: `needs_deep_index` 응답이 나오면 먼저 `build_deep_index`를 실행하세요.*
### 🔍 **고급 검색 예시**
<details>
<summary><strong>코드 패턴 검색</strong></summary>
```
"get.*Data"에 해당하는 함수 호출을 정규식으로 찾아줘
```
*예: `getData()`, `getUserData()`, `getFormData()`*
</details>
<details>
<summary><strong>퍼지 함수 검색</strong></summary>
```
'authUser'와 유사한 인증 관련 함수를 찾아줘
```
*예: `authenticateUser`, `authUserToken`, `userAuthCheck`*
</details>
<details>
<summary><strong>언어별 검색</strong></summary>
```
Python 파일에서만 "API_ENDPOINT" 를 찾아줘
```
*`search_code_advanced` + `file_pattern="*.py"` (기본 10개 결과, `max_results`로 확장하고 `start_index`로 페이지 이동)*
</details>
<details>
<summary><strong>자동 새로고침 설정</strong></summary>
```
파일 변경 시 자동으로 인덱스를 새로고침하도록 설정해줘
```
*`configure_file_watcher`로 활성화 및 지연 시간 설정*
</details>
<details>
<summary><strong>프로젝트 유지 관리</strong></summary>
```
새 컴포넌트를 추가했어. 프로젝트 인덱스를 다시 빌드해줘
```
*`refresh_index`로 빠르게 얕은 인덱스를 업데이트*
</details>
## 문제 해결
### 🔄 **자동 새로고침이 동작하지 않을 때**
- 환경 문제로 `watchdog`가 빠졌다면 설치: `pip install watchdog`
- 수동 새로고침: 변경 후 `refresh_index` 도구 실행
- 워처 상태 확인: `get_file_watcher_status` 도구로 활성 여부 점검
## 개발 & 기여
### 🛠️ **소스에서 실행하기**
```bash
git clone https://github.com/johnhuang316/code-index-mcp.git
cd code-index-mcp
uv sync
uv run code-index-mcp
```
### 🧪 **디버깅 도구**
```bash
npx @modelcontextprotocol/inspector uvx code-index-mcp
```
### 🤝 **기여 안내**
Pull Request를 언제든 환영합니다. 변경 사항과 테스트 방법을 함께 공유해주세요.
---
### 📄 **라이선스**
[MIT License](LICENSE)
### 🌍 **번역본**
- [English](README.md)
- [繁體中文](README_zh.md)
- [日本語](README_ja.md)

View File

@@ -0,0 +1,416 @@
# 程式碼索引 MCP
<div align="center">
[![MCP Server](https://img.shields.io/badge/MCP-Server-blue)](https://modelcontextprotocol.io)
[![Python](https://img.shields.io/badge/Python-3.10%2B-green)](https://www.python.org/)
[![License](https://img.shields.io/badge/License-MIT-yellow)](LICENSE)
**為大型語言模型提供智慧程式碼索引與分析**
以先進的搜尋、分析和導航功能,徹底改變 AI 對程式碼庫的理解方式。
</div>
<a href="https://glama.ai/mcp/servers/@johnhuang316/code-index-mcp">
<img width="380" height="200" src="https://glama.ai/mcp/servers/@johnhuang316/code-index-mcp/badge" alt="code-index-mcp MCP server" />
</a>
## 概述
程式碼索引 MCP 是一個 [模型上下文協定](https://modelcontextprotocol.io) 伺服器,架起 AI 模型與複雜程式碼庫之間的橋樑。它提供智慧索引、先進搜尋功能和詳細程式碼分析,幫助 AI 助理有效地理解和導航您的專案。
**適用於:**程式碼審查、重構、文件生成、除錯協助和架構分析。
## 快速開始
### 🚀 **推薦設定(大多數使用者)**
與任何 MCP 相容應用程式開始的最簡單方式:
**前置需求:** Python 3.10+ 和 [uv](https://github.com/astral-sh/uv)
1. **新增到您的 MCP 設定** (例如 `claude_desktop_config.json``~/.claude.json`)
```json
{
"mcpServers": {
"code-index": {
"command": "uvx",
"args": ["code-index-mcp"]
}
}
}
```
> 若想在啟動時自動設定專案路徑,可在 `args` 陣列末尾加入
> `--project-path /絕對/路徑`,效果等同於啟動後呼叫 `set_project_path`。
2. **重新啟動應用程式** `uvx` 會自動處理安裝和執行
3. **開始使用**(向您的 AI 助理提供這些提示):
```
設定專案路徑為 /Users/dev/my-react-app
在這個專案中找到所有 TypeScript 檔案
搜尋「authentication」相關函數
分析主要的 App.tsx 檔案
```
*如果啟動時已提供 `--project-path`,可以略過第一個指令,伺服器會自動記住路徑。*
### Codex CLI 設定
如果你使用 Anthropic 的 Codex CLI請在 `~/.codex/config.toml` 中加入下列伺服器設定。
Windows 的設定檔位於 `C:\Users\<you>\.codex\config.toml`
```toml
[mcp_servers.code-index]
type = "stdio"
command = "uvx"
args = ["code-index-mcp"]
```
> 如需要啟動時自動設定專案,請把 `--project-path C:/絕對/路徑` 加到 `args`
> 清單中,與手動呼叫 `set_project_path` 的結果相同。
在 Windows 中,`uvx` 需要找到標準的使用者目錄。保留下列環境變數覆寫設定,才能讓 MCP 穩定啟動:
```toml
env = {
HOME = "C:\\Users\\<you>",
APPDATA = "C:\\Users\\<you>\\AppData\\Roaming",
LOCALAPPDATA = "C:\\Users\\<you>\\AppData\\Local",
SystemRoot = "C:\\Windows"
}
```
Linux 與 macOS 預設會提供 `HOME` 與 XDG 路徑,因此通常不需要額外的 `env` 區塊;只有在受限的容器環境或想手動調整快取/設定位置時才需要覆寫。
完整的環境變數清單請參考 [`uv` 環境變數說明](https://docs.astral.sh/uv/reference/environment/)(包含 `HOME`、`XDG_CACHE_HOME`、`XDG_CONFIG_HOME`、`APPDATA` 等)。
## 典型使用場景
**程式碼審查**:「找出所有使用舊 API 的地方」
**重構協助**:「這個函數在哪裡被呼叫?」
**學習專案**:「顯示這個 React 專案的主要元件」
**除錯協助**:「搜尋所有錯誤處理相關的程式碼」
## 主要特性
### 🔍 **智慧搜尋與分析**
- **雙策略架構**7 種核心語言使用專業化 Tree-sitter 解析50+ 種檔案類型使用備用策略
- **直接 Tree-sitter 整合**:專業化語言無正則表達式備用 - 快速失敗並提供清晰錯誤訊息
- **進階搜尋**自動偵測並使用最佳工具ugrep、ripgrep、ag 或 grep
- **通用檔案支援**:從進階 AST 解析到基本檔案索引的全面覆蓋
- **檔案分析**:執行 `build_deep_index` 後深入了解結構、匯入、類別、方法和複雜度指標
### 🗂️ **多語言支援**
- **7 種語言使用 Tree-sitter AST 解析**Python、JavaScript、TypeScript、Java、Go、Objective-C、Zig
- **50+ 種檔案類型使用備用策略**C/C++、Rust、Ruby、PHP 和所有其他程式語言
- **文件與配置檔案**Markdown、JSON、YAML、XML 適當處理
- **網頁前端**Vue、React、Svelte、HTML、CSS、SCSS
- **Java Web 與建置**JSP/Tag (`.jsp`, `.jspx`, `.jspf`, `.tag`, `.tagx`)、Grails/GSP (`.gsp`)、Gradle/Groovy 腳本 (`.gradle`, `.groovy`)、`.properties`、Protocol Buffers (`.proto`)
- **資料庫**SQL 變體、NoSQL、存儲過程、遷移腳本
- **配置檔案**JSON、YAML、XML、Markdown
- **[查看完整列表](#支援的檔案類型)**
### ⚡ **即時監控與自動刷新**
- **檔案監控器**:檔案變更時自動更新索引
- **跨平台**:原生作業系統檔案系統監控
- **智慧處理**:批次處理快速變更以防止過度重建
- **淺層索引更新**:監控檔案變更並維持檔案清單最新;需要符號資料時請執行 `build_deep_index`
### ⚡ **效能與效率**
- **Tree-sitter AST 解析**:原生語法解析以實現準確的符號提取
- **持久快取**:儲存索引以實現超快速的後續存取
- **智慧篩選**:智能排除建構目錄和暫存檔案
- **記憶體高效**:針對大型程式碼庫優化
- **直接依賴**:無備用機制 - 快速失敗並提供清晰錯誤訊息
## 支援的檔案類型
<details>
<summary><strong>📁 程式語言(點擊展開)</strong></summary>
**專業化 Tree-sitter 策略語言:**
- **Python** (`.py`, `.pyw`) - 完整 AST 分析,包含類別/方法提取和呼叫追蹤
- **JavaScript** (`.js`, `.jsx`, `.mjs`, `.cjs`) - ES6+ 類別和函數解析使用 Tree-sitter
- **TypeScript** (`.ts`, `.tsx`) - 完整類型感知符號提取,包含介面
- **Java** (`.java`) - 完整類別階層、方法簽名和呼叫關係
- **Go** (`.go`) - 結構方法、接收者類型和函數分析
- **Objective-C** (`.m`, `.mm`) - 類別/實例方法區分,使用 +/- 標記法
- **Zig** (`.zig`, `.zon`) - 函數和結構解析使用 Tree-sitter AST
**所有其他程式語言:**
所有其他程式語言使用 **備用解析策略**,提供基本檔案索引和元資料提取。包括:
- **系統與低階語言:** C/C++ (`.c`, `.cpp`, `.h`, `.hpp`)、Rust (`.rs`)
- **物件導向語言:** C# (`.cs`)、Kotlin (`.kt`)、Scala (`.scala`)、Swift (`.swift`)
- **腳本與動態語言:** Ruby (`.rb`)、PHP (`.php`)、Shell (`.sh`, `.bash`)
- **以及 40+ 種檔案類型** - 全部通過備用策略處理進行基本索引
</details>
<details>
<summary><strong>🌐 網頁與前端(點擊展開)</strong></summary>
**框架與函式庫:**
- Vue (`.vue`)
- Svelte (`.svelte`)
- Astro (`.astro`)
**樣式:**
- CSS (`.css`, `.scss`, `.less`, `.sass`, `.stylus`, `.styl`)
- HTML (`.html`)
**模板:**
- Handlebars (`.hbs`, `.handlebars`)
- EJS (`.ejs`)
- Pug (`.pug`)
- FreeMarker (`.ftl`)
- Mustache (`.mustache`)
- Liquid (`.liquid`)
- ERB (`.erb`)
</details>
<details>
<summary><strong>🗄️ 資料庫與 SQL點擊展開</strong></summary>
**SQL 變體:**
- 標準 SQL (`.sql`, `.ddl`, `.dml`)
- 資料庫特定 (`.mysql`, `.postgresql`, `.psql`, `.sqlite`, `.mssql`, `.oracle`, `.ora`, `.db2`)
**資料庫物件:**
- 程序與函式 (`.proc`, `.procedure`, `.func`, `.function`)
- 檢視與觸發器 (`.view`, `.trigger`, `.index`)
**遷移與工具:**
- 遷移檔案 (`.migration`, `.seed`, `.fixture`, `.schema`)
- 工具特定 (`.liquibase`, `.flyway`)
**NoSQL 與現代資料庫:**
- 圖形與查詢 (`.cql`, `.cypher`, `.sparql`, `.gql`)
</details>
<details>
<summary><strong>📄 文件與配置(點擊展開)</strong></summary>
- Markdown (`.md`, `.mdx`)
- 配置 (`.json`, `.xml`, `.yml`, `.yaml`, `.properties`)
</details>
## 快速開始
### 🚀 **建議設定(適用於大多數使用者)**
在任何相容 MCP 的應用程式中開始使用的最簡單方法:
**先決條件:** Python 3.10+ 和 [uv](https://github.com/astral-sh/uv)
1. **新增到您的 MCP 配置**(例如 `claude_desktop_config.json` 或 `~/.claude.json`
```json
{
"mcpServers": {
"code-index": {
"command": "uvx",
"args": ["code-index-mcp"]
}
}
}
```
2. **重新啟動您的應用程式** `uvx` 會自動處理安裝和執行
### 🛠️ **開發設定**
適用於貢獻或本地開發:
1. **克隆並安裝:**
```bash
git clone https://github.com/johnhuang316/code-index-mcp.git
cd code-index-mcp
uv sync
```
2. **配置本地開發:**
```json
{
"mcpServers": {
"code-index": {
"command": "uv",
"args": ["run", "code-index-mcp"]
}
}
}
```
3. **使用 MCP Inspector 除錯:**
```bash
npx @modelcontextprotocol/inspector uv run code-index-mcp
```
<details>
<summary><strong>替代方案:手動 pip 安裝</strong></summary>
如果您偏好傳統的 pip 管理:
```bash
pip install code-index-mcp
```
然後配置:
```json
{
"mcpServers": {
"code-index": {
"command": "code-index-mcp",
"args": []
}
}
}
```
</details>
## 可用工具
### 🏗️ **專案管理**
| 工具 | 描述 |
|------|------|
| **`set_project_path`** | 為專案目錄初始化索引 |
| **`refresh_index`** | 在檔案變更後重建淺層檔案索引 |
| **`build_deep_index`** | 產生供深度分析使用的完整符號索引 |
| **`get_settings_info`** | 檢視目前專案配置和狀態 |
*需要符號層級資料時,請執行 `build_deep_index`;預設的淺層索引提供快速檔案探索。*
### 🔍 **搜尋與探索**
| 工具 | 描述 |
|------|------|
| **`search_code_advanced`** | 智慧搜尋,支援正規表達式、模糊匹配和檔案篩選,預設每頁回傳 10 筆結果,可透過 `max_results` 與 `start_index` 調整 |
| **`find_files`** | 使用萬用字元模式尋找檔案(例如 `**/*.py` |
| **`get_file_summary`** | 分析檔案結構、函式、匯入和複雜度(需要深度索引) |
### 🔄 **監控與自動刷新**
| 工具 | 描述 |
|------|------|
| **`get_file_watcher_status`** | 檢查檔案監控器狀態和配置 |
| **`configure_file_watcher`** | 啟用/停用自動刷新並配置設定 |
### 🛠️ **系統與維護**
| 工具 | 描述 |
|------|------|
| **`create_temp_directory`** | 設定索引資料的儲存目錄 |
| **`check_temp_directory`** | 驗證索引儲存位置和權限 |
| **`clear_settings`** | 重設所有快取資料和配置 |
| **`refresh_search_tools`** | 重新偵測可用的搜尋工具ugrep、ripgrep 等) |
## 使用範例
### 🎯 **快速開始工作流程**
**1. 初始化您的專案**
```
將專案路徑設定為 /Users/dev/my-react-app
```
*自動索引您的程式碼庫並建立可搜尋的快取*
**2. 探索專案結構**
```
在 src/components 中尋找所有 TypeScript 元件檔案
```
*使用:`find_files`,模式為 `src/components/**/*.tsx`*
**3. 分析關鍵檔案**
```
給我 src/api/userService.ts 的摘要
```
*使用:`get_file_summary` 顯示函式、匯入和複雜度*
*提示:若收到 `needs_deep_index` 回應,請先執行 `build_deep_index`。*
### 🔍 **進階搜尋範例**
<details>
<summary><strong>程式碼模式搜尋</strong></summary>
```
使用正規表達式搜尋所有符合 "get.*Data" 的函式呼叫
```
*找到:`getData()`、`getUserData()`、`getFormData()` 等*
</details>
<details>
<summary><strong>模糊函式搜尋</strong></summary>
```
使用 'authUser' 的模糊搜尋尋找驗證相關函式
```
*匹配:`authenticateUser`、`authUserToken`、`userAuthCheck` 等*
</details>
<details>
<summary><strong>特定語言搜尋</strong></summary>
```
只在 Python 檔案中搜尋 "API_ENDPOINT"
```
*使用:`search_code_advanced``file_pattern: "*.py"`(預設回傳 10 筆;使用 `max_results` 擴充或 `start_index` 換頁)*
</details>
<details>
<summary><strong>自動刷新配置</strong></summary>
```
配置檔案變更時的自動索引更新
```
*使用:`configure_file_watcher` 啟用/停用監控並設定防抖時間*
</details>
<details>
<summary><strong>專案維護</strong></summary>
```
我新增了新元件,請重新整理專案索引
```
*使用:`refresh_index` 更新可搜尋的快取*
</details>
## 故障排除
### 🔄 **自動刷新無法運作**
如果檔案變更時自動索引更新無法運作,請嘗試:
- `pip install watchdog`(可能解決環境隔離問題)
- 使用手動刷新:在檔案變更後呼叫 `refresh_index` 工具
- 檢查檔案監視器狀態:使用 `get_file_watcher_status` 驗證監控是否處於活動狀態
## 開發與貢獻
### 🔧 **從原始碼建構**
```bash
git clone https://github.com/johnhuang316/code-index-mcp.git
cd code-index-mcp
uv sync
uv run code-index-mcp
```
### 🐛 **除錯**
```bash
npx @modelcontextprotocol/inspector uvx code-index-mcp
```
### 🤝 **貢獻**
歡迎貢獻!請隨時提交拉取請求。
---
### 📜 **授權條款**
[MIT 授權條款](LICENSE)
### 🌐 **翻譯**
- [English](README.md)
- [日本語](README_ja.md)

View File

@@ -0,0 +1,83 @@
# MCP Restart Playbook (November 10, 2025)
This runbook is for the first LLM/agent session *after* the MCP server restarts (for example, after bumping dependencies or recycling the FastMCP process). Follow every step in order so we quickly regain context, validate the upgraded toolchain, and communicate status to the rest of the team.
---
## 1. Current Snapshot
- **Branch**: `mcp-upgrade-notes`
- **Python**: 3.13.2 (uv-managed)
- **Key dependency**: `mcp>=1.21.0,<2.0.0` (synced across `pyproject.toml`, `requirements.txt`, and `uv.lock`)
- **Latest validation**: `uv run pytest` — 16 tests passed on **November 10, 2025 @ 02:05 UTC**
- **Reference doc**: `docs/mcp-upgrade-notes.md` (rationale, API deltas, validation checklist)
If any of these details drift (new branch, newer SDK, etc.) update this file before handing off.
---
## 2. Post-Restart MCP Calls (must run all tools)
Run through every exposed MCP primitive to guarantee parity after restart. Use the table below as a checklist and record each response summary.
| # | Tool | Minimum Input | Expected outcome |
|---|------|---------------|------------------|
| 1 | `set_project_path` | `path="C:\Users\p10362321\project\code-index-mcp"` | Indexed ~149 files; watcher initialized. |
| 2 | `build_deep_index` | - | Project re-indexed. Found ~149 files / ~1,070 symbols. |
| 3 | `search_code_advanced` | `pattern="FastMCP", file_pattern="src/**/*.py", max_results=20` | Hits in `server.py` plus pagination metadata. |
| 4 | `find_files` | `pattern="tests/**/*.py"` | Returns 10 test modules. |
| 5 | `get_file_summary` | `file_path="src/code_index_mcp/server.py"` | ~390 lines, 20+ functions reported. |
| 6 | `refresh_index` | - | Shallow index re-built with ~149 files. |
| 7 | `get_settings_info` | - | Shows temp/settings dirs, writable=true. |
| 8 | `create_temp_directory` | - | Confirms directory exists/created. |
| 9 | `check_temp_directory` | - | Lists `index.db`, `index.msgpack`, `index.shallow.json`. |
|10 | `clear_settings` | - | Project settings, index, and cache have been cleared (rerun #1 + #2). |
|11 | `refresh_search_tools` | - | Available: ['ripgrep', 'basic']; preferred: ripgrep. |
|12 | `get_file_watcher_status` | - | status: active, debounce_seconds=6. |
|13 | `configure_file_watcher` | `enabled=True, debounce_seconds=6` | Confirmation message (restart may be required). |
Notes:
- After running `clear_settings`, immediately repeat `set_project_path` + `build_deep_index` to restore context before proceeding.
- If any tool fails, stop the playbook, capture output, and escalate before continuing.
Log each response summary in the session notes so the next engineer knows everything is green.
---
## 3. CLI / End-to-End Smoke
Run these in the repo root once the MCP tools succeed:
```powershell
uv run code-index-mcp --project-path C:\Users\p10362321\project\code-index-mcp
uv run pytest
```
- Treat any warning or stderr output as a blocker.
- Capture timestamps + durations; attach to release prep if we are close to tagging.
---
## 4. Communicate Status
When handing the session back to the team, summarize:
- **SDK state**: Confirm we are still on MCP 1.21.0 (with context injection + capability helpers).
- **Tool cache**: Mention that clients should re-cache tool lists after restart (FastMCP now enforces metadata changes).
- **Known issues**: Note any skipped steps, flaky tests, or manual interventions.
- **Next action**: “Ready for release prep” or “Need follow-up on X” — whichever applies after the smoke tests.
---
## 5. Troubleshooting Quick Reference
- **`set_project_path` fails** → Ensure the repo path is accessible (sandbox permissions) and no other agent locked `index.db`. Run `clear_settings()` then retry.
- **Search returns zero results** → Run `refresh_search_tools()`; if ripgrep missing, fall back to `basic` and flag the infra team.
- **Watcher inactive** → Call `configure_file_watcher(enabled=True)` and `refresh_index()`. Document if it remains inactive.
- **CLI smoke exits non-zero** → Capture full stdout/stderr, file an issue linked to `docs/mcp-upgrade-notes.md`, and pause release work.
Keep this section updated with any new gotchas discovered during restarts.
---
## 6. Hand-off Checklist
- [ ] Steps 14 executed and logged in the current session.
- [ ] Any deviations documented (include timestamps + command output).
- [ ] This playbook reviewed/updated if procedures changed.
If all boxes are checked, the MCP server is considered healthy and ready for normal development or release activities.

View File

@@ -0,0 +1,28 @@
# MCP Upgrade Notes (November 2025)
## Why this upgrade matters
- `mcp` 1.21.0 was published to PyPI on 2025-11-06, so we are at least 17 point releases behind the current SDK and missing recent transport, auth, and client-surface fixes.
- The MCP governance group will cut the next specification release on 2025-11-25 (RC on 2025-11-11), so validating 1.21.0 now keeps us aligned ahead of another protocol bump.
## Dependency & packaging considerations
1. Run `uv lock --upgrade mcp` (or equivalent) so `uv.lock` stops pinning 1.4.1 and picks up the 1.21.0 wheels plus their refreshed transitive set (Starlette 0.49.1, AnyIO/HTTPX upgrades, etc.).
2. Re-run `uv run pytest` and our smoke commands (`uv run code-index-mcp --project-path <repo>`) because AnyIO cancellation semantics and Starlette ASGI changes can surface subtle regressions in watcher services.
3. Publish the lockfile and version bumps together; our release checklist requires pyproject + package __init__ + uv.lock to stay in sync.
## API & runtime changes to verify
- SEP-985 landed in 1.21.0, adding OAuth-protected resource metadata fallback: confirm our SettingsService handles `WWW-Authenticate` responses and that CLI flags surface any required bearer tokens.
- `ClientSession.get_server_capabilities()` is new; if clients or integration tests introspect capabilities manually, migrate to this helper.
- Starlette 0.49.1 ships tighter ASGI scope validation; double-check our SSE transport and progress notifications.
## Recommended practices for 1.21.x
1. **Depend on Context injection, not globals.** Annotate `ctx: Context` parameters so FastMCP injects the request context automatically instead of calling `mcp.get_context()` directly; this keeps us compatible with async-only handlers and future dependency-injection changes.
2. **Cache expensive tool listings in clients.** Newer agents (OpenAI Agents SDK, Claude Desktop) call `list_tools()` on every run; set `cache_tools_list=True` only when our tool roster is static and call `invalidate_tools_cache()` after deployments.
3. **Respect capability negotiation each session.** Protocol version 2025-06-18 remains current, and version negotiation happens during `initialize`; ensure our server exposes accurate `capabilities` metadata and gracefully errors when clients offer only future versions.
4. **Stay ahead of November spec changes.** The upcoming 2025-11-25 spec focuses on additional security hardening. Schedule time to exercise the RC (available 2025-11-11) so we can absorb any required surface changes early.
5. **Document OAuth and transport choices.** With SEP-985 and other auth SEPs in flight, record which flows (`device`, `jwt-bearer`, etc.) each deployment expects, and prefer the Streamable HTTP transport when exposing remote servers to benefit from the latest security guidance.
## Validation checklist before merging
- [ ] Lockfile regenerated (`uv lock --upgrade mcp`) and `uv run python -m code_index_mcp.server --help` still succeeds.
- [ ] `uv run code-index-mcp --project-path <repo>` exercises `set_project_path`, `build_deep_index`, and `search_code_advanced` end-to-end.
- [ ] Smoke Claude Desktop / Codex CLI against the upgraded server; confirm resources + tools enumerate and that tool caching behaves as expected.
- [ ] Update release notes + AGENTS.md summary once 1.21.x is verified in staging.

View File

@@ -0,0 +1,43 @@
{
"$schema": "https://fastmcp.wiki/en/schemas/fastmcp.json",
"name": "Code Index MCP",
"description": "Indexes a local repository and exposes search, indexing, and file utilities via the Model Context Protocol.",
"license": "MIT",
"keywords": [
"mcp",
"code-index",
"search",
"fastmcp"
],
"links": [
{
"rel": "source",
"href": "https://github.com/johnhuang316/code-index-mcp"
},
{
"rel": "documentation",
"href": "https://github.com/johnhuang316/code-index-mcp#readme"
}
],
"source": {
"path": "src/code_index_mcp/server.py",
"entrypoint": "mcp"
},
"environment": {
"python": ">=3.10",
"dependencies": [
"mcp>=1.21.0,<2.0.0",
"watchdog>=3.0.0",
"tree-sitter>=0.20.0",
"tree-sitter-javascript>=0.20.0",
"tree-sitter-typescript>=0.20.0",
"tree-sitter-java>=0.20.0",
"tree-sitter-zig>=0.20.0",
"pathspec>=0.12.1",
"msgpack>=1.0.0"
]
},
"deployment": {
"transport": "stdio"
}
}

View File

@@ -0,0 +1,35 @@
[build-system]
requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"
[project]
name = "code-index-mcp"
version = "2.9.4"
description = "Code indexing and analysis tools for LLMs using MCP"
readme = "README.md"
requires-python = ">=3.10"
license = {text = "MIT"}
authors = [
{name = "johnhuang316"}
]
dependencies = [
"mcp>=1.21.0,<2.0.0",
"watchdog>=3.0.0",
"tree-sitter>=0.20.0",
"tree-sitter-javascript>=0.20.0",
"tree-sitter-typescript>=0.20.0",
"tree-sitter-java>=0.20.0",
"tree-sitter-zig>=0.20.0",
"pathspec>=0.12.1",
"msgpack>=1.0.0",
]
[project.urls]
Homepage = "https://github.com/johnhuang316/code-index-mcp"
"Bug Tracker" = "https://github.com/johnhuang316/code-index-mcp/issues"
[project.scripts]
code-index-mcp = "code_index_mcp.server:main"
[tool.setuptools]
package-dir = {"" = "src"}

View File

@@ -0,0 +1,10 @@
mcp>=1.21.0,<2.0.0
watchdog>=3.0.0
protobuf>=4.21.0
tree-sitter>=0.20.0
tree-sitter-javascript>=0.20.0
tree-sitter-typescript>=0.20.0
tree-sitter-java>=0.20.0
tree-sitter-zig>=0.20.0
pathspec>=0.12.1
libclang>=16.0.0

View File

@@ -0,0 +1,19 @@
#!/usr/bin/env python
"""
Development convenience script to run the Code Index MCP server.
"""
import sys
import os
# Add src directory to path
src_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'src')
sys.path.insert(0, src_path)
try:
from code_index_mcp.server import main
if __name__ == "__main__":
main()
except Exception:
# Exit silently on failure without printing any messages
raise SystemExit(1)

View File

@@ -0,0 +1,6 @@
"""Code Index MCP package.
A Model Context Protocol server for code indexing, searching, and analysis.
"""
__version__ = "2.9.4"

View File

@@ -0,0 +1,6 @@
"""Main entry point for the code-index-mcp package."""
from code_index_mcp.server import main
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,128 @@
"""
Shared constants for the Code Index MCP server.
"""
# Directory and file names
SETTINGS_DIR = "code_indexer"
CONFIG_FILE = "config.json"
INDEX_FILE = "index.json" # JSON index file (deep index)
INDEX_FILE_SHALLOW = "index.shallow.json" # Minimal shallow index (file list)
INDEX_FILE_DB = "index.db" # SQLite deep index file
# Supported file extensions for code analysis
# This is the authoritative list used by both old and new indexing systems
SUPPORTED_EXTENSIONS = [
# Core programming languages
'.py', '.pyw', # Python
'.js', '.jsx', '.ts', '.tsx', # JavaScript/TypeScript
'.mjs', '.cjs', # Modern JavaScript
'.java', # Java
'.c', '.cpp', '.h', '.hpp', # C/C++
'.cxx', '.cc', '.hxx', '.hh', # C++ variants
'.cs', # C#
'.go', # Go
'.m', '.mm', # Objective-C
'.rb', # Ruby
'.php', # PHP
'.swift', # Swift
'.kt', '.kts', # Kotlin
'.rs', # Rust
'.scala', # Scala
'.sh', '.bash', '.zsh', # Shell scripts
'.ps1', # PowerShell
'.bat', '.cmd', # Windows batch
'.r', '.R', # R
'.pl', '.pm', # Perl
'.lua', # Lua
'.dart', # Dart
'.hs', # Haskell
'.ml', '.mli', # OCaml
'.fs', '.fsx', # F#
'.clj', '.cljs', # Clojure
'.vim', # Vim script
'.zig', '.zon', # Zig
# Web and markup
'.html', '.htm', # HTML
'.css', '.scss', '.sass', # Stylesheets
'.less', '.stylus', '.styl', # Style languages
'.md', '.mdx', # Markdown
'.json', '.jsonc', # JSON
'.xml', # XML
'.yml', '.yaml', # YAML
# Frontend frameworks
'.vue', # Vue.js
'.svelte', # Svelte
'.astro', # Astro
# Java web & build artifacts
'.jsp', '.jspx', '.jspf', # JSP pages
'.tag', '.tagx', # JSP tag files
'.gsp', # Grails templates
'.properties', # Java .properties configs
'.gradle', '.groovy', # Gradle/Groovy build scripts
'.proto', # Protocol Buffers
# Template engines
'.hbs', '.handlebars', # Handlebars
'.ejs', # EJS
'.pug', # Pug
'.ftl', # FreeMarker
'.mustache', '.liquid', '.erb', # Additional template engines
# Database and SQL
'.sql', '.ddl', '.dml', # SQL
'.mysql', '.postgresql', '.psql', # Database-specific SQL
'.sqlite', '.mssql', '.oracle', # More databases
'.ora', '.db2', # Oracle and DB2
'.proc', '.procedure', # Stored procedures
'.func', '.function', # Functions
'.view', '.trigger', '.index', # Database objects
'.migration', '.seed', '.fixture', # Migration files
'.schema', # Schema files
'.cql', '.cypher', '.sparql', # NoSQL query languages
'.gql', # GraphQL
'.liquibase', '.flyway', # Migration tools
]
# Centralized filtering configuration
FILTER_CONFIG = {
"exclude_directories": {
# Version control
'.git', '.svn', '.hg', '.bzr',
# Package managers & dependencies
'node_modules', '__pycache__', '.venv', 'venv',
'vendor', 'bower_components',
# Build outputs
'dist', 'build', 'target', 'out', 'bin', 'obj',
# IDE & editors
'.idea', '.vscode', '.vs', '.sublime-workspace',
# Testing & coverage
'.pytest_cache', '.coverage', '.tox', '.nyc_output',
'coverage', 'htmlcov',
# OS artifacts
'.DS_Store', 'Thumbs.db', 'desktop.ini'
},
"exclude_files": {
# Temporary files
'*.tmp', '*.temp', '*.swp', '*.swo',
# Backup files
'*.bak', '*~', '*.orig',
# Log files
'*.log',
# Lock files
'package-lock.json', 'yarn.lock', 'Pipfile.lock'
},
"supported_extensions": SUPPORTED_EXTENSIONS
}

View File

@@ -0,0 +1,36 @@
"""
Code indexing utilities for the MCP server.
Deep indexing now relies exclusively on the SQLite backend.
"""
from .qualified_names import generate_qualified_name, normalize_file_path
from .json_index_builder import JSONIndexBuilder, IndexMetadata
from .sqlite_index_builder import SQLiteIndexBuilder
from .sqlite_index_manager import SQLiteIndexManager
from .shallow_index_manager import ShallowIndexManager, get_shallow_index_manager
from .deep_index_manager import DeepIndexManager
from .models import SymbolInfo, FileInfo
_sqlite_index_manager = SQLiteIndexManager()
def get_index_manager() -> SQLiteIndexManager:
"""Return the singleton SQLite index manager."""
return _sqlite_index_manager
__all__ = [
"generate_qualified_name",
"normalize_file_path",
"JSONIndexBuilder",
"IndexMetadata",
"SQLiteIndexBuilder",
"SQLiteIndexManager",
"get_index_manager",
"ShallowIndexManager",
"get_shallow_index_manager",
"DeepIndexManager",
"SymbolInfo",
"FileInfo",
]

View File

@@ -0,0 +1,44 @@
"""
Deep Index Manager - Wrapper around the SQLite index manager.
This class provides a clear semantic separation from the shallow manager while
delegating operations to the SQLite-backed implementation.
"""
from __future__ import annotations
from typing import Optional, Dict, Any, List
from .sqlite_index_manager import SQLiteIndexManager
class DeepIndexManager:
"""Thin wrapper over SQLiteIndexManager to expose deep-index API."""
def __init__(self) -> None:
self._mgr = SQLiteIndexManager()
# Expose a subset of API to keep callers simple
def set_project_path(self, project_path: str) -> bool:
return self._mgr.set_project_path(project_path)
def build_index(self, force_rebuild: bool = False) -> bool:
return self._mgr.build_index(force_rebuild=force_rebuild)
def load_index(self) -> bool:
return self._mgr.load_index()
def refresh_index(self) -> bool:
return self._mgr.refresh_index()
def find_files(self, pattern: str = "*") -> List[str]:
return self._mgr.find_files(pattern)
def get_file_summary(self, file_path: str) -> Optional[Dict[str, Any]]:
return self._mgr.get_file_summary(file_path)
def get_index_stats(self) -> Dict[str, Any]:
return self._mgr.get_index_stats()
def cleanup(self) -> None:
self._mgr.cleanup()

View File

@@ -0,0 +1,125 @@
"""
Index provider interface definitions.
Defines standard interfaces for all index access, ensuring consistency across different implementations.
"""
from typing import List, Optional, Dict, Any, Protocol
from dataclasses import dataclass
from .models import SymbolInfo, FileInfo
@dataclass
class IndexMetadata:
"""Standard index metadata structure."""
version: str
format_type: str
created_at: float
last_updated: float
file_count: int
project_root: str
tool_version: str
class IIndexProvider(Protocol):
"""
Standard index provider interface.
All index implementations must follow this interface to ensure consistent access patterns.
"""
def get_file_list(self) -> List[FileInfo]:
"""
Get list of all indexed files.
Returns:
List of file information objects
"""
...
def get_file_info(self, file_path: str) -> Optional[FileInfo]:
"""
Get information for a specific file.
Args:
file_path: Relative file path
Returns:
File information, or None if file is not in index
"""
...
def query_symbols(self, file_path: str) -> List[SymbolInfo]:
"""
Query symbol information in a file.
Args:
file_path: Relative file path
Returns:
List of symbol information objects
"""
...
def search_files(self, pattern: str) -> List[str]:
"""
Search files by pattern.
Args:
pattern: Glob pattern or regular expression
Returns:
List of matching file paths
"""
...
def get_metadata(self) -> IndexMetadata:
"""
Get index metadata.
Returns:
Index metadata information
"""
...
def is_available(self) -> bool:
"""
Check if index is available.
Returns:
True if index is available and functional
"""
...
class IIndexManager(Protocol):
"""
Index manager interface.
Defines standard interface for index lifecycle management.
"""
def initialize(self) -> bool:
"""Initialize the index manager."""
...
def get_provider(self) -> Optional[IIndexProvider]:
"""Get the current active index provider."""
...
def refresh_index(self, force: bool = False) -> bool:
"""Refresh the index."""
...
def save_index(self) -> bool:
"""Save index state."""
...
def clear_index(self) -> None:
"""Clear index state."""
...
def get_index_status(self) -> Dict[str, Any]:
"""Get index status information."""
...

View File

@@ -0,0 +1,472 @@
"""
JSON Index Builder - Clean implementation using Strategy pattern.
This replaces the monolithic parser implementation with a clean,
maintainable Strategy pattern architecture.
"""
import logging
import os
import time
from collections import defaultdict
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
from dataclasses import dataclass, asdict
from pathlib import Path
from typing import Dict, List, Optional, Any, Tuple
from .strategies import StrategyFactory
from .models import SymbolInfo, FileInfo
logger = logging.getLogger(__name__)
@dataclass
class IndexMetadata:
"""Metadata for the JSON index."""
project_path: str
indexed_files: int
index_version: str
timestamp: str
languages: List[str]
total_symbols: int = 0
specialized_parsers: int = 0
fallback_files: int = 0
class JSONIndexBuilder:
"""
Main index builder using Strategy pattern for language parsing.
This class orchestrates the index building process by:
1. Discovering files in the project
2. Using StrategyFactory to get appropriate parsers
3. Extracting symbols and metadata
4. Assembling the final JSON index
"""
def __init__(self, project_path: str, additional_excludes: Optional[List[str]] = None):
from ..utils import FileFilter
# Input validation
if not isinstance(project_path, str):
raise ValueError(f"Project path must be a string, got {type(project_path)}")
project_path = project_path.strip()
if not project_path:
raise ValueError("Project path cannot be empty")
if not os.path.isdir(project_path):
raise ValueError(f"Project path does not exist: {project_path}")
self.project_path = project_path
self.in_memory_index: Optional[Dict[str, Any]] = None
self.strategy_factory = StrategyFactory()
self.file_filter = FileFilter(additional_excludes)
logger.info(f"Initialized JSON index builder for {project_path}")
strategy_info = self.strategy_factory.get_strategy_info()
logger.info(f"Available parsing strategies: {len(strategy_info)} types")
# Log specialized vs fallback coverage
specialized = len(self.strategy_factory.get_specialized_extensions())
fallback = len(self.strategy_factory.get_fallback_extensions())
logger.info(f"Specialized parsers: {specialized} extensions, Fallback coverage: {fallback} extensions")
def _process_file(self, file_path: str, specialized_extensions: set) -> Optional[Tuple[Dict, Dict, str, bool]]:
"""
Process a single file - designed for parallel execution.
Args:
file_path: Path to the file to process
specialized_extensions: Set of extensions with specialized parsers
Returns:
Tuple of (symbols, file_info, language, is_specialized) or None on error
"""
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
content = f.read()
ext = Path(file_path).suffix.lower()
rel_path = os.path.relpath(file_path, self.project_path).replace('\\', '/')
# Get appropriate strategy
strategy = self.strategy_factory.get_strategy(ext)
# Track strategy usage
is_specialized = ext in specialized_extensions
# Parse file using strategy
symbols, file_info = strategy.parse_file(rel_path, content)
logger.debug(f"Parsed {rel_path}: {len(symbols)} symbols ({file_info.language})")
return (symbols, {rel_path: file_info}, file_info.language, is_specialized)
except Exception as e:
logger.warning(f"Error processing {file_path}: {e}")
return None
def build_index(self, parallel: bool = True, max_workers: Optional[int] = None) -> Dict[str, Any]:
"""
Build the complete index using Strategy pattern with parallel processing.
Args:
parallel: Whether to use parallel processing (default: True)
max_workers: Maximum number of worker processes/threads (default: CPU count)
Returns:
Complete JSON index with metadata, symbols, and file information
"""
logger.info(f"Building JSON index using Strategy pattern (parallel={parallel})...")
start_time = time.time()
all_symbols = {}
all_files = {}
languages = set()
specialized_count = 0
fallback_count = 0
pending_calls: List[Tuple[str, str]] = []
# Get specialized extensions for tracking
specialized_extensions = set(self.strategy_factory.get_specialized_extensions())
# Get list of files to process
files_to_process = self._get_supported_files()
total_files = len(files_to_process)
if total_files == 0:
logger.warning("No files to process")
return self._create_empty_index()
logger.info(f"Processing {total_files} files...")
def process_result(result):
nonlocal specialized_count, fallback_count
if not result:
return
symbols, file_info_dict, language, is_specialized = result
for symbol_id, symbol_info in symbols.items():
all_symbols[symbol_id] = symbol_info
for rel_path, file_info in file_info_dict.items():
all_files[rel_path] = file_info
file_pending = getattr(file_info, "pending_calls", [])
if file_pending:
pending_calls.extend(file_pending)
languages.add(language)
if is_specialized:
specialized_count += 1
else:
fallback_count += 1
if parallel and total_files > 1:
# Use ThreadPoolExecutor for I/O-bound file reading
# ProcessPoolExecutor has issues with strategy sharing
if max_workers is None:
max_workers = min(os.cpu_count() or 4, total_files)
logger.info(f"Using parallel processing with {max_workers} workers")
with ThreadPoolExecutor(max_workers=max_workers) as executor:
# Submit all tasks
future_to_file = {
executor.submit(self._process_file, file_path, specialized_extensions): file_path
for file_path in files_to_process
}
# Process completed tasks
processed = 0
for future in as_completed(future_to_file):
file_path = future_to_file[future]
result = future.result()
process_result(result)
processed += 1
if processed % 100 == 0:
logger.debug(f"Processed {processed}/{total_files} files")
else:
# Sequential processing
logger.info("Using sequential processing")
for file_path in files_to_process:
result = self._process_file(file_path, specialized_extensions)
process_result(result)
self._resolve_pending_calls(all_symbols, pending_calls)
# Build index metadata
metadata = IndexMetadata(
project_path=self.project_path,
indexed_files=len(all_files),
index_version="2.0.0-strategy",
timestamp=time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
languages=sorted(list(languages)),
total_symbols=len(all_symbols),
specialized_parsers=specialized_count,
fallback_files=fallback_count
)
# Assemble final index
index = {
"metadata": asdict(metadata),
"symbols": {k: asdict(v) for k, v in all_symbols.items()},
"files": {k: asdict(v) for k, v in all_files.items()}
}
# Cache in memory
self.in_memory_index = index
elapsed = time.time() - start_time
logger.info(f"Built index with {len(all_symbols)} symbols from {len(all_files)} files in {elapsed:.2f}s")
logger.info(f"Languages detected: {sorted(languages)}")
logger.info(f"Strategy usage: {specialized_count} specialized, {fallback_count} fallback")
return index
def _resolve_pending_calls(
self,
all_symbols: Dict[str, SymbolInfo],
pending_calls: List[Tuple[str, str]]
) -> None:
"""Resolve cross-file call relationships using global symbol index."""
if not pending_calls:
return
short_index: Dict[str, List[str]] = defaultdict(list)
for symbol_id in all_symbols:
short_name = symbol_id.split("::")[-1]
short_index[short_name].append(symbol_id)
for caller, called in pending_calls:
target_ids: List[str] = []
if called in all_symbols:
target_ids = [called]
else:
if called in short_index:
target_ids = short_index[called]
if not target_ids and "." in called:
target_ids = short_index.get(called, [])
if not target_ids:
matches: List[str] = []
suffix = f".{called}"
for short_name, ids in short_index.items():
if short_name.endswith(suffix):
matches.extend(ids)
target_ids = matches
if len(target_ids) != 1:
continue
symbol_info = all_symbols[target_ids[0]]
if caller not in symbol_info.called_by:
symbol_info.called_by.append(caller)
def _create_empty_index(self) -> Dict[str, Any]:
"""Create an empty index structure."""
metadata = IndexMetadata(
project_path=self.project_path,
indexed_files=0,
index_version="2.0.0-strategy",
timestamp=time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
languages=[],
total_symbols=0,
specialized_parsers=0,
fallback_files=0
)
return {
"metadata": asdict(metadata),
"symbols": {},
"files": {}
}
def get_index(self) -> Optional[Dict[str, Any]]:
"""Get the current in-memory index."""
return self.in_memory_index
def clear_index(self):
"""Clear the in-memory index."""
self.in_memory_index = None
logger.debug("Cleared in-memory index")
def _get_supported_files(self) -> List[str]:
"""
Get all supported files in the project using centralized filtering.
Returns:
List of file paths that can be parsed
"""
supported_files = []
base_path = Path(self.project_path)
try:
for root, dirs, files in os.walk(self.project_path):
# Filter directories in-place using centralized logic
dirs[:] = [d for d in dirs if not self.file_filter.should_exclude_directory(d)]
# Filter files using centralized logic
for file in files:
file_path = Path(root) / file
if self.file_filter.should_process_path(file_path, base_path):
supported_files.append(str(file_path))
except Exception as e:
logger.error(f"Error scanning directory {self.project_path}: {e}")
logger.debug(f"Found {len(supported_files)} supported files")
return supported_files
def build_shallow_file_list(self) -> List[str]:
"""
Build a minimal shallow index consisting of relative file paths only.
This method does not read file contents. It enumerates supported files
using centralized filtering and returns normalized relative paths with
forward slashes for cross-platform consistency.
Returns:
List of relative file paths (using '/').
"""
try:
absolute_files = self._get_supported_files()
result: List[str] = []
for abs_path in absolute_files:
rel_path = os.path.relpath(abs_path, self.project_path).replace('\\', '/')
# Normalize leading './'
if rel_path.startswith('./'):
rel_path = rel_path[2:]
result.append(rel_path)
return result
except Exception as e:
logger.error(f"Failed to build shallow file list: {e}")
return []
def save_index(self, index: Dict[str, Any], index_path: str) -> bool:
"""
Save index to disk.
Args:
index: Index data to save
index_path: Path where to save the index
Returns:
True if successful, False otherwise
"""
try:
import json
with open(index_path, 'w', encoding='utf-8') as f:
json.dump(index, f, indent=2, ensure_ascii=False)
logger.info(f"Saved index to {index_path}")
return True
except Exception as e:
logger.error(f"Failed to save index to {index_path}: {e}")
return False
def load_index(self, index_path: str) -> Optional[Dict[str, Any]]:
"""
Load index from disk.
Args:
index_path: Path to the index file
Returns:
Index data if successful, None otherwise
"""
try:
if not os.path.exists(index_path):
logger.debug(f"Index file not found: {index_path}")
return None
import json
with open(index_path, 'r', encoding='utf-8') as f:
index = json.load(f)
# Cache in memory
self.in_memory_index = index
logger.info(f"Loaded index from {index_path}")
return index
except Exception as e:
logger.error(f"Failed to load index from {index_path}: {e}")
return None
def get_parsing_statistics(self) -> Dict[str, Any]:
"""
Get detailed statistics about parsing capabilities.
Returns:
Dictionary with parsing statistics and strategy information
"""
strategy_info = self.strategy_factory.get_strategy_info()
return {
"total_strategies": len(strategy_info),
"specialized_languages": [lang for lang in strategy_info.keys() if not lang.startswith('fallback_')],
"fallback_languages": [lang.replace('fallback_', '') for lang in strategy_info.keys() if lang.startswith('fallback_')],
"total_extensions": len(self.strategy_factory.get_all_supported_extensions()),
"specialized_extensions": len(self.strategy_factory.get_specialized_extensions()),
"fallback_extensions": len(self.strategy_factory.get_fallback_extensions()),
"strategy_details": strategy_info
}
def get_file_symbols(self, file_path: str) -> List[Dict[str, Any]]:
"""
Get symbols for a specific file.
Args:
file_path: Relative path to the file
Returns:
List of symbols in the file
"""
if not self.in_memory_index:
logger.warning("Index not loaded")
return []
try:
# Normalize file path
file_path = file_path.replace('\\', '/')
if file_path.startswith('./'):
file_path = file_path[2:]
# Get file info
file_info = self.in_memory_index["files"].get(file_path)
if not file_info:
logger.warning(f"File not found in index: {file_path}")
return []
# Work directly with global symbols for this file
global_symbols = self.in_memory_index.get("symbols", {})
result = []
# Find all symbols for this file directly from global symbols
for symbol_id, symbol_data in global_symbols.items():
symbol_file = symbol_data.get("file", "").replace("\\", "/")
# Check if this symbol belongs to our file
if symbol_file == file_path:
symbol_type = symbol_data.get("type", "unknown")
symbol_name = symbol_id.split("::")[-1] # Extract symbol name from ID
# Create symbol info
symbol_info = {
"name": symbol_name,
"called_by": symbol_data.get("called_by", []),
"line": symbol_data.get("line"),
"signature": symbol_data.get("signature")
}
# Categorize by type
if symbol_type in ["function", "method"]:
result.append(symbol_info)
elif symbol_type == "class":
result.append(symbol_info)
# Sort by line number for consistent ordering
result.sort(key=lambda x: x.get("line", 0))
return result
except Exception as e:
logger.error(f"Error getting file symbols for {file_path}: {e}")
return []

View File

@@ -0,0 +1,8 @@
"""
Model classes for the indexing system.
"""
from .symbol_info import SymbolInfo
from .file_info import FileInfo
__all__ = ['SymbolInfo', 'FileInfo']

View File

@@ -0,0 +1,24 @@
"""
FileInfo model for representing file metadata.
"""
from dataclasses import dataclass
from typing import Dict, List, Optional, Any
@dataclass
class FileInfo:
"""Information about a source code file."""
language: str # programming language
line_count: int # total lines in file
symbols: Dict[str, List[str]] # symbol categories (functions, classes, etc.)
imports: List[str] # imported modules/packages
exports: Optional[List[str]] = None # exported symbols (for JS/TS modules)
package: Optional[str] = None # package name (for Java, Go, etc.)
docstring: Optional[str] = None # file-level documentation
def __post_init__(self):
"""Initialize mutable defaults."""
if self.exports is None:
self.exports = []

View File

@@ -0,0 +1,23 @@
"""
SymbolInfo model for representing code symbols.
"""
from dataclasses import dataclass
from typing import Optional, List
@dataclass
class SymbolInfo:
"""Information about a code symbol (function, class, method, etc.)."""
type: str # function, class, method, interface, etc.
file: str # file path where symbol is defined
line: int # line number where symbol starts
signature: Optional[str] = None # function/method signature
docstring: Optional[str] = None # documentation string
called_by: Optional[List[str]] = None # list of symbols that call this symbol
def __post_init__(self):
"""Initialize mutable defaults."""
if self.called_by is None:
self.called_by = []

View File

@@ -0,0 +1,49 @@
"""
Qualified name generation utilities.
"""
import os
from typing import Optional
def normalize_file_path(file_path: str) -> str:
"""
Normalize a file path to use forward slashes and relative paths.
Args:
file_path: The file path to normalize
Returns:
Normalized file path
"""
# Convert to forward slashes and make relative
normalized = file_path.replace('\\', '/')
# Remove leading slash if present
if normalized.startswith('/'):
normalized = normalized[1:]
return normalized
def generate_qualified_name(file_path: str, symbol_name: str, namespace: Optional[str] = None) -> str:
"""
Generate a qualified name for a symbol.
Args:
file_path: Path to the file containing the symbol
symbol_name: Name of the symbol
namespace: Optional namespace/module context
Returns:
Qualified name for the symbol
"""
normalized_path = normalize_file_path(file_path)
# Remove file extension for module-like name
base_name = os.path.splitext(normalized_path)[0]
module_path = base_name.replace('/', '.')
if namespace:
return f"{module_path}.{namespace}.{symbol_name}"
else:
return f"{module_path}.{symbol_name}"

View File

@@ -0,0 +1,194 @@
"""
Shallow Index Manager - Manages a minimal file-list-only index.
This manager builds and loads a shallow index consisting of relative file
paths only. It is optimized for fast initialization and filename-based
search/browsing. Content parsing and symbol extraction are not performed.
"""
from __future__ import annotations
import hashlib
import json
import logging
import os
import tempfile
import threading
from typing import List, Optional
import re
from .json_index_builder import JSONIndexBuilder
from ..constants import SETTINGS_DIR, INDEX_FILE_SHALLOW
logger = logging.getLogger(__name__)
class ShallowIndexManager:
"""Manage shallow (file-list) index lifecycle and storage."""
def __init__(self) -> None:
self.project_path: Optional[str] = None
self.index_builder: Optional[JSONIndexBuilder] = None
self.temp_dir: Optional[str] = None
self.index_path: Optional[str] = None
self._file_list: Optional[List[str]] = None
self._lock = threading.RLock()
def set_project_path(self, project_path: str) -> bool:
with self._lock:
try:
if not isinstance(project_path, str) or not project_path.strip():
logger.error("Invalid project path for shallow index")
return False
project_path = project_path.strip()
if not os.path.isdir(project_path):
logger.error(f"Project path does not exist: {project_path}")
return False
self.project_path = project_path
self.index_builder = JSONIndexBuilder(project_path)
project_hash = hashlib.md5(project_path.encode()).hexdigest()[:12]
self.temp_dir = os.path.join(tempfile.gettempdir(), SETTINGS_DIR, project_hash)
os.makedirs(self.temp_dir, exist_ok=True)
self.index_path = os.path.join(self.temp_dir, INDEX_FILE_SHALLOW)
return True
except Exception as e: # noqa: BLE001 - centralized logging
logger.error(f"Failed to set project path (shallow): {e}")
return False
def build_index(self) -> bool:
"""Build and persist the shallow file list index."""
with self._lock:
if not self.index_builder or not self.index_path:
logger.error("ShallowIndexManager not initialized")
return False
try:
file_list = self.index_builder.build_shallow_file_list()
with open(self.index_path, 'w', encoding='utf-8') as f:
json.dump(file_list, f, ensure_ascii=False)
self._file_list = file_list
logger.info(f"Built shallow index with {len(file_list)} files")
return True
except Exception as e: # noqa: BLE001
logger.error(f"Failed to build shallow index: {e}")
return False
def load_index(self) -> bool:
"""Load shallow index from disk to memory."""
with self._lock:
try:
if not self.index_path or not os.path.exists(self.index_path):
return False
with open(self.index_path, 'r', encoding='utf-8') as f:
data = json.load(f)
if isinstance(data, list):
# Normalize slashes/prefix
normalized: List[str] = []
for p in data:
if isinstance(p, str):
q = p.replace('\\\\', '/').replace('\\', '/')
if q.startswith('./'):
q = q[2:]
normalized.append(q)
self._file_list = normalized
return True
return False
except Exception as e: # noqa: BLE001
logger.error(f"Failed to load shallow index: {e}")
return False
def get_file_list(self) -> List[str]:
with self._lock:
return list(self._file_list or [])
def find_files(self, pattern: str = "*") -> List[str]:
with self._lock:
if not isinstance(pattern, str):
return []
norm = (pattern.strip() or "*").replace('\\\\','/').replace('\\','/')
files = self._file_list or []
# Fast path: wildcard all
if norm == "*":
return list(files)
# 1) Exact, case-sensitive
exact_regex = self._compile_glob_regex(norm)
exact_hits = [f for f in files if exact_regex.match(f) is not None]
if exact_hits or '/' in norm:
return exact_hits
# 2) Recursive **/ fallback (case-sensitive)
recursive_pattern = f"**/{norm}"
rec_regex = self._compile_glob_regex(recursive_pattern)
rec_hits = [f for f in files if rec_regex.match(f) is not None]
if rec_hits:
return self._dedupe_preserve_order(exact_hits + rec_hits)
# 3) Case-insensitive (root only)
ci_regex = self._compile_glob_regex(norm, ignore_case=True)
ci_hits = [f for f in files if ci_regex.match(f) is not None]
if ci_hits:
return self._dedupe_preserve_order(exact_hits + rec_hits + ci_hits)
# 4) Case-insensitive recursive
rec_ci_regex = self._compile_glob_regex(recursive_pattern, ignore_case=True)
rec_ci_hits = [f for f in files if rec_ci_regex.match(f) is not None]
if rec_ci_hits:
return self._dedupe_preserve_order(
exact_hits + rec_hits + ci_hits + rec_ci_hits
)
return []
@staticmethod
def _compile_glob_regex(pattern: str, ignore_case: bool = False) -> re.Pattern:
i = 0
out = []
special = ".^$+{}[]|()"
while i < len(pattern):
c = pattern[i]
if c == '*':
if i + 1 < len(pattern) and pattern[i + 1] == '*':
out.append('.*')
i += 2
continue
else:
out.append('[^/]*')
elif c == '?':
out.append('[^/]')
elif c in special:
out.append('\\' + c)
else:
out.append(c)
i += 1
flags = re.IGNORECASE if ignore_case else 0
return re.compile('^' + ''.join(out) + '$', flags=flags)
@staticmethod
def _dedupe_preserve_order(items: List[str]) -> List[str]:
seen = set()
result = []
for item in items:
if item not in seen:
seen.add(item)
result.append(item)
return result
def cleanup(self) -> None:
with self._lock:
self.project_path = None
self.index_builder = None
self.temp_dir = None
self.index_path = None
self._file_list = None
# Global singleton
_shallow_manager = ShallowIndexManager()
def get_shallow_index_manager() -> ShallowIndexManager:
return _shallow_manager

View File

@@ -0,0 +1,327 @@
"""
SQLite-backed index builder leveraging existing strategy pipeline.
"""
from __future__ import annotations
import json
import logging
import os
import time
from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Dict, Iterable, List, Optional, Tuple
from .json_index_builder import JSONIndexBuilder
from .sqlite_store import SQLiteIndexStore
from .models import FileInfo, SymbolInfo
logger = logging.getLogger(__name__)
class SQLiteIndexBuilder(JSONIndexBuilder):
"""
Build the deep index directly into SQLite storage.
Inherits scanning/strategy utilities from JSONIndexBuilder but writes rows
to the provided SQLiteIndexStore instead of assembling large dictionaries.
"""
def __init__(
self,
project_path: str,
store: SQLiteIndexStore,
additional_excludes: Optional[List[str]] = None,
):
super().__init__(project_path, additional_excludes)
self.store = store
def build_index(
self,
parallel: bool = True,
max_workers: Optional[int] = None,
) -> Dict[str, int]:
"""
Build the SQLite index and return lightweight statistics.
Args:
parallel: Whether to parse files in parallel.
max_workers: Optional override for worker count.
Returns:
Dictionary with totals for files, symbols, and languages.
"""
logger.info("Building SQLite index (parallel=%s)...", parallel)
start_time = time.time()
files_to_process = self._get_supported_files()
total_files = len(files_to_process)
if total_files == 0:
logger.warning("No files to process")
with self.store.connect(for_build=True) as conn:
self._reset_database(conn)
self._persist_metadata(conn, 0, 0, [], 0, 0, {})
return {
"files": 0,
"symbols": 0,
"languages": 0,
}
specialized_extensions = set(self.strategy_factory.get_specialized_extensions())
results_iter: Iterable[Tuple[Dict[str, SymbolInfo], Dict[str, FileInfo], str, bool]]
executor = None
if parallel and total_files > 1:
if max_workers is None:
max_workers = min(os.cpu_count() or 4, total_files)
logger.info("Using ThreadPoolExecutor with %s workers", max_workers)
executor = ThreadPoolExecutor(max_workers=max_workers)
future_to_file = {
executor.submit(self._process_file, file_path, specialized_extensions): file_path
for file_path in files_to_process
}
def _iter_results():
for future in as_completed(future_to_file):
result = future.result()
if result:
yield result
results_iter = _iter_results()
else:
logger.info("Using sequential processing")
def _iter_results_sequential():
for file_path in files_to_process:
result = self._process_file(file_path, specialized_extensions)
if result:
yield result
results_iter = _iter_results_sequential()
languages = set()
specialized_count = 0
fallback_count = 0
pending_calls: List[Tuple[str, str]] = []
total_symbols = 0
symbol_types: Dict[str, int] = {}
processed_files = 0
self.store.initialize_schema()
with self.store.connect(for_build=True) as conn:
conn.execute("PRAGMA foreign_keys=ON")
self._reset_database(conn)
for symbols, file_info_dict, language, is_specialized in results_iter:
file_path, file_info = next(iter(file_info_dict.items()))
file_id = self._insert_file(conn, file_path, file_info)
file_pending = getattr(file_info, "pending_calls", [])
if file_pending:
pending_calls.extend(file_pending)
symbol_rows = self._prepare_symbol_rows(symbols, file_id)
if symbol_rows:
conn.executemany(
"""
INSERT INTO symbols(
symbol_id,
file_id,
type,
line,
signature,
docstring,
called_by,
short_name
) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""",
symbol_rows,
)
languages.add(language)
processed_files += 1
total_symbols += len(symbol_rows)
if is_specialized:
specialized_count += 1
else:
fallback_count += 1
for _, _, symbol_type, _, _, _, _, _ in symbol_rows:
key = symbol_type or "unknown"
symbol_types[key] = symbol_types.get(key, 0) + 1
self._persist_metadata(
conn,
processed_files,
total_symbols,
sorted(languages),
specialized_count,
fallback_count,
symbol_types,
)
self._resolve_pending_calls_sqlite(conn, pending_calls)
try:
conn.execute("PRAGMA optimize")
except Exception: # pragma: no cover - best effort
pass
if executor:
executor.shutdown(wait=True)
elapsed = time.time() - start_time
logger.info(
"SQLite index built: files=%s symbols=%s languages=%s elapsed=%.2fs",
processed_files,
total_symbols,
len(languages),
elapsed,
)
return {
"files": processed_files,
"symbols": total_symbols,
"languages": len(languages),
}
# Internal helpers -------------------------------------------------
def _reset_database(self, conn):
conn.execute("DELETE FROM symbols")
conn.execute("DELETE FROM files")
conn.execute(
"DELETE FROM metadata WHERE key NOT IN ('schema_version')"
)
def _insert_file(self, conn, path: str, file_info: FileInfo) -> int:
params = (
path,
file_info.language,
file_info.line_count,
json.dumps(file_info.imports or []),
json.dumps(file_info.exports or []),
file_info.package,
file_info.docstring,
)
cur = conn.execute(
"""
INSERT INTO files(
path,
language,
line_count,
imports,
exports,
package,
docstring
) VALUES (?, ?, ?, ?, ?, ?, ?)
""",
params,
)
return cur.lastrowid
def _prepare_symbol_rows(
self,
symbols: Dict[str, SymbolInfo],
file_id: int,
) -> List[Tuple[str, int, Optional[str], Optional[int], Optional[str], Optional[str], str, str]]:
rows: List[Tuple[str, int, Optional[str], Optional[int], Optional[str], Optional[str], str, str]] = []
for symbol_id, symbol_info in symbols.items():
called_by = json.dumps(symbol_info.called_by or [])
short_name = symbol_id.split("::")[-1]
rows.append(
(
symbol_id,
file_id,
symbol_info.type,
symbol_info.line,
symbol_info.signature,
symbol_info.docstring,
called_by,
short_name,
)
)
return rows
def _persist_metadata(
self,
conn,
file_count: int,
symbol_count: int,
languages: List[str],
specialized_count: int,
fallback_count: int,
symbol_types: Dict[str, int],
) -> None:
metadata = {
"project_path": self.project_path,
"indexed_files": file_count,
"index_version": "3.0.0-sqlite",
"timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
"languages": languages,
"total_symbols": symbol_count,
"specialized_parsers": specialized_count,
"fallback_files": fallback_count,
"symbol_types": symbol_types,
}
self.store.set_metadata(conn, "project_path", self.project_path)
self.store.set_metadata(conn, "index_metadata", metadata)
def _resolve_pending_calls_sqlite(
self,
conn,
pending_calls: List[Tuple[str, str]]
) -> None:
"""Resolve cross-file call relationships directly in SQLite storage."""
if not pending_calls:
return
rows = list(
conn.execute(
"SELECT symbol_id, short_name, called_by FROM symbols"
)
)
symbol_map = {row["symbol_id"]: row for row in rows}
short_index: Dict[str, List[str]] = defaultdict(list)
for row in rows:
short_name = row["short_name"]
if short_name:
short_index[short_name].append(row["symbol_id"])
updates: Dict[str, set] = defaultdict(set)
for caller, called in pending_calls:
target_ids: List[str] = []
if called in symbol_map:
target_ids = [called]
else:
if called in short_index:
target_ids = short_index[called]
if not target_ids:
suffix = f".{called}"
matches: List[str] = []
for short_name, ids in short_index.items():
if short_name and short_name.endswith(suffix):
matches.extend(ids)
target_ids = matches
if len(target_ids) != 1:
continue
updates[target_ids[0]].add(caller)
for symbol_id, callers in updates.items():
row = symbol_map.get(symbol_id)
if not row:
continue
existing = []
if row["called_by"]:
try:
existing = json.loads(row["called_by"])
except json.JSONDecodeError:
existing = []
merged = list(dict.fromkeys(existing + list(callers)))
conn.execute(
"UPDATE symbols SET called_by=? WHERE symbol_id=?",
(json.dumps(merged), symbol_id),
)

View File

@@ -0,0 +1,354 @@
"""
SQLite-backed index manager coordinating builder and store.
"""
from __future__ import annotations
import json
import logging
import os
import re
import tempfile
import threading
from pathlib import Path
from typing import Any, Dict, List, Optional
from .sqlite_index_builder import SQLiteIndexBuilder
from .sqlite_store import SQLiteIndexStore, SQLiteSchemaMismatchError
from ..constants import INDEX_FILE_DB, INDEX_FILE, INDEX_FILE_SHALLOW, SETTINGS_DIR
logger = logging.getLogger(__name__)
class SQLiteIndexManager:
"""Manage lifecycle of SQLite-backed deep index."""
def __init__(self) -> None:
self.project_path: Optional[str] = None
self.index_builder: Optional[SQLiteIndexBuilder] = None
self.store: Optional[SQLiteIndexStore] = None
self.temp_dir: Optional[str] = None
self.index_path: Optional[str] = None
self.shallow_index_path: Optional[str] = None
self._shallow_file_list: Optional[List[str]] = None
self._is_loaded = False
self._lock = threading.RLock()
logger.info("Initialized SQLite Index Manager")
def set_project_path(self, project_path: str) -> bool:
"""Configure project path and underlying storage location."""
with self._lock:
if not project_path or not isinstance(project_path, str):
logger.error("Invalid project path: %s", project_path)
return False
project_path = project_path.strip()
if not project_path or not os.path.isdir(project_path):
logger.error("Project path does not exist: %s", project_path)
return False
self.project_path = project_path
project_hash = _hash_project_path(project_path)
self.temp_dir = os.path.join(tempfile.gettempdir(), SETTINGS_DIR, project_hash)
os.makedirs(self.temp_dir, exist_ok=True)
self.index_path = os.path.join(self.temp_dir, INDEX_FILE_DB)
legacy_path = os.path.join(self.temp_dir, INDEX_FILE)
if os.path.exists(legacy_path):
try:
os.remove(legacy_path)
logger.info("Removed legacy JSON index at %s", legacy_path)
except OSError as exc: # pragma: no cover - best effort
logger.warning("Failed to remove legacy index %s: %s", legacy_path, exc)
self.shallow_index_path = os.path.join(self.temp_dir, INDEX_FILE_SHALLOW)
self.store = SQLiteIndexStore(self.index_path)
self.index_builder = SQLiteIndexBuilder(project_path, self.store)
self._is_loaded = False
logger.info("SQLite index storage: %s", self.index_path)
return True
def build_index(self, force_rebuild: bool = False) -> bool:
"""Build or rebuild the SQLite index."""
with self._lock:
if not self.index_builder:
logger.error("Index builder not initialized")
return False
try:
stats = self.index_builder.build_index()
logger.info(
"SQLite index build complete: %s files, %s symbols",
stats.get("files"),
stats.get("symbols"),
)
self._is_loaded = True
return True
except SQLiteSchemaMismatchError:
logger.warning("Schema mismatch detected; recreating database")
self.store.clear() # type: ignore[union-attr]
stats = self.index_builder.build_index()
logger.info(
"SQLite index rebuild after schema reset: %s files, %s symbols",
stats.get("files"),
stats.get("symbols"),
)
self._is_loaded = True
return True
except Exception as exc: # pragma: no cover - defensive
logger.error("Failed to build SQLite index: %s", exc)
self._is_loaded = False
return False
def load_index(self) -> bool:
"""Validate that an index database exists and schema is current."""
with self._lock:
if not self.store:
logger.error("Index store not initialized")
return False
try:
self.store.initialize_schema()
with self.store.connect() as conn:
metadata = self.store.get_metadata(conn, "index_metadata")
except SQLiteSchemaMismatchError:
logger.info("Schema mismatch on load; forcing rebuild on next build_index()")
self._is_loaded = False
return False
except Exception as exc: # pragma: no cover
logger.error("Failed to load SQLite index: %s", exc)
self._is_loaded = False
return False
self._is_loaded = metadata is not None
return self._is_loaded
def refresh_index(self) -> bool:
"""Force rebuild of the SQLite index."""
with self._lock:
logger.info("Refreshing SQLite deep index...")
if self.build_index(force_rebuild=True):
return self.load_index()
return False
def build_shallow_index(self) -> bool:
"""Build the shallow index file list using existing builder helper."""
with self._lock:
if not self.index_builder or not self.project_path or not self.shallow_index_path:
logger.error("Index builder not initialized for shallow index")
return False
try:
file_list = self.index_builder.build_shallow_file_list()
with open(self.shallow_index_path, "w", encoding="utf-8") as handle:
json.dump(file_list, handle, ensure_ascii=False)
self._shallow_file_list = file_list
return True
except Exception as exc: # pragma: no cover
logger.error("Failed to build shallow index: %s", exc)
return False
def load_shallow_index(self) -> bool:
"""Load shallow index from disk."""
with self._lock:
if not self.shallow_index_path or not os.path.exists(self.shallow_index_path):
return False
try:
with open(self.shallow_index_path, "r", encoding="utf-8") as handle:
data = json.load(handle)
if isinstance(data, list):
self._shallow_file_list = [_normalize_path(p) for p in data if isinstance(p, str)]
return True
except Exception as exc: # pragma: no cover
logger.error("Failed to load shallow index: %s", exc)
return False
def find_files(self, pattern: str = "*") -> List[str]:
"""Find files from the shallow index using glob semantics."""
with self._lock:
if not isinstance(pattern, str):
logger.error("Pattern must be a string, got %s", type(pattern))
return []
pattern = pattern.strip() or "*"
norm_pattern = pattern.replace("\\\\", "/").replace("\\", "/")
regex = _compile_glob_regex(norm_pattern)
if self._shallow_file_list is None:
if not self.load_shallow_index():
if self.build_shallow_index():
self.load_shallow_index()
files = list(self._shallow_file_list or [])
if norm_pattern == "*":
return files
return [f for f in files if regex.match(f)]
def get_file_summary(self, file_path: str) -> Optional[Dict[str, Any]]:
"""Return summary information for a file from SQLite storage."""
with self._lock:
if not isinstance(file_path, str):
logger.error("File path must be a string, got %s", type(file_path))
return None
if not self.store or not self._is_loaded:
if not self.load_index():
return None
normalized = _normalize_path(file_path)
with self.store.connect() as conn:
row = conn.execute(
"""
SELECT id, language, line_count, imports, exports, docstring
FROM files WHERE path = ?
""",
(normalized,),
).fetchone()
if not row:
logger.warning("File not found in index: %s", normalized)
return None
symbol_rows = conn.execute(
"""
SELECT type, line, signature, docstring, called_by, short_name
FROM symbols
WHERE file_id = ?
ORDER BY line ASC
""",
(row["id"],),
).fetchall()
imports = _safe_json_loads(row["imports"])
exports = _safe_json_loads(row["exports"])
categorized = _categorize_symbols(symbol_rows)
return {
"file_path": normalized,
"language": row["language"],
"line_count": row["line_count"],
"symbol_count": len(symbol_rows),
"functions": categorized["functions"],
"classes": categorized["classes"],
"methods": categorized["methods"],
"imports": imports,
"exports": exports,
"docstring": row["docstring"],
}
def get_index_stats(self) -> Dict[str, Any]:
"""Return basic statistics for the current index."""
with self._lock:
if not self.store:
return {"status": "not_loaded"}
try:
with self.store.connect() as conn:
metadata = self.store.get_metadata(conn, "index_metadata")
except SQLiteSchemaMismatchError:
return {"status": "not_loaded"}
if not metadata:
return {"status": "not_loaded"}
return {
"status": "loaded" if self._is_loaded else "not_loaded",
"indexed_files": metadata.get("indexed_files", 0),
"total_symbols": metadata.get("total_symbols", 0),
"symbol_types": metadata.get("symbol_types", {}),
"languages": metadata.get("languages", []),
"project_path": metadata.get("project_path"),
"timestamp": metadata.get("timestamp"),
}
def cleanup(self) -> None:
"""Reset internal state."""
with self._lock:
self.project_path = None
self.index_builder = None
self.store = None
self.temp_dir = None
self.index_path = None
self._shallow_file_list = None
self._is_loaded = False
def _hash_project_path(project_path: str) -> str:
import hashlib
return hashlib.md5(project_path.encode()).hexdigest()[:12]
def _compile_glob_regex(pattern: str):
i = 0
out = []
special = ".^$+{}[]|()"
while i < len(pattern):
c = pattern[i]
if c == "*":
if i + 1 < len(pattern) and pattern[i + 1] == "*":
out.append(".*")
i += 2
continue
out.append("[^/]*")
elif c == "?":
out.append("[^/]")
elif c in special:
out.append("\\" + c)
else:
out.append(c)
i += 1
return re.compile("^" + "".join(out) + "$")
def _normalize_path(path: str) -> str:
result = path.replace("\\\\", "/").replace("\\", "/")
if result.startswith("./"):
result = result[2:]
return result
def _safe_json_loads(value: Any) -> List[Any]:
if not value:
return []
if isinstance(value, list):
return value
try:
parsed = json.loads(value)
return parsed if isinstance(parsed, list) else []
except json.JSONDecodeError:
return []
def _categorize_symbols(symbol_rows) -> Dict[str, List[Dict[str, Any]]]:
functions: List[Dict[str, Any]] = []
classes: List[Dict[str, Any]] = []
methods: List[Dict[str, Any]] = []
for row in symbol_rows:
symbol_type = row["type"]
called_by = _safe_json_loads(row["called_by"])
info = {
"name": row["short_name"],
"called_by": called_by,
"line": row["line"],
"signature": row["signature"],
"docstring": row["docstring"],
}
signature = row["signature"] or ""
if signature.startswith("def ") and "::" in signature:
methods.append(info)
elif signature.startswith("def "):
functions.append(info)
elif signature.startswith("class ") or symbol_type == "class":
classes.append(info)
else:
if symbol_type == "method":
methods.append(info)
elif symbol_type == "class":
classes.append(info)
else:
functions.append(info)
functions.sort(key=lambda item: item.get("line") or 0)
classes.sort(key=lambda item: item.get("line") or 0)
methods.sort(key=lambda item: item.get("line") or 0)
return {
"functions": functions,
"classes": classes,
"methods": methods,
}

View File

@@ -0,0 +1,173 @@
"""
SQLite storage layer for deep code index data.
This module centralizes SQLite setup, schema management, and connection
pragmas so higher-level builders/managers can focus on data orchestration.
"""
from __future__ import annotations
import json
import os
import sqlite3
import threading
from contextlib import contextmanager
from typing import Any, Dict, Generator, Optional
SCHEMA_VERSION = 1
class SQLiteSchemaMismatchError(RuntimeError):
"""Raised when the on-disk schema cannot be used safely."""
class SQLiteIndexStore:
"""Utility wrapper around an on-disk SQLite database for the deep index."""
def __init__(self, db_path: str) -> None:
if not db_path or not isinstance(db_path, str):
raise ValueError("db_path must be a non-empty string")
self.db_path = db_path
self._lock = threading.RLock()
def initialize_schema(self) -> None:
"""Create database schema if needed and validate schema version."""
with self._lock:
os.makedirs(os.path.dirname(self.db_path), exist_ok=True)
with self.connect(for_build=True) as conn:
self._create_tables(conn)
self._ensure_schema_version(conn)
# Ensure metadata contains the canonical project path placeholder
if self.get_metadata(conn, "project_path") is None:
self.set_metadata(conn, "project_path", "")
@contextmanager
def connect(self, *, for_build: bool = False) -> Generator[sqlite3.Connection, None, None]:
"""
Context manager yielding a configured SQLite connection.
Args:
for_build: Apply write-optimized pragmas (journal mode, cache size).
"""
with self._lock:
conn = sqlite3.connect(self.db_path, check_same_thread=False)
conn.row_factory = sqlite3.Row
self._apply_pragmas(conn, for_build)
try:
yield conn
conn.commit()
except Exception:
conn.rollback()
raise
finally:
conn.close()
def clear(self) -> None:
"""Remove existing database file."""
with self._lock:
if os.path.exists(self.db_path):
os.remove(self.db_path)
# Metadata helpers -------------------------------------------------
def set_metadata(self, conn: sqlite3.Connection, key: str, value: Any) -> None:
"""Persist a metadata key/value pair (value stored as JSON string)."""
conn.execute(
"""
INSERT INTO metadata(key, value)
VALUES(?, ?)
ON CONFLICT(key) DO UPDATE SET value=excluded.value
""",
(key, json.dumps(value)),
)
def get_metadata(self, conn: sqlite3.Connection, key: str) -> Optional[Any]:
"""Retrieve a metadata value (deserialized from JSON)."""
row = conn.execute("SELECT value FROM metadata WHERE key=?", (key,)).fetchone()
if not row:
return None
try:
return json.loads(row["value"])
except json.JSONDecodeError:
return row["value"]
# Internal helpers -------------------------------------------------
def _create_tables(self, conn: sqlite3.Connection) -> None:
conn.execute(
"""
CREATE TABLE IF NOT EXISTS metadata (
key TEXT PRIMARY KEY,
value TEXT NOT NULL
)
"""
)
conn.execute(
"""
CREATE TABLE IF NOT EXISTS files (
id INTEGER PRIMARY KEY,
path TEXT UNIQUE NOT NULL,
language TEXT,
line_count INTEGER,
imports TEXT,
exports TEXT,
package TEXT,
docstring TEXT
)
"""
)
conn.execute(
"""
CREATE TABLE IF NOT EXISTS symbols (
id INTEGER PRIMARY KEY,
symbol_id TEXT UNIQUE NOT NULL,
file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
type TEXT,
line INTEGER,
signature TEXT,
docstring TEXT,
called_by TEXT,
short_name TEXT
)
"""
)
conn.execute(
"""
CREATE INDEX IF NOT EXISTS idx_symbols_file ON symbols(file_id)
"""
)
conn.execute(
"""
CREATE INDEX IF NOT EXISTS idx_symbols_short_name ON symbols(short_name)
"""
)
def _ensure_schema_version(self, conn: sqlite3.Connection) -> None:
stored = self.get_metadata(conn, "schema_version")
if stored is None:
self.set_metadata(conn, "schema_version", SCHEMA_VERSION)
return
if int(stored) != SCHEMA_VERSION:
raise SQLiteSchemaMismatchError(
f"Unexpected schema version {stored} (expected {SCHEMA_VERSION})"
)
def _apply_pragmas(self, conn: sqlite3.Connection, for_build: bool) -> None:
pragmas: Dict[str, Any] = {
"journal_mode": "WAL" if for_build else "WAL",
"synchronous": "NORMAL" if for_build else "FULL",
"cache_size": -262144, # negative => size in KB, ~256MB
}
for pragma, value in pragmas.items():
try:
conn.execute(f"PRAGMA {pragma}={value}")
except sqlite3.DatabaseError:
# PRAGMA not supported or rejected; continue best-effort.
continue
if for_build:
try:
conn.execute("PRAGMA temp_store=MEMORY")
except sqlite3.DatabaseError:
pass

View File

@@ -0,0 +1,8 @@
"""
Parsing strategies for different programming languages.
"""
from .base_strategy import ParsingStrategy
from .strategy_factory import StrategyFactory
__all__ = ['ParsingStrategy', 'StrategyFactory']

View File

@@ -0,0 +1,91 @@
"""
Abstract base class for language parsing strategies.
"""
import os
from abc import ABC, abstractmethod
from typing import Dict, List, Tuple, Optional
from ..models import SymbolInfo, FileInfo
class ParsingStrategy(ABC):
"""Abstract base class for language parsing strategies."""
@abstractmethod
def get_language_name(self) -> str:
"""Return the language name this strategy handles."""
@abstractmethod
def get_supported_extensions(self) -> List[str]:
"""Return list of file extensions this strategy supports."""
@abstractmethod
def parse_file(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo], FileInfo]:
"""
Parse file content and extract symbols.
Args:
file_path: Path to the file being parsed
content: File content as string
Returns:
Tuple of (symbols_dict, file_info)
- symbols_dict: Maps symbol_id -> SymbolInfo
- file_info: FileInfo with metadata about the file
"""
def _create_symbol_id(self, file_path: str, symbol_name: str) -> str:
"""
Create a unique symbol ID.
Args:
file_path: Path to the file containing the symbol
symbol_name: Name of the symbol
Returns:
Unique symbol identifier in format "relative_path::symbol_name"
"""
relative_path = self._get_relative_path(file_path)
return f"{relative_path}::{symbol_name}"
def _get_relative_path(self, file_path: str) -> str:
"""Normalize path for symbol identifiers relative to project root."""
if not file_path:
return ""
normalized = os.path.normpath(file_path)
if normalized == ".":
return ""
normalized = normalized.replace("\\", "/")
if normalized.startswith("./"):
normalized = normalized[2:]
if not os.path.isabs(file_path):
normalized = normalized.lstrip("/")
return normalized or os.path.basename(file_path)
def _extract_line_number(self, content: str, symbol_position: int) -> int:
"""
Extract line number from character position in content.
Args:
content: File content
symbol_position: Character position in content
Returns:
Line number (1-based)
"""
return content[:symbol_position].count('\n') + 1
def _get_file_name(self, file_path: str) -> str:
"""Get just the filename from a full path."""
return os.path.basename(file_path)
def _safe_extract_text(self, content: str, start: int, end: int) -> str:
"""Safely extract text from content, handling bounds."""
try:
return content[start:end].strip()
except (IndexError, TypeError):
return ""

View File

@@ -0,0 +1,46 @@
"""
Fallback parsing strategy for unsupported languages and file types.
"""
import os
from typing import Dict, List, Tuple
from .base_strategy import ParsingStrategy
from ..models import SymbolInfo, FileInfo
class FallbackParsingStrategy(ParsingStrategy):
"""Fallback parser for unsupported languages and file types."""
def __init__(self, language_name: str = "unknown"):
self.language_name = language_name
def get_language_name(self) -> str:
return self.language_name
def get_supported_extensions(self) -> List[str]:
return [] # Fallback supports any extension
def parse_file(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo], FileInfo]:
"""Basic parsing: extract file information without symbol parsing."""
symbols = {}
# For document files, we can at least index their existence
file_info = FileInfo(
language=self.language_name,
line_count=len(content.splitlines()),
symbols={"functions": [], "classes": []},
imports=[]
)
# For document files (e.g. .md, .txt, .json), we can add a symbol representing the file itself
if self.language_name in ['markdown', 'text', 'json', 'yaml', 'xml', 'config', 'css', 'html']:
filename = os.path.basename(file_path)
symbol_id = self._create_symbol_id(file_path, f"file:{filename}")
symbols[symbol_id] = SymbolInfo(
type="file",
file=file_path,
line=1,
signature=f"{self.language_name} file: {filename}"
)
return symbols, file_info

View File

@@ -0,0 +1,359 @@
"""
Go parsing strategy using regex patterns.
"""
import re
from typing import Dict, List, Tuple, Optional
from .base_strategy import ParsingStrategy
from ..models import SymbolInfo, FileInfo
class GoParsingStrategy(ParsingStrategy):
"""Go-specific parsing strategy using regex patterns."""
def get_language_name(self) -> str:
return "go"
def get_supported_extensions(self) -> List[str]:
return ['.go']
def parse_file(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo], FileInfo]:
"""Parse Go file using regex patterns."""
symbols = {}
functions = []
lines = content.splitlines()
classes = [] # Go doesn't have classes, but we'll track structs/interfaces
imports = self._extract_go_imports(lines)
package = None
for i, line in enumerate(lines):
line = line.strip()
# Package declaration
if line.startswith('package '):
package = line.split('package ')[1].strip()
# Function declarations
elif line.startswith('func '):
func_match = re.match(r'func\s+(\w+)\s*\(', line)
if func_match:
func_name = func_match.group(1)
docstring = self._extract_go_comment(lines, i)
symbol_id = self._create_symbol_id(file_path, func_name)
symbols[symbol_id] = SymbolInfo(
type="function",
file=file_path,
line=i + 1,
signature=line,
docstring=docstring
)
functions.append(func_name)
# Method declarations (func (receiver) methodName)
method_match = re.match(r'func\s+\([^)]+\)\s+(\w+)\s*\(', line)
if method_match:
method_name = method_match.group(1)
docstring = self._extract_go_comment(lines, i)
symbol_id = self._create_symbol_id(file_path, method_name)
symbols[symbol_id] = SymbolInfo(
type="method",
file=file_path,
line=i + 1,
signature=line,
docstring=docstring
)
functions.append(method_name)
# Struct declarations
elif re.match(r'type\s+\w+\s+struct\s*\{', line):
struct_match = re.match(r'type\s+(\w+)\s+struct', line)
if struct_match:
struct_name = struct_match.group(1)
docstring = self._extract_go_comment(lines, i)
symbol_id = self._create_symbol_id(file_path, struct_name)
symbols[symbol_id] = SymbolInfo(
type="struct",
file=file_path,
line=i + 1,
docstring=docstring
)
classes.append(struct_name)
# Interface declarations
elif re.match(r'type\s+\w+\s+interface\s*\{', line):
interface_match = re.match(r'type\s+(\w+)\s+interface', line)
if interface_match:
interface_name = interface_match.group(1)
docstring = self._extract_go_comment(lines, i)
symbol_id = self._create_symbol_id(file_path, interface_name)
symbols[symbol_id] = SymbolInfo(
type="interface",
file=file_path,
line=i + 1,
docstring=docstring
)
classes.append(interface_name)
# Phase 2: Add call relationship analysis
self._analyze_go_calls(content, symbols, file_path)
file_info = FileInfo(
language=self.get_language_name(),
line_count=len(lines),
symbols={"functions": functions, "classes": classes},
imports=imports,
package=package
)
return symbols, file_info
def _analyze_go_calls(self, content: str, symbols: Dict[str, SymbolInfo], file_path: str):
"""Analyze Go function calls for relationships."""
lines = content.splitlines()
current_function = None
is_function_declaration_line = False
for i, line in enumerate(lines):
original_line = line
line = line.strip()
# Track current function context
if line.startswith('func '):
func_name = self._extract_go_function_name(line)
if func_name:
current_function = self._create_symbol_id(file_path, func_name)
is_function_declaration_line = True
else:
is_function_declaration_line = False
# Find function calls: functionName() or obj.methodName()
# Skip the function declaration line itself to avoid false self-calls
if current_function and not is_function_declaration_line and ('(' in line and ')' in line):
called_functions = self._extract_go_called_functions(line)
for called_func in called_functions:
# Find the called function in symbols and add relationship
for symbol_id, symbol_info in symbols.items():
if called_func in symbol_id.split("::")[-1]:
if current_function not in symbol_info.called_by:
symbol_info.called_by.append(current_function)
def _extract_go_function_name(self, line: str) -> Optional[str]:
"""Extract function name from Go function declaration."""
try:
# func functionName(...) or func (receiver) methodName(...)
match = re.match(r'func\s+(?:\([^)]*\)\s+)?(\w+)\s*\(', line)
if match:
return match.group(1)
except:
pass
return None
def _extract_go_imports(self, lines: List[str]) -> List[str]:
"""Extract Go import paths, handling multi-line blocks and comments."""
imports: List[str] = []
in_block_comment = False
paren_depth = 0
for raw_line in lines:
clean_line, in_block_comment = self._strip_go_comments(raw_line, in_block_comment)
stripped = clean_line.strip()
if not stripped:
continue
if paren_depth == 0:
if not stripped.startswith('import '):
continue
remainder = stripped[len('import '):].strip()
if not remainder:
continue
imports.extend(self._extract_string_literals(remainder))
paren_depth = (
self._count_unquoted_characters(remainder, '(')
- self._count_unquoted_characters(remainder, ')')
)
if paren_depth <= 0:
paren_depth = 0
continue
imports.extend(self._extract_string_literals(clean_line))
paren_depth += self._count_unquoted_characters(clean_line, '(')
paren_depth -= self._count_unquoted_characters(clean_line, ')')
if paren_depth <= 0:
paren_depth = 0
return imports
def _strip_go_comments(self, line: str, in_block_comment: bool) -> Tuple[str, bool]:
"""Remove Go comments from a line while tracking block comment state."""
result: List[str] = []
i = 0
length = len(line)
while i < length:
if in_block_comment:
if line.startswith('*/', i):
in_block_comment = False
i += 2
else:
i += 1
continue
if line.startswith('//', i):
break
if line.startswith('/*', i):
in_block_comment = True
i += 2
continue
result.append(line[i])
i += 1
return ''.join(result), in_block_comment
def _extract_string_literals(self, line: str) -> List[str]:
"""Return string literal values found in a line (supports " and `)."""
literals: List[str] = []
i = 0
length = len(line)
while i < length:
char = line[i]
if char not in ('"', '`'):
i += 1
continue
delimiter = char
i += 1
buffer: List[str] = []
while i < length:
current = line[i]
if delimiter == '"':
if current == '\\':
if i + 1 < length:
buffer.append(line[i + 1])
i += 2
continue
elif current == '"':
literals.append(''.join(buffer))
i += 1
break
else: # Raw string delimited by backticks
if current == '`':
literals.append(''.join(buffer))
i += 1
break
buffer.append(current)
i += 1
else:
break
return literals
def _count_unquoted_characters(self, line: str, target: str) -> int:
"""Count occurrences of a character outside string literals."""
count = 0
i = 0
length = len(line)
delimiter: Optional[str] = None
while i < length:
char = line[i]
if delimiter is None:
if char in ('"', '`'):
delimiter = char
elif char == target:
count += 1
else:
if delimiter == '"':
if char == '\\':
i += 2
continue
if char == '"':
delimiter = None
elif delimiter == '`' and char == '`':
delimiter = None
i += 1
return count
def _extract_go_comment(self, lines: List[str], line_index: int) -> Optional[str]:
"""Extract Go comment (docstring) from lines preceding the given line.
Go documentation comments are regular comments that appear immediately before
the declaration, with no blank line in between.
"""
comment_lines = []
# Look backwards from the line before the declaration
i = line_index - 1
while i >= 0:
stripped = lines[i].strip()
# Stop at empty line
if not stripped:
break
# Single-line comment
if stripped.startswith('//'):
comment_text = stripped[2:].strip()
comment_lines.insert(0, comment_text)
i -= 1
# Multi-line comment block
elif stripped.startswith('/*') or stripped.endswith('*/'):
# Handle single-line /* comment */
if stripped.startswith('/*') and stripped.endswith('*/'):
comment_text = stripped[2:-2].strip()
comment_lines.insert(0, comment_text)
i -= 1
# Handle multi-line comment block
elif stripped.endswith('*/'):
# Found end of multi-line comment, collect until start
temp_lines = []
temp_lines.insert(0, stripped[:-2].strip())
i -= 1
while i >= 0:
temp_stripped = lines[i].strip()
if temp_stripped.startswith('/*'):
temp_lines.insert(0, temp_stripped[2:].strip())
comment_lines = temp_lines + comment_lines
i -= 1
break
else:
temp_lines.insert(0, temp_stripped)
i -= 1
break
else:
break
else:
# Not a comment, stop looking
break
if comment_lines:
# Join with newlines and clean up
docstring = '\n'.join(comment_lines)
return docstring if docstring else None
return None
def _extract_go_called_functions(self, line: str) -> List[str]:
"""Extract function names that are being called in this line."""
called_functions = []
# Find patterns like: functionName( or obj.methodName(
patterns = [
r'(\w+)\s*\(', # functionName(
r'\.(\w+)\s*\(', # .methodName(
]
for pattern in patterns:
matches = re.findall(pattern, line)
called_functions.extend(matches)
return called_functions

View File

@@ -0,0 +1,209 @@
"""
Java parsing strategy using tree-sitter - Optimized single-pass version.
"""
import logging
from typing import Dict, List, Tuple, Optional, Set
from .base_strategy import ParsingStrategy
from ..models import SymbolInfo, FileInfo
logger = logging.getLogger(__name__)
import tree_sitter
from tree_sitter_java import language
class JavaParsingStrategy(ParsingStrategy):
"""Java-specific parsing strategy - Single Pass Optimized."""
def __init__(self):
self.java_language = tree_sitter.Language(language())
def get_language_name(self) -> str:
return "java"
def get_supported_extensions(self) -> List[str]:
return ['.java']
def parse_file(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo], FileInfo]:
"""Parse Java file using tree-sitter with single-pass optimization."""
symbols = {}
functions = []
classes = []
imports = []
package = None
# Symbol lookup index for O(1) access
symbol_lookup = {} # name -> symbol_id mapping
parser = tree_sitter.Parser(self.java_language)
try:
tree = parser.parse(content.encode('utf8'))
# Extract package info first
for node in tree.root_node.children:
if node.type == 'package_declaration':
package = self._extract_java_package(node, content)
break
# Single-pass traversal that handles everything
context = TraversalContext(
content=content,
file_path=file_path,
symbols=symbols,
functions=functions,
classes=classes,
imports=imports,
symbol_lookup=symbol_lookup
)
self._traverse_node_single_pass(tree.root_node, context)
except Exception as e:
logger.warning(f"Error parsing Java file {file_path}: {e}")
file_info = FileInfo(
language=self.get_language_name(),
line_count=len(content.splitlines()),
symbols={"functions": functions, "classes": classes},
imports=imports,
package=package
)
return symbols, file_info
def _traverse_node_single_pass(self, node, context: 'TraversalContext',
current_class: Optional[str] = None,
current_method: Optional[str] = None):
"""Single-pass traversal that extracts symbols and analyzes calls."""
# Handle class declarations
if node.type == 'class_declaration':
name = self._get_java_class_name(node, context.content)
if name:
symbol_id = self._create_symbol_id(context.file_path, name)
symbol_info = SymbolInfo(
type="class",
file=context.file_path,
line=node.start_point[0] + 1
)
context.symbols[symbol_id] = symbol_info
context.symbol_lookup[name] = symbol_id
context.classes.append(name)
# Traverse class body with updated context
for child in node.children:
self._traverse_node_single_pass(child, context, current_class=name, current_method=current_method)
return
# Handle method declarations
elif node.type == 'method_declaration':
name = self._get_java_method_name(node, context.content)
if name:
# Build full method name with class context
if current_class:
full_name = f"{current_class}.{name}"
else:
full_name = name
symbol_id = self._create_symbol_id(context.file_path, full_name)
symbol_info = SymbolInfo(
type="method",
file=context.file_path,
line=node.start_point[0] + 1,
signature=self._get_java_method_signature(node, context.content)
)
context.symbols[symbol_id] = symbol_info
context.symbol_lookup[full_name] = symbol_id
context.symbol_lookup[name] = symbol_id # Also index by method name alone
context.functions.append(full_name)
# Traverse method body with updated context
for child in node.children:
self._traverse_node_single_pass(child, context, current_class=current_class,
current_method=symbol_id)
return
# Handle method invocations (calls)
elif node.type == 'method_invocation':
if current_method:
called_method = self._get_called_method_name(node, context.content)
if called_method:
# Use O(1) lookup instead of O(n) iteration
if called_method in context.symbol_lookup:
symbol_id = context.symbol_lookup[called_method]
symbol_info = context.symbols[symbol_id]
if current_method not in symbol_info.called_by:
symbol_info.called_by.append(current_method)
else:
# Try to find method with class prefix
for name, sid in context.symbol_lookup.items():
if name.endswith(f".{called_method}"):
symbol_info = context.symbols[sid]
if current_method not in symbol_info.called_by:
symbol_info.called_by.append(current_method)
break
# Handle import declarations
elif node.type == 'import_declaration':
import_text = context.content[node.start_byte:node.end_byte]
# Extract the import path (remove 'import' keyword and semicolon)
import_path = import_text.replace('import', '').replace(';', '').strip()
if import_path:
context.imports.append(import_path)
# Continue traversing children for other node types
for child in node.children:
self._traverse_node_single_pass(child, context, current_class=current_class,
current_method=current_method)
def _get_java_class_name(self, node, content: str) -> Optional[str]:
for child in node.children:
if child.type == 'identifier':
return content[child.start_byte:child.end_byte]
return None
def _get_java_method_name(self, node, content: str) -> Optional[str]:
for child in node.children:
if child.type == 'identifier':
return content[child.start_byte:child.end_byte]
return None
def _get_java_method_signature(self, node, content: str) -> str:
return content[node.start_byte:node.end_byte].split('\n')[0].strip()
def _extract_java_package(self, node, content: str) -> Optional[str]:
for child in node.children:
if child.type == 'scoped_identifier':
return content[child.start_byte:child.end_byte]
return None
def _get_called_method_name(self, node, content: str) -> Optional[str]:
"""Extract called method name from method invocation node."""
# Handle obj.method() pattern - look for the method name after the dot
for child in node.children:
if child.type == 'field_access':
# For field_access nodes, get the field (method) name
for subchild in child.children:
if subchild.type == 'identifier' and subchild.start_byte > child.start_byte:
# Get the rightmost identifier (the method name)
return content[subchild.start_byte:subchild.end_byte]
elif child.type == 'identifier':
# Direct method call without object reference
return content[child.start_byte:child.end_byte]
return None
class TraversalContext:
"""Context object to pass state during single-pass traversal."""
def __init__(self, content: str, file_path: str, symbols: Dict,
functions: List, classes: List, imports: List, symbol_lookup: Dict):
self.content = content
self.file_path = file_path
self.symbols = symbols
self.functions = functions
self.classes = classes
self.imports = imports
self.symbol_lookup = symbol_lookup

View File

@@ -0,0 +1,628 @@
"""
JavaScript parsing strategy using tree-sitter.
"""
import logging
from typing import Dict, List, Tuple, Optional, Set
import tree_sitter
from tree_sitter_javascript import language
from .base_strategy import ParsingStrategy
from ..models import SymbolInfo, FileInfo
logger = logging.getLogger(__name__)
class JavaScriptParsingStrategy(ParsingStrategy):
"""JavaScript-specific parsing strategy using tree-sitter."""
def __init__(self):
self.js_language = tree_sitter.Language(language())
def get_language_name(self) -> str:
return "javascript"
def get_supported_extensions(self) -> List[str]:
return ['.js', '.jsx', '.mjs', '.cjs']
def parse_file(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo], FileInfo]:
"""Parse JavaScript file using tree-sitter."""
symbols: Dict[str, SymbolInfo] = {}
functions: List[str] = []
classes: List[str] = []
imports: List[str] = []
exports: List[str] = []
symbol_lookup: Dict[str, str] = {}
pending_calls: List[Tuple[str, str]] = []
pending_call_set: Set[Tuple[str, str]] = set()
variable_scopes: List[Dict[str, str]] = [{}]
parser = tree_sitter.Parser(self.js_language)
tree = parser.parse(content.encode('utf8'))
self._traverse_js_node(
tree.root_node,
content,
file_path,
symbols,
functions,
classes,
imports,
exports,
symbol_lookup,
pending_calls,
pending_call_set,
variable_scopes,
)
file_info = FileInfo(
language=self.get_language_name(),
line_count=len(content.splitlines()),
symbols={"functions": functions, "classes": classes},
imports=imports,
exports=exports
)
if pending_calls:
file_info.pending_calls = pending_calls
return symbols, file_info
def _traverse_js_node(
self,
node,
content: str,
file_path: str,
symbols: Dict[str, SymbolInfo],
functions: List[str],
classes: List[str],
imports: List[str],
exports: List[str],
symbol_lookup: Dict[str, str],
pending_calls: List[Tuple[str, str]],
pending_call_set: Set[Tuple[str, str]],
variable_scopes: List[Dict[str, str]],
current_function: Optional[str] = None,
current_class: Optional[str] = None,
):
"""Traverse JavaScript AST node and collect symbols and relationships."""
node_type = node.type
if node_type == 'function_declaration':
name = self._get_function_name(node, content)
if name:
symbol_id = self._create_symbol_id(file_path, name)
signature = self._get_js_function_signature(node, content)
symbols[symbol_id] = SymbolInfo(
type="function",
file=file_path,
line=node.start_point[0] + 1,
signature=signature
)
symbol_lookup[name] = symbol_id
functions.append(name)
function_id = f"{file_path}::{name}"
variable_scopes.append({})
for child in node.children:
self._traverse_js_node(
child,
content,
file_path,
symbols,
functions,
classes,
imports,
exports,
symbol_lookup,
pending_calls,
pending_call_set,
variable_scopes,
current_function=function_id,
current_class=current_class,
)
variable_scopes.pop()
return
if node_type == 'class_declaration':
name = self._get_class_name(node, content)
if name:
symbol_id = self._create_symbol_id(file_path, name)
symbols[symbol_id] = SymbolInfo(
type="class",
file=file_path,
line=node.start_point[0] + 1
)
symbol_lookup[name] = symbol_id
classes.append(name)
for child in node.children:
self._traverse_js_node(
child,
content,
file_path,
symbols,
functions,
classes,
imports,
exports,
symbol_lookup,
pending_calls,
pending_call_set,
variable_scopes,
current_function=current_function,
current_class=name,
)
return
if node_type == 'method_definition':
method_name = self._get_method_name(node, content)
class_name = current_class or self._find_parent_class(node, content)
if method_name and class_name:
full_name = f"{class_name}.{method_name}"
symbol_id = self._create_symbol_id(file_path, full_name)
signature = self._get_js_function_signature(node, content)
symbols[symbol_id] = SymbolInfo(
type="method",
file=file_path,
line=node.start_point[0] + 1,
signature=signature
)
symbol_lookup[full_name] = symbol_id
symbol_lookup[method_name] = symbol_id
functions.append(full_name)
function_id = f"{file_path}::{full_name}"
variable_scopes.append({})
for child in node.children:
self._traverse_js_node(
child,
content,
file_path,
symbols,
functions,
classes,
imports,
exports,
symbol_lookup,
pending_calls,
pending_call_set,
variable_scopes,
current_function=function_id,
current_class=class_name,
)
variable_scopes.pop()
return
if node_type in ['lexical_declaration', 'variable_declaration']:
for child in node.children:
if child.type != 'variable_declarator':
self._traverse_js_node(
child,
content,
file_path,
symbols,
functions,
classes,
imports,
exports,
symbol_lookup,
pending_calls,
pending_call_set,
variable_scopes,
current_function=current_function,
current_class=current_class,
)
continue
name_node = child.child_by_field_name('name')
value_node = child.child_by_field_name('value')
if not name_node:
continue
name = self._get_node_text(name_node, content)
if value_node and value_node.type in ['arrow_function', 'function_expression', 'function']:
symbol_id = self._create_symbol_id(file_path, name)
signature = content[child.start_byte:child.end_byte].split('\n')[0].strip()
symbols[symbol_id] = SymbolInfo(
type="function",
file=file_path,
line=child.start_point[0] + 1,
signature=signature
)
symbol_lookup[name] = symbol_id
functions.append(name)
function_id = f"{file_path}::{name}"
variable_scopes.append({})
self._traverse_js_node(
value_node,
content,
file_path,
symbols,
functions,
classes,
imports,
exports,
symbol_lookup,
pending_calls,
pending_call_set,
variable_scopes,
current_function=function_id,
current_class=current_class,
)
variable_scopes.pop()
else:
inferred = self._infer_expression_type(value_node, content)
if inferred:
self._set_variable_type(variable_scopes, name, inferred)
if value_node:
self._traverse_js_node(
value_node,
content,
file_path,
symbols,
functions,
classes,
imports,
exports,
symbol_lookup,
pending_calls,
pending_call_set,
variable_scopes,
current_function=current_function,
current_class=current_class,
)
return
if node_type == 'arrow_function':
variable_scopes.append({})
for child in node.children:
self._traverse_js_node(
child,
content,
file_path,
symbols,
functions,
classes,
imports,
exports,
symbol_lookup,
pending_calls,
pending_call_set,
variable_scopes,
current_function=current_function,
current_class=current_class,
)
variable_scopes.pop()
return
if node_type == 'call_expression':
caller = current_function or f"{file_path}:{node.start_point[0] + 1}"
called = self._resolve_called_function(
node,
content,
variable_scopes,
current_class
)
if caller and called:
self._register_call(
symbols,
symbol_lookup,
pending_calls,
pending_call_set,
caller,
called
)
if caller:
self._collect_callback_arguments(
node,
content,
symbols,
symbol_lookup,
pending_calls,
pending_call_set,
variable_scopes,
current_class,
caller
)
if node_type in ['import_statement', 'require_call']:
import_text = self._get_node_text(node, content)
imports.append(import_text)
elif node_type in ['export_statement', 'export_clause', 'export_default_declaration']:
exports.append(self._get_node_text(node, content))
for child in node.children:
self._traverse_js_node(
child,
content,
file_path,
symbols,
functions,
classes,
imports,
exports,
symbol_lookup,
pending_calls,
pending_call_set,
variable_scopes,
current_function=current_function,
current_class=current_class,
)
def _collect_callback_arguments(
self,
call_node,
content: str,
symbols: Dict[str, SymbolInfo],
symbol_lookup: Dict[str, str],
pending_calls: List[Tuple[str, str]],
pending_call_set: Set[Tuple[str, str]],
variable_scopes: List[Dict[str, str]],
current_class: Optional[str],
caller: str
) -> None:
"""Capture identifier callbacks passed as call expression arguments."""
arguments_node = call_node.child_by_field_name('arguments')
if not arguments_node:
return
for argument in arguments_node.children:
if not getattr(argument, "is_named", False):
continue
callback_name = self._resolve_argument_reference(
argument,
content,
variable_scopes,
current_class
)
if not callback_name:
continue
self._register_call(
symbols,
symbol_lookup,
pending_calls,
pending_call_set,
caller,
callback_name
)
def _resolve_argument_reference(
self,
node,
content: str,
variable_scopes: List[Dict[str, str]],
current_class: Optional[str]
) -> Optional[str]:
"""Resolve a potential callback reference used as an argument."""
node_type = node.type
if node_type == 'identifier':
return self._get_node_text(node, content)
if node_type == 'member_expression':
property_node = node.child_by_field_name('property')
if property_node is None:
for child in node.children:
if child.type in ['property_identifier', 'identifier']:
property_node = child
break
if property_node is None:
return None
property_name = self._get_node_text(property_node, content)
qualifier_node = node.child_by_field_name('object')
qualifier = None
if qualifier_node is not None:
qualifier = self._resolve_member_qualifier(
qualifier_node,
content,
variable_scopes,
current_class
)
if not qualifier:
for child in node.children:
if child is property_node:
continue
qualifier = self._resolve_member_qualifier(
child,
content,
variable_scopes,
current_class
)
if qualifier:
break
if qualifier:
return f"{qualifier}.{property_name}"
return property_name
if node_type in ['call_expression', 'arrow_function', 'function', 'function_expression']:
return None
return None
def _get_function_name(self, node, content: str) -> Optional[str]:
"""Extract function name from tree-sitter node."""
for child in node.children:
if child.type == 'identifier':
return self._get_node_text(child, content)
return None
def _get_class_name(self, node, content: str) -> Optional[str]:
"""Extract class name from tree-sitter node."""
for child in node.children:
if child.type == 'identifier':
return self._get_node_text(child, content)
return None
def _get_method_name(self, node, content: str) -> Optional[str]:
"""Extract method name from tree-sitter node."""
for child in node.children:
if child.type == 'property_identifier':
return self._get_node_text(child, content)
return None
def _find_parent_class(self, node, content: str) -> Optional[str]:
"""Find the parent class of a method."""
parent = node.parent
while parent:
if parent.type == 'class_declaration':
return self._get_class_name(parent, content)
parent = parent.parent
return None
def _get_js_function_signature(self, node, content: str) -> str:
"""Extract JavaScript function signature."""
return content[node.start_byte:node.end_byte].split('\n')[0].strip()
def _get_node_text(self, node, content: str) -> str:
return content[node.start_byte:node.end_byte]
def _set_variable_type(self, variable_scopes: List[Dict[str, str]], name: str, value: str) -> None:
if not variable_scopes:
return
variable_scopes[-1][name] = value
def _lookup_variable_type(self, variable_scopes: List[Dict[str, str]], name: str) -> Optional[str]:
for scope in reversed(variable_scopes):
if name in scope:
return scope[name]
return None
def _infer_expression_type(self, node, content: str) -> Optional[str]:
"""Infer the class/type from a simple expression like `new ClassName()`."""
if node is None:
return None
if node.type == 'new_expression':
constructor_node = node.child_by_field_name('constructor')
if constructor_node is None:
# Fallback: first identifier or member expression child
for child in node.children:
if child.type in ['identifier', 'member_expression']:
constructor_node = child
break
if constructor_node:
if constructor_node.type == 'identifier':
return self._get_node_text(constructor_node, content)
if constructor_node.type == 'member_expression':
property_node = constructor_node.child_by_field_name('property')
if property_node:
return self._get_node_text(property_node, content)
for child in reversed(constructor_node.children):
if child.type in ['identifier', 'property_identifier']:
return self._get_node_text(child, content)
return None
def _resolve_called_function(
self,
node,
content: str,
variable_scopes: List[Dict[str, str]],
current_class: Optional[str]
) -> Optional[str]:
function_node = node.child_by_field_name('function')
if function_node is None and node.children:
function_node = node.children[0]
if function_node is None:
return None
if function_node.type == 'identifier':
return self._get_node_text(function_node, content)
if function_node.type == 'member_expression':
property_node = function_node.child_by_field_name('property')
if property_node is None:
for child in function_node.children:
if child.type in ['property_identifier', 'identifier']:
property_node = child
break
if property_node is None:
return None
property_name = self._get_node_text(property_node, content)
object_node = function_node.child_by_field_name('object')
qualifier = None
if object_node is not None:
qualifier = self._resolve_member_qualifier(
object_node,
content,
variable_scopes,
current_class
)
else:
for child in function_node.children:
if child is property_node:
continue
qualifier = self._resolve_member_qualifier(
child,
content,
variable_scopes,
current_class
)
if qualifier:
break
if qualifier:
return f"{qualifier}.{property_name}"
return property_name
return None
def _resolve_member_qualifier(
self,
node,
content: str,
variable_scopes: List[Dict[str, str]],
current_class: Optional[str]
) -> Optional[str]:
node_type = node.type
if node_type == 'this':
return current_class
if node_type == 'identifier':
name = self._get_node_text(node, content)
var_type = self._lookup_variable_type(variable_scopes, name)
return var_type or name
if node_type == 'member_expression':
property_node = node.child_by_field_name('property')
if property_node is None:
for child in node.children:
if child.type in ['property_identifier', 'identifier']:
property_node = child
break
if property_node is None:
return None
qualifier = self._resolve_member_qualifier(
node.child_by_field_name('object'),
content,
variable_scopes,
current_class
)
property_name = self._get_node_text(property_node, content)
if qualifier:
return f"{qualifier}.{property_name}"
return property_name
return None
def _register_call(
self,
symbols: Dict[str, SymbolInfo],
symbol_lookup: Dict[str, str],
pending_calls: List[Tuple[str, str]],
pending_call_set: Set[Tuple[str, str]],
caller: str,
called: str
) -> None:
if called in symbol_lookup:
symbol_info = symbols[symbol_lookup[called]]
if caller not in symbol_info.called_by:
symbol_info.called_by.append(caller)
return
key = (caller, called)
if key not in pending_call_set:
pending_call_set.add(key)
pending_calls.append(key)

View File

@@ -0,0 +1,154 @@
"""
Objective-C parsing strategy using regex patterns.
"""
import re
from typing import Dict, List, Tuple, Optional
from .base_strategy import ParsingStrategy
from ..models import SymbolInfo, FileInfo
class ObjectiveCParsingStrategy(ParsingStrategy):
"""Objective-C parsing strategy using regex patterns."""
def get_language_name(self) -> str:
return "objective-c"
def get_supported_extensions(self) -> List[str]:
return ['.m', '.mm']
def parse_file(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo], FileInfo]:
"""Parse Objective-C file using regex patterns."""
symbols = {}
functions = []
classes = []
imports = []
lines = content.splitlines()
current_class = None
for i, line in enumerate(lines):
line = line.strip()
# Import statements
if line.startswith('#import ') or line.startswith('#include '):
import_match = re.search(r'#(?:import|include)\s+[<"]([^>"]+)[>"]', line)
if import_match:
imports.append(import_match.group(1))
# Interface declarations
elif line.startswith('@interface '):
interface_match = re.match(r'@interface\s+(\w+)', line)
if interface_match:
class_name = interface_match.group(1)
current_class = class_name
symbol_id = self._create_symbol_id(file_path, class_name)
symbols[symbol_id] = SymbolInfo(
type="class",
file=file_path,
line=i + 1
)
classes.append(class_name)
# Implementation declarations
elif line.startswith('@implementation '):
impl_match = re.match(r'@implementation\s+(\w+)', line)
if impl_match:
current_class = impl_match.group(1)
# Method declarations
elif line.startswith(('- (', '+ (')):
method_match = re.search(r'[+-]\s*\([^)]+\)\s*(\w+)', line)
if method_match:
method_name = method_match.group(1)
full_name = f"{current_class}.{method_name}" if current_class else method_name
symbol_id = self._create_symbol_id(file_path, full_name)
symbols[symbol_id] = SymbolInfo(
type="method",
file=file_path,
line=i + 1,
signature=line
)
functions.append(full_name)
# C function declarations
elif re.match(r'\w+.*\s+\w+\s*\([^)]*\)\s*\{?', line) and not line.startswith(('if', 'for', 'while')):
func_match = re.search(r'\s(\w+)\s*\([^)]*\)', line)
if func_match:
func_name = func_match.group(1)
symbol_id = self._create_symbol_id(file_path, func_name)
symbols[symbol_id] = SymbolInfo(
type="function",
file=file_path,
line=i + 1,
signature=line
)
functions.append(func_name)
# End of class
elif line == '@end':
current_class = None
# Phase 2: Add call relationship analysis
self._analyze_objc_calls(content, symbols, file_path)
file_info = FileInfo(
language=self.get_language_name(),
line_count=len(lines),
symbols={"functions": functions, "classes": classes},
imports=imports
)
return symbols, file_info
def _analyze_objc_calls(self, content: str, symbols: Dict[str, SymbolInfo], file_path: str):
"""Analyze Objective-C method calls for relationships."""
lines = content.splitlines()
current_function = None
for i, line in enumerate(lines):
original_line = line
line = line.strip()
# Track current method context
if line.startswith('- (') or line.startswith('+ ('):
func_name = self._extract_objc_method_name(line)
if func_name:
current_function = self._create_symbol_id(file_path, func_name)
# Find method calls: [obj methodName] or functionName()
if current_function and ('[' in line and ']' in line or ('(' in line and ')' in line)):
called_functions = self._extract_objc_called_functions(line)
for called_func in called_functions:
# Find the called function in symbols and add relationship
for symbol_id, symbol_info in symbols.items():
if called_func in symbol_id.split("::")[-1]:
if current_function not in symbol_info.called_by:
symbol_info.called_by.append(current_function)
def _extract_objc_method_name(self, line: str) -> Optional[str]:
"""Extract method name from Objective-C method declaration."""
try:
# - (returnType)methodName:(params) or + (returnType)methodName
match = re.search(r'[+-]\s*\([^)]*\)\s*(\w+)', line)
if match:
return match.group(1)
except:
pass
return None
def _extract_objc_called_functions(self, line: str) -> List[str]:
"""Extract method names that are being called in this line."""
called_functions = []
# Find patterns like: [obj methodName] or functionName(
patterns = [
r'\[\s*\w+\s+(\w+)\s*[\]:]', # [obj methodName]
r'(\w+)\s*\(', # functionName(
]
for pattern in patterns:
matches = re.findall(pattern, line)
called_functions.extend(matches)
return called_functions

View File

@@ -0,0 +1,367 @@
"""
Python parsing strategy using AST - Optimized single-pass version.
"""
import ast
import logging
from typing import Dict, List, Tuple, Optional, Set
from .base_strategy import ParsingStrategy
from ..models import SymbolInfo, FileInfo
logger = logging.getLogger(__name__)
class PythonParsingStrategy(ParsingStrategy):
"""Python-specific parsing strategy using Python's built-in AST - Single Pass Optimized."""
def get_language_name(self) -> str:
return "python"
def get_supported_extensions(self) -> List[str]:
return ['.py', '.pyw']
def parse_file(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo], FileInfo]:
"""Parse Python file using AST with single-pass optimization."""
symbols = {}
functions = []
classes = []
imports = []
try:
tree = ast.parse(content)
# Single-pass visitor that handles everything at once
visitor = SinglePassVisitor(symbols, functions, classes, imports, file_path)
visitor.visit(tree)
except SyntaxError as e:
logger.warning(f"Syntax error in Python file {file_path}: {e}")
except Exception as e:
logger.warning(f"Error parsing Python file {file_path}: {e}")
file_info = FileInfo(
language=self.get_language_name(),
line_count=len(content.splitlines()),
symbols={"functions": functions, "classes": classes},
imports=imports
)
pending_calls = visitor.resolve_deferred_calls()
if pending_calls:
file_info.pending_calls = pending_calls
return symbols, file_info
class SinglePassVisitor(ast.NodeVisitor):
"""Single-pass AST visitor that extracts symbols and analyzes calls in one traversal."""
def __init__(self, symbols: Dict[str, SymbolInfo], functions: List[str],
classes: List[str], imports: List[str], file_path: str):
self.symbols = symbols
self.functions = functions
self.classes = classes
self.imports = imports
self.file_path = file_path
# Context tracking for call analysis
self.current_function_stack = []
self.current_class = None
self.variable_type_stack: List[Dict[str, str]] = [{}]
# Symbol lookup index for O(1) access
self.symbol_lookup = {} # name -> symbol_id mapping for fast lookups
# Track processed nodes to avoid duplicates
self.processed_nodes: Set[int] = set()
# Deferred call relationships for forward references
self.deferred_calls: List[Tuple[str, str]] = []
def visit_ClassDef(self, node: ast.ClassDef):
"""Visit class definition - extract symbol and analyze in single pass."""
class_name = node.name
symbol_id = self._create_symbol_id(self.file_path, class_name)
# Extract docstring
docstring = ast.get_docstring(node)
# Create symbol info
symbol_info = SymbolInfo(
type="class",
file=self.file_path,
line=node.lineno,
docstring=docstring
)
# Store in symbols and lookup index
self.symbols[symbol_id] = symbol_info
self.symbol_lookup[class_name] = symbol_id
self.classes.append(class_name)
# Track class context for method processing
old_class = self.current_class
self.current_class = class_name
method_nodes = []
# First pass: register methods so forward references resolve
for child in node.body:
if isinstance(child, (ast.FunctionDef, ast.AsyncFunctionDef)):
self._register_method(child, class_name)
method_nodes.append(child)
else:
self.visit(child)
# Second pass: visit method bodies for call analysis
for method_node in method_nodes:
self._visit_registered_method(method_node, class_name)
# Restore previous class context
self.current_class = old_class
def visit_FunctionDef(self, node: ast.FunctionDef):
"""Visit function definition - extract symbol and track context."""
self._process_function(node)
def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef):
"""Visit async function definition - extract symbol and track context."""
self._process_function(node)
def _process_function(self, node):
"""Process both sync and async function definitions."""
# Skip if this is a method (already handled by ClassDef)
if self.current_class:
return
# Skip if already processed
node_id = id(node)
if node_id in self.processed_nodes:
return
self.processed_nodes.add(node_id)
func_name = node.name
symbol_id = self._create_symbol_id(self.file_path, func_name)
# Extract function signature and docstring
signature = self._extract_function_signature(node)
docstring = ast.get_docstring(node)
# Create symbol info
symbol_info = SymbolInfo(
type="function",
file=self.file_path,
line=node.lineno,
signature=signature,
docstring=docstring
)
# Store in symbols and lookup index
self.symbols[symbol_id] = symbol_info
self.symbol_lookup[func_name] = symbol_id
self.functions.append(func_name)
# Track function context for call analysis
function_id = f"{self.file_path}::{func_name}"
self.variable_type_stack.append({})
self.current_function_stack.append(function_id)
# Visit function body to analyze calls
self.generic_visit(node)
# Pop function from stack
self.current_function_stack.pop()
self.variable_type_stack.pop()
def visit_Assign(self, node: ast.Assign):
"""Track simple variable assignments to class instances."""
class_name = self._infer_class_name(node.value)
if class_name:
current_scope = self._current_var_types()
for target in node.targets:
if isinstance(target, ast.Name):
current_scope[target.id] = class_name
self.generic_visit(node)
def visit_AnnAssign(self, node: ast.AnnAssign):
"""Track annotated assignments that instantiate classes."""
class_name = self._infer_class_name(node.value)
if class_name and isinstance(node.target, ast.Name):
self._current_var_types()[node.target.id] = class_name
self.generic_visit(node)
def _current_var_types(self) -> Dict[str, str]:
return self.variable_type_stack[-1]
def _infer_class_name(self, value: Optional[ast.AST]) -> Optional[str]:
if isinstance(value, ast.Call):
func = value.func
if isinstance(func, ast.Name):
return func.id
if isinstance(func, ast.Attribute):
return func.attr
return None
def _register_method(self, node: ast.FunctionDef, class_name: str):
"""Register a method symbol without visiting its body."""
method_name = f"{class_name}.{node.name}"
method_symbol_id = self._create_symbol_id(self.file_path, method_name)
method_signature = self._extract_function_signature(node)
method_docstring = ast.get_docstring(node)
symbol_info = SymbolInfo(
type="method",
file=self.file_path,
line=node.lineno,
signature=method_signature,
docstring=method_docstring
)
self.symbols[method_symbol_id] = symbol_info
self.symbol_lookup[method_name] = method_symbol_id
self.symbol_lookup[node.name] = method_symbol_id # Also index by short method name
self.functions.append(method_name)
def _visit_registered_method(self, node: ast.FunctionDef, class_name: str):
"""Visit a previously registered method body for call analysis."""
method_name = f"{class_name}.{node.name}"
function_id = f"{self.file_path}::{method_name}"
self.variable_type_stack.append({})
self.current_function_stack.append(function_id)
for child in node.body:
self.visit(child)
self.current_function_stack.pop()
self.variable_type_stack.pop()
def visit_Import(self, node: ast.Import):
"""Handle import statements."""
for alias in node.names:
self.imports.append(alias.name)
self.generic_visit(node)
def visit_ImportFrom(self, node: ast.ImportFrom):
"""Handle from...import statements."""
if node.module:
for alias in node.names:
self.imports.append(f"{node.module}.{alias.name}")
self.generic_visit(node)
def visit_Call(self, node: ast.Call):
"""Visit function call and record relationship using O(1) lookup."""
if not self.current_function_stack:
self.generic_visit(node)
return
try:
# Get the function name being called
called_function = None
if isinstance(node.func, ast.Name):
# Direct function call: function_name()
called_function = self._qualify_name(node.func.id)
elif isinstance(node.func, ast.Attribute):
# Method call: obj.method() or module.function()
if not self._is_super_call(node.func):
qualifier = self._infer_attribute_qualifier(node.func.value)
if qualifier:
called_function = f"{qualifier}.{node.func.attr}"
else:
called_function = node.func.attr
if called_function:
caller_function = self.current_function_stack[-1]
if not self._register_call_relationship(caller_function, called_function):
self.deferred_calls.append((caller_function, called_function))
except Exception:
# Silently handle parsing errors for complex call patterns
pass
# Continue visiting child nodes
self.generic_visit(node)
def _register_call_relationship(self, caller_function: str, called_function: str) -> bool:
"""Attempt to resolve a call relationship immediately."""
try:
if called_function in self.symbol_lookup:
symbol_id = self.symbol_lookup[called_function]
symbol_info = self.symbols[symbol_id]
if symbol_info.type in ["function", "method"]:
if caller_function not in symbol_info.called_by:
symbol_info.called_by.append(caller_function)
return True
for name, symbol_id in self.symbol_lookup.items():
if name.endswith(f".{called_function}"):
symbol_info = self.symbols[symbol_id]
if symbol_info.type in ["function", "method"]:
if caller_function not in symbol_info.called_by:
symbol_info.called_by.append(caller_function)
return True
except Exception:
return False
return False
def _qualify_name(self, name: str) -> str:
"""Map bare identifiers to fully qualified symbol names."""
if name in self.symbol_lookup:
return name
if name and name[0].isupper():
return f"{name}.__init__"
return name
def _infer_attribute_qualifier(self, value: ast.AST) -> Optional[str]:
"""Infer class name for attribute-based calls."""
if isinstance(value, ast.Name):
return self._current_var_types().get(value.id)
if isinstance(value, ast.Call):
return self._infer_class_name(value)
if isinstance(value, ast.Attribute):
if isinstance(value.value, ast.Name):
inferred = self._current_var_types().get(value.value.id)
if inferred:
return inferred
return value.attr
return None
def resolve_deferred_calls(self) -> List[Tuple[str, str]]:
"""Resolve stored call relationships once all symbols are known."""
if not self.deferred_calls:
return []
current = list(self.deferred_calls)
unresolved: List[Tuple[str, str]] = []
self.deferred_calls.clear()
for caller, called in current:
if not self._register_call_relationship(caller, called):
unresolved.append((caller, called))
self.deferred_calls = unresolved
return unresolved
@staticmethod
def _is_super_call(attr_node: ast.Attribute) -> bool:
"""Detect super().method(...) patterns."""
value = attr_node.value
if isinstance(value, ast.Call) and isinstance(value.func, ast.Name):
return value.func.id == "super"
return False
def _create_symbol_id(self, file_path: str, symbol_name: str) -> str:
"""Create a unique symbol ID."""
return f"{file_path}::{symbol_name}"
def _extract_function_signature(self, node: ast.FunctionDef) -> str:
"""Extract function signature from AST node."""
# Build basic signature
args = []
# Regular arguments
for arg in node.args.args:
args.append(arg.arg)
# Varargs (*args)
if node.args.vararg:
args.append(f"*{node.args.vararg.arg}")
# Keyword arguments (**kwargs)
if node.args.kwarg:
args.append(f"**{node.args.kwarg.arg}")
signature = f"def {node.name}({', '.join(args)}):"
return signature

View File

@@ -0,0 +1,201 @@
"""
Strategy factory for creating appropriate parsing strategies.
"""
import threading
from typing import Dict, List
from .base_strategy import ParsingStrategy
from .python_strategy import PythonParsingStrategy
from .javascript_strategy import JavaScriptParsingStrategy
from .typescript_strategy import TypeScriptParsingStrategy
from .java_strategy import JavaParsingStrategy
from .go_strategy import GoParsingStrategy
from .objective_c_strategy import ObjectiveCParsingStrategy
from .zig_strategy import ZigParsingStrategy
from .fallback_strategy import FallbackParsingStrategy
class StrategyFactory:
"""Factory for creating appropriate parsing strategies."""
def __init__(self):
# Initialize all strategies with thread safety
self._strategies: Dict[str, ParsingStrategy] = {}
self._initialized = False
self._lock = threading.RLock()
self._initialize_strategies()
# File type mappings for fallback parser
self._file_type_mappings = {
# Web and markup
'.html': 'html', '.htm': 'html',
'.css': 'css', '.scss': 'css', '.sass': 'css',
'.less': 'css', '.stylus': 'css', '.styl': 'css',
'.md': 'markdown', '.mdx': 'markdown',
'.json': 'json', '.jsonc': 'json',
'.xml': 'xml',
'.yml': 'yaml', '.yaml': 'yaml',
# Frontend frameworks
'.vue': 'vue',
'.svelte': 'svelte',
'.astro': 'astro',
# Template engines
'.hbs': 'handlebars', '.handlebars': 'handlebars',
'.ejs': 'ejs',
'.pug': 'pug',
# Database and SQL
'.sql': 'sql', '.ddl': 'sql', '.dml': 'sql',
'.mysql': 'sql', '.postgresql': 'sql', '.psql': 'sql',
'.sqlite': 'sql', '.mssql': 'sql', '.oracle': 'sql',
'.ora': 'sql', '.db2': 'sql',
'.proc': 'sql', '.procedure': 'sql',
'.func': 'sql', '.function': 'sql',
'.view': 'sql', '.trigger': 'sql', '.index': 'sql',
'.migration': 'sql', '.seed': 'sql', '.fixture': 'sql',
'.schema': 'sql',
'.cql': 'sql', '.cypher': 'sql', '.sparql': 'sql',
'.gql': 'graphql',
'.liquibase': 'sql', '.flyway': 'sql',
# Config and text files
'.txt': 'text',
'.ini': 'config', '.cfg': 'config', '.conf': 'config',
'.toml': 'config',
'.properties': 'config',
'.env': 'config',
'.gitignore': 'config',
'.dockerignore': 'config',
'.editorconfig': 'config',
# Other programming languages (will use fallback)
'.c': 'c', '.cpp': 'cpp', '.h': 'h', '.hpp': 'hpp',
'.cxx': 'cpp', '.cc': 'cpp', '.hxx': 'hpp', '.hh': 'hpp',
'.cs': 'csharp',
'.rb': 'ruby',
'.php': 'php',
'.swift': 'swift',
'.kt': 'kotlin', '.kts': 'kotlin',
'.rs': 'rust',
'.scala': 'scala',
'.sh': 'shell', '.bash': 'shell', '.zsh': 'shell',
'.ps1': 'powershell',
'.bat': 'batch', '.cmd': 'batch',
'.r': 'r', '.R': 'r',
'.pl': 'perl', '.pm': 'perl',
'.lua': 'lua',
'.dart': 'dart',
'.hs': 'haskell',
'.ml': 'ocaml', '.mli': 'ocaml',
'.fs': 'fsharp', '.fsx': 'fsharp',
'.clj': 'clojure', '.cljs': 'clojure',
'.vim': 'vim',
}
def _initialize_strategies(self):
"""Initialize all parsing strategies with thread safety."""
with self._lock:
if self._initialized:
return
try:
# Python
python_strategy = PythonParsingStrategy()
for ext in python_strategy.get_supported_extensions():
self._strategies[ext] = python_strategy
# JavaScript
js_strategy = JavaScriptParsingStrategy()
for ext in js_strategy.get_supported_extensions():
self._strategies[ext] = js_strategy
# TypeScript
ts_strategy = TypeScriptParsingStrategy()
for ext in ts_strategy.get_supported_extensions():
self._strategies[ext] = ts_strategy
# Java
java_strategy = JavaParsingStrategy()
for ext in java_strategy.get_supported_extensions():
self._strategies[ext] = java_strategy
# Go
go_strategy = GoParsingStrategy()
for ext in go_strategy.get_supported_extensions():
self._strategies[ext] = go_strategy
# Objective-C
objc_strategy = ObjectiveCParsingStrategy()
for ext in objc_strategy.get_supported_extensions():
self._strategies[ext] = objc_strategy
# Zig
zig_strategy = ZigParsingStrategy()
for ext in zig_strategy.get_supported_extensions():
self._strategies[ext] = zig_strategy
self._initialized = True
except Exception as e:
# Reset state on failure to allow retry
self._strategies.clear()
self._initialized = False
raise e
def get_strategy(self, file_extension: str) -> ParsingStrategy:
"""
Get appropriate strategy for file extension.
Args:
file_extension: File extension (e.g., '.py', '.js')
Returns:
Appropriate parsing strategy
"""
with self._lock:
# Ensure initialization is complete
if not self._initialized:
self._initialize_strategies()
# Check for specialized strategies first
if file_extension in self._strategies:
return self._strategies[file_extension]
# Use fallback strategy with appropriate language name
language_name = self._file_type_mappings.get(file_extension, 'unknown')
return FallbackParsingStrategy(language_name)
def get_all_supported_extensions(self) -> List[str]:
"""Get all supported extensions across strategies."""
specialized = list(self._strategies.keys())
fallback = list(self._file_type_mappings.keys())
return specialized + fallback
def get_specialized_extensions(self) -> List[str]:
"""Get extensions that have specialized parsers."""
return list(self._strategies.keys())
def get_fallback_extensions(self) -> List[str]:
"""Get extensions that use fallback parsing."""
return list(self._file_type_mappings.keys())
def get_strategy_info(self) -> Dict[str, List[str]]:
"""Get information about available strategies."""
info = {}
# Group extensions by strategy type
for ext, strategy in self._strategies.items():
strategy_name = strategy.get_language_name()
if strategy_name not in info:
info[strategy_name] = []
info[strategy_name].append(ext)
# Add fallback info
fallback_languages = set(self._file_type_mappings.values())
for lang in fallback_languages:
extensions = [ext for ext, mapped_lang in self._file_type_mappings.items() if mapped_lang == lang]
info[f"fallback_{lang}"] = extensions
return info

View File

@@ -0,0 +1,487 @@
"""
TypeScript parsing strategy using tree-sitter - Optimized single-pass version.
"""
import logging
from typing import Dict, List, Tuple, Optional, Set
from .base_strategy import ParsingStrategy
from ..models import SymbolInfo, FileInfo
logger = logging.getLogger(__name__)
import tree_sitter
from tree_sitter_typescript import language_typescript
class TypeScriptParsingStrategy(ParsingStrategy):
"""TypeScript-specific parsing strategy using tree-sitter - Single Pass Optimized."""
def __init__(self):
self.ts_language = tree_sitter.Language(language_typescript())
def get_language_name(self) -> str:
return "typescript"
def get_supported_extensions(self) -> List[str]:
return ['.ts', '.tsx']
def parse_file(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo], FileInfo]:
"""Parse TypeScript file using tree-sitter with single-pass optimization."""
symbols = {}
functions = []
classes = []
imports = []
exports = []
# Symbol lookup index for O(1) access
symbol_lookup = {} # name -> symbol_id mapping
pending_calls: List[Tuple[str, str]] = []
pending_call_set: Set[Tuple[str, str]] = set()
variable_scopes: List[Dict[str, str]] = [{}]
parser = tree_sitter.Parser(self.ts_language)
tree = parser.parse(content.encode('utf8'))
# Single-pass traversal that handles everything
context = TraversalContext(
content=content,
file_path=file_path,
symbols=symbols,
functions=functions,
classes=classes,
imports=imports,
exports=exports,
symbol_lookup=symbol_lookup,
pending_calls=pending_calls,
pending_call_set=pending_call_set,
variable_scopes=variable_scopes,
)
self._traverse_node_single_pass(tree.root_node, context)
file_info = FileInfo(
language=self.get_language_name(),
line_count=len(content.splitlines()),
symbols={"functions": functions, "classes": classes},
imports=imports,
exports=exports
)
if context.pending_calls:
file_info.pending_calls = context.pending_calls
return symbols, file_info
def _traverse_node_single_pass(self, node, context: 'TraversalContext',
current_function: Optional[str] = None,
current_class: Optional[str] = None):
"""Single-pass traversal that extracts symbols and analyzes calls."""
node_type = node.type
# Handle function declarations
if node_type == 'function_declaration':
name = self._get_function_name(node, context.content)
if name:
symbol_id = self._create_symbol_id(context.file_path, name)
signature = self._get_ts_function_signature(node, context.content)
symbol_info = SymbolInfo(
type="function",
file=context.file_path,
line=node.start_point[0] + 1,
signature=signature
)
context.symbols[symbol_id] = symbol_info
context.symbol_lookup[name] = symbol_id
context.functions.append(name)
# Traverse function body with updated context
func_context = f"{context.file_path}::{name}"
for child in node.children:
self._traverse_node_single_pass(child, context, current_function=func_context,
current_class=current_class)
return
# Handle class declarations
elif node_type == 'class_declaration':
name = self._get_class_name(node, context.content)
if name:
symbol_id = self._create_symbol_id(context.file_path, name)
symbol_info = SymbolInfo(
type="class",
file=context.file_path,
line=node.start_point[0] + 1
)
context.symbols[symbol_id] = symbol_info
context.symbol_lookup[name] = symbol_id
context.classes.append(name)
# Traverse class body with updated context
for child in node.children:
self._traverse_node_single_pass(child, context, current_function=current_function,
current_class=name)
return
# Handle interface declarations
elif node_type == 'interface_declaration':
name = self._get_interface_name(node, context.content)
if name:
symbol_id = self._create_symbol_id(context.file_path, name)
symbol_info = SymbolInfo(
type="interface",
file=context.file_path,
line=node.start_point[0] + 1
)
context.symbols[symbol_id] = symbol_info
context.symbol_lookup[name] = symbol_id
context.classes.append(name) # Group interfaces with classes
# Traverse interface body with updated context
for child in node.children:
self._traverse_node_single_pass(child, context, current_function=current_function,
current_class=name)
return
# Handle method definitions
elif node_type == 'method_definition':
method_name = self._get_method_name(node, context.content)
if method_name and current_class:
full_name = f"{current_class}.{method_name}"
symbol_id = self._create_symbol_id(context.file_path, full_name)
signature = self._get_ts_function_signature(node, context.content)
symbol_info = SymbolInfo(
type="method",
file=context.file_path,
line=node.start_point[0] + 1,
signature=signature
)
context.symbols[symbol_id] = symbol_info
context.symbol_lookup[full_name] = symbol_id
context.symbol_lookup[method_name] = symbol_id # Also index by method name alone
context.functions.append(full_name)
# Traverse method body with updated context
method_context = f"{context.file_path}::{full_name}"
for child in node.children:
self._traverse_node_single_pass(child, context, current_function=method_context,
current_class=current_class)
return
# Handle variable declarations that define callable exports
elif node_type in ['lexical_declaration', 'variable_statement']:
handled = False
for child in node.children:
if child.type != 'variable_declarator':
continue
name_node = child.child_by_field_name('name')
value_node = child.child_by_field_name('value')
if not name_node or not value_node:
continue
if current_function is not None:
continue
value_type = value_node.type
if value_type not in [
'arrow_function',
'function',
'function_expression',
'call_expression',
'new_expression',
'identifier',
'member_expression',
]:
continue
name = context.content[name_node.start_byte:name_node.end_byte]
symbol_id = self._create_symbol_id(context.file_path, name)
signature = context.content[child.start_byte:child.end_byte].split('\n')[0].strip()
symbol_info = SymbolInfo(
type="function",
file=context.file_path,
line=child.start_point[0] + 1,
signature=signature
)
context.symbols[symbol_id] = symbol_info
context.symbol_lookup[name] = symbol_id
context.functions.append(name)
handled = True
if value_type in ['arrow_function', 'function', 'function_expression']:
func_context = f"{context.file_path}::{name}"
context.variable_scopes.append({})
self._traverse_node_single_pass(
value_node,
context,
current_function=func_context,
current_class=current_class
)
context.variable_scopes.pop()
if handled:
return
# Handle function calls
elif node_type == 'call_expression':
caller = current_function or f"{context.file_path}:{node.start_point[0] + 1}"
called_function = self._resolve_called_function(node, context, current_class)
if caller and called_function:
self._register_call(context, caller, called_function)
if caller:
self._collect_callback_arguments(node, context, caller, current_class, current_function)
# Handle import declarations
elif node.type == 'import_statement':
import_text = context.content[node.start_byte:node.end_byte]
context.imports.append(import_text)
# Handle export declarations
elif node.type in ['export_statement', 'export_default_declaration']:
export_text = context.content[node.start_byte:node.end_byte]
context.exports.append(export_text)
# Continue traversing children for other node types
for child in node.children:
self._traverse_node_single_pass(child, context, current_function=current_function,
current_class=current_class)
def _register_call(self, context: 'TraversalContext', caller: str, called: str) -> None:
if called in context.symbol_lookup:
symbol_id = context.symbol_lookup[called]
symbol_info = context.symbols[symbol_id]
if caller not in symbol_info.called_by:
symbol_info.called_by.append(caller)
return
key = (caller, called)
if key not in context.pending_call_set:
context.pending_call_set.add(key)
context.pending_calls.append(key)
def _collect_callback_arguments(
self,
node,
context: 'TraversalContext',
caller: str,
current_class: Optional[str],
current_function: Optional[str]
) -> None:
arguments_node = node.child_by_field_name('arguments')
if not arguments_node:
return
for argument in arguments_node.children:
if not getattr(argument, "is_named", False):
continue
callback_name = self._resolve_argument_reference(argument, context, current_class)
if callback_name:
call_site = caller
if current_function is None:
call_site = f"{context.file_path}:{argument.start_point[0] + 1}"
self._register_call(context, call_site, callback_name)
def _resolve_argument_reference(
self,
node,
context: 'TraversalContext',
current_class: Optional[str]
) -> Optional[str]:
node_type = node.type
if node_type == 'identifier':
return context.content[node.start_byte:node.end_byte]
if node_type == 'member_expression':
property_node = node.child_by_field_name('property')
if property_node is None:
for child in node.children:
if child.type in ['property_identifier', 'identifier']:
property_node = child
break
if property_node is None:
return None
property_name = context.content[property_node.start_byte:property_node.end_byte]
qualifier_node = node.child_by_field_name('object')
qualifier = self._resolve_member_qualifier(
qualifier_node,
context,
current_class
)
if not qualifier:
for child in node.children:
if child is property_node:
continue
qualifier = self._resolve_member_qualifier(
child,
context,
current_class
)
if qualifier:
break
if qualifier:
return f"{qualifier}.{property_name}"
return property_name
return None
def _resolve_called_function(
self,
node,
context: 'TraversalContext',
current_class: Optional[str]
) -> Optional[str]:
function_node = node.child_by_field_name('function')
if function_node is None and node.children:
function_node = node.children[0]
if function_node is None:
return None
if function_node.type == 'identifier':
return context.content[function_node.start_byte:function_node.end_byte]
if function_node.type == 'member_expression':
property_node = function_node.child_by_field_name('property')
if property_node is None:
for child in function_node.children:
if child.type in ['property_identifier', 'identifier']:
property_node = child
break
if property_node is None:
return None
property_name = context.content[property_node.start_byte:property_node.end_byte]
qualifier_node = function_node.child_by_field_name('object')
qualifier = self._resolve_member_qualifier(
qualifier_node,
context,
current_class
)
if not qualifier:
for child in function_node.children:
if child is property_node:
continue
qualifier = self._resolve_member_qualifier(
child,
context,
current_class
)
if qualifier:
break
if qualifier:
return f"{qualifier}.{property_name}"
return property_name
return None
def _resolve_member_qualifier(
self,
node,
context: 'TraversalContext',
current_class: Optional[str]
) -> Optional[str]:
if node is None:
return None
node_type = node.type
if node_type == 'this':
return current_class
if node_type == 'identifier':
return context.content[node.start_byte:node.end_byte]
if node_type == 'member_expression':
property_node = node.child_by_field_name('property')
if property_node is None:
for child in node.children:
if child.type in ['property_identifier', 'identifier']:
property_node = child
break
if property_node is None:
return None
qualifier = self._resolve_member_qualifier(
node.child_by_field_name('object'),
context,
current_class
)
if not qualifier:
for child in node.children:
if child is property_node:
continue
qualifier = self._resolve_member_qualifier(
child,
context,
current_class
)
if qualifier:
break
property_name = context.content[property_node.start_byte:property_node.end_byte]
if qualifier:
return f"{qualifier}.{property_name}"
return property_name
return None
def _get_function_name(self, node, content: str) -> Optional[str]:
"""Extract function name from tree-sitter node."""
for child in node.children:
if child.type == 'identifier':
return content[child.start_byte:child.end_byte]
return None
def _get_class_name(self, node, content: str) -> Optional[str]:
"""Extract class name from tree-sitter node."""
for child in node.children:
if child.type == 'identifier':
return content[child.start_byte:child.end_byte]
return None
def _get_interface_name(self, node, content: str) -> Optional[str]:
"""Extract interface name from tree-sitter node."""
for child in node.children:
if child.type == 'type_identifier':
return content[child.start_byte:child.end_byte]
return None
def _get_method_name(self, node, content: str) -> Optional[str]:
"""Extract method name from tree-sitter node."""
for child in node.children:
if child.type == 'property_identifier':
return content[child.start_byte:child.end_byte]
return None
def _get_ts_function_signature(self, node, content: str) -> str:
"""Extract TypeScript function signature."""
return content[node.start_byte:node.end_byte].split('\n')[0].strip()
class TraversalContext:
"""Context object to pass state during single-pass traversal."""
def __init__(
self,
content: str,
file_path: str,
symbols: Dict,
functions: List,
classes: List,
imports: List,
exports: List,
symbol_lookup: Dict,
pending_calls: List[Tuple[str, str]],
pending_call_set: Set[Tuple[str, str]],
variable_scopes: List[Dict[str, str]],
):
self.content = content
self.file_path = file_path
self.symbols = symbols
self.functions = functions
self.classes = classes
self.imports = imports
self.exports = exports
self.symbol_lookup = symbol_lookup
self.pending_calls = pending_calls
self.pending_call_set = pending_call_set
self.variable_scopes = variable_scopes

View File

@@ -0,0 +1,99 @@
"""
Zig parsing strategy using tree-sitter.
"""
import logging
from typing import Dict, List, Tuple, Optional
from .base_strategy import ParsingStrategy
from ..models import SymbolInfo, FileInfo
logger = logging.getLogger(__name__)
import tree_sitter
from tree_sitter_zig import language
class ZigParsingStrategy(ParsingStrategy):
"""Zig parsing strategy using tree-sitter."""
def __init__(self):
self.zig_language = tree_sitter.Language(language())
def get_language_name(self) -> str:
return "zig"
def get_supported_extensions(self) -> List[str]:
return ['.zig', '.zon']
def parse_file(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo], FileInfo]:
"""Parse Zig file using tree-sitter."""
return self._tree_sitter_parse(file_path, content)
def _tree_sitter_parse(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo], FileInfo]:
"""Parse Zig file using tree-sitter."""
symbols = {}
functions = []
classes = []
imports = []
parser = tree_sitter.Parser(self.zig_language)
tree = parser.parse(content.encode('utf8'))
# Phase 1: Extract symbols using tree-sitter
self._traverse_zig_node(tree.root_node, content, file_path, symbols, functions, classes, imports)
file_info = FileInfo(
language=self.get_language_name(),
line_count=len(content.splitlines()),
symbols={"functions": functions, "classes": classes},
imports=imports
)
return symbols, file_info
def _traverse_zig_node(self, node, content: str, file_path: str, symbols: Dict, functions: List, classes: List, imports: List):
"""Traverse Zig AST node and extract symbols."""
if node.type == 'function_declaration':
func_name = self._extract_zig_function_name_from_node(node, content)
if func_name:
line_number = self._extract_line_number(content, node.start_byte)
symbol_id = self._create_symbol_id(file_path, func_name)
symbols[symbol_id] = SymbolInfo(
type="function",
file=file_path,
line=line_number,
signature=self._safe_extract_text(content, node.start_byte, node.end_byte)
)
functions.append(func_name)
elif node.type in ['struct_declaration', 'union_declaration', 'enum_declaration']:
type_name = self._extract_zig_type_name_from_node(node, content)
if type_name:
line_number = self._extract_line_number(content, node.start_byte)
symbol_id = self._create_symbol_id(file_path, type_name)
symbols[symbol_id] = SymbolInfo(
type=node.type.replace('_declaration', ''),
file=file_path,
line=line_number
)
classes.append(type_name)
# Recurse through children
for child in node.children:
self._traverse_zig_node(child, content, file_path, symbols, functions, classes, imports)
def _extract_zig_function_name_from_node(self, node, content: str) -> Optional[str]:
"""Extract function name from tree-sitter node."""
for child in node.children:
if child.type == 'identifier':
return self._safe_extract_text(content, child.start_byte, child.end_byte)
return None
def _extract_zig_type_name_from_node(self, node, content: str) -> Optional[str]:
"""Extract type name from tree-sitter node."""
for child in node.children:
if child.type == 'identifier':
return self._safe_extract_text(content, child.start_byte, child.end_byte)
return None

View File

@@ -0,0 +1,514 @@
"""
Project Settings Management
This module provides functionality for managing project settings and persistent data
for the Code Index MCP server.
"""
import os
import json
import tempfile
import hashlib
from datetime import datetime
from .constants import (
SETTINGS_DIR, CONFIG_FILE, INDEX_FILE
)
from .search.base import SearchStrategy
from .search.ugrep import UgrepStrategy
from .search.ripgrep import RipgrepStrategy
from .search.ag import AgStrategy
from .search.grep import GrepStrategy
from .search.basic import BasicSearchStrategy
# Prioritized list of search strategies
SEARCH_STRATEGY_CLASSES = [
UgrepStrategy,
RipgrepStrategy,
AgStrategy,
GrepStrategy,
BasicSearchStrategy,
]
def _get_available_strategies() -> list[SearchStrategy]:
"""
Detect and return a list of available search strategy instances,
ordered by preference.
"""
available = []
for strategy_class in SEARCH_STRATEGY_CLASSES:
try:
strategy = strategy_class()
if strategy.is_available():
available.append(strategy)
except Exception:
pass
return available
class ProjectSettings:
"""Class for managing project settings and index data"""
def __init__(self, base_path, skip_load=False):
"""Initialize project settings
Args:
base_path (str): Base path of the project
skip_load (bool): Whether to skip loading files
"""
self.base_path = base_path
self.skip_load = skip_load
self.available_strategies: list[SearchStrategy] = []
self.refresh_available_strategies()
# Ensure the base path of the temporary directory exists
try:
# Get system temporary directory
system_temp = tempfile.gettempdir()
# Check if the system temporary directory exists and is writable
if not os.path.exists(system_temp):
# Try using project directory as fallback if available
if base_path and os.path.exists(base_path):
system_temp = base_path
else:
# Use user's home directory as last resort
system_temp = os.path.expanduser("~")
if not os.access(system_temp, os.W_OK):
# Try using project directory as fallback if available
if base_path and os.path.exists(base_path) and os.access(base_path, os.W_OK):
system_temp = base_path
else:
# Use user's home directory as last resort
system_temp = os.path.expanduser("~")
# Create code_indexer directory
temp_base_dir = os.path.join(system_temp, SETTINGS_DIR)
if not os.path.exists(temp_base_dir):
os.makedirs(temp_base_dir, exist_ok=True)
else:
pass
except Exception:
# If unable to create temporary directory, use .code_indexer in project directory if available
if base_path and os.path.exists(base_path):
temp_base_dir = os.path.join(base_path, ".code_indexer")
else:
# Use home directory as last resort
temp_base_dir = os.path.join(os.path.expanduser("~"), ".code_indexer")
if not os.path.exists(temp_base_dir):
os.makedirs(temp_base_dir, exist_ok=True)
# Use system temporary directory to store index data
try:
if base_path:
# Use hash of project path as unique identifier
path_hash = hashlib.md5(base_path.encode()).hexdigest()
self.settings_path = os.path.join(temp_base_dir, path_hash)
else:
# If no base path provided, use a default directory
self.settings_path = os.path.join(temp_base_dir, "default")
self.ensure_settings_dir()
except Exception:
# If error occurs, use .code_indexer in project or home directory as fallback
if base_path and os.path.exists(base_path):
fallback_dir = os.path.join(base_path, ".code_indexer",
hashlib.md5(base_path.encode()).hexdigest())
else:
fallback_dir = os.path.join(os.path.expanduser("~"), ".code_indexer",
"default" if not base_path else hashlib.md5(base_path.encode()).hexdigest())
self.settings_path = fallback_dir
if not os.path.exists(fallback_dir):
os.makedirs(fallback_dir, exist_ok=True)
def ensure_settings_dir(self):
"""Ensure settings directory exists"""
try:
if not os.path.exists(self.settings_path):
# Create directory structure
os.makedirs(self.settings_path, exist_ok=True)
else:
pass
# Check if directory is writable
if not os.access(self.settings_path, os.W_OK):
# If directory is not writable, use .code_indexer in project or home directory as fallback
if self.base_path and os.path.exists(self.base_path) and os.access(self.base_path, os.W_OK):
fallback_dir = os.path.join(self.base_path, ".code_indexer",
os.path.basename(self.settings_path))
else:
fallback_dir = os.path.join(os.path.expanduser("~"), ".code_indexer",
os.path.basename(self.settings_path))
self.settings_path = fallback_dir
if not os.path.exists(fallback_dir):
os.makedirs(fallback_dir, exist_ok=True)
except Exception:
# If unable to create settings directory, use .code_indexer in project or home directory
if self.base_path and os.path.exists(self.base_path):
fallback_dir = os.path.join(self.base_path, ".code_indexer",
hashlib.md5(self.base_path.encode()).hexdigest())
else:
fallback_dir = os.path.join(os.path.expanduser("~"), ".code_indexer",
"default" if not self.base_path else hashlib.md5(self.base_path.encode()).hexdigest())
self.settings_path = fallback_dir
if not os.path.exists(fallback_dir):
os.makedirs(fallback_dir, exist_ok=True)
def get_config_path(self):
"""Get the path to the configuration file"""
try:
path = os.path.join(self.settings_path, CONFIG_FILE)
# Ensure directory exists
os.makedirs(os.path.dirname(path), exist_ok=True)
return path
except Exception:
# If error occurs, use file in project or home directory as fallback
if self.base_path and os.path.exists(self.base_path):
return os.path.join(self.base_path, CONFIG_FILE)
else:
return os.path.join(os.path.expanduser("~"), CONFIG_FILE)
def _get_timestamp(self):
"""Get current timestamp"""
return datetime.now().isoformat()
def save_config(self, config):
"""Save configuration data
Args:
config (dict): Configuration data
"""
try:
config_path = self.get_config_path()
# Add timestamp
config['last_updated'] = self._get_timestamp()
# Ensure directory exists
os.makedirs(os.path.dirname(config_path), exist_ok=True)
with open(config_path, 'w', encoding='utf-8') as f:
json.dump(config, f, indent=2, ensure_ascii=False)
return config
except Exception:
return config
def load_config(self):
"""Load configuration data
Returns:
dict: Configuration data, or empty dict if file doesn't exist
"""
# If skip_load is set, return empty dict directly
if self.skip_load:
return {}
try:
config_path = self.get_config_path()
if os.path.exists(config_path):
try:
with open(config_path, 'r', encoding='utf-8') as f:
config = json.load(f)
return config
except (json.JSONDecodeError, UnicodeDecodeError):
# If file is corrupted, return empty dict
return {}
else:
pass
return {}
except Exception:
return {}
def save_index(self, index_data):
"""Save code index in JSON format
Args:
index_data: Index data as dictionary or JSON string
"""
try:
index_path = self.get_index_path()
# Ensure directory exists
dir_path = os.path.dirname(index_path)
if not os.path.exists(dir_path):
os.makedirs(dir_path, exist_ok=True)
# Check if directory is writable
if not os.access(dir_path, os.W_OK):
# Use project or home directory as fallback
if self.base_path and os.path.exists(self.base_path):
index_path = os.path.join(self.base_path, INDEX_FILE)
else:
index_path = os.path.join(os.path.expanduser("~"), INDEX_FILE)
# Convert to JSON string if it's an object with to_json method
if hasattr(index_data, 'to_json'):
json_data = index_data.to_json()
elif isinstance(index_data, str):
json_data = index_data
else:
# Assume it's a dictionary and convert to JSON
json_data = json.dumps(index_data, indent=2, default=str)
with open(index_path, 'w', encoding='utf-8') as f:
f.write(json_data)
except Exception:
# Try saving to project or home directory
try:
if self.base_path and os.path.exists(self.base_path):
fallback_path = os.path.join(self.base_path, INDEX_FILE)
else:
fallback_path = os.path.join(os.path.expanduser("~"), INDEX_FILE)
# Convert to JSON string if it's an object with to_json method
if hasattr(index_data, 'to_json'):
json_data = index_data.to_json()
elif isinstance(index_data, str):
json_data = index_data
else:
json_data = json.dumps(index_data, indent=2, default=str)
with open(fallback_path, 'w', encoding='utf-8') as f:
f.write(json_data)
except Exception:
pass
def load_index(self):
"""Load code index from JSON format
Returns:
dict: Index data, or None if file doesn't exist or has errors
"""
# If skip_load is set, return None directly
if self.skip_load:
return None
try:
index_path = self.get_index_path()
if os.path.exists(index_path):
try:
with open(index_path, 'r', encoding='utf-8') as f:
index_data = json.load(f)
return index_data
except (json.JSONDecodeError, UnicodeDecodeError):
# If file is corrupted, return None
return None
except Exception:
return None
else:
# Try loading from project or home directory
if self.base_path and os.path.exists(self.base_path):
fallback_path = os.path.join(self.base_path, INDEX_FILE)
else:
fallback_path = os.path.join(os.path.expanduser("~"), INDEX_FILE)
if os.path.exists(fallback_path):
try:
with open(fallback_path, 'r', encoding='utf-8') as f:
index_data = json.load(f)
return index_data
except Exception:
pass
return None
except Exception:
return None
def cleanup_legacy_files(self) -> None:
"""Clean up any legacy index files found."""
try:
legacy_files = [
os.path.join(self.settings_path, "file_index.pickle"),
os.path.join(self.settings_path, "content_cache.pickle"),
os.path.join(self.settings_path, INDEX_FILE) # Legacy JSON
]
for legacy_file in legacy_files:
if os.path.exists(legacy_file):
try:
os.remove(legacy_file)
except Exception:
pass
except Exception:
pass
def clear(self):
"""Clear config and index files"""
try:
if os.path.exists(self.settings_path):
# Check if directory is writable
if not os.access(self.settings_path, os.W_OK):
return
# Delete specific files only (config.json and index.json)
files_to_delete = [CONFIG_FILE, INDEX_FILE]
for filename in files_to_delete:
file_path = os.path.join(self.settings_path, filename)
try:
if os.path.isfile(file_path):
os.unlink(file_path)
except Exception:
pass
else:
pass
except Exception:
pass
def get_stats(self):
"""Get statistics for the settings directory
Returns:
dict: Dictionary containing file sizes and update times
"""
try:
stats = {
'settings_path': self.settings_path,
'exists': os.path.exists(self.settings_path),
'is_directory': os.path.isdir(self.settings_path) if os.path.exists(self.settings_path) else False,
'writable': os.access(self.settings_path, os.W_OK) if os.path.exists(self.settings_path) else False,
'files': {},
'temp_dir': tempfile.gettempdir(),
'base_path': self.base_path
}
if stats['exists'] and stats['is_directory']:
try:
# Get all files in the directory
all_files = os.listdir(self.settings_path)
stats['all_files'] = all_files
# Get details for specific files
for filename in [CONFIG_FILE, INDEX_FILE]:
file_path = os.path.join(self.settings_path, filename)
if os.path.exists(file_path):
try:
file_stats = os.stat(file_path)
stats['files'][filename] = {
'path': file_path,
'size_bytes': file_stats.st_size,
'last_modified': datetime.fromtimestamp(file_stats.st_mtime).isoformat(),
'readable': os.access(file_path, os.R_OK),
'writable': os.access(file_path, os.W_OK)
}
except Exception as e:
stats['files'][filename] = {
'path': file_path,
'error': str(e)
}
except Exception as e:
stats['list_error'] = str(e)
# Check fallback path
if self.base_path and os.path.exists(self.base_path):
fallback_dir = os.path.join(self.base_path, ".code_indexer")
else:
fallback_dir = os.path.join(os.path.expanduser("~"), ".code_indexer")
stats['fallback_path'] = fallback_dir
stats['fallback_exists'] = os.path.exists(fallback_dir)
stats['fallback_is_directory'] = os.path.isdir(fallback_dir) if os.path.exists(fallback_dir) else False
return stats
except Exception as e:
return {
'error': str(e),
'settings_path': self.settings_path,
'temp_dir': tempfile.gettempdir(),
'base_path': self.base_path
}
def get_search_tools_config(self):
"""Get the configuration of available search tools.
Returns:
dict: A dictionary containing the list of available tool names.
"""
return {
"available_tools": [s.name for s in self.available_strategies],
"preferred_tool": self.get_preferred_search_tool().name if self.available_strategies else None
}
def get_preferred_search_tool(self) -> SearchStrategy | None:
"""Get the preferred search tool based on availability and priority.
Returns:
SearchStrategy: An instance of the preferred search strategy, or None.
"""
if not self.available_strategies:
self.refresh_available_strategies()
return self.available_strategies[0] if self.available_strategies else None
def refresh_available_strategies(self):
"""
Force a refresh of the available search tools list.
"""
self.available_strategies = _get_available_strategies()
def get_file_watcher_config(self) -> dict:
"""
Get file watcher specific configuration.
Returns:
dict: File watcher configuration with defaults
"""
config = self.load_config()
default_config = {
"enabled": True,
"debounce_seconds": 6.0,
"additional_exclude_patterns": [],
"monitored_extensions": [], # Empty = use all supported extensions
"exclude_patterns": [
".git", ".svn", ".hg",
"node_modules", "__pycache__", ".venv", "venv",
".DS_Store", "Thumbs.db",
"dist", "build", "target", ".idea", ".vscode",
".pytest_cache", ".coverage", ".tox",
"bin", "obj"
]
}
# Merge with loaded config
file_watcher_config = config.get("file_watcher", {})
for key, default_value in default_config.items():
if key not in file_watcher_config:
file_watcher_config[key] = default_value
return file_watcher_config
def update_file_watcher_config(self, updates: dict) -> None:
"""
Update file watcher configuration.
Args:
updates: Dictionary of configuration updates
"""
config = self.load_config()
if "file_watcher" not in config:
config["file_watcher"] = self.get_file_watcher_config()
config["file_watcher"].update(updates)
self.save_config(config)

View File

@@ -0,0 +1 @@
"""Search strategies package."""

View File

@@ -0,0 +1,145 @@
"""
Search Strategy for The Silver Searcher (ag)
"""
import shutil
import subprocess
from typing import Dict, List, Optional, Tuple
from .base import SearchStrategy, parse_search_output, create_word_boundary_pattern, is_safe_regex_pattern
class AgStrategy(SearchStrategy):
"""Search strategy using 'The Silver Searcher' (ag) command-line tool."""
@property
def name(self) -> str:
"""The name of the search tool."""
return 'ag'
def is_available(self) -> bool:
"""Check if 'ag' command is available on the system."""
return shutil.which('ag') is not None
def search(
self,
pattern: str,
base_path: str,
case_sensitive: bool = True,
context_lines: int = 0,
file_pattern: Optional[str] = None,
fuzzy: bool = False,
regex: bool = False
) -> Dict[str, List[Tuple[int, str]]]:
"""
Execute a search using The Silver Searcher (ag).
Args:
pattern: The search pattern
base_path: Directory to search in
case_sensitive: Whether search is case sensitive
context_lines: Number of context lines to show
file_pattern: File pattern to filter
fuzzy: Enable word boundary matching (not true fuzzy search)
regex: Enable regex pattern matching
"""
# ag prints line numbers and groups by file by default, which is good.
# --noheading is used to be consistent with other tools' output format.
cmd = ['ag', '--noheading']
if not case_sensitive:
cmd.append('--ignore-case')
# Prepare search pattern
search_pattern = pattern
if regex:
# Use regex mode - check for safety first
if not is_safe_regex_pattern(pattern):
raise ValueError(f"Potentially unsafe regex pattern: {pattern}")
# Don't add --literal, use regex mode
elif fuzzy:
# Use word boundary pattern for partial matching
search_pattern = create_word_boundary_pattern(pattern)
else:
# Use literal string search
cmd.append('--literal')
if context_lines > 0:
cmd.extend(['--before', str(context_lines)])
cmd.extend(['--after', str(context_lines)])
if file_pattern:
# Convert glob pattern to regex pattern for ag's -G parameter
# ag's -G expects regex, not glob patterns
regex_pattern = file_pattern
if '*' in file_pattern and not file_pattern.startswith('^') and not file_pattern.endswith('$'):
# Convert common glob patterns to regex
if file_pattern.startswith('*.'):
# Pattern like "*.py" -> "\.py$"
extension = file_pattern[2:] # Remove "*."
regex_pattern = f'\\.{extension}$'
elif file_pattern.endswith('*'):
# Pattern like "test_*" -> "^test_.*"
prefix = file_pattern[:-1] # Remove "*"
regex_pattern = f'^{prefix}.*'
elif '*' in file_pattern:
# Pattern like "test_*.py" -> "^test_.*\.py$"
# First escape dots, then replace * with .*
regex_pattern = file_pattern.replace('.', '\\.')
regex_pattern = regex_pattern.replace('*', '.*')
if not regex_pattern.startswith('^'):
regex_pattern = '^' + regex_pattern
if not regex_pattern.endswith('$'):
regex_pattern = regex_pattern + '$'
cmd.extend(['-G', regex_pattern])
processed_patterns = set()
exclude_dirs = getattr(self, 'exclude_dirs', [])
exclude_file_patterns = getattr(self, 'exclude_file_patterns', [])
for directory in exclude_dirs:
normalized = directory.strip()
if not normalized or normalized in processed_patterns:
continue
cmd.extend(['--ignore', normalized])
processed_patterns.add(normalized)
for pattern in exclude_file_patterns:
normalized = pattern.strip()
if not normalized or normalized in processed_patterns:
continue
if normalized.startswith('!'):
normalized = normalized[1:]
cmd.extend(['--ignore', normalized])
processed_patterns.add(normalized)
# Add -- to treat pattern as a literal argument, preventing injection
cmd.append('--')
cmd.append(search_pattern)
cmd.append('.') # Use current directory since we set cwd=base_path
try:
# ag exits with 1 if no matches are found, which is not an error.
# It exits with 0 on success (match found). Other codes are errors.
process = subprocess.run(
cmd,
capture_output=True,
text=True,
encoding='utf-8',
errors='replace',
check=False, # Do not raise CalledProcessError on non-zero exit
cwd=base_path # Set working directory to project base path for proper pattern resolution
)
# We don't check returncode > 1 because ag's exit code behavior
# is less standardized than rg/ug. 0 for match, 1 for no match.
# Any actual error will likely raise an exception or be in stderr.
if process.returncode > 1:
raise RuntimeError(f"ag failed with exit code {process.returncode}: {process.stderr}")
return parse_search_output(process.stdout, base_path)
except FileNotFoundError:
raise RuntimeError("'ag' (The Silver Searcher) not found. Please install it and ensure it's in your PATH.")
except Exception as e:
# Re-raise other potential exceptions like permission errors
raise RuntimeError(f"An error occurred while running ag: {e}")

View File

@@ -0,0 +1,234 @@
"""
Search Strategies for Code Indexer
This module defines the abstract base class for search strategies and will contain
concrete implementations for different search tools like ugrep, ripgrep, etc.
"""
import os
import re
import shutil
import subprocess
import sys
from abc import ABC, abstractmethod
from typing import Any, Dict, List, Optional, Tuple, TYPE_CHECKING
from ..indexing.qualified_names import normalize_file_path
if TYPE_CHECKING: # pragma: no cover
from ..utils.file_filter import FileFilter
def parse_search_output(
output: str,
base_path: str
) -> Dict[str, List[Tuple[int, str]]]:
"""
Parse the output of command-line search tools (grep, ag, rg).
Args:
output: The raw output from the command-line tool.
base_path: The base path of the project to make file paths relative.
Returns:
A dictionary where keys are file paths and values are lists of (line_number, line_content) tuples.
"""
results = {}
# Normalize base_path to ensure consistent path separation
normalized_base_path = os.path.normpath(base_path)
for line in output.strip().split('\n'):
if not line.strip():
continue
try:
# Try to parse as a matched line first (format: path:linenum:content)
parts = line.split(':', 2)
# Check if this might be a context line (format: path-linenum-content)
# Context lines use '-' as separator in grep/ag output
if len(parts) < 3 and '-' in line:
# Try to parse as context line
# Match pattern: path-linenum-content or path-linenum-\tcontent
match = re.match(r'^(.*?)-(\d+)[-\t](.*)$', line)
if match:
file_path_abs = match.group(1)
line_number_str = match.group(2)
content = match.group(3)
else:
# If regex doesn't match, skip this line
continue
elif sys.platform == "win32" and len(parts) >= 3 and len(parts[0]) == 1 and parts[1].startswith('\\'):
# Handle Windows paths with drive letter (e.g., C:\path\file.txt)
file_path_abs = f"{parts[0]}:{parts[1]}"
line_number_str = parts[2].split(':', 1)[0]
content = parts[2].split(':', 1)[1] if ':' in parts[2] else parts[2]
elif len(parts) >= 3:
# Standard format: path:linenum:content
file_path_abs = parts[0]
line_number_str = parts[1]
content = parts[2]
else:
# Line doesn't match any expected format
continue
line_number = int(line_number_str)
# If the path is already relative (doesn't start with /), keep it as is
# Otherwise, make it relative to the base_path
if os.path.isabs(file_path_abs):
relative_path = os.path.relpath(file_path_abs, normalized_base_path)
else:
# Path is already relative, use it as is
relative_path = file_path_abs
# Normalize path separators for consistency
relative_path = normalize_file_path(relative_path)
if relative_path not in results:
results[relative_path] = []
results[relative_path].append((line_number, content))
except (ValueError, IndexError):
# Silently ignore lines that don't match the expected format
# This can happen with summary lines or other tool-specific output
pass
return results
def create_word_boundary_pattern(pattern: str) -> str:
"""
Create word boundary patterns for partial matching.
This is NOT true fuzzy search, but allows matching words at boundaries.
Args:
pattern: Original search pattern
Returns:
Word boundary pattern for regex matching
"""
# Escape any regex special characters to make them literal
escaped = re.escape(pattern)
# Create word boundary pattern that matches:
# 1. Word at start of word boundary (e.g., "test" in "testing")
# 2. Word at end of word boundary (e.g., "test" in "mytest")
# 3. Whole word (e.g., "test" as standalone word)
if len(pattern) >= 3: # Only for patterns of reasonable length
# This pattern allows partial matches at word boundaries
boundary_pattern = f"\\b{escaped}|{escaped}\\b"
else:
# For short patterns, require full word boundaries to avoid too many matches
boundary_pattern = f"\\b{escaped}\\b"
return boundary_pattern
def is_safe_regex_pattern(pattern: str) -> bool:
"""
Check if a pattern appears to be a safe regex pattern.
Args:
pattern: The search pattern to check
Returns:
True if the pattern looks like a safe regex, False otherwise
"""
# Strong indicators of regex intent
strong_regex_indicators = ['|', '(', ')', '[', ']', '^', '$']
# Weaker indicators that need context
weak_regex_indicators = ['.', '*', '+', '?']
# Check for strong regex indicators
has_strong_regex = any(char in pattern for char in strong_regex_indicators)
# Check for weak indicators with context
has_weak_regex = any(char in pattern for char in weak_regex_indicators)
# If has strong indicators, likely regex
if has_strong_regex:
# Still check for dangerous patterns
dangerous_patterns = [
r'(.+)+', # Nested quantifiers
r'(.*)*', # Nested stars
r'(.{0,})+', # Potential ReDoS patterns
]
has_dangerous_patterns = any(dangerous in pattern for dangerous in dangerous_patterns)
return not has_dangerous_patterns
# If only weak indicators, need more context
if has_weak_regex:
# Patterns like ".*", ".+", "file.*py" look like regex
# But "file.txt", "test.py" look like literal filenames
regex_like_patterns = [
r'\.\*', # .*
r'\.\+', # .+
r'\.\w*\*', # .something*
r'\*\.', # *.
r'\w+\.\*\w*', # word.*word
]
return any(re.search(regex_pattern, pattern) for regex_pattern in regex_like_patterns)
return False
class SearchStrategy(ABC):
"""
Abstract base class for a search strategy.
Each strategy is responsible for searching code using a specific tool or method.
"""
def configure_excludes(self, file_filter: Optional['FileFilter']) -> None:
"""Configure shared exclusion settings for the strategy."""
self.file_filter = file_filter
if file_filter:
self.exclude_dirs = sorted(set(file_filter.exclude_dirs))
self.exclude_file_patterns = sorted(set(file_filter.exclude_files))
else:
self.exclude_dirs = []
self.exclude_file_patterns = []
@property
@abstractmethod
def name(self) -> str:
"""The name of the search tool (e.g., 'ugrep', 'ripgrep')."""
pass
@abstractmethod
def is_available(self) -> bool:
"""
Check if the search tool for this strategy is available on the system.
Returns:
True if the tool is available, False otherwise.
"""
pass
@abstractmethod
def search(
self,
pattern: str,
base_path: str,
case_sensitive: bool = True,
context_lines: int = 0,
file_pattern: Optional[str] = None,
fuzzy: bool = False,
regex: bool = False
) -> Dict[str, List[Tuple[int, str]]]:
"""
Execute a search using the specific strategy.
Args:
pattern: The search pattern.
base_path: The root directory to search in.
case_sensitive: Whether the search is case-sensitive.
context_lines: Number of context lines to show around each match.
file_pattern: Glob pattern to filter files (e.g., "*.py").
fuzzy: Whether to enable fuzzy/partial matching.
regex: Whether to enable regex pattern matching.
Returns:
A dictionary mapping filenames to lists of (line_number, line_content) tuples.
"""
pass

View File

@@ -0,0 +1,116 @@
"""
Basic, pure-Python search strategy.
"""
import fnmatch
import os
import re
from pathlib import Path
from typing import Dict, List, Optional, Tuple
from .base import SearchStrategy, create_word_boundary_pattern, is_safe_regex_pattern
class BasicSearchStrategy(SearchStrategy):
"""
A basic, pure-Python search strategy.
This strategy iterates through files and lines manually. It's a fallback
for when no advanced command-line search tools are available.
It does not support context lines.
"""
@property
def name(self) -> str:
"""The name of the search tool."""
return 'basic'
def is_available(self) -> bool:
"""This basic strategy is always available."""
return True
def _matches_pattern(self, filename: str, pattern: str) -> bool:
"""Check if filename matches the glob pattern."""
if not pattern:
return True
# Handle simple cases efficiently
if pattern.startswith('*') and not any(c in pattern[1:] for c in '*?[]{}'):
return filename.endswith(pattern[1:])
# Use fnmatch for more complex patterns
return fnmatch.fnmatch(filename, pattern)
def search(
self,
pattern: str,
base_path: str,
case_sensitive: bool = True,
context_lines: int = 0,
file_pattern: Optional[str] = None,
fuzzy: bool = False,
regex: bool = False
) -> Dict[str, List[Tuple[int, str]]]:
"""
Execute a basic, line-by-line search.
Note: This implementation does not support context_lines.
Args:
pattern: The search pattern
base_path: Directory to search in
case_sensitive: Whether search is case sensitive
context_lines: Number of context lines (not supported)
file_pattern: File pattern to filter
fuzzy: Enable word boundary matching
regex: Enable regex pattern matching
"""
results: Dict[str, List[Tuple[int, str]]] = {}
flags = 0 if case_sensitive else re.IGNORECASE
try:
if regex:
# Use regex mode - check for safety first
if not is_safe_regex_pattern(pattern):
raise ValueError(f"Potentially unsafe regex pattern: {pattern}")
search_regex = re.compile(pattern, flags)
elif fuzzy:
# Use word boundary pattern for partial matching
search_pattern = create_word_boundary_pattern(pattern)
search_regex = re.compile(search_pattern, flags)
else:
# Use literal string search
search_regex = re.compile(re.escape(pattern), flags)
except re.error as e:
raise ValueError(f"Invalid regex pattern: {pattern}, error: {e}")
file_filter = getattr(self, 'file_filter', None)
base = Path(base_path)
for root, dirs, files in os.walk(base_path):
if file_filter:
dirs[:] = [d for d in dirs if not file_filter.should_exclude_directory(d)]
for file in files:
if file_pattern and not self._matches_pattern(file, file_pattern):
continue
file_path = Path(root) / file
if file_filter and not file_filter.should_process_path(file_path, base):
continue
rel_path = os.path.relpath(file_path, base_path)
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
for line_num, line in enumerate(f, 1):
if search_regex.search(line):
content = line.rstrip('\n')
if rel_path not in results:
results[rel_path] = []
results[rel_path].append((line_num, content))
except (UnicodeDecodeError, PermissionError, OSError):
continue
except Exception:
continue
return results

View File

@@ -0,0 +1,131 @@
"""
Search Strategy for standard grep
"""
import shutil
import subprocess
from typing import Dict, List, Optional, Tuple
from .base import SearchStrategy, parse_search_output, create_word_boundary_pattern, is_safe_regex_pattern
class GrepStrategy(SearchStrategy):
"""
Search strategy using the standard 'grep' command-line tool.
This is intended as a fallback for when more advanced tools like
ugrep, ripgrep, or ag are not available.
"""
@property
def name(self) -> str:
"""The name of the search tool."""
return 'grep'
def is_available(self) -> bool:
"""Check if 'grep' command is available on the system."""
return shutil.which('grep') is not None
def search(
self,
pattern: str,
base_path: str,
case_sensitive: bool = True,
context_lines: int = 0,
file_pattern: Optional[str] = None,
fuzzy: bool = False,
regex: bool = False
) -> Dict[str, List[Tuple[int, str]]]:
"""
Execute a search using standard grep.
Args:
pattern: The search pattern
base_path: Directory to search in
case_sensitive: Whether search is case sensitive
context_lines: Number of context lines to show
file_pattern: File pattern to filter
fuzzy: Enable word boundary matching
regex: Enable regex pattern matching
"""
# -r: recursive, -n: line number
cmd = ['grep', '-r', '-n']
# Prepare search pattern
search_pattern = pattern
if regex:
# Use regex mode - check for safety first
if not is_safe_regex_pattern(pattern):
raise ValueError(f"Potentially unsafe regex pattern: {pattern}")
cmd.append('-E') # Extended Regular Expressions
elif fuzzy:
# Use word boundary pattern for partial matching
search_pattern = create_word_boundary_pattern(pattern)
cmd.append('-E') # Extended Regular Expressions
else:
# Auto-detect if pattern looks like a safe regex
if is_safe_regex_pattern(pattern):
# Pattern contains regex chars, use extended regex mode
cmd.append('-E')
else:
# Use literal string search
cmd.append('-F')
if not case_sensitive:
cmd.append('-i')
if context_lines > 0:
cmd.extend(['-A', str(context_lines)])
cmd.extend(['-B', str(context_lines)])
if file_pattern:
# Note: grep's --include uses glob patterns, not regex
cmd.append(f'--include={file_pattern}')
exclude_dirs = getattr(self, 'exclude_dirs', [])
exclude_file_patterns = getattr(self, 'exclude_file_patterns', [])
processed_dirs = set()
for directory in exclude_dirs:
normalized = directory.strip()
if not normalized or normalized in processed_dirs:
continue
cmd.append(f'--exclude-dir={normalized}')
processed_dirs.add(normalized)
processed_files = set()
for pattern in exclude_file_patterns:
normalized = pattern.strip()
if not normalized or normalized in processed_files:
continue
if normalized.startswith('!'):
normalized = normalized[1:]
cmd.append(f'--exclude={normalized}')
processed_files.add(normalized)
# Add -- to treat pattern as a literal argument, preventing injection
cmd.append('--')
cmd.append(search_pattern)
cmd.append('.') # Use current directory since we set cwd=base_path
try:
# grep exits with 1 if no matches are found, which is not an error.
# It exits with 0 on success (match found). >1 for errors.
process = subprocess.run(
cmd,
capture_output=True,
text=True,
encoding='utf-8',
errors='replace',
check=False,
cwd=base_path # Set working directory to project base path for proper pattern resolution
)
if process.returncode > 1:
raise RuntimeError(f"grep failed with exit code {process.returncode}: {process.stderr}")
return parse_search_output(process.stdout, base_path)
except FileNotFoundError:
raise RuntimeError("'grep' not found. Please install it and ensure it's in your PATH.")
except Exception as e:
raise RuntimeError(f"An error occurred while running grep: {e}")

View File

@@ -0,0 +1,121 @@
"""
Search Strategy for ripgrep
"""
import shutil
import subprocess
from typing import Dict, List, Optional, Tuple
from .base import SearchStrategy, parse_search_output, create_word_boundary_pattern, is_safe_regex_pattern
class RipgrepStrategy(SearchStrategy):
"""Search strategy using the 'ripgrep' (rg) command-line tool."""
@property
def name(self) -> str:
"""The name of the search tool."""
return 'ripgrep'
def is_available(self) -> bool:
"""Check if 'rg' command is available on the system."""
return shutil.which('rg') is not None
def search(
self,
pattern: str,
base_path: str,
case_sensitive: bool = True,
context_lines: int = 0,
file_pattern: Optional[str] = None,
fuzzy: bool = False,
regex: bool = False
) -> Dict[str, List[Tuple[int, str]]]:
"""
Execute a search using ripgrep.
Args:
pattern: The search pattern
base_path: Directory to search in
case_sensitive: Whether search is case sensitive
context_lines: Number of context lines to show
file_pattern: File pattern to filter
fuzzy: Enable word boundary matching (not true fuzzy search)
regex: Enable regex pattern matching
"""
cmd = ['rg', '--line-number', '--no-heading', '--color=never', '--no-ignore']
if not case_sensitive:
cmd.append('--ignore-case')
# Prepare search pattern
search_pattern = pattern
if regex:
# Use regex mode - check for safety first
if not is_safe_regex_pattern(pattern):
raise ValueError(f"Potentially unsafe regex pattern: {pattern}")
# Don't add --fixed-strings, use regex mode
elif fuzzy:
# Use word boundary pattern for partial matching
search_pattern = create_word_boundary_pattern(pattern)
else:
# Use literal string search
cmd.append('--fixed-strings')
if context_lines > 0:
cmd.extend(['--context', str(context_lines)])
if file_pattern:
cmd.extend(['--glob', file_pattern])
exclude_dirs = getattr(self, 'exclude_dirs', [])
exclude_file_patterns = getattr(self, 'exclude_file_patterns', [])
processed_patterns = set()
for directory in exclude_dirs:
normalized = directory.strip()
if not normalized or normalized in processed_patterns:
continue
cmd.extend(['--glob', f'!**/{normalized}/**'])
processed_patterns.add(normalized)
for pattern in exclude_file_patterns:
normalized = pattern.strip()
if not normalized or normalized in processed_patterns:
continue
if normalized.startswith('!'):
glob_pattern = normalized
elif any(ch in normalized for ch in '*?[') or '/' in normalized:
glob_pattern = f'!{normalized}'
else:
glob_pattern = f'!**/{normalized}'
cmd.extend(['--glob', glob_pattern])
processed_patterns.add(normalized)
# Add -- to treat pattern as a literal argument, preventing injection
cmd.append('--')
cmd.append(search_pattern)
cmd.append('.') # Use current directory since we set cwd=base_path
try:
# ripgrep exits with 1 if no matches are found, which is not an error.
# It exits with 2 for actual errors.
process = subprocess.run(
cmd,
capture_output=True,
text=True,
encoding='utf-8',
errors='replace',
check=False, # Do not raise CalledProcessError on non-zero exit
cwd=base_path # Set working directory to project base path for proper glob resolution
)
if process.returncode > 1:
raise RuntimeError(f"ripgrep failed with exit code {process.returncode}: {process.stderr}")
return parse_search_output(process.stdout, base_path)
except FileNotFoundError:
raise RuntimeError("ripgrep (rg) not found. Please install it and ensure it's in your PATH.")
except Exception as e:
# Re-raise other potential exceptions like permission errors
raise RuntimeError(f"An error occurred while running ripgrep: {e}")

View File

@@ -0,0 +1,121 @@
"""
Search Strategy for ugrep
"""
import shutil
import subprocess
from typing import Dict, List, Optional, Tuple
from .base import SearchStrategy, parse_search_output, create_word_boundary_pattern, is_safe_regex_pattern
class UgrepStrategy(SearchStrategy):
"""Search strategy using the 'ugrep' (ug) command-line tool."""
@property
def name(self) -> str:
"""The name of the search tool."""
return 'ugrep'
def is_available(self) -> bool:
"""Check if 'ug' command is available on the system."""
return shutil.which('ug') is not None
def search(
self,
pattern: str,
base_path: str,
case_sensitive: bool = True,
context_lines: int = 0,
file_pattern: Optional[str] = None,
fuzzy: bool = False,
regex: bool = False
) -> Dict[str, List[Tuple[int, str]]]:
"""
Execute a search using the 'ug' command-line tool.
Args:
pattern: The search pattern
base_path: Directory to search in
case_sensitive: Whether search is case sensitive
context_lines: Number of context lines to show
file_pattern: File pattern to filter
fuzzy: Enable true fuzzy search (ugrep native support)
regex: Enable regex pattern matching
"""
if not self.is_available():
return {"error": "ugrep (ug) command not found."}
cmd = ['ug', '-r', '--line-number', '--no-heading']
if fuzzy:
# ugrep has native fuzzy search support
cmd.append('--fuzzy')
elif regex:
# Use regex mode - check for safety first
if not is_safe_regex_pattern(pattern):
raise ValueError(f"Potentially unsafe regex pattern: {pattern}")
# Don't add --fixed-strings, use regex mode
else:
# Use literal string search
cmd.append('--fixed-strings')
if not case_sensitive:
cmd.append('--ignore-case')
if context_lines > 0:
cmd.extend(['-A', str(context_lines), '-B', str(context_lines)])
if file_pattern:
cmd.extend(['--include', file_pattern])
processed_patterns = set()
exclude_dirs = getattr(self, 'exclude_dirs', [])
exclude_file_patterns = getattr(self, 'exclude_file_patterns', [])
for directory in exclude_dirs:
normalized = directory.strip()
if not normalized or normalized in processed_patterns:
continue
cmd.extend(['--ignore', f'**/{normalized}/**'])
processed_patterns.add(normalized)
for pattern in exclude_file_patterns:
normalized = pattern.strip()
if not normalized or normalized in processed_patterns:
continue
if normalized.startswith('!'):
ignore_pattern = normalized[1:]
elif any(ch in normalized for ch in '*?[') or '/' in normalized:
ignore_pattern = normalized
else:
ignore_pattern = f'**/{normalized}'
cmd.extend(['--ignore', ignore_pattern])
processed_patterns.add(normalized)
# Add '--' to treat pattern as a literal argument, preventing injection
cmd.append('--')
cmd.append(pattern)
cmd.append('.') # Use current directory since we set cwd=base_path
try:
process = subprocess.run(
cmd,
capture_output=True,
text=True,
encoding='utf-8',
errors='ignore', # Ignore decoding errors for binary-like content
check=False, # Do not raise exception on non-zero exit codes
cwd=base_path # Set working directory to project base path for proper pattern resolution
)
# ugrep exits with 1 if no matches are found, which is not an error for us.
# It exits with 2 for actual errors.
if process.returncode > 1:
error_output = process.stderr.strip()
return {"error": f"ugrep execution failed with code {process.returncode}", "details": error_output}
return parse_search_output(process.stdout, base_path)
except FileNotFoundError:
return {"error": "ugrep (ug) command not found. Please ensure it's installed and in your PATH."}
except Exception as e:
return {"error": f"An unexpected error occurred during search: {str(e)}"}

View File

@@ -0,0 +1,386 @@
"""
Code Index MCP Server
This MCP server allows LLMs to index, search, and analyze code from a project directory.
It provides tools for file discovery, content retrieval, and code analysis.
This version uses a service-oriented architecture where MCP decorators delegate
to domain-specific services for business logic.
"""
# Standard library imports
import argparse
import inspect
import sys
import logging
from contextlib import asynccontextmanager
from dataclasses import dataclass
from typing import AsyncIterator, Dict, Any, List, Optional
from urllib.parse import unquote
# Third-party imports
from mcp.server.fastmcp import FastMCP, Context
# Local imports
from .project_settings import ProjectSettings
from .services import (
SearchService, FileService, SettingsService, FileWatcherService
)
from .services.settings_service import manage_temp_directory
from .services.file_discovery_service import FileDiscoveryService
from .services.project_management_service import ProjectManagementService
from .services.index_management_service import IndexManagementService
from .services.code_intelligence_service import CodeIntelligenceService
from .services.system_management_service import SystemManagementService
from .utils import handle_mcp_tool_errors
# Setup logging without writing to files
def setup_indexing_performance_logging():
"""Setup logging (stderr only); remove any file-based logging."""
root_logger = logging.getLogger()
root_logger.handlers.clear()
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
# stderr for errors only
stderr_handler = logging.StreamHandler(sys.stderr)
stderr_handler.setFormatter(formatter)
stderr_handler.setLevel(logging.ERROR)
root_logger.addHandler(stderr_handler)
root_logger.setLevel(logging.DEBUG)
# Initialize logging (no file handlers)
setup_indexing_performance_logging()
logger = logging.getLogger(__name__)
@dataclass
class CodeIndexerContext:
"""Context for the Code Indexer MCP server."""
base_path: str
settings: ProjectSettings
file_count: int = 0
file_watcher_service: FileWatcherService = None
@dataclass
class _CLIConfig:
"""Holds CLI configuration for bootstrap operations."""
project_path: str | None = None
class _BootstrapRequestContext:
"""Minimal request context to reuse business services during bootstrap."""
def __init__(self, lifespan_context: CodeIndexerContext):
self.lifespan_context = lifespan_context
self.session = None
self.meta = None
_CLI_CONFIG = _CLIConfig()
@asynccontextmanager
async def indexer_lifespan(_server: FastMCP) -> AsyncIterator[CodeIndexerContext]:
"""Manage the lifecycle of the Code Indexer MCP server."""
# Don't set a default path, user must explicitly set project path
base_path = "" # Empty string to indicate no path is set
# Initialize settings manager with skip_load=True to skip loading files
settings = ProjectSettings(base_path, skip_load=True)
# Initialize context - file watcher will be initialized later when project path is set
context = CodeIndexerContext(
base_path=base_path,
settings=settings,
file_watcher_service=None
)
try:
# Bootstrap project path when provided via CLI.
if _CLI_CONFIG.project_path:
bootstrap_ctx = Context(
request_context=_BootstrapRequestContext(context),
fastmcp=mcp
)
try:
message = ProjectManagementService(bootstrap_ctx).initialize_project(
_CLI_CONFIG.project_path
)
logger.info("Project initialized from CLI flag: %s", message)
except Exception as exc: # pylint: disable=broad-except
logger.error("Failed to initialize project from CLI flag: %s", exc)
raise RuntimeError(
f"Failed to initialize project path '{_CLI_CONFIG.project_path}'"
) from exc
# Provide context to the server
yield context
finally:
# Stop file watcher if it was started
if context.file_watcher_service:
context.file_watcher_service.stop_monitoring()
# Create the MCP server with lifespan manager
mcp = FastMCP("CodeIndexer", lifespan=indexer_lifespan, dependencies=["pathlib"])
# ----- RESOURCES -----
@mcp.resource("files://{file_path}")
def get_file_content(file_path: str) -> str:
"""Get the content of a specific file."""
decoded_path = unquote(file_path)
ctx = mcp.get_context()
return FileService(ctx).get_file_content(decoded_path)
# ----- TOOLS -----
@mcp.tool()
@handle_mcp_tool_errors(return_type='str')
def set_project_path(path: str, ctx: Context) -> str:
"""Set the base project path for indexing."""
return ProjectManagementService(ctx).initialize_project(path)
@mcp.tool()
@handle_mcp_tool_errors(return_type='dict')
def search_code_advanced(
pattern: str,
ctx: Context,
case_sensitive: bool = True,
context_lines: int = 0,
file_pattern: str = None,
fuzzy: bool = False,
regex: bool = None,
start_index: int = 0,
max_results: Optional[int] = 10
) -> Dict[str, Any]:
"""
Search for a code pattern in the project using an advanced, fast tool with pagination support.
This tool automatically selects the best available command-line search tool
(like ugrep, ripgrep, ag, or grep) for maximum performance.
Args:
pattern: The search pattern. Can be literal text or regex (see regex parameter).
case_sensitive: Whether the search should be case-sensitive.
context_lines: Number of lines to show before and after the match.
file_pattern: A glob pattern to filter files to search in
(e.g., "*.py", "*.js", "test_*.py").
All search tools now handle glob patterns consistently:
- ugrep: Uses glob patterns (*.py, *.{js,ts})
- ripgrep: Uses glob patterns (*.py, *.{js,ts})
- ag (Silver Searcher): Automatically converts globs to regex patterns
- grep: Basic glob pattern matching
All common glob patterns like "*.py", "test_*.js", "src/*.ts" are supported.
fuzzy: If True, enables fuzzy/partial matching behavior varies by search tool:
- ugrep: Native fuzzy search with --fuzzy flag (true edit-distance fuzzy search)
- ripgrep, ag, grep, basic: Word boundary pattern matching (not true fuzzy search)
IMPORTANT: Only ugrep provides true fuzzy search. Other tools use word boundary
matching which allows partial matches at word boundaries.
For exact literal matches, set fuzzy=False (default and recommended).
regex: Controls regex pattern matching behavior:
- If True, enables regex pattern matching
- If False, forces literal string search
- If None (default), automatically detects regex patterns and enables regex for patterns like "ERROR|WARN"
The pattern will always be validated for safety to prevent ReDoS attacks.
start_index: Zero-based offset into the flattened match list. Use to fetch subsequent pages.
max_results: Maximum number of matches to return (default 10). Pass None to retrieve all matches.
Returns:
A dictionary containing:
- results: List of matches with file, line, and text keys.
- pagination: Metadata with total_matches, returned, start_index, end_index, has_more,
and optionally max_results.
If an error occurs, an error message is returned instead.
"""
return SearchService(ctx).search_code(
pattern=pattern,
case_sensitive=case_sensitive,
context_lines=context_lines,
file_pattern=file_pattern,
fuzzy=fuzzy,
regex=regex,
start_index=start_index,
max_results=max_results
)
@mcp.tool()
@handle_mcp_tool_errors(return_type='list')
def find_files(pattern: str, ctx: Context) -> List[str]:
"""
Find files matching a glob pattern using pre-built file index.
Use when:
- Looking for files by pattern (e.g., "*.py", "test_*.js")
- Searching by filename only (e.g., "README.md" finds all README files)
- Checking if specific files exist in the project
- Getting file lists for further analysis
Pattern matching:
- Supports both full path and filename-only matching
- Uses standard glob patterns (*, ?, [])
- Fast lookup using in-memory file index
- Uses forward slashes consistently across all platforms
Args:
pattern: Glob pattern to match files (e.g., "*.py", "test_*.js", "README.md")
Returns:
List of file paths matching the pattern
"""
return FileDiscoveryService(ctx).find_files(pattern)
@mcp.tool()
@handle_mcp_tool_errors(return_type='dict')
def get_file_summary(file_path: str, ctx: Context) -> Dict[str, Any]:
"""
Get a summary of a specific file, including:
- Line count
- Function/class definitions (for supported languages)
- Import statements
- Basic complexity metrics
"""
return CodeIntelligenceService(ctx).analyze_file(file_path)
@mcp.tool()
@handle_mcp_tool_errors(return_type='str')
def refresh_index(ctx: Context) -> str:
"""
Manually refresh the project index when files have been added/removed/moved.
Use when:
- File watcher is disabled or unavailable
- After large-scale operations (git checkout, merge, pull) that change many files
- When you want immediate index rebuild without waiting for file watcher debounce
- When find_files results seem incomplete or outdated
- For troubleshooting suspected index synchronization issues
Important notes for LLMs:
- Always available as backup when file watcher is not working
- Performs full project re-indexing for complete accuracy
- Use when you suspect the index is stale after file system changes
- **Call this after programmatic file modifications if file watcher seems unresponsive**
- Complements the automatic file watcher system
Returns:
Success message with total file count
"""
return IndexManagementService(ctx).rebuild_index()
@mcp.tool()
@handle_mcp_tool_errors(return_type='str')
def build_deep_index(ctx: Context) -> str:
"""
Build the deep index (full symbol extraction) for the current project.
This performs a complete re-index and loads it into memory.
"""
return IndexManagementService(ctx).rebuild_deep_index()
@mcp.tool()
@handle_mcp_tool_errors(return_type='dict')
def get_settings_info(ctx: Context) -> Dict[str, Any]:
"""Get information about the project settings."""
return SettingsService(ctx).get_settings_info()
@mcp.tool()
@handle_mcp_tool_errors(return_type='dict')
def create_temp_directory() -> Dict[str, Any]:
"""Create the temporary directory used for storing index data."""
return manage_temp_directory('create')
@mcp.tool()
@handle_mcp_tool_errors(return_type='dict')
def check_temp_directory() -> Dict[str, Any]:
"""Check the temporary directory used for storing index data."""
return manage_temp_directory('check')
@mcp.tool()
@handle_mcp_tool_errors(return_type='str')
def clear_settings(ctx: Context) -> str:
"""Clear all settings and cached data."""
return SettingsService(ctx).clear_all_settings()
@mcp.tool()
@handle_mcp_tool_errors(return_type='str')
def refresh_search_tools(ctx: Context) -> str:
"""
Manually re-detect the available command-line search tools on the system.
This is useful if you have installed a new tool (like ripgrep) after starting the server.
"""
return SearchService(ctx).refresh_search_tools()
@mcp.tool()
@handle_mcp_tool_errors(return_type='dict')
def get_file_watcher_status(ctx: Context) -> Dict[str, Any]:
"""Get file watcher service status and statistics."""
return SystemManagementService(ctx).get_file_watcher_status()
@mcp.tool()
@handle_mcp_tool_errors(return_type='str')
def configure_file_watcher(
ctx: Context,
enabled: bool = None,
debounce_seconds: float = None,
additional_exclude_patterns: list = None
) -> str:
"""Configure file watcher service settings."""
return SystemManagementService(ctx).configure_file_watcher(enabled, debounce_seconds, additional_exclude_patterns)
# ----- PROMPTS -----
# Removed: analyze_code, code_search, set_project prompts
def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
"""Parse CLI arguments for the MCP server."""
parser = argparse.ArgumentParser(description="Code Index MCP server")
parser.add_argument(
"--project-path",
dest="project_path",
help="Set the project path on startup (equivalent to calling set_project_path)."
)
parser.add_argument(
"--transport",
choices=["stdio", "sse", "streamable-http"],
default="stdio",
help="Transport protocol to use (default: stdio)."
)
parser.add_argument(
"--mount-path",
dest="mount_path",
default=None,
help="Mount path when using SSE transport."
)
return parser.parse_args(argv)
def main(argv: list[str] | None = None):
"""Main function to run the MCP server."""
args = _parse_args(argv)
# Store CLI configuration for lifespan bootstrap.
_CLI_CONFIG.project_path = args.project_path
run_kwargs = {"transport": args.transport}
if args.transport == "sse" and args.mount_path:
run_signature = inspect.signature(mcp.run)
if "mount_path" in run_signature.parameters:
run_kwargs["mount_path"] = args.mount_path
else:
logger.warning(
"Ignoring --mount-path because this FastMCP version "
"does not accept the parameter."
)
try:
mcp.run(**run_kwargs)
except RuntimeError as exc:
logger.error("MCP server terminated with error: %s", exc)
raise SystemExit(1) from exc
except Exception as exc: # pylint: disable=broad-except
logger.error("Unexpected MCP server error: %s", exc)
raise
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,48 @@
"""
Service layer for the Code Index MCP server.
This package contains domain-specific services that handle the business logic
for different areas of functionality:
- SearchService: Code search operations and search tool management
- FileService: File operations, content retrieval, and analysis
- SettingsService: Settings management and directory operations
Each service follows a consistent pattern:
- Constructor accepts MCP Context parameter
- Methods correspond to MCP entry points
- Clear domain boundaries with no cross-service dependencies
- Shared utilities accessed through utils module
- Meaningful exceptions raised for error conditions
"""
# New Three-Layer Architecture Services
from .base_service import BaseService
from .project_management_service import ProjectManagementService
from .index_management_service import IndexManagementService
from .file_discovery_service import FileDiscoveryService
from .code_intelligence_service import CodeIntelligenceService
from .system_management_service import SystemManagementService
from .search_service import SearchService # Already follows clean architecture
from .settings_service import SettingsService
# Simple Services
from .file_service import FileService # Simple file reading for resources
from .file_watcher_service import FileWatcherService # Low-level service, still needed
__all__ = [
# New Architecture
'BaseService',
'ProjectManagementService',
'IndexManagementService',
'FileDiscoveryService',
'CodeIntelligenceService',
'SystemManagementService',
'SearchService',
'SettingsService',
# Simple Services
'FileService', # Simple file reading for resources
'FileWatcherService' # Keep as low-level service
]

View File

@@ -0,0 +1,140 @@
"""
Base service class providing common functionality for all services.
This module defines the base service pattern that all domain services inherit from,
ensuring consistent behavior and shared functionality across the service layer.
"""
from abc import ABC
from typing import Optional
from mcp.server.fastmcp import Context
from ..utils import ContextHelper, ValidationHelper
class BaseService(ABC):
"""
Base class for all MCP services.
This class provides common functionality that all services need:
- Context management through ContextHelper
- Common validation patterns
- Shared error checking methods
All domain services should inherit from this class to ensure
consistent behavior and access to shared utilities.
"""
def __init__(self, ctx: Context):
"""
Initialize the base service.
Args:
ctx: The MCP Context object containing request and lifespan context
"""
self.ctx = ctx
self.helper = ContextHelper(ctx)
def _validate_project_setup(self) -> Optional[str]:
"""
Validate that the project is properly set up.
This method checks if the base path is set and valid, which is
required for most operations.
Returns:
Error message if project is not set up properly, None if valid
"""
return self.helper.get_base_path_error()
def _require_project_setup(self) -> None:
"""
Ensure project is set up, raising an exception if not.
This is a convenience method for operations that absolutely
require a valid project setup.
Raises:
ValueError: If project is not properly set up
"""
error = self._validate_project_setup()
if error:
raise ValueError(error)
def _validate_file_path(self, file_path: str) -> Optional[str]:
"""
Validate a file path for security and accessibility.
Args:
file_path: The file path to validate
Returns:
Error message if validation fails, None if valid
"""
return ValidationHelper.validate_file_path(file_path, self.helper.base_path)
def _require_valid_file_path(self, file_path: str) -> None:
"""
Ensure file path is valid, raising an exception if not.
Args:
file_path: The file path to validate
Raises:
ValueError: If file path is invalid
"""
error = self._validate_file_path(file_path)
if error:
raise ValueError(error)
@property
def base_path(self) -> str:
"""
Convenient access to the base project path.
Returns:
The base project path
"""
return self.helper.base_path
@property
def settings(self):
"""
Convenient access to the project settings.
Returns:
The ProjectSettings instance
"""
return self.helper.settings
@property
def file_count(self) -> int:
"""
Convenient access to the current file count.
Returns:
The number of indexed files
"""
return self.helper.file_count
@property
def index_provider(self):
"""
Convenient access to the unified index provider.
Returns:
The current IIndexProvider instance, or None if not available
"""
if self.helper.index_manager:
return self.helper.index_manager.get_provider()
return None
@property
def index_manager(self):
"""
Convenient access to the index manager.
Returns:
The index manager instance, or None if not available
"""
return self.helper.index_manager

View File

@@ -0,0 +1,104 @@
"""
Code Intelligence Service - Business logic for code analysis and understanding.
This service handles the business logic for analyzing code files using the new
JSON-based indexing system optimized for LLM consumption.
"""
import logging
import os
from typing import Dict, Any
from .base_service import BaseService
from ..tools.filesystem import FileSystemTool
from ..indexing import get_index_manager
logger = logging.getLogger(__name__)
class CodeIntelligenceService(BaseService):
"""
Business service for code analysis and intelligence using JSON indexing.
This service provides comprehensive code analysis using the optimized
JSON-based indexing system for fast LLM-friendly responses.
"""
def __init__(self, ctx):
super().__init__(ctx)
self._filesystem_tool = FileSystemTool()
def analyze_file(self, file_path: str) -> Dict[str, Any]:
"""
Analyze a file and return comprehensive intelligence.
This is the main business method that orchestrates the file analysis
workflow, choosing the best analysis strategy and providing rich
insights about the code.
Args:
file_path: Path to the file to analyze (relative to project root)
Returns:
Dictionary with comprehensive file analysis
Raises:
ValueError: If file path is invalid or analysis fails
"""
# Business validation
self._validate_analysis_request(file_path)
# Use the global index manager
index_manager = get_index_manager()
# Debug logging
logger.info(f"Getting file summary for: {file_path}")
logger.info(f"Index manager state - Project path: {index_manager.project_path}")
logger.info(f"Index manager state - Has builder: {index_manager.index_builder is not None}")
if index_manager.index_builder:
logger.info(f"Index manager state - Has index: {index_manager.index_builder.in_memory_index is not None}")
# Get file summary from JSON index
summary = index_manager.get_file_summary(file_path)
logger.info(f"Summary result: {summary is not None}")
# If deep index isn't available yet, return a helpful hint instead of error
if not summary:
return {
"status": "needs_deep_index",
"message": "Deep index not available. Please run build_deep_index before calling get_file_summary.",
"file_path": file_path
}
return summary
def _validate_analysis_request(self, file_path: str) -> None:
"""
Validate the file analysis request according to business rules.
Args:
file_path: File path to validate
Raises:
ValueError: If validation fails
"""
# Business rule: Project must be set up OR auto-initialization must be possible
if self.base_path:
# Standard validation if project is set up in context
self._require_valid_file_path(file_path)
full_path = os.path.join(self.base_path, file_path)
if not os.path.exists(full_path):
raise ValueError(f"File does not exist: {file_path}")
else:
# Allow proceeding if auto-initialization might work
# The index manager will handle project discovery
logger.info("Project not set in context, relying on index auto-initialization")
# Basic file path validation only
if not file_path or '..' in file_path:
raise ValueError(f"Invalid file path: {file_path}")

View File

@@ -0,0 +1,78 @@
"""
File Discovery Service - Business logic for intelligent file discovery.
This service handles the business logic for finding files using the new
JSON-based indexing system optimized for LLM consumption.
"""
from typing import Dict, Any, List, Optional
from dataclasses import dataclass
from .base_service import BaseService
from ..indexing import get_shallow_index_manager
@dataclass
class FileDiscoveryResult:
"""Business result for file discovery operations."""
files: List[str]
total_count: int
pattern_used: str
search_strategy: str
metadata: Dict[str, Any]
class FileDiscoveryService(BaseService):
"""
Business service for intelligent file discovery using JSON indexing.
This service provides fast file discovery using the optimized JSON
indexing system for efficient LLM-oriented responses.
"""
def __init__(self, ctx):
super().__init__(ctx)
self._index_manager = get_shallow_index_manager()
def find_files(self, pattern: str, max_results: Optional[int] = None) -> List[str]:
"""
Find files matching the given pattern using JSON indexing.
Args:
pattern: Glob pattern to search for (e.g., "*.py", "test_*.js")
max_results: Maximum number of results to return (None for no limit)
Returns:
List of file paths matching the pattern
Raises:
ValueError: If pattern is invalid or project not set up
"""
# Business validation
self._validate_discovery_request(pattern)
# Get files from JSON index
files = self._index_manager.find_files(pattern)
# Apply max_results limit if specified
if max_results and len(files) > max_results:
files = files[:max_results]
return files
def _validate_discovery_request(self, pattern: str) -> None:
"""
Validate the file discovery request according to business rules.
Args:
pattern: Pattern to validate
Raises:
ValueError: If validation fails
"""
# Ensure project is set up
self._require_project_setup()
# Validate pattern
if not pattern or not pattern.strip():
raise ValueError("Search pattern cannot be empty")

View File

@@ -0,0 +1,62 @@
"""
File Service - Simple file reading service for MCP resources.
This service provides simple file content reading functionality for MCP resources.
Complex file analysis has been moved to CodeIntelligenceService.
Usage:
- get_file_content() - used by files://{file_path} resource
"""
import os
from .base_service import BaseService
class FileService(BaseService):
"""
Simple service for file content reading.
This service handles basic file reading operations for MCP resources.
Complex analysis functionality has been moved to CodeIntelligenceService.
"""
def get_file_content(self, file_path: str) -> str:
"""
Get file content for MCP resource.
Args:
file_path: Path to the file (relative to project root)
Returns:
File content as string
Raises:
ValueError: If project is not set up or path is invalid
FileNotFoundError: If file is not found or readable
"""
self._require_project_setup()
self._require_valid_file_path(file_path)
# Build full path
full_path = os.path.join(self.base_path, file_path)
try:
# Try UTF-8 first (most common)
with open(full_path, 'r', encoding='utf-8') as f:
return f.read()
except UnicodeDecodeError:
# Try other encodings if UTF-8 fails
encodings = ['utf-8-sig', 'latin-1', 'cp1252', 'iso-8859-1']
for encoding in encodings:
try:
with open(full_path, 'r', encoding=encoding) as f:
return f.read()
except UnicodeDecodeError:
continue
raise ValueError(
f"Could not decode file {file_path}. File may have "
f"unsupported encoding."
) from None
except (FileNotFoundError, PermissionError, OSError) as e:
raise FileNotFoundError(f"Error reading file: {e}") from e

View File

@@ -0,0 +1,418 @@
"""
File Watcher Service for automatic index rebuilds.
This module provides file system monitoring capabilities that automatically
trigger index rebuilds when relevant files are modified, created, or deleted.
It uses the watchdog library for cross-platform file system event monitoring.
"""
# pylint: disable=missing-function-docstring # Fallback stub methods don't need docstrings
import logging
import os
import traceback
from threading import Timer
from typing import Optional, Callable, List
from pathlib import Path
try:
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler, FileSystemEvent
WATCHDOG_AVAILABLE = True
except ImportError:
# Fallback classes for when watchdog is not available
class Observer:
"""Fallback Observer class when watchdog library is not available."""
def __init__(self):
pass
def schedule(self, *args, **kwargs):
pass
def start(self):
pass
def stop(self):
pass
def join(self, *args, **kwargs):
pass
def is_alive(self):
return False
class FileSystemEventHandler:
"""Fallback FileSystemEventHandler class when watchdog library is not available."""
def __init__(self):
pass
class FileSystemEvent:
"""Fallback FileSystemEvent class when watchdog library is not available."""
def __init__(self):
self.is_directory = False
self.src_path = ""
self.event_type = ""
WATCHDOG_AVAILABLE = False
from .base_service import BaseService
from ..constants import SUPPORTED_EXTENSIONS
class FileWatcherService(BaseService):
"""
Service for monitoring file system changes and triggering index rebuilds.
This service uses the watchdog library to monitor file system events and
automatically triggers background index rebuilds when relevant files change.
It includes intelligent debouncing to batch rapid changes and filtering
to only monitor relevant file types.
"""
MAX_RESTART_ATTEMPTS = 3
def __init__(self, ctx):
"""
Initialize the file watcher service.
Args:
ctx: The MCP Context object
"""
super().__init__(ctx)
self.logger = logging.getLogger(__name__)
self.observer: Optional[Observer] = None
self.event_handler: Optional[DebounceEventHandler] = None
self.is_monitoring = False
self.restart_attempts = 0
self.rebuild_callback: Optional[Callable] = None
# Check if watchdog is available
if not WATCHDOG_AVAILABLE:
self.logger.warning("Watchdog library not available - file watcher disabled")
def start_monitoring(self, rebuild_callback: Callable) -> bool:
"""
Start file system monitoring.
Args:
rebuild_callback: Function to call when rebuild is needed
Returns:
True if monitoring started successfully, False otherwise
"""
if not WATCHDOG_AVAILABLE:
self.logger.warning("Cannot start file watcher - watchdog library not available")
return False
if self.is_monitoring:
self.logger.debug("File watcher already monitoring")
return True
# Validate project setup
error = self._validate_project_setup()
if error:
self.logger.error("Cannot start file watcher: %s", error)
return False
self.rebuild_callback = rebuild_callback
# Get debounce seconds from config
config = self.settings.get_file_watcher_config()
debounce_seconds = config.get('debounce_seconds', 6.0)
try:
self.observer = Observer()
self.event_handler = DebounceEventHandler(
debounce_seconds=debounce_seconds,
rebuild_callback=self.rebuild_callback,
base_path=Path(self.base_path),
logger=self.logger
)
# Log detailed Observer setup
watch_path = str(self.base_path)
self.logger.debug("Scheduling Observer for path: %s", watch_path)
self.observer.schedule(
self.event_handler,
watch_path,
recursive=True
)
# Log Observer start
self.logger.debug("Starting Observer...")
self.observer.start()
self.is_monitoring = True
self.restart_attempts = 0
# Log Observer thread info
if hasattr(self.observer, '_thread'):
self.logger.debug("Observer thread: %s", self.observer._thread)
# Verify observer is actually running
if self.observer.is_alive():
self.logger.info(
"File watcher started successfully",
extra={
"debounce_seconds": debounce_seconds,
"monitored_path": str(self.base_path),
"supported_extensions": len(SUPPORTED_EXTENSIONS)
}
)
# Add diagnostic test - create a test event to verify Observer works
self.logger.debug("Observer thread is alive: %s", self.observer.is_alive())
self.logger.debug("Monitored path exists: %s", os.path.exists(str(self.base_path)))
self.logger.debug("Event handler is set: %s", self.event_handler is not None)
# Log current directory for comparison
current_dir = os.getcwd()
self.logger.debug("Current working directory: %s", current_dir)
self.logger.debug("Are paths same: %s", os.path.normpath(current_dir) == os.path.normpath(str(self.base_path)))
return True
else:
self.logger.error("File watcher failed to start - Observer not alive")
return False
except Exception as e:
self.logger.warning("Failed to start file watcher: %s", e)
self.logger.info("Falling back to reactive index refresh")
return False
def stop_monitoring(self) -> None:
"""
Stop file system monitoring and cleanup all resources.
This method ensures complete cleanup of:
- Observer thread
- Event handler
- Debounce timers
- Monitoring state
"""
if not self.observer and not self.is_monitoring:
# Already stopped or never started
return
self.logger.info("Stopping file watcher monitoring...")
try:
# Step 1: Stop the observer first
if self.observer:
self.logger.debug("Stopping observer...")
self.observer.stop()
# Step 2: Cancel any active debounce timer
if self.event_handler and self.event_handler.debounce_timer:
self.logger.debug("Cancelling debounce timer...")
self.event_handler.debounce_timer.cancel()
# Step 3: Wait for observer thread to finish (with timeout)
self.logger.debug("Waiting for observer thread to finish...")
self.observer.join(timeout=5.0)
# Step 4: Check if thread actually finished
if self.observer.is_alive():
self.logger.warning("Observer thread did not stop within timeout")
else:
self.logger.debug("Observer thread stopped successfully")
# Step 5: Clear all references
self.observer = None
self.event_handler = None
self.rebuild_callback = None
self.is_monitoring = False
self.logger.info("File watcher stopped and cleaned up successfully")
except Exception as e:
self.logger.error("Error stopping file watcher: %s", e)
# Force cleanup even if there were errors
self.observer = None
self.event_handler = None
self.rebuild_callback = None
self.is_monitoring = False
def is_active(self) -> bool:
"""
Check if file watcher is actively monitoring.
Returns:
True if actively monitoring, False otherwise
"""
return (self.is_monitoring and
self.observer and
self.observer.is_alive())
def restart_observer(self) -> bool:
"""
Attempt to restart the file system observer.
Returns:
True if restart successful, False otherwise
"""
if self.restart_attempts >= self.MAX_RESTART_ATTEMPTS:
self.logger.error("Max restart attempts reached, file watcher disabled")
return False
self.logger.info("Attempting to restart file watcher (attempt %d)",
self.restart_attempts + 1)
self.restart_attempts += 1
# Stop current observer if running
if self.observer:
try:
self.observer.stop()
self.observer.join(timeout=2.0)
except Exception as e:
self.logger.warning("Error stopping observer during restart: %s", e)
# Start new observer
try:
self.observer = Observer()
self.observer.schedule(
self.event_handler,
str(self.base_path),
recursive=True
)
self.observer.start()
self.is_monitoring = True
self.logger.info("File watcher restarted successfully")
return True
except Exception as e:
self.logger.error("Failed to restart file watcher: %s", e)
return False
def get_status(self) -> dict:
"""
Get current file watcher status information.
Returns:
Dictionary containing status information
"""
# Get current debounce seconds from config
config = self.settings.get_file_watcher_config()
debounce_seconds = config.get('debounce_seconds', 6.0)
return {
"available": WATCHDOG_AVAILABLE,
"active": self.is_active(),
"monitoring": self.is_monitoring,
"restart_attempts": self.restart_attempts,
"debounce_seconds": debounce_seconds,
"base_path": self.base_path if self.base_path else None,
"observer_alive": self.observer.is_alive() if self.observer else False
}
class DebounceEventHandler(FileSystemEventHandler):
"""
File system event handler with debouncing capability.
This handler filters file system events to only relevant files and
implements a debounce mechanism to batch rapid changes into single
rebuild operations.
"""
def __init__(self, debounce_seconds: float, rebuild_callback: Callable,
base_path: Path, logger: logging.Logger, additional_excludes: Optional[List[str]] = None):
"""
Initialize the debounce event handler.
Args:
debounce_seconds: Number of seconds to wait before triggering rebuild
rebuild_callback: Function to call when rebuild is needed
base_path: Base project path for filtering
logger: Logger instance for debug messages
additional_excludes: Additional patterns to exclude
"""
from ..utils import FileFilter
super().__init__()
self.debounce_seconds = debounce_seconds
self.rebuild_callback = rebuild_callback
self.base_path = base_path
self.debounce_timer: Optional[Timer] = None
self.logger = logger
# Use centralized file filtering
self.file_filter = FileFilter(additional_excludes)
def on_any_event(self, event: FileSystemEvent) -> None:
"""
Handle any file system event.
Args:
event: The file system event
"""
# Check if event should be processed
should_process = self.should_process_event(event)
if should_process:
self.logger.info("File changed: %s - %s", event.event_type, event.src_path)
self.reset_debounce_timer()
else:
# Only log at debug level for filtered events
self.logger.debug("Filtered: %s - %s", event.event_type, event.src_path)
def should_process_event(self, event: FileSystemEvent) -> bool:
"""
Determine if event should trigger index rebuild using centralized filtering.
Args:
event: The file system event to evaluate
Returns:
True if event should trigger rebuild, False otherwise
"""
# Skip directory events
if event.is_directory:
self.logger.debug("Skipping directory event: %s", event.src_path)
return False
# Select path to check: dest_path for moves, src_path for others
if event.event_type == 'moved':
if not hasattr(event, 'dest_path'):
return False
target_path = event.dest_path
else:
target_path = event.src_path
# Use centralized filtering logic
try:
path = Path(target_path)
should_process = self.file_filter.should_process_path(path, self.base_path)
# Skip temporary files using centralized logic
if not should_process or self.file_filter.is_temporary_file(path):
return False
return True
except Exception:
return False
def reset_debounce_timer(self) -> None:
"""Reset the debounce timer, canceling any existing timer."""
if self.debounce_timer:
self.debounce_timer.cancel()
self.debounce_timer = Timer(
self.debounce_seconds,
self.trigger_rebuild
)
self.debounce_timer.start()
def trigger_rebuild(self) -> None:
"""Trigger index rebuild after debounce period."""
self.logger.info("File changes detected, triggering rebuild")
if self.rebuild_callback:
try:
result = self.rebuild_callback()
except Exception as e:
self.logger.error("Rebuild callback failed: %s", e)
traceback_msg = traceback.format_exc()
self.logger.error("Traceback: %s", traceback_msg)
else:
self.logger.warning("No rebuild callback configured")

View File

@@ -0,0 +1,198 @@
"""
Index Management Service - Business logic for index lifecycle management.
This service handles the business logic for index rebuilding, status monitoring,
and index-related operations using the new JSON-based indexing system.
"""
import time
import logging
import os
import json
from typing import Dict, Any
from dataclasses import dataclass
logger = logging.getLogger(__name__)
from .base_service import BaseService
from ..indexing import get_index_manager, get_shallow_index_manager, DeepIndexManager
@dataclass
class IndexRebuildResult:
"""Business result for index rebuild operations."""
file_count: int
rebuild_time: float
status: str
message: str
class IndexManagementService(BaseService):
"""
Business service for index lifecycle management.
This service orchestrates index management workflows using the new
JSON-based indexing system for optimal LLM performance.
"""
def __init__(self, ctx):
super().__init__(ctx)
# Deep manager (symbols/files, legacy JSON index manager)
self._index_manager = get_index_manager()
# Shallow manager (file-list only) for default workflows
self._shallow_manager = get_shallow_index_manager()
# Optional wrapper for explicit deep builds
self._deep_wrapper = DeepIndexManager()
def rebuild_index(self) -> str:
"""
Rebuild the project index (DEFAULT: shallow file list).
For deep/symbol rebuilds, use build_deep_index() tool instead.
Returns:
Success message with rebuild information
Raises:
ValueError: If project not set up or rebuild fails
"""
# Business validation
self._validate_rebuild_request()
# Shallow rebuild only (fast path)
if not self._shallow_manager.set_project_path(self.base_path):
raise RuntimeError("Failed to set project path (shallow) in index manager")
if not self._shallow_manager.build_index():
raise RuntimeError("Failed to rebuild shallow index")
try:
count = len(self._shallow_manager.get_file_list())
except Exception:
count = 0
return f"Shallow index re-built with {count} files."
def get_rebuild_status(self) -> Dict[str, Any]:
"""
Get current index rebuild status information.
Returns:
Dictionary with rebuild status and metadata
"""
# Check if project is set up
if not self.base_path:
return {
'status': 'not_initialized',
'message': 'Project not initialized',
'is_rebuilding': False
}
# Get index stats from the new JSON system
stats = self._index_manager.get_index_stats()
return {
'status': 'ready' if stats.get('status') == 'loaded' else 'needs_rebuild',
'index_available': stats.get('status') == 'loaded',
'is_rebuilding': False,
'project_path': self.base_path,
'file_count': stats.get('indexed_files', 0),
'total_symbols': stats.get('total_symbols', 0),
'symbol_types': stats.get('symbol_types', {}),
'languages': stats.get('languages', [])
}
def _validate_rebuild_request(self) -> None:
"""
Validate the index rebuild request according to business rules.
Raises:
ValueError: If validation fails
"""
# Business rule: Project must be set up
self._require_project_setup()
def _execute_rebuild_workflow(self) -> IndexRebuildResult:
"""
Execute the core index rebuild business workflow.
Returns:
IndexRebuildResult with rebuild data
"""
start_time = time.time()
# Set project path in index manager
if not self._index_manager.set_project_path(self.base_path):
raise RuntimeError("Failed to set project path in index manager")
# Rebuild the index
if not self._index_manager.refresh_index():
raise RuntimeError("Failed to rebuild index")
# Get stats for result
stats = self._index_manager.get_index_stats()
file_count = stats.get('indexed_files', 0)
rebuild_time = time.time() - start_time
return IndexRebuildResult(
file_count=file_count,
rebuild_time=rebuild_time,
status='success',
message=f"Index rebuilt successfully with {file_count} files"
)
def _format_rebuild_result(self, result: IndexRebuildResult) -> str:
"""
Format the rebuild result according to business requirements.
Args:
result: Rebuild result data
Returns:
Formatted result string for MCP response
"""
return f"Project re-indexed. Found {result.file_count} files."
def build_shallow_index(self) -> str:
"""
Build and persist the shallow index (file list only).
Returns:
Success message including file count if available.
Raises:
ValueError/RuntimeError on validation or build failure
"""
# Ensure project is set up
self._require_project_setup()
# Initialize manager with current base path
if not self._shallow_manager.set_project_path(self.base_path):
raise RuntimeError("Failed to set project path in index manager")
# Build shallow index
if not self._shallow_manager.build_index():
raise RuntimeError("Failed to build shallow index")
# Try to report count
count = 0
try:
shallow_path = getattr(self._shallow_manager, 'index_path', None)
if shallow_path and os.path.exists(shallow_path):
with open(shallow_path, 'r', encoding='utf-8') as f:
data = json.load(f)
if isinstance(data, list):
count = len(data)
except Exception as e: # noqa: BLE001 - safe fallback to zero
logger.debug(f"Unable to read shallow index count: {e}")
return f"Shallow index built{f' with {count} files' if count else ''}."
def rebuild_deep_index(self) -> str:
"""Rebuild the deep index using the original workflow."""
# Business validation
self._validate_rebuild_request()
# Deep rebuild via existing workflow
result = self._execute_rebuild_workflow()
return self._format_rebuild_result(result)

View File

@@ -0,0 +1,375 @@
"""
Project Management Service - Business logic for project lifecycle management.
This service handles the business logic for project initialization, configuration,
and lifecycle management using the new JSON-based indexing system.
"""
import logging
from typing import Dict, Any
from dataclasses import dataclass
from contextlib import contextmanager
from .base_service import BaseService
from ..utils.response_formatter import ResponseFormatter
from ..constants import SUPPORTED_EXTENSIONS
from ..indexing import get_index_manager, get_shallow_index_manager
logger = logging.getLogger(__name__)
@dataclass
class ProjectInitializationResult:
"""Business result for project initialization operations."""
project_path: str
file_count: int
index_source: str # 'loaded_existing' or 'built_new'
search_capabilities: str
monitoring_status: str
message: str
class ProjectManagementService(BaseService):
"""
Business service for project lifecycle management.
This service orchestrates project initialization workflows by composing
technical tools to achieve business goals like setting up projects,
managing configurations, and coordinating system components.
"""
def __init__(self, ctx):
super().__init__(ctx)
# Deep index manager (legacy full index)
self._index_manager = get_index_manager()
# Shallow index manager (default for initialization)
self._shallow_manager = get_shallow_index_manager()
from ..tools.config import ProjectConfigTool
self._config_tool = ProjectConfigTool()
# Import FileWatcherTool locally to avoid circular import
from ..tools.monitoring import FileWatcherTool
self._watcher_tool = FileWatcherTool(ctx)
@contextmanager
def _noop_operation(self, *_args, **_kwargs):
yield
def initialize_project(self, path: str) -> str:
"""
Initialize a project with comprehensive business logic.
This is the main business method that orchestrates the project
initialization workflow, handling validation, cleanup, setup,
and coordination of all project components.
Args:
path: Project directory path to initialize
Returns:
Success message with project information
Raises:
ValueError: If path is invalid or initialization fails
"""
# Business validation
self._validate_initialization_request(path)
# Business workflow: Execute initialization
result = self._execute_initialization_workflow(path)
# Business result formatting
return self._format_initialization_result(result)
def _validate_initialization_request(self, path: str) -> None:
"""
Validate the project initialization request according to business rules.
Args:
path: Project path to validate
Raises:
ValueError: If validation fails
"""
# Business rule: Path must be valid
error = self._config_tool.validate_project_path(path)
if error:
raise ValueError(error)
def _execute_initialization_workflow(self, path: str) -> ProjectInitializationResult:
"""
Execute the core project initialization business workflow.
Args:
path: Project path to initialize
Returns:
ProjectInitializationResult with initialization data
"""
# Business step 1: Initialize config tool
self._config_tool.initialize_settings(path)
# Normalize path for consistent processing
normalized_path = self._config_tool.normalize_project_path(path)
# Business step 2: Cleanup existing project state
self._cleanup_existing_project()
# Business step 3: Initialize shallow index by default (fast path)
index_result = self._initialize_shallow_index_manager(normalized_path)
# Business step 3.1: Store index manager in context for other services
self.helper.update_index_manager(self._index_manager)
# Business step 4: Setup file monitoring
monitoring_result = self._setup_file_monitoring(normalized_path)
# Business step 4: Update system state
self._update_project_state(normalized_path, index_result['file_count'])
# Business step 6: Get search capabilities info
search_info = self._get_search_capabilities_info()
return ProjectInitializationResult(
project_path=normalized_path,
file_count=index_result['file_count'],
index_source=index_result['source'],
search_capabilities=search_info,
monitoring_status=monitoring_result,
message=f"Project initialized: {normalized_path}"
)
def _cleanup_existing_project(self) -> None:
"""Business logic to cleanup existing project state."""
with self._noop_operation():
# Stop existing file monitoring
self._watcher_tool.stop_existing_watcher()
# Clear existing index cache
self.helper.clear_index_cache()
# Clear any existing index state
pass
def _initialize_shallow_index_manager(self, project_path: str) -> Dict[str, Any]:
"""
Business logic to initialize the shallow index manager by default.
Args:
project_path: Project path
Returns:
Dictionary with initialization results
"""
# Set project path in shallow manager
if not self._shallow_manager.set_project_path(project_path):
raise RuntimeError(f"Failed to set project path (shallow): {project_path}")
# Update context
self.helper.update_base_path(project_path)
# Try to load existing shallow index or build new one
if self._shallow_manager.load_index():
source = "loaded_existing"
else:
if not self._shallow_manager.build_index():
raise RuntimeError("Failed to build shallow index")
source = "built_new"
# Determine file count from shallow list
try:
files = self._shallow_manager.get_file_list()
file_count = len(files)
except Exception: # noqa: BLE001 - safe fallback
file_count = 0
return {
'file_count': file_count,
'source': source,
'total_symbols': 0,
'languages': []
}
def _is_valid_existing_index(self, index_data: Dict[str, Any]) -> bool:
"""
Business rule to determine if existing index is valid and usable.
Args:
index_data: Index data to validate
Returns:
True if index is valid and usable, False otherwise
"""
if not index_data or not isinstance(index_data, dict):
return False
# Business rule: Must have new format metadata
if 'index_metadata' not in index_data:
return False
# Business rule: Must be compatible version
version = index_data.get('index_metadata', {}).get('version', '')
return version >= '3.0'
def _load_existing_index(self, index_data: Dict[str, Any]) -> Dict[str, Any]:
"""
Business logic to load and use existing index.
Args:
index_data: Existing index data
Returns:
Dictionary with loading results
"""
# Note: Legacy index loading is now handled by UnifiedIndexManager
# This method is kept for backward compatibility but functionality moved
# Extract file count from metadata
file_count = index_data.get('project_metadata', {}).get('total_files', 0)
return {
'file_count': file_count,
'source': 'loaded_existing'
}
def _setup_file_monitoring(self, project_path: str) -> str:
"""
Business logic to setup file monitoring for the project.
Args:
project_path: Project path to monitor
Returns:
String describing monitoring setup result
"""
try:
# Create rebuild callback that uses the JSON index manager
def rebuild_callback():
logger.info("File watcher triggered rebuild callback")
try:
logger.debug(f"Starting shallow index rebuild for: {project_path}")
# Business logic: File changed, rebuild using SHALLOW index manager
try:
if not self._shallow_manager.set_project_path(project_path):
logger.warning("Shallow manager set_project_path failed")
return False
if self._shallow_manager.build_index():
files = self._shallow_manager.get_file_list()
logger.info(f"File watcher shallow rebuild completed successfully - files {len(files)}")
return True
else:
logger.warning("File watcher shallow rebuild failed")
return False
except Exception as e:
import traceback
logger.error(f"File watcher shallow rebuild failed: {e}")
logger.error(f"Traceback: {traceback.format_exc()}")
return False
except Exception as e:
import traceback
logger.error(f"File watcher rebuild failed: {e}")
logger.error(f"Traceback: {traceback.format_exc()}")
return False
# Start monitoring using watcher tool
success = self._watcher_tool.start_monitoring(project_path, rebuild_callback)
if success:
# Store watcher in context for later access
self._watcher_tool.store_in_context()
# No logging
return "monitoring_active"
else:
self._watcher_tool.record_error("Failed to start file monitoring")
return "monitoring_failed"
except Exception as e:
error_msg = f"File monitoring setup failed: {e}"
self._watcher_tool.record_error(error_msg)
return "monitoring_error"
def _update_project_state(self, project_path: str, file_count: int) -> None:
"""Business logic to update system state after project initialization."""
# Update context with file count
self.helper.update_file_count(file_count)
# No logging
def _get_search_capabilities_info(self) -> str:
"""Business logic to get search capabilities information."""
search_info = self._config_tool.get_search_tool_info()
if search_info['available']:
return f"Advanced search enabled ({search_info['name']})"
else:
return "Basic search available"
def _format_initialization_result(self, result: ProjectInitializationResult) -> str:
"""
Format the initialization result according to business requirements.
Args:
result: Initialization result data
Returns:
Formatted result string for MCP response
"""
if result.index_source == 'unified_manager':
message = (f"Project path set to: {result.project_path}. "
f"Initialized unified index with {result.file_count} files. "
f"{result.search_capabilities}.")
elif result.index_source == 'failed':
message = (f"Project path set to: {result.project_path}. "
f"Index initialization failed. Some features may be limited. "
f"{result.search_capabilities}.")
else:
message = (f"Project path set to: {result.project_path}. "
f"Indexed {result.file_count} files. "
f"{result.search_capabilities}.")
if result.monitoring_status != "monitoring_active":
message += " (File monitoring unavailable - use manual refresh)"
return message
def get_project_config(self) -> str:
"""
Get the current project configuration for MCP resource.
Returns:
JSON formatted configuration string
"""
# Check if project is configured
if not self.helper.base_path:
config_data = {
"status": "not_configured",
"message": ("Project path not set. Please use set_project_path "
"to set a project directory first."),
"supported_extensions": SUPPORTED_EXTENSIONS
}
return ResponseFormatter.config_response(config_data)
# Get settings stats
settings_stats = self.helper.settings.get_stats() if self.helper.settings else {}
config_data = {
"base_path": self.helper.base_path,
"supported_extensions": SUPPORTED_EXTENSIONS,
"file_count": self.helper.file_count,
"settings_directory": self.helper.settings.settings_path if self.helper.settings else "",
"settings_stats": settings_stats
}
return ResponseFormatter.config_response(config_data)
# Removed: get_project_structure; the project structure resource is deprecated

View File

@@ -0,0 +1,269 @@
"""
Search service for the Code Index MCP server.
This service handles code search operations, search tool management,
and search strategy selection.
"""
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
from .base_service import BaseService
from ..utils import FileFilter, ResponseFormatter, ValidationHelper
from ..search.base import is_safe_regex_pattern
class SearchService(BaseService):
"""Service for managing code search operations."""
def __init__(self, ctx):
super().__init__(ctx)
self.file_filter = self._create_file_filter()
def search_code( # pylint: disable=too-many-arguments, too-many-locals
self,
pattern: str,
case_sensitive: bool = True,
context_lines: int = 0,
file_pattern: Optional[str] = None,
fuzzy: bool = False,
regex: Optional[bool] = None,
start_index: int = 0,
max_results: Optional[int] = 10
) -> Dict[str, Any]:
"""Search for code patterns in the project."""
self._require_project_setup()
if regex is None:
regex = is_safe_regex_pattern(pattern)
error = ValidationHelper.validate_search_pattern(pattern, regex)
if error:
raise ValueError(error)
if file_pattern:
error = ValidationHelper.validate_glob_pattern(file_pattern)
if error:
raise ValueError(f"Invalid file pattern: {error}")
pagination_error = ValidationHelper.validate_pagination(start_index, max_results)
if pagination_error:
raise ValueError(pagination_error)
if not self.settings:
raise ValueError("Settings not available")
strategy = self.settings.get_preferred_search_tool()
if not strategy:
raise ValueError("No search strategies available")
self._configure_strategy(strategy)
try:
results = strategy.search(
pattern=pattern,
base_path=self.base_path,
case_sensitive=case_sensitive,
context_lines=context_lines,
file_pattern=file_pattern,
fuzzy=fuzzy,
regex=regex
)
filtered = self._filter_results(results)
formatted_results, pagination = self._paginate_results(
filtered,
start_index=start_index,
max_results=max_results
)
return ResponseFormatter.search_results_response(
formatted_results,
pagination
)
except Exception as exc:
raise ValueError(f"Search failed using '{strategy.name}': {exc}") from exc
def refresh_search_tools(self) -> str:
"""Refresh the available search tools."""
if not self.settings:
raise ValueError("Settings not available")
self.settings.refresh_available_strategies()
config = self.settings.get_search_tools_config()
available = config['available_tools']
preferred = config['preferred_tool']
return f"Search tools refreshed. Available: {available}. Preferred: {preferred}."
def get_search_capabilities(self) -> Dict[str, Any]:
"""Get information about search capabilities and available tools."""
if not self.settings:
return {"error": "Settings not available"}
config = self.settings.get_search_tools_config()
capabilities = {
"available_tools": config.get('available_tools', []),
"preferred_tool": config.get('preferred_tool', 'basic'),
"supports_regex": True,
"supports_fuzzy": True,
"supports_case_sensitivity": True,
"supports_context_lines": True,
"supports_file_patterns": True
}
return capabilities
def _configure_strategy(self, strategy) -> None:
"""Apply shared exclusion configuration to the strategy if supported."""
configure = getattr(strategy, 'configure_excludes', None)
if not configure:
return
try:
configure(self.file_filter)
except Exception: # pragma: no cover - defensive fallback
pass
def _create_file_filter(self) -> FileFilter:
"""Build a shared file filter drawing from project settings."""
additional_dirs: List[str] = []
additional_file_patterns: List[str] = []
settings = self.settings
if settings:
try:
config = settings.get_file_watcher_config()
except Exception: # pragma: no cover - fallback if config fails
config = {}
for key in ('exclude_patterns', 'additional_exclude_patterns'):
patterns = config.get(key) or []
for pattern in patterns:
if not isinstance(pattern, str):
continue
normalized = pattern.strip()
if not normalized:
continue
additional_dirs.append(normalized)
additional_file_patterns.append(normalized)
file_filter = FileFilter(additional_dirs or None)
if additional_file_patterns:
file_filter.exclude_files.update(additional_file_patterns)
return file_filter
def _filter_results(self, results: Dict[str, Any]) -> Dict[str, Any]:
"""Filter out matches that reside under excluded paths."""
if not isinstance(results, dict) or not results:
return results
if 'error' in results or not self.file_filter or not self.base_path:
return results
base_path = Path(self.base_path)
filtered: Dict[str, Any] = {}
for rel_path, matches in results.items():
if not isinstance(rel_path, str):
continue
normalized = Path(rel_path.replace('\\', '/'))
try:
absolute = (base_path / normalized).resolve()
except Exception: # pragma: no cover - invalid path safety
continue
try:
if self.file_filter.should_process_path(absolute, base_path):
filtered[rel_path] = matches
except Exception: # pragma: no cover - defensive fallback
continue
return filtered
def _paginate_results(
self,
results: Dict[str, Any],
start_index: int,
max_results: Optional[int]
) -> Tuple[List[Dict[str, Any]], Dict[str, Any]]:
"""Apply pagination to search results and format them for responses."""
total_matches = 0
for matches in results.values():
if isinstance(matches, (list, tuple)):
total_matches += len(matches)
effective_start = min(max(start_index, 0), total_matches)
if total_matches == 0 or effective_start >= total_matches:
pagination = self._build_pagination_metadata(
total_matches=total_matches,
returned=0,
start_index=effective_start,
max_results=max_results
)
return [], pagination
collected: List[Dict[str, Any]] = []
current_index = 0
sorted_items = sorted(
(
(path, matches)
for path, matches in results.items()
if isinstance(path, str) and isinstance(matches, (list, tuple))
),
key=lambda item: item[0]
)
for path, matches in sorted_items:
sorted_matches = sorted(
(match for match in matches if isinstance(match, (list, tuple)) and len(match) >= 2),
key=lambda pair: pair[0]
)
for line_number, content, *_ in sorted_matches:
if current_index >= effective_start:
if max_results is None or len(collected) < max_results:
collected.append({
"file": path,
"line": line_number,
"text": content
})
else:
break
current_index += 1
if max_results is not None and len(collected) >= max_results:
break
pagination = self._build_pagination_metadata(
total_matches=total_matches,
returned=len(collected),
start_index=effective_start,
max_results=max_results
)
return collected, pagination
@staticmethod
def _build_pagination_metadata(
total_matches: int,
returned: int,
start_index: int,
max_results: Optional[int]
) -> Dict[str, Any]:
"""Construct pagination metadata for search responses."""
end_index = start_index + returned
metadata: Dict[str, Any] = {
"total_matches": total_matches,
"returned": returned,
"start_index": start_index,
"has_more": end_index < total_matches
}
if max_results is not None:
metadata["max_results"] = max_results
metadata["end_index"] = end_index
return metadata

View File

@@ -0,0 +1,191 @@
"""
Settings management service for the Code Index MCP server.
This service handles settings information, statistics,
temporary directory management, and settings cleanup operations.
"""
import os
import tempfile
from typing import Dict, Any
from .base_service import BaseService
from ..utils import ResponseFormatter
from ..constants import SETTINGS_DIR
from ..project_settings import ProjectSettings
from ..indexing import get_index_manager
def manage_temp_directory(action: str) -> Dict[str, Any]:
"""
Manage temporary directory operations.
This is a standalone function that doesn't require project context.
Handles the logic for create_temp_directory and check_temp_directory MCP tools.
Args:
action: The action to perform ('create' or 'check')
Returns:
Dictionary with directory information and operation results
Raises:
ValueError: If action is invalid or operation fails
"""
if action not in ['create', 'check']:
raise ValueError(f"Invalid action: {action}. Must be 'create' or 'check'")
# Try to get the actual temp directory from index manager, fallback to default
try:
index_manager = get_index_manager()
temp_dir = index_manager.temp_dir if index_manager.temp_dir else os.path.join(tempfile.gettempdir(), SETTINGS_DIR)
except:
temp_dir = os.path.join(tempfile.gettempdir(), SETTINGS_DIR)
if action == 'create':
existed_before = os.path.exists(temp_dir)
try:
# Use ProjectSettings to handle directory creation consistently
ProjectSettings("", skip_load=True)
result = ResponseFormatter.directory_info_response(
temp_directory=temp_dir,
exists=os.path.exists(temp_dir),
is_directory=os.path.isdir(temp_dir)
)
result["existed_before"] = existed_before
result["created"] = not existed_before
return result
except (OSError, IOError, ValueError) as e:
return ResponseFormatter.directory_info_response(
temp_directory=temp_dir,
exists=False,
error=str(e)
)
else: # action == 'check'
result = ResponseFormatter.directory_info_response(
temp_directory=temp_dir,
exists=os.path.exists(temp_dir),
is_directory=os.path.isdir(temp_dir) if os.path.exists(temp_dir) else False
)
result["temp_root"] = tempfile.gettempdir()
# If the directory exists, list its contents
if result["exists"] and result["is_directory"]:
try:
contents = os.listdir(temp_dir)
result["contents"] = contents
result["subdirectories"] = []
# Check each subdirectory
for item in contents:
item_path = os.path.join(temp_dir, item)
if os.path.isdir(item_path):
subdir_info = {
"name": item,
"path": item_path,
"contents": os.listdir(item_path) if os.path.exists(item_path) else []
}
result["subdirectories"].append(subdir_info)
except (OSError, PermissionError) as e:
result["error"] = str(e)
return result
class SettingsService(BaseService):
"""
Service for managing settings and directory operations.
This service handles:
- Settings information and statistics
- Temporary directory management
- Settings cleanup operations
- Configuration data access
"""
def get_settings_info(self) -> Dict[str, Any]:
"""
Get comprehensive settings information.
Handles the logic for get_settings_info MCP tool.
Returns:
Dictionary with settings directory, config, stats, and status information
"""
temp_dir = os.path.join(tempfile.gettempdir(), SETTINGS_DIR)
# Get the actual index directory from the index manager
index_manager = get_index_manager()
actual_temp_dir = index_manager.temp_dir if index_manager.temp_dir else temp_dir
# Check if base_path is set
if not self.base_path:
return ResponseFormatter.settings_info_response(
settings_directory="",
temp_directory=actual_temp_dir,
temp_directory_exists=os.path.exists(actual_temp_dir),
config={},
stats={},
exists=False,
status="not_configured",
message="Project path not set. Please use set_project_path to set a "
"project directory first."
)
# Get config and stats
config = self.settings.load_config() if self.settings else {}
stats = self.settings.get_stats() if self.settings else {}
settings_directory = actual_temp_dir
exists = os.path.exists(settings_directory) if settings_directory else False
return ResponseFormatter.settings_info_response(
settings_directory=settings_directory,
temp_directory=actual_temp_dir,
temp_directory_exists=os.path.exists(actual_temp_dir),
config=config,
stats=stats,
exists=exists
)
def clear_all_settings(self) -> str:
"""
Clear all settings and cached data.
Handles the logic for clear_settings MCP tool.
Returns:
Success message confirming settings were cleared
"""
if self.settings:
self.settings.clear()
return "Project settings, index, and cache have been cleared."
def get_settings_stats(self) -> str:
"""
Get settings statistics as JSON string.
Handles the logic for settings://stats MCP resource.
Returns:
JSON formatted settings statistics
"""
if not self.settings:
stats_data = {"error": "Settings not available"}
else:
stats_data = self.settings.get_stats()
return ResponseFormatter.stats_response(stats_data)

View File

@@ -0,0 +1,407 @@
"""
System Management Service - Business logic for system configuration and monitoring.
This service handles the business logic for system management operations including
file watcher status, configuration management, and system health monitoring.
It composes technical tools to achieve business goals.
"""
from typing import Dict, Any, Optional
from dataclasses import dataclass
from .index_management_service import IndexManagementService
from .base_service import BaseService
# FileWatcherTool will be imported locally to avoid circular import
from ..tools.config import ProjectConfigTool, SettingsTool
@dataclass
class FileWatcherStatus:
"""Business result for file watcher status operations."""
available: bool
active: bool
status: str
message: Optional[str]
error_info: Optional[Dict[str, Any]]
configuration: Dict[str, Any]
rebuild_status: Dict[str, Any]
recommendations: list[str]
class SystemManagementService(BaseService):
"""
Business service for system configuration and monitoring.
This service orchestrates system management workflows by composing
technical tools to achieve business goals like monitoring file watchers,
managing configurations, and providing system health insights.
"""
def __init__(self, ctx):
super().__init__(ctx)
# Import FileWatcherTool locally to avoid circular import
from ..tools.monitoring import FileWatcherTool
self._watcher_tool = FileWatcherTool(ctx)
self._config_tool = ProjectConfigTool()
self._settings_tool = SettingsTool()
def get_file_watcher_status(self) -> Dict[str, Any]:
"""
Get comprehensive file watcher status with business intelligence.
This is the main business method that orchestrates the file watcher
status workflow, analyzing system state, providing recommendations,
and formatting comprehensive status information.
Returns:
Dictionary with comprehensive file watcher status
"""
# Business workflow: Analyze system state
status_result = self._analyze_file_watcher_state()
# Business result formatting
return self._format_status_result(status_result)
def configure_file_watcher(self, enabled: Optional[bool] = None,
debounce_seconds: Optional[float] = None,
additional_exclude_patterns: Optional[list] = None) -> str:
"""
Configure file watcher settings with business validation.
Args:
enabled: Whether to enable file watcher
debounce_seconds: Debounce time in seconds
additional_exclude_patterns: Additional patterns to exclude
Returns:
Success message with configuration details
Raises:
ValueError: If configuration is invalid
"""
# Business validation
self._validate_configuration_request(enabled, debounce_seconds, additional_exclude_patterns)
# Business workflow: Apply configuration
result = self._apply_file_watcher_configuration(enabled, debounce_seconds, additional_exclude_patterns)
return result
def _analyze_file_watcher_state(self) -> FileWatcherStatus:
"""
Business logic to analyze comprehensive file watcher state.
Returns:
FileWatcherStatus with complete analysis
"""
# Business step 1: Check for error conditions
error_info = self._check_for_watcher_errors()
if error_info:
return self._create_error_status(error_info)
# Business step 2: Check initialization state
watcher_service = self._watcher_tool.get_from_context()
if not watcher_service:
return self._create_not_initialized_status()
# Business step 3: Get active status
return self._create_active_status(watcher_service)
def _check_for_watcher_errors(self) -> Optional[Dict[str, Any]]:
"""
Business logic to check for file watcher error conditions.
Returns:
Error information dictionary or None if no errors
"""
# Check context for recorded errors
if hasattr(self.ctx.request_context.lifespan_context, 'file_watcher_error'):
return self.ctx.request_context.lifespan_context.file_watcher_error
return None
def _create_error_status(self, error_info: Dict[str, Any]) -> FileWatcherStatus:
"""
Business logic to create error status with recommendations.
Args:
error_info: Error information from context
Returns:
FileWatcherStatus for error condition
"""
# Get configuration if available
configuration = self._get_file_watcher_configuration()
# Get rebuild status
rebuild_status = self._get_rebuild_status()
# Business logic: Generate error-specific recommendations
recommendations = [
"Use refresh_index tool for manual updates",
"File watcher auto-refresh is disabled due to errors",
"Consider restarting the project or checking system permissions"
]
return FileWatcherStatus(
available=True,
active=False,
status="error",
message=error_info.get('message', 'File watcher error occurred'),
error_info=error_info,
configuration=configuration,
rebuild_status=rebuild_status,
recommendations=recommendations
)
def _create_not_initialized_status(self) -> FileWatcherStatus:
"""
Business logic to create not-initialized status.
Returns:
FileWatcherStatus for not-initialized condition
"""
# Get basic configuration
configuration = self._get_file_watcher_configuration()
# Get rebuild status
rebuild_status = self._get_rebuild_status()
# Business logic: Generate initialization recommendations
recommendations = [
"Use set_project_path tool to initialize file watcher",
"File monitoring will be enabled after project initialization"
]
return FileWatcherStatus(
available=True,
active=False,
status="not_initialized",
message="File watcher service not initialized. Set project path to enable auto-refresh.",
error_info=None,
configuration=configuration,
rebuild_status=rebuild_status,
recommendations=recommendations
)
def _create_active_status(self, watcher_service) -> FileWatcherStatus:
"""
Business logic to create active status with comprehensive information.
Args:
watcher_service: Active file watcher service
Returns:
FileWatcherStatus for active condition
"""
# Get detailed status from watcher service
watcher_status = watcher_service.get_status()
# Get configuration
configuration = self._get_file_watcher_configuration()
# Get rebuild status
rebuild_status = self._get_rebuild_status()
# Business logic: Generate status-specific recommendations
recommendations = self._generate_active_recommendations(watcher_status)
return FileWatcherStatus(
available=watcher_status.get('available', True),
active=watcher_status.get('active', False),
status=watcher_status.get('status', 'active'),
message=watcher_status.get('message'),
error_info=None,
configuration=configuration,
rebuild_status=rebuild_status,
recommendations=recommendations
)
def _get_file_watcher_configuration(self) -> Dict[str, Any]:
"""
Business logic to get file watcher configuration safely.
Returns:
Configuration dictionary
"""
try:
# Try to get from project settings
if (hasattr(self.ctx.request_context.lifespan_context, 'settings') and
self.ctx.request_context.lifespan_context.settings):
return self.ctx.request_context.lifespan_context.settings.get_file_watcher_config()
# Fallback to default configuration
return {
'enabled': True,
'debounce_seconds': 6.0,
'additional_exclude_patterns': [],
'note': 'Default configuration - project not fully initialized'
}
except Exception as e:
return {
'error': f'Could not load configuration: {e}',
'enabled': True,
'debounce_seconds': 6.0
}
def _get_rebuild_status(self) -> Dict[str, Any]:
"""
Business logic to get index rebuild status safely.
Returns:
Rebuild status dictionary
"""
try:
index_service = IndexManagementService(self.ctx)
return index_service.get_rebuild_status()
except Exception as e:
return {
'status': 'unknown',
'error': f'Could not get rebuild status: {e}'
}
def _generate_active_recommendations(self, watcher_status: Dict[str, Any]) -> list[str]:
"""
Business logic to generate recommendations for active file watcher.
Args:
watcher_status: Current watcher status
Returns:
List of recommendations
"""
recommendations = []
if watcher_status.get('active', False):
recommendations.append("File watcher is active - automatic index updates enabled")
recommendations.append("Files will be re-indexed automatically when changed")
else:
recommendations.append("File watcher is available but not active")
recommendations.append("Use refresh_index for manual updates")
# Add performance recommendations
restart_attempts = watcher_status.get('restart_attempts', 0)
if restart_attempts > 0:
recommendations.append(f"File watcher has restarted {restart_attempts} times - monitor for stability")
return recommendations
def _validate_configuration_request(self, enabled: Optional[bool],
debounce_seconds: Optional[float],
additional_exclude_patterns: Optional[list]) -> None:
"""
Business validation for file watcher configuration.
Args:
enabled: Enable flag
debounce_seconds: Debounce time
additional_exclude_patterns: Exclude patterns
Raises:
ValueError: If validation fails
"""
# Business rule: Enabled flag must be boolean if provided
if enabled is not None and not isinstance(enabled, bool):
raise ValueError("Enabled flag must be a boolean value")
# Business rule: Debounce seconds must be reasonable
if debounce_seconds is not None:
if debounce_seconds < 0.1:
raise ValueError("Debounce seconds must be at least 0.1")
if debounce_seconds > 300: # 5 minutes
raise ValueError("Debounce seconds cannot exceed 300 (5 minutes)")
# Business rule: Exclude patterns must be valid
if additional_exclude_patterns is not None:
if not isinstance(additional_exclude_patterns, list):
raise ValueError("Additional exclude patterns must be a list")
for pattern in additional_exclude_patterns:
if not isinstance(pattern, str):
raise ValueError("All exclude patterns must be strings")
if not pattern.strip():
raise ValueError("Exclude patterns cannot be empty")
def _apply_file_watcher_configuration(self, enabled: Optional[bool],
debounce_seconds: Optional[float],
additional_exclude_patterns: Optional[list]) -> str:
"""
Business logic to apply file watcher configuration.
Args:
enabled: Enable flag
debounce_seconds: Debounce time
additional_exclude_patterns: Exclude patterns
Returns:
Success message
Raises:
ValueError: If configuration cannot be applied
"""
# Business rule: Settings must be available
if (not hasattr(self.ctx.request_context.lifespan_context, 'settings') or
not self.ctx.request_context.lifespan_context.settings):
raise ValueError("Settings not available - project path not set")
settings = self.ctx.request_context.lifespan_context.settings
# Build updates dictionary
updates = {}
if enabled is not None:
updates["enabled"] = enabled
if debounce_seconds is not None:
updates["debounce_seconds"] = debounce_seconds
if additional_exclude_patterns is not None:
updates["additional_exclude_patterns"] = additional_exclude_patterns
if not updates:
return "No configuration changes specified"
# Apply configuration
settings.update_file_watcher_config(updates)
# Business logic: Generate informative result message
changes_summary = []
if 'enabled' in updates:
changes_summary.append(f"enabled={updates['enabled']}")
if 'debounce_seconds' in updates:
changes_summary.append(f"debounce={updates['debounce_seconds']}s")
if 'additional_exclude_patterns' in updates:
pattern_count = len(updates['additional_exclude_patterns'])
changes_summary.append(f"exclude_patterns={pattern_count}")
changes_str = ", ".join(changes_summary)
return (f"File watcher configuration updated: {changes_str}. "
f"Restart may be required for changes to take effect.")
def _format_status_result(self, status_result: FileWatcherStatus) -> Dict[str, Any]:
"""
Format the status result according to business requirements.
Args:
status_result: Status analysis result
Returns:
Formatted result dictionary for MCP response
"""
result = {
'available': status_result.available,
'active': status_result.active,
'status': status_result.status,
'configuration': status_result.configuration,
'rebuild_status': status_result.rebuild_status,
'recommendations': status_result.recommendations
}
# Add optional fields
if status_result.message:
result['message'] = status_result.message
if status_result.error_info:
result['error'] = status_result.error_info
result['manual_refresh_required'] = True
return result

View File

@@ -0,0 +1,19 @@
"""
Tool Layer - Technical components for the Code Index MCP server.
This package contains pure technical components that provide specific
capabilities without business logic. These tools are composed by the
business layer to achieve business goals.
"""
from .filesystem import FileMatchingTool, FileSystemTool
from .config import ProjectConfigTool, SettingsTool
from .monitoring import FileWatcherTool
__all__ = [
'FileMatchingTool',
'FileSystemTool',
'ProjectConfigTool',
'SettingsTool',
'FileWatcherTool'
]

View File

@@ -0,0 +1,8 @@
"""
Configuration Tools - Technical components for configuration management.
"""
from .project_config_tool import ProjectConfigTool
from .settings_tool import SettingsTool
__all__ = ['ProjectConfigTool', 'SettingsTool']

View File

@@ -0,0 +1,308 @@
"""
Project Configuration Tool - Pure technical component for project configuration operations.
This tool handles low-level project configuration operations without any business logic.
"""
import os
from typing import Dict, Any, Optional
from pathlib import Path
from ...project_settings import ProjectSettings
class ProjectConfigTool:
"""
Pure technical component for project configuration operations.
This tool provides low-level configuration management capabilities
without any business logic or decision making.
"""
def __init__(self):
self._settings: Optional[ProjectSettings] = None
self._project_path: Optional[str] = None
def initialize_settings(self, project_path: str) -> ProjectSettings:
"""
Initialize project settings for the given path.
Args:
project_path: Absolute path to the project directory
Returns:
ProjectSettings instance
Raises:
ValueError: If project path is invalid
"""
if not Path(project_path).exists():
raise ValueError(f"Project path does not exist: {project_path}")
if not Path(project_path).is_dir():
raise ValueError(f"Project path is not a directory: {project_path}")
self._project_path = project_path
self._settings = ProjectSettings(project_path, skip_load=False)
return self._settings
def load_existing_index(self) -> Optional[Dict[str, Any]]:
"""
Load existing index data if available.
Returns:
Index data dictionary or None if not available
Raises:
RuntimeError: If settings not initialized
"""
if not self._settings:
raise RuntimeError("Settings not initialized. Call initialize_settings() first.")
try:
return self._settings.load_index()
except Exception:
return None
def save_project_config(self, config_data: Dict[str, Any]) -> None:
"""
Save project configuration data.
Args:
config_data: Configuration data to save
Raises:
RuntimeError: If settings not initialized
"""
if not self._settings:
raise RuntimeError("Settings not initialized")
self._settings.save_config(config_data)
def save_index_data(self, index_data: Dict[str, Any]) -> None:
"""
Save index data to persistent storage.
Args:
index_data: Index data to save
Raises:
RuntimeError: If settings not initialized
"""
if not self._settings:
raise RuntimeError("Settings not initialized")
self._settings.save_index(index_data)
def check_index_version(self) -> bool:
"""
Check if JSON index is the latest version.
Returns:
True if JSON index exists and is recent, False if needs rebuild
Raises:
RuntimeError: If settings not initialized
"""
if not self._settings:
raise RuntimeError("Settings not initialized")
# Check if JSON index exists and is fresh
from ...indexing import get_index_manager
index_manager = get_index_manager()
# Set project path if available
if self._settings.base_path:
index_manager.set_project_path(self._settings.base_path)
stats = index_manager.get_index_stats()
return stats.get('status') == 'loaded'
return False
def cleanup_legacy_files(self) -> None:
"""
Clean up legacy index files.
Raises:
RuntimeError: If settings not initialized
"""
if not self._settings:
raise RuntimeError("Settings not initialized")
self._settings.cleanup_legacy_files()
def get_search_tool_info(self) -> Dict[str, Any]:
"""
Get information about available search tools.
Returns:
Dictionary with search tool information
Raises:
RuntimeError: If settings not initialized
"""
if not self._settings:
raise RuntimeError("Settings not initialized")
search_tool = self._settings.get_preferred_search_tool()
return {
'available': search_tool is not None,
'name': search_tool.name if search_tool else None,
'description': "Advanced search enabled" if search_tool else "Basic search available"
}
def get_file_watcher_config(self) -> Dict[str, Any]:
"""
Get file watcher configuration.
Returns:
File watcher configuration dictionary
Raises:
RuntimeError: If settings not initialized
"""
if not self._settings:
raise RuntimeError("Settings not initialized")
return self._settings.get_file_watcher_config()
def create_default_config(self, project_path: str) -> Dict[str, Any]:
"""
Create default project configuration.
Args:
project_path: Project path for the configuration
Returns:
Default configuration dictionary
"""
from ...utils import FileFilter
file_filter = FileFilter()
return {
"base_path": project_path,
"supported_extensions": list(file_filter.supported_extensions),
"last_indexed": None,
"file_watcher": self.get_file_watcher_config() if self._settings else {}
}
def validate_project_path(self, path: str) -> Optional[str]:
"""
Validate project path.
Args:
path: Path to validate
Returns:
Error message if invalid, None if valid
"""
if not path or not path.strip():
return "Project path cannot be empty"
try:
norm_path = os.path.normpath(path)
abs_path = os.path.abspath(norm_path)
except (OSError, ValueError) as e:
return f"Invalid path format: {str(e)}"
if not os.path.exists(abs_path):
return f"Path does not exist: {abs_path}"
if not os.path.isdir(abs_path):
return f"Path is not a directory: {abs_path}"
return None
def normalize_project_path(self, path: str) -> str:
"""
Normalize and get absolute project path.
Args:
path: Path to normalize
Returns:
Normalized absolute path
"""
norm_path = os.path.normpath(path)
return os.path.abspath(norm_path)
def get_settings_path(self) -> Optional[str]:
"""
Get the settings directory path.
Returns:
Settings directory path or None if not initialized
"""
return self._settings.settings_path if self._settings else None
def get_project_path(self) -> Optional[str]:
"""
Get the current project path.
Returns:
Project path or None if not set
"""
return self._project_path
def get_basic_project_structure(self, project_path: str) -> Dict[str, Any]:
"""
Get basic project directory structure.
Args:
project_path: Path to analyze
Returns:
Basic directory structure dictionary
"""
from ...utils import FileFilter
file_filter = FileFilter()
def build_tree(path: str, max_depth: int = 3, current_depth: int = 0) -> Dict[str, Any]:
"""Build directory tree with limited depth using centralized filtering."""
if current_depth >= max_depth:
return {"type": "directory", "truncated": True}
try:
items = []
path_obj = Path(path)
for item in sorted(path_obj.iterdir()):
if item.is_dir():
# Use centralized directory filtering
if not file_filter.should_exclude_directory(item.name):
items.append({
"name": item.name,
"type": "directory",
"children": build_tree(str(item), max_depth, current_depth + 1)
})
else:
# Use centralized file filtering
if not file_filter.should_exclude_file(item):
items.append({
"name": item.name,
"type": "file",
"size": item.stat().st_size if item.exists() else 0
})
return {"type": "directory", "children": items}
except (OSError, PermissionError):
return {"type": "directory", "error": "Access denied"}
try:
root_name = Path(project_path).name
structure = {
"name": root_name,
"path": project_path,
"type": "directory",
"children": build_tree(project_path)["children"]
}
return structure
except Exception as e:
return {
"error": f"Failed to build project structure: {e}",
"path": project_path
}

View File

@@ -0,0 +1,100 @@
"""
Settings Tool - Pure technical component for settings operations.
This tool handles low-level settings operations without any business logic.
"""
import os
import tempfile
from typing import Dict, Any
from ...constants import SETTINGS_DIR
class SettingsTool:
"""
Pure technical component for settings operations.
This tool provides low-level settings management capabilities
without any business logic or decision making.
"""
def __init__(self):
pass
def get_temp_directory_path(self) -> str:
"""
Get the path to the temporary directory for settings.
Returns:
Path to the temporary settings directory
"""
return os.path.join(tempfile.gettempdir(), SETTINGS_DIR)
def create_temp_directory(self) -> Dict[str, Any]:
"""
Create the temporary directory for settings.
Returns:
Dictionary with creation results
"""
temp_dir = self.get_temp_directory_path()
existed_before = os.path.exists(temp_dir)
try:
os.makedirs(temp_dir, exist_ok=True)
return {
"temp_directory": temp_dir,
"exists": os.path.exists(temp_dir),
"is_directory": os.path.isdir(temp_dir),
"existed_before": existed_before,
"created": not existed_before
}
except (OSError, IOError) as e:
return {
"temp_directory": temp_dir,
"exists": False,
"error": str(e)
}
def check_temp_directory(self) -> Dict[str, Any]:
"""
Check the status of the temporary directory.
Returns:
Dictionary with directory status information
"""
temp_dir = self.get_temp_directory_path()
result = {
"temp_directory": temp_dir,
"temp_root": tempfile.gettempdir(),
"exists": os.path.exists(temp_dir),
"is_directory": os.path.isdir(temp_dir) if os.path.exists(temp_dir) else False
}
# If the directory exists, list its contents
if result["exists"] and result["is_directory"]:
try:
contents = os.listdir(temp_dir)
result["contents"] = contents
result["subdirectories"] = []
# Check each subdirectory
for item in contents:
item_path = os.path.join(temp_dir, item)
if os.path.isdir(item_path):
subdir_info = {
"name": item,
"path": item_path,
"contents": os.listdir(item_path) if os.path.exists(item_path) else []
}
result["subdirectories"].append(subdir_info)
except (OSError, PermissionError) as e:
result["error"] = str(e)
return result

View File

@@ -0,0 +1,8 @@
"""
Filesystem Tools - Technical components for file system operations.
"""
from .file_matching_tool import FileMatchingTool
from .file_system_tool import FileSystemTool
__all__ = ['FileMatchingTool', 'FileSystemTool']

View File

@@ -0,0 +1,215 @@
"""
File Matching Tool - Pure technical component for pattern matching operations.
This tool handles file pattern matching without any business logic.
It provides technical capabilities for finding files based on various patterns.
"""
import fnmatch
from typing import List, Set
from pathlib import Path
# FileInfo defined locally for file matching operations
from dataclasses import dataclass
@dataclass
class FileInfo:
"""File information structure."""
relative_path: str
language: str
class FileMatchingTool:
"""
Pure technical component for file pattern matching.
This tool provides low-level pattern matching capabilities without
any business logic. It can match files using glob patterns, regex,
or other matching strategies.
"""
def __init__(self):
pass
def match_glob_pattern(self, files: List[FileInfo], pattern: str) -> List[FileInfo]:
"""
Match files using glob pattern.
Args:
files: List of FileInfo objects to search through
pattern: Glob pattern (e.g., "*.py", "test_*.js", "src/**/*.ts")
Returns:
List of FileInfo objects that match the pattern
"""
if not pattern:
return files
matched_files = []
for file_info in files:
# Try matching against full path
if fnmatch.fnmatch(file_info.relative_path, pattern):
matched_files.append(file_info)
continue
# Try matching against just the filename
filename = Path(file_info.relative_path).name
if fnmatch.fnmatch(filename, pattern):
matched_files.append(file_info)
return matched_files
def match_multiple_patterns(self, files: List[FileInfo], patterns: List[str]) -> List[FileInfo]:
"""
Match files using multiple glob patterns (OR logic).
Args:
files: List of FileInfo objects to search through
patterns: List of glob patterns
Returns:
List of FileInfo objects that match any of the patterns
"""
if not patterns:
return files
matched_files = set()
for pattern in patterns:
pattern_matches = self.match_glob_pattern(files, pattern)
matched_files.update(pattern_matches)
return list(matched_files)
def match_by_language(self, files: List[FileInfo], languages: List[str]) -> List[FileInfo]:
"""
Match files by programming language.
Args:
files: List of FileInfo objects to search through
languages: List of language names (e.g., ["python", "javascript"])
Returns:
List of FileInfo objects with matching languages
"""
if not languages:
return files
# Normalize language names for comparison
normalized_languages = {lang.lower() for lang in languages}
matched_files = []
for file_info in files:
if file_info.language.lower() in normalized_languages:
matched_files.append(file_info)
return matched_files
def match_by_directory(self, files: List[FileInfo], directory_patterns: List[str]) -> List[FileInfo]:
"""
Match files by directory patterns.
Args:
files: List of FileInfo objects to search through
directory_patterns: List of directory patterns (e.g., ["src/*", "test/**"])
Returns:
List of FileInfo objects in matching directories
"""
if not directory_patterns:
return files
matched_files = []
for file_info in files:
file_dir = str(Path(file_info.relative_path).parent)
for dir_pattern in directory_patterns:
if fnmatch.fnmatch(file_dir, dir_pattern):
matched_files.append(file_info)
break
return matched_files
def exclude_patterns(self, files: List[FileInfo], exclude_patterns: List[str]) -> List[FileInfo]:
"""
Exclude files matching the given patterns.
Args:
files: List of FileInfo objects to filter
exclude_patterns: List of patterns to exclude
Returns:
List of FileInfo objects that don't match any exclude pattern
"""
if not exclude_patterns:
return files
filtered_files = []
for file_info in files:
should_exclude = False
for exclude_pattern in exclude_patterns:
if (fnmatch.fnmatch(file_info.relative_path, exclude_pattern) or
fnmatch.fnmatch(Path(file_info.relative_path).name, exclude_pattern)):
should_exclude = True
break
if not should_exclude:
filtered_files.append(file_info)
return filtered_files
def sort_by_relevance(self, files: List[FileInfo], pattern: str) -> List[FileInfo]:
"""
Sort files by relevance to the search pattern.
Args:
files: List of FileInfo objects to sort
pattern: Original search pattern for relevance scoring
Returns:
List of FileInfo objects sorted by relevance (most relevant first)
"""
def relevance_score(file_info: FileInfo) -> int:
"""Calculate relevance score for a file."""
score = 0
filename = Path(file_info.relative_path).name
# Exact filename match gets highest score
if filename == pattern:
score += 100
# Filename starts with pattern
elif filename.startswith(pattern.replace('*', '')):
score += 50
# Pattern appears in filename
elif pattern.replace('*', '') in filename:
score += 25
# Shorter paths are generally more relevant
path_depth = len(Path(file_info.relative_path).parts)
score += max(0, 10 - path_depth)
return score
return sorted(files, key=relevance_score, reverse=True)
def limit_results(self, files: List[FileInfo], max_results: int) -> List[FileInfo]:
"""
Limit the number of results returned.
Args:
files: List of FileInfo objects
max_results: Maximum number of results to return
Returns:
List of FileInfo objects limited to max_results
"""
if max_results <= 0:
return files
return files[:max_results]

View File

@@ -0,0 +1,234 @@
"""
File System Tool - Pure technical component for file system operations.
This tool handles low-level file system operations without any business logic.
"""
import os
from typing import Dict, Any, Optional
from pathlib import Path
class FileSystemTool:
"""
Pure technical component for file system operations.
This tool provides low-level file system capabilities without
any business logic or decision making.
"""
def __init__(self):
pass
def get_file_stats(self, file_path: str) -> Dict[str, Any]:
"""
Get basic file system statistics for a file.
Args:
file_path: Absolute path to the file
Returns:
Dictionary with file statistics
Raises:
FileNotFoundError: If file doesn't exist
OSError: If file cannot be accessed
"""
if not os.path.exists(file_path):
raise FileNotFoundError(f"File not found: {file_path}")
try:
stat_info = os.stat(file_path)
path_obj = Path(file_path)
return {
'size_bytes': stat_info.st_size,
'modified_time': stat_info.st_mtime,
'created_time': stat_info.st_ctime,
'is_file': path_obj.is_file(),
'is_directory': path_obj.is_dir(),
'extension': path_obj.suffix,
'name': path_obj.name,
'parent': str(path_obj.parent)
}
except OSError as e:
raise OSError(f"Cannot access file {file_path}: {e}") from e
def read_file_content(self, file_path: str) -> str:
"""
Read file content with intelligent encoding detection.
Args:
file_path: Absolute path to the file
Returns:
File content as string
Raises:
FileNotFoundError: If file doesn't exist
ValueError: If file cannot be decoded
"""
if not os.path.exists(file_path):
raise FileNotFoundError(f"File not found: {file_path}")
# Try UTF-8 first (most common)
try:
with open(file_path, 'r', encoding='utf-8') as f:
return f.read()
except UnicodeDecodeError:
pass
# Try other common encodings
encodings = ['utf-8-sig', 'latin-1', 'cp1252', 'iso-8859-1']
for encoding in encodings:
try:
with open(file_path, 'r', encoding=encoding) as f:
return f.read()
except UnicodeDecodeError:
continue
raise ValueError(f"Could not decode file {file_path} with any supported encoding")
def count_lines(self, file_path: str) -> int:
"""
Count the number of lines in a file.
Args:
file_path: Absolute path to the file
Returns:
Number of lines in the file
Raises:
FileNotFoundError: If file doesn't exist
"""
try:
content = self.read_file_content(file_path)
return len(content.splitlines())
except Exception:
# If we can't read the file, return 0
return 0
def detect_language_from_extension(self, file_path: str) -> str:
"""
Detect programming language from file extension.
Args:
file_path: Path to the file
Returns:
Language name or 'unknown'
"""
extension = Path(file_path).suffix.lower()
lang_map = {
'.py': 'python',
'.js': 'javascript',
'.jsx': 'javascript',
'.ts': 'typescript',
'.tsx': 'typescript',
'.java': 'java',
'.cpp': 'cpp',
'.cxx': 'cpp',
'.cc': 'cpp',
'.c': 'c',
'.h': 'c',
'.hpp': 'cpp',
'.hxx': 'cpp',
'.cs': 'csharp',
'.go': 'go',
'.rs': 'rust',
'.php': 'php',
'.rb': 'ruby',
'.swift': 'swift',
'.kt': 'kotlin',
'.scala': 'scala',
'.m': 'objc',
'.mm': 'objc',
'.html': 'html',
'.htm': 'html',
'.css': 'css',
'.scss': 'scss',
'.sass': 'sass',
'.less': 'less',
'.json': 'json',
'.xml': 'xml',
'.yaml': 'yaml',
'.yml': 'yaml',
'.md': 'markdown',
'.txt': 'text',
'.sh': 'shell',
'.bash': 'shell',
'.zsh': 'shell',
'.fish': 'shell',
'.ps1': 'powershell',
'.bat': 'batch',
'.cmd': 'batch'
}
return lang_map.get(extension, 'unknown')
def is_text_file(self, file_path: str) -> bool:
"""
Check if a file is likely a text file.
Args:
file_path: Path to the file
Returns:
True if file appears to be text, False otherwise
"""
try:
# Try to read a small portion of the file
with open(file_path, 'rb') as f:
chunk = f.read(1024)
# Check for null bytes (common in binary files)
if b'\x00' in chunk:
return False
# Try to decode as UTF-8
try:
chunk.decode('utf-8')
return True
except UnicodeDecodeError:
# Try other encodings
for encoding in ['latin-1', 'cp1252']:
try:
chunk.decode(encoding)
return True
except UnicodeDecodeError:
continue
return False
except Exception:
return False
def get_file_size_category(self, file_path: str) -> str:
"""
Categorize file size for analysis purposes.
Args:
file_path: Path to the file
Returns:
Size category: 'small', 'medium', 'large', or 'very_large'
"""
try:
size = os.path.getsize(file_path)
if size < 1024: # < 1KB
return 'tiny'
elif size < 10 * 1024: # < 10KB
return 'small'
elif size < 100 * 1024: # < 100KB
return 'medium'
elif size < 1024 * 1024: # < 1MB
return 'large'
else:
return 'very_large'
except Exception:
return 'unknown'

View File

@@ -0,0 +1,7 @@
"""
Monitoring Tools - Technical components for file monitoring operations.
"""
from .file_watcher_tool import FileWatcherTool
__all__ = ['FileWatcherTool']

View File

@@ -0,0 +1,134 @@
"""
File Watcher Tool - Pure technical component for file monitoring operations.
This tool handles low-level file watching operations without any business logic.
"""
import time
from typing import Optional, Callable
from ...utils import ContextHelper
from ...services.file_watcher_service import FileWatcherService
class FileWatcherTool:
"""
Pure technical component for file monitoring operations.
This tool provides low-level file watching capabilities without
any business logic or decision making.
"""
def __init__(self, ctx):
self._ctx = ctx
self._file_watcher_service: Optional[FileWatcherService] = None
def create_watcher(self) -> FileWatcherService:
"""
Create a new file watcher service instance.
Returns:
FileWatcherService instance
"""
self._file_watcher_service = FileWatcherService(self._ctx)
return self._file_watcher_service
def start_monitoring(self, project_path: str, rebuild_callback: Callable) -> bool:
"""
Start file monitoring for the given project path.
Args:
project_path: Path to monitor
rebuild_callback: Callback function for rebuild events
Returns:
True if monitoring started successfully, False otherwise
"""
if not self._file_watcher_service:
self._file_watcher_service = self.create_watcher()
# Validate that the project path matches the expected base path
helper = ContextHelper(self._ctx)
if helper.base_path and helper.base_path != project_path:
pass
return self._file_watcher_service.start_monitoring(rebuild_callback)
def stop_monitoring(self) -> None:
"""Stop file monitoring if active."""
if self._file_watcher_service:
self._file_watcher_service.stop_monitoring()
def is_monitoring_active(self) -> bool:
"""
Check if file monitoring is currently active.
Returns:
True if monitoring is active, False otherwise
"""
return (self._file_watcher_service is not None and
self._file_watcher_service.is_active())
def get_monitoring_status(self) -> dict:
"""
Get current monitoring status.
Returns:
Dictionary with monitoring status information
"""
if not self._file_watcher_service:
return {
'active': False,
'available': True,
'status': 'not_initialized'
}
return self._file_watcher_service.get_status()
def store_in_context(self) -> None:
"""Store the file watcher service in the MCP context."""
if (self._file_watcher_service and
hasattr(self._ctx.request_context.lifespan_context, '__dict__')):
self._ctx.request_context.lifespan_context.file_watcher_service = self._file_watcher_service
def get_from_context(self) -> Optional[FileWatcherService]:
"""
Get existing file watcher service from context.
Returns:
FileWatcherService instance or None if not found
"""
if hasattr(self._ctx.request_context.lifespan_context, 'file_watcher_service'):
return self._ctx.request_context.lifespan_context.file_watcher_service
return None
def stop_existing_watcher(self) -> None:
"""Stop any existing file watcher from context."""
existing_watcher = self.get_from_context()
if existing_watcher:
existing_watcher.stop_monitoring()
# Clear reference
if hasattr(self._ctx.request_context.lifespan_context, '__dict__'):
self._ctx.request_context.lifespan_context.file_watcher_service = None
def record_error(self, error_message: str) -> None:
"""
Record file watcher error in context for status reporting.
Args:
error_message: Error message to record
"""
error_info = {
'status': 'failed',
'message': f'{error_message}. Auto-refresh disabled. Please use manual refresh.',
'timestamp': time.time(),
'manual_refresh_required': True
}
# Store error in context for status reporting
if hasattr(self._ctx.request_context.lifespan_context, '__dict__'):
self._ctx.request_context.lifespan_context.file_watcher_error = error_info

View File

@@ -0,0 +1,31 @@
"""
Utility modules for the Code Index MCP server.
This package contains shared utilities used across services:
- error_handler: Decorator-based error handling for MCP entry points
- context_helper: Context access utilities and helpers
- validation: Common validation logic
- response_formatter: Response formatting utilities
"""
from .error_handler import (
handle_mcp_errors,
handle_mcp_resource_errors,
handle_mcp_tool_errors,
MCPToolError,
)
from .context_helper import ContextHelper
from .validation import ValidationHelper
from .response_formatter import ResponseFormatter
from .file_filter import FileFilter
__all__ = [
'handle_mcp_errors',
'handle_mcp_resource_errors',
'handle_mcp_tool_errors',
'MCPToolError',
'ContextHelper',
'ValidationHelper',
'ResponseFormatter',
'FileFilter'
]

View File

@@ -0,0 +1,169 @@
"""
Context access utilities and helpers.
This module provides convenient access to MCP Context data and common
operations that services need to perform with the context.
"""
import os
from typing import Optional
from mcp.server.fastmcp import Context
from ..project_settings import ProjectSettings
class ContextHelper:
"""
Helper class for convenient access to MCP Context data.
This class wraps the MCP Context object and provides convenient properties
and methods for accessing commonly needed data like base_path, settings, etc.
"""
def __init__(self, ctx: Context):
"""
Initialize the context helper.
Args:
ctx: The MCP Context object
"""
self.ctx = ctx
@property
def base_path(self) -> str:
"""
Get the base project path from the context.
Returns:
The base project path, or empty string if not set
"""
try:
return self.ctx.request_context.lifespan_context.base_path
except AttributeError:
return ""
@property
def settings(self) -> Optional[ProjectSettings]:
"""
Get the project settings from the context.
Returns:
The ProjectSettings instance, or None if not available
"""
try:
return self.ctx.request_context.lifespan_context.settings
except AttributeError:
return None
@property
def file_count(self) -> int:
"""
Get the current file count from the context.
Returns:
The number of indexed files, or 0 if not available
"""
try:
return self.ctx.request_context.lifespan_context.file_count
except AttributeError:
return 0
@property
def index_manager(self):
"""
Get the unified index manager from the context.
Returns:
The UnifiedIndexManager instance, or None if not available
"""
try:
return getattr(self.ctx.request_context.lifespan_context, 'index_manager', None)
except AttributeError:
return None
def validate_base_path(self) -> bool:
"""
Check if the base path is set and valid.
Returns:
True if base path is set and exists, False otherwise
"""
base_path = self.base_path
return bool(base_path and os.path.exists(base_path))
def get_base_path_error(self) -> Optional[str]:
"""
Get an error message if base path is not properly set.
Returns:
Error message string if base path is invalid, None if valid
"""
if not self.base_path:
return ("Project path not set. Please use set_project_path to set a "
"project directory first.")
if not os.path.exists(self.base_path):
return f"Project path does not exist: {self.base_path}"
if not os.path.isdir(self.base_path):
return f"Project path is not a directory: {self.base_path}"
return None
def update_file_count(self, count: int) -> None:
"""
Update the file count in the context.
Args:
count: The new file count
"""
try:
self.ctx.request_context.lifespan_context.file_count = count
except AttributeError:
pass # Context not available or doesn't support this operation
def update_base_path(self, path: str) -> None:
"""
Update the base path in the context.
Args:
path: The new base path
"""
try:
self.ctx.request_context.lifespan_context.base_path = path
except AttributeError:
pass # Context not available or doesn't support this operation
def update_settings(self, settings: ProjectSettings) -> None:
"""
Update the settings in the context.
Args:
settings: The new ProjectSettings instance
"""
try:
self.ctx.request_context.lifespan_context.settings = settings
except AttributeError:
pass # Context not available or doesn't support this operation
def clear_index_cache(self) -> None:
"""
Clear the index through the unified index manager.
"""
try:
if self.index_manager:
self.index_manager.clear_index()
except AttributeError:
pass
def update_index_manager(self, index_manager) -> None:
"""
Update the index manager in the context.
Args:
index_manager: The new UnifiedIndexManager instance
"""
try:
self.ctx.request_context.lifespan_context.index_manager = index_manager
except AttributeError:
pass # Context not available or doesn't support this operation

View File

@@ -0,0 +1,122 @@
"""
Decorator-based error handling for MCP entry points.
This module provides consistent error handling across all MCP tools, resources, and prompts.
"""
import functools
import json
from typing import Any, Callable
class MCPToolError(RuntimeError):
"""Exception raised when an MCP entry point fails."""
def __init__(self, message: str):
super().__init__(message)
def handle_mcp_errors(return_type: str = 'str') -> Callable:
"""
Decorator to handle exceptions in MCP entry points consistently.
This decorator catches all exceptions and rethrows them as MCPToolError after
formatting a consistent error message. FastMCP converts the raised exception
into a structured error response for the client.
Args:
return_type: Label used to format the error message for logging/consistency.
- 'str'/'list'/others: Prefixes message with "Error: ..."
- 'dict'/'json': Prefixes message with "Operation failed: ..."
Returns:
Decorator function that wraps MCP entry points with error handling
Example:
@mcp.tool()
@handle_mcp_errors(return_type='str')
def set_project_path(path: str, ctx: Context) -> str:
from ..services.project_management_service import ProjectManagementService
return ProjectManagementService(ctx).initialize_project(path)
@mcp.tool()
@handle_mcp_errors(return_type='dict')
def search_code_advanced(pattern: str, ctx: Context, **kwargs) -> Dict[str, Any]:
return SearchService(ctx).search_code(pattern, **kwargs)
"""
def decorator(func: Callable) -> Callable:
@functools.wraps(func)
def wrapper(*args, **kwargs) -> Any:
try:
return func(*args, **kwargs)
except MCPToolError:
raise
except Exception as exc:
error_message = str(exc)
formatted = _format_error_message(error_message, return_type)
raise MCPToolError(formatted) from exc
return wrapper
return decorator
def handle_mcp_resource_errors(func: Callable) -> Callable:
"""
Specialized error handler for MCP resources that always return strings.
This is a convenience decorator specifically for @mcp.resource decorated functions
which always return string responses.
Args:
func: The MCP resource function to wrap
Returns:
Wrapped function with error handling
Example:
@mcp.resource("config://code-indexer")
@handle_mcp_resource_errors
def get_config(ctx: Context) -> str:
from ..services.project_management_service import ProjectManagementService
return ProjectManagementService(ctx).get_project_config()
"""
return handle_mcp_errors(return_type='str')(func)
def handle_mcp_tool_errors(return_type: str = 'str') -> Callable:
"""
Specialized error handler for MCP tools with flexible return types.
This is a convenience decorator specifically for @mcp.tool decorated functions
which may return either strings or dictionaries.
Args:
return_type: Label describing the successful payload shape (e.g. 'str', 'dict', 'list').
Returns:
Decorator function for MCP tools
Example:
@mcp.tool()
@handle_mcp_tool_errors(return_type='dict')
def find_files(pattern: str, ctx: Context) -> Dict[str, Any]:
from ..services.file_discovery_service import FileDiscoveryService
return FileDiscoveryService(ctx).find_files(pattern)
"""
return handle_mcp_errors(return_type=return_type)
def _format_error_message(error_message: str, return_type: str) -> str:
"""
Convert an exception message into a consistent string for MCP errors.
Args:
error_message: The raw exception message.
return_type: The declared return type for the decorated entry point.
Returns:
A string representation suitable for raising as MCPToolError.
"""
if return_type in {'dict', 'json'}:
return f"Operation failed: {error_message}"
return f"Error: {error_message}"

View File

@@ -0,0 +1,177 @@
"""
Centralized file filtering logic for the Code Index MCP server.
This module provides unified filtering capabilities used across all components
that need to determine which files and directories should be processed or excluded.
"""
import fnmatch
from pathlib import Path
from typing import List, Optional, Set
from ..constants import FILTER_CONFIG
class FileFilter:
"""Centralized file filtering logic."""
def __init__(self, additional_excludes: Optional[List[str]] = None):
"""
Initialize the file filter.
Args:
additional_excludes: Additional directory patterns to exclude
"""
self.exclude_dirs = set(FILTER_CONFIG["exclude_directories"])
self.exclude_files = set(FILTER_CONFIG["exclude_files"])
self.supported_extensions = set(FILTER_CONFIG["supported_extensions"])
# Add user-defined exclusions
if additional_excludes:
self.exclude_dirs.update(additional_excludes)
def should_exclude_directory(self, dir_name: str) -> bool:
"""
Check if directory should be excluded from processing.
Args:
dir_name: Directory name to check
Returns:
True if directory should be excluded, False otherwise
"""
# Skip hidden directories except for specific allowed ones
if dir_name.startswith('.') and dir_name not in {'.env', '.gitignore'}:
return True
# Check against exclude patterns
return dir_name in self.exclude_dirs
def should_exclude_file(self, file_path: Path) -> bool:
"""
Check if file should be excluded from processing.
Args:
file_path: Path object for the file to check
Returns:
True if file should be excluded, False otherwise
"""
# Extension check - only process supported file types
if file_path.suffix.lower() not in self.supported_extensions:
return True
# Hidden files (except specific allowed ones)
if file_path.name.startswith('.') and file_path.name not in {'.gitignore', '.env'}:
return True
# Filename pattern check using glob patterns
for pattern in self.exclude_files:
if fnmatch.fnmatch(file_path.name, pattern):
return True
return False
def should_process_path(self, path: Path, base_path: Path) -> bool:
"""
Unified path processing logic to determine if a file should be processed.
Args:
path: File path to check
base_path: Project base path for relative path calculation
Returns:
True if file should be processed, False otherwise
"""
try:
# Ensure we're working with absolute paths
if not path.is_absolute():
path = base_path / path
# Get relative path from base
relative_path = path.relative_to(base_path)
# Check each path component for excluded directories
for part in relative_path.parts[:-1]: # Exclude filename
if self.should_exclude_directory(part):
return False
# Check file itself
return not self.should_exclude_file(path)
except (ValueError, OSError):
# Path not relative to base_path or other path errors
return False
def is_supported_file_type(self, file_path: Path) -> bool:
"""
Check if file type is supported for indexing.
Args:
file_path: Path to check
Returns:
True if file type is supported, False otherwise
"""
return file_path.suffix.lower() in self.supported_extensions
def is_temporary_file(self, file_path: Path) -> bool:
"""
Check if file appears to be a temporary file.
Args:
file_path: Path to check
Returns:
True if file appears temporary, False otherwise
"""
name = file_path.name
# Common temporary file patterns
temp_patterns = ['*.tmp', '*.temp', '*.swp', '*.swo', '*~']
for pattern in temp_patterns:
if fnmatch.fnmatch(name, pattern):
return True
# Files ending in .bak or .orig
if name.endswith(('.bak', '.orig')):
return True
return False
def filter_file_list(self, files: List[str], base_path: str) -> List[str]:
"""
Filter a list of file paths, keeping only those that should be processed.
Args:
files: List of file paths (absolute or relative)
base_path: Project base path
Returns:
Filtered list of file paths that should be processed
"""
base = Path(base_path)
filtered = []
for file_path_str in files:
file_path = Path(file_path_str)
if self.should_process_path(file_path, base):
filtered.append(file_path_str)
return filtered
def get_exclude_summary(self) -> dict:
"""
Get summary of current exclusion configuration.
Returns:
Dictionary with exclusion configuration details
"""
return {
"exclude_directories_count": len(self.exclude_dirs),
"exclude_files_count": len(self.exclude_files),
"supported_extensions_count": len(self.supported_extensions),
"exclude_directories": sorted(self.exclude_dirs),
"exclude_files": sorted(self.exclude_files)
}

View File

@@ -0,0 +1,372 @@
"""
Response formatting utilities for the MCP server.
This module provides consistent response formatting functions used across
services to ensure uniform response structures and formats.
"""
import json
from typing import Any, Dict, List, Optional, Union
from ..indexing.qualified_names import generate_qualified_name
class ResponseFormatter:
"""
Helper class for formatting responses consistently across services.
This class provides static methods for formatting different types of
responses in a consistent manner.
"""
@staticmethod
def _resolve_qualified_names_in_relationships(
file_path: str,
relationship_list: List[str],
duplicate_names: set,
index_cache: Optional[Dict[str, Any]] = None
) -> List[str]:
"""
Convert simple names to qualified names when duplicates exist.
Args:
file_path: Current file path for context
relationship_list: List of function/class names that may need qualification
duplicate_names: Set of names that have duplicates in the project
index_cache: Optional index cache for duplicate detection
Returns:
List with qualified names where duplicates exist
"""
if not relationship_list or not duplicate_names:
return relationship_list
qualified_list = []
for name in relationship_list:
if name in duplicate_names:
# Convert to qualified name if this name has duplicates
if index_cache and 'files' in index_cache:
# Try to find the actual file where this name is defined
# For now, we'll use the current file path as context
qualified_name = generate_qualified_name(file_path, name)
qualified_list.append(qualified_name)
else:
# Fallback: keep original name if we can't resolve
qualified_list.append(name)
else:
# No duplicates, keep original name
qualified_list.append(name)
return qualified_list
@staticmethod
def _get_duplicate_names_from_index(index_cache: Optional[Dict[str, Any]] = None) -> Dict[str, set]:
"""
Extract duplicate function and class names from index cache.
Args:
index_cache: Optional index cache
Returns:
Dictionary with 'functions' and 'classes' sets of duplicate names
"""
duplicates = {'functions': set(), 'classes': set()}
if not index_cache:
return duplicates
# Duplicate detection functionality removed - was legacy code
# Return empty duplicates as this feature is no longer used
return duplicates
@staticmethod
def success_response(message: str, data: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
"""
Format a successful operation response.
Args:
message: Success message
data: Optional additional data to include
Returns:
Formatted success response dictionary
"""
response = {"status": "success", "message": message}
if data:
response.update(data)
return response
@staticmethod
def error_response(message: str, error_code: Optional[str] = None) -> Dict[str, Any]:
"""
Format an error response.
Args:
message: Error message
error_code: Optional error code for categorization
Returns:
Formatted error response dictionary
"""
response = {"error": message}
if error_code:
response["error_code"] = error_code
return response
@staticmethod
def file_list_response(files: List[str], status_message: str) -> Dict[str, Any]:
"""
Format a file list response for find_files operations.
Args:
files: List of file paths
status_message: Status message describing the operation result
Returns:
Formatted file list response
"""
return {
"files": files,
"status": status_message
}
@staticmethod
def search_results_response(
results: List[Dict[str, Any]],
pagination: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
"""
Format search results response.
Args:
results: List of search result dictionaries
Returns:
Formatted search results response
"""
response = {
"results": results
}
if pagination is not None:
response["pagination"] = pagination
return response
@staticmethod
def config_response(config_data: Dict[str, Any]) -> str:
"""
Format configuration data as JSON string.
Args:
config_data: Configuration data dictionary
Returns:
JSON formatted configuration string
"""
return json.dumps(config_data, indent=2)
@staticmethod
def stats_response(stats_data: Dict[str, Any]) -> str:
"""
Format statistics data as JSON string.
Args:
stats_data: Statistics data dictionary
Returns:
JSON formatted statistics string
"""
return json.dumps(stats_data, indent=2)
@staticmethod
def file_summary_response(
file_path: str,
line_count: int,
size_bytes: int,
extension: str,
language: str = "unknown",
functions: Optional[Union[List[str], List[Dict[str, Any]]]] = None,
classes: Optional[Union[List[str], List[Dict[str, Any]]]] = None,
imports: Optional[Union[List[str], List[Dict[str, Any]]]] = None,
language_specific: Optional[Dict[str, Any]] = None,
error: Optional[str] = None,
index_cache: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
"""
Format file summary response from index data.
Args:
file_path: Path to the file
line_count: Number of lines in the file
size_bytes: File size in bytes
extension: File extension
language: Programming language detected
functions: List of function names (strings) or complete function objects (dicts)
classes: List of class names (strings) or complete class objects (dicts)
imports: List of import statements (strings) or complete import objects (dicts)
language_specific: Language-specific analysis data
error: Error message if analysis failed
index_cache: Optional index cache for duplicate name resolution
Returns:
Formatted file summary response
"""
# Get duplicate names from index for qualified name resolution
duplicate_names = ResponseFormatter._get_duplicate_names_from_index(index_cache)
# Handle backward compatibility for functions
processed_functions = []
if functions:
for func in functions:
if isinstance(func, str):
# Legacy format - convert string to basic object
processed_functions.append({"name": func})
elif isinstance(func, dict):
# New format - use complete object and resolve qualified names in relationships
processed_func = func.copy()
# Resolve qualified names in relationship fields
if 'calls' in processed_func and isinstance(processed_func['calls'], list):
processed_func['calls'] = ResponseFormatter._resolve_qualified_names_in_relationships(
file_path, processed_func['calls'], duplicate_names['functions'], index_cache
)
if 'called_by' in processed_func and isinstance(processed_func['called_by'], list):
processed_func['called_by'] = ResponseFormatter._resolve_qualified_names_in_relationships(
file_path, processed_func['called_by'], duplicate_names['functions'], index_cache
)
processed_functions.append(processed_func)
# Handle backward compatibility for classes
processed_classes = []
if classes:
for cls in classes:
if isinstance(cls, str):
# Legacy format - convert string to basic object
processed_classes.append({"name": cls})
elif isinstance(cls, dict):
# New format - use complete object and resolve qualified names in relationships
processed_cls = cls.copy()
# Resolve qualified names in relationship fields
if 'instantiated_by' in processed_cls and isinstance(processed_cls['instantiated_by'], list):
processed_cls['instantiated_by'] = ResponseFormatter._resolve_qualified_names_in_relationships(
file_path, processed_cls['instantiated_by'], duplicate_names['functions'], index_cache
)
processed_classes.append(processed_cls)
# Handle backward compatibility for imports
processed_imports = []
if imports:
for imp in imports:
if isinstance(imp, str):
# Legacy format - convert string to basic object
processed_imports.append({"module": imp, "import_type": "unknown"})
elif isinstance(imp, dict):
# New format - use complete object
processed_imports.append(imp)
response = {
"file_path": file_path,
"line_count": line_count,
"size_bytes": size_bytes,
"extension": extension,
"language": language,
"functions": processed_functions,
"classes": processed_classes,
"imports": processed_imports,
"language_specific": language_specific or {}
}
if error:
response["error"] = error
return response
@staticmethod
def directory_info_response(
temp_directory: str,
exists: bool,
is_directory: bool = False,
contents: Optional[List[str]] = None,
subdirectories: Optional[List[Dict[str, Any]]] = None,
error: Optional[str] = None
) -> Dict[str, Any]:
"""
Format directory information response.
Args:
temp_directory: Path to the directory
exists: Whether the directory exists
is_directory: Whether the path is a directory
contents: List of directory contents
subdirectories: List of subdirectory information
error: Error message if operation failed
Returns:
Formatted directory info response
"""
response = {
"temp_directory": temp_directory,
"exists": exists,
"is_directory": is_directory
}
if contents is not None:
response["contents"] = contents
if subdirectories is not None:
response["subdirectories"] = subdirectories
if error:
response["error"] = error
return response
@staticmethod
def settings_info_response(
settings_directory: str,
temp_directory: str,
temp_directory_exists: bool,
config: Dict[str, Any],
stats: Dict[str, Any],
exists: bool,
status: str = "configured",
message: Optional[str] = None
) -> Dict[str, Any]:
"""
Format settings information response.
Args:
settings_directory: Path to settings directory
temp_directory: Path to temp directory
temp_directory_exists: Whether temp directory exists
config: Configuration data
stats: Statistics data
exists: Whether settings directory exists
status: Status of the configuration
message: Optional status message
Returns:
Formatted settings info response
"""
response = {
"settings_directory": settings_directory,
"temp_directory": temp_directory,
"temp_directory_exists": temp_directory_exists,
"config": config,
"stats": stats,
"exists": exists
}
if status != "configured":
response["status"] = status
if message:
response["message"] = message
return response

View File

@@ -0,0 +1,239 @@
"""
Common validation logic for the MCP server.
This module provides shared validation functions used across services
to ensure consistent validation behavior and reduce code duplication.
"""
import os
import re
import fnmatch
from typing import Optional, List
from ..indexing.qualified_names import normalize_file_path
class ValidationHelper:
"""
Helper class containing common validation logic.
This class provides static methods for common validation operations
that are used across multiple services.
"""
@staticmethod
def validate_file_path(file_path: str, base_path: str) -> Optional[str]:
"""
Validate a file path for security and accessibility.
This method checks for:
- Path traversal attempts
- Absolute path usage (not allowed)
- Path existence within base directory
Args:
file_path: The file path to validate (should be relative)
base_path: The base project directory path
Returns:
Error message if validation fails, None if valid
"""
if not file_path:
return "File path cannot be empty"
if not base_path:
return "Base path not set"
# Handle absolute paths (especially Windows paths starting with drive letters)
if os.path.isabs(file_path) or (len(file_path) > 1 and file_path[1] == ':'):
return (f"Absolute file paths like '{file_path}' are not allowed. "
"Please use paths relative to the project root.")
# Normalize the file path
norm_path = os.path.normpath(file_path)
# Check for path traversal attempts
if "..\\" in norm_path or "../" in norm_path or norm_path.startswith(".."):
return f"Invalid file path: {file_path} (directory traversal not allowed)"
# Construct the full path and verify it's within the project bounds
full_path = os.path.join(base_path, norm_path)
real_full_path = os.path.realpath(full_path)
real_base_path = os.path.realpath(base_path)
if not real_full_path.startswith(real_base_path):
return "Access denied. File path must be within project directory."
return None
@staticmethod
def validate_directory_path(dir_path: str) -> Optional[str]:
"""
Validate a directory path for project initialization.
Args:
dir_path: The directory path to validate
Returns:
Error message if validation fails, None if valid
"""
if not dir_path:
return "Directory path cannot be empty"
# Normalize and get absolute path
try:
norm_path = os.path.normpath(dir_path)
abs_path = os.path.abspath(norm_path)
except (OSError, ValueError) as e:
return f"Invalid path format: {str(e)}"
if not os.path.exists(abs_path):
return f"Path does not exist: {abs_path}"
if not os.path.isdir(abs_path):
return f"Path is not a directory: {abs_path}"
return None
@staticmethod
def validate_glob_pattern(pattern: str) -> Optional[str]:
"""
Validate a glob pattern for file searching.
Args:
pattern: The glob pattern to validate
Returns:
Error message if validation fails, None if valid
"""
if not pattern:
return "Pattern cannot be empty"
# Check for potentially dangerous patterns
if pattern.startswith('/') or pattern.startswith('\\'):
return "Pattern cannot start with path separator"
# Test if the pattern is valid by trying to compile it
try:
# This will raise an exception if the pattern is malformed
fnmatch.translate(pattern)
except (ValueError, TypeError) as e:
return f"Invalid glob pattern: {str(e)}"
return None
@staticmethod
def validate_search_pattern(pattern: str, regex: bool = False) -> Optional[str]:
"""
Validate a search pattern for code searching.
Args:
pattern: The search pattern to validate
regex: Whether the pattern is a regex pattern
Returns:
Error message if validation fails, None if valid
"""
if not pattern:
return "Search pattern cannot be empty"
if regex:
# Basic regex validation - check for potentially dangerous patterns
try:
re.compile(pattern)
except re.error as e:
return (
f"Invalid regex pattern: {str(e)}. "
"If you intended a literal search, pass regex=False."
)
# Check for potentially expensive regex patterns (basic ReDoS protection)
dangerous_patterns = [
r'\(\?\=.*\)\+', # Positive lookahead with quantifier
r'\(\?\!.*\)\+', # Negative lookahead with quantifier
r'\(\?\<\=.*\)\+', # Positive lookbehind with quantifier
r'\(\?\<\!.*\)\+', # Negative lookbehind with quantifier
]
for dangerous in dangerous_patterns:
if re.search(dangerous, pattern):
return "Potentially dangerous regex pattern detected"
return None
@staticmethod
def validate_pagination(start_index: int, max_results: Optional[int]) -> Optional[str]:
"""
Validate pagination parameters for search queries.
Args:
start_index: The index of the first result to include.
max_results: The maximum number of results to return.
Returns:
Error message if validation fails, None if valid.
"""
if not isinstance(start_index, int):
return "start_index must be an integer"
if start_index < 0:
return "start_index cannot be negative"
if max_results is None:
return None
if not isinstance(max_results, int):
return "max_results must be an integer when provided"
if max_results <= 0:
return "max_results must be greater than zero when provided"
return None
@staticmethod
def validate_file_extensions(extensions: List[str]) -> Optional[str]:
"""
Validate a list of file extensions.
Args:
extensions: List of file extensions to validate
Returns:
Error message if validation fails, None if valid
"""
if not extensions:
return "Extensions list cannot be empty"
for ext in extensions:
if not isinstance(ext, str):
return "All extensions must be strings"
if not ext.startswith('.'):
return f"Extension '{ext}' must start with a dot"
if len(ext) < 2:
return f"Extension '{ext}' is too short"
return None
@staticmethod
def sanitize_file_path(file_path: str) -> str:
"""
Sanitize a file path by normalizing separators and removing dangerous elements.
Args:
file_path: The file path to sanitize
Returns:
Sanitized file path
"""
if not file_path:
return ""
# Normalize path separators and structure
sanitized = normalize_file_path(file_path)
# Remove any leading slashes to ensure relative path
sanitized = sanitized.lstrip('/')
return sanitized

View File

@@ -0,0 +1,247 @@
# Test Projects for Code Index MCP
This directory contains comprehensive test projects designed to validate and demonstrate the capabilities of the Code Index MCP server. Each project represents a realistic, enterprise-level codebase that showcases different programming languages, frameworks, and architectural patterns.
## Project Structure
```
test/
├── sample-projects/
│ ├── python/
│ │ └── user_management/ # Python user management system
│ ├── java/
│ │ └── user-management/ # Java Spring Boot user management
│ ├── go/
│ │ └── user-management/ # Go Gin user management API
│ ├── javascript/
│ │ └── user-management/ # Node.js Express user management
│ ├── typescript/
│ │ └── user-management/ # TypeScript Express user management
│ └── objective-c/ # Objective-C test files
└── README.md # This file
```
## Sample Projects Overview
Each sample project implements a comprehensive user management system with the following core features:
### Common Features Across All Projects
- **User Registration & Authentication**: Secure user registration with password hashing
- **Role-Based Access Control (RBAC)**: Admin, User, and Guest roles with permissions
- **CRUD Operations**: Complete Create, Read, Update, Delete functionality
- **Search & Filtering**: Full-text search and role/status-based filtering
- **Pagination**: Efficient pagination for large datasets
- **Input Validation**: Comprehensive validation and sanitization
- **Error Handling**: Structured error handling with custom error classes
- **Logging**: Structured logging for debugging and monitoring
- **Security**: Password hashing, rate limiting, and security headers
- **Data Export**: User data export functionality
- **Statistics**: User analytics and statistics
### Language-Specific Implementation Details
#### Python Project (`python/user_management/`)
- **Framework**: Flask-based web application
- **Database**: SQLAlchemy ORM with SQLite
- **Authentication**: JWT tokens with BCrypt password hashing
- **Structure**: Clean package structure with models, services, and utilities
- **Features**: CLI interface, comprehensive validation, and export functionality
**Key Files:**
- `models/person.py` - Base Person model
- `models/user.py` - User model with authentication
- `services/user_manager.py` - Business logic layer
- `services/auth_service.py` - Authentication service
- `utils/` - Validation, exceptions, and helper utilities
- `cli.py` - Command-line interface
#### Java Project (`java/user-management/`)
- **Framework**: Spring Boot with Spring Data JPA
- **Database**: H2 in-memory database with JPA
- **Authentication**: JWT tokens with BCrypt
- **Structure**: Maven project with standard Java package structure
- **Features**: REST API, validation annotations, and comprehensive testing
**Key Files:**
- `model/User.java` - JPA entity with validation
- `service/UserService.java` - Business logic service
- `controller/UserController.java` - REST API endpoints
- `util/` - Validation, exceptions, and utilities
- `Application.java` - Spring Boot application entry point
#### Go Project (`go/user-management/`)
- **Framework**: Gin web framework with GORM
- **Database**: SQLite with GORM ORM
- **Authentication**: JWT tokens with BCrypt
- **Structure**: Clean Go module structure with internal packages
- **Features**: High-performance API, middleware, and concurrent processing
**Key Files:**
- `internal/models/user.go` - User model with GORM
- `internal/services/user_service.go` - Business logic
- `pkg/api/handlers/user_handler.go` - HTTP handlers
- `pkg/middleware/` - Authentication and validation middleware
- `cmd/server/main.go` - Application entry point
#### JavaScript Project (`javascript/user-management/`)
- **Framework**: Express.js with Mongoose
- **Database**: MongoDB with Mongoose ODM
- **Authentication**: JWT tokens with BCrypt
- **Structure**: Modern Node.js project with ES6+ features
- **Features**: Async/await, middleware, and comprehensive error handling
**Key Files:**
- `src/models/User.js` - Mongoose model with validation
- `src/services/UserService.js` - Business logic service
- `src/routes/userRoutes.js` - Express routes
- `src/middleware/` - Authentication and validation middleware
- `src/server.js` - Express application setup
#### TypeScript Project (`typescript/user-management/`)
- **Framework**: Express.js with Mongoose (TypeScript)
- **Database**: MongoDB with Mongoose ODM
- **Authentication**: JWT tokens with BCrypt
- **Structure**: Type-safe Node.js project with comprehensive interfaces
- **Features**: Full type safety, interfaces, and advanced TypeScript features
**Key Files:**
- `src/types/User.ts` - TypeScript interfaces and types
- `src/models/User.ts` - Mongoose model with TypeScript
- `src/services/UserService.ts` - Typed business logic service
- `src/routes/userRoutes.ts` - Typed Express routes
- `src/server.ts` - TypeScript Express application
#### Objective-C Project (`objective-c/`)
- **Framework**: Foundation classes
- **Features**: Classes, properties, methods, protocols
- **Structure**: Traditional .h/.m file structure
**Key Files:**
- `Person.h/.m` - Person class with properties
- `UserManager.h/.m` - User management functionality
- `main.m` - Application entry point
## Testing the Code Index MCP
These projects are designed to test various aspects of the Code Index MCP:
### File Analysis Capabilities
- **Language Detection**: Automatic detection of programming languages
- **Syntax Parsing**: Parsing of different syntax structures
- **Import/Dependency Analysis**: Understanding of module dependencies
- **Code Structure**: Recognition of classes, functions, and interfaces
### Search and Navigation
- **Symbol Search**: Finding functions, classes, and variables
- **Cross-Reference**: Finding usage of symbols across files
- **Fuzzy Search**: Approximate matching for typos and partial queries
- **Pattern Matching**: Regular expression and pattern-based searches
### Code Intelligence
- **Function Signatures**: Understanding of function parameters and return types
- **Variable Types**: Type inference and tracking
- **Scope Analysis**: Understanding of variable and function scope
- **Documentation**: Parsing of comments and documentation
### Performance Testing
- **Large Codebases**: Testing with realistic project sizes
- **Complex Structures**: Nested packages and deep directory structures
- **Multiple File Types**: Mixed file types within projects
- **Concurrent Access**: Multiple simultaneous search operations
## Running the Projects
Each project includes comprehensive setup instructions in its respective README.md file. General steps:
1. Navigate to the project directory
2. Install dependencies using the appropriate package manager
3. Set up environment variables (see .env.example files)
4. Run the application using the provided scripts
5. Test the API endpoints using the provided examples
### Quick Start Examples
```bash
# Python project
cd test/sample-projects/python/user_management
pip install -r requirements.txt
python cli.py
# Java project
cd test/sample-projects/java/user-management
mvn spring-boot:run
# Go project
cd test/sample-projects/go/user-management
go run cmd/server/main.go
# JavaScript project
cd test/sample-projects/javascript/user-management
npm install
npm run dev
# TypeScript project
cd test/sample-projects/typescript/user-management
npm install
npm run dev
```
## MCP Server Testing
To test the Code Index MCP server with these projects:
1. **Set Project Path**: Use the `set_project_path` tool to point to a project directory
2. **Index Files**: The server will automatically index all files in the project
3. **Search Testing**: Test various search queries and patterns
4. **Analysis Testing**: Use the analysis tools to examine code structure
5. **Performance Testing**: Measure response times and resource usage
### Example MCP Commands
```bash
# Set project path
set_project_path /path/to/test/sample-projects/python/user_management
# Search for user-related functions
search_code_advanced "def create_user" --file-pattern "*.py"
# Find all authentication-related code
search_code_advanced "auth" --fuzzy true
# Get file summary
get_file_summary models/user.py
# Find TypeScript interfaces
search_code_advanced "interface.*User" --regex true --file-pattern "*.ts"
```
## Contributing
When adding new test projects:
1. Follow the established patterns and structure
2. Implement all core features consistently
3. Include comprehensive documentation
4. Add appropriate test cases
5. Update this README with project details
## Security Considerations
All test projects include:
- Secure password hashing (BCrypt)
- Input validation and sanitization
- Rate limiting and security headers
- JWT token-based authentication
- Environment variable configuration
- Proper error handling without information disclosure
## Future Enhancements
Potential additions to the test suite:
- **Rust Project**: Systems programming language example
- **C++ Project**: Complex C++ codebase with templates
- **C# Project**: .NET Core application
- **PHP Project**: Laravel-based web application
- **Ruby Project**: Rails application
- **Swift Project**: iOS application structure
- **Kotlin Project**: Android/JVM application

View File

@@ -0,0 +1,324 @@
# User Management System (Go)
A comprehensive user management system built in Go for testing Code Index MCP's analysis capabilities.
## Features
- **User Management**: Create, update, delete, and search users
- **REST API**: Full HTTP API with JSON responses
- **Authentication**: BCrypt password hashing and JWT tokens
- **Authorization**: Role-based access control (Admin, User, Guest)
- **Database**: SQLite with GORM ORM
- **Pagination**: Efficient pagination for large datasets
- **Search**: Full-text search across users
- **Export**: JSON export functionality
- **Logging**: Structured logging with middleware
- **CORS**: Cross-origin resource sharing support
## Project Structure
```
user-management/
├── cmd/
│ ├── server/
│ │ └── main.go # HTTP server entry point
│ └── cli/
│ └── main.go # CLI application
├── internal/
│ ├── models/
│ │ └── user.go # User model and types
│ ├── services/
│ │ └── user_service.go # Business logic
│ └── utils/
│ └── types.go # Utility types and helpers
├── pkg/
│ └── api/
│ └── user_handler.go # HTTP handlers
├── go.mod # Go module file
├── go.sum # Go dependencies
└── README.md # This file
```
## Technologies Used
- **Go 1.21**: Modern Go with generics and latest features
- **Gin**: HTTP web framework
- **GORM**: ORM for database operations
- **SQLite**: Embedded database
- **UUID**: Unique identifiers
- **BCrypt**: Password hashing
- **JWT**: JSON Web Tokens (planned)
- **Viper**: Configuration management
- **Cobra**: CLI framework
## Build and Run
### Prerequisites
- Go 1.21 or higher
### Install Dependencies
```bash
go mod tidy
```
### Run HTTP Server
```bash
go run cmd/server/main.go
```
The server will start on `http://localhost:8080`
### Run CLI
```bash
go run cmd/cli/main.go
```
### Build
```bash
# Build server
go build -o bin/server cmd/server/main.go
# Build CLI
go build -o bin/cli cmd/cli/main.go
```
## API Endpoints
### Users
| Method | Endpoint | Description |
|--------|----------|-------------|
| `POST` | `/api/v1/users` | Create a new user |
| `GET` | `/api/v1/users` | Get all users (paginated) |
| `GET` | `/api/v1/users/:id` | Get user by ID |
| `PUT` | `/api/v1/users/:id` | Update user |
| `DELETE` | `/api/v1/users/:id` | Delete user |
| `GET` | `/api/v1/users/search` | Search users |
| `GET` | `/api/v1/users/stats` | Get user statistics |
| `GET` | `/api/v1/users/export` | Export users |
### Authentication
| Method | Endpoint | Description |
|--------|----------|-------------|
| `POST` | `/api/v1/auth/login` | User login |
| `POST` | `/api/v1/auth/logout` | User logout |
| `POST` | `/api/v1/auth/change-password` | Change password |
### Admin
| Method | Endpoint | Description |
|--------|----------|-------------|
| `POST` | `/api/v1/admin/users/:id/reset-password` | Reset user password |
| `POST` | `/api/v1/admin/users/:id/permissions` | Add permission |
| `DELETE` | `/api/v1/admin/users/:id/permissions` | Remove permission |
## Usage Examples
### Create User
```bash
curl -X POST http://localhost:8080/api/v1/users \
-H "Content-Type: application/json" \
-d '{
"username": "johndoe",
"email": "john@example.com",
"name": "John Doe",
"age": 30,
"password": "password123"
}'
```
### Get Users
```bash
curl http://localhost:8080/api/v1/users?page=1&page_size=10
```
### Search Users
```bash
curl http://localhost:8080/api/v1/users/search?q=john&page=1&page_size=10
```
### Login
```bash
curl -X POST http://localhost:8080/api/v1/auth/login \
-H "Content-Type: application/json" \
-d '{
"username": "admin",
"password": "admin123"
}'
```
### Get Statistics
```bash
curl http://localhost:8080/api/v1/users/stats
```
## Programmatic Usage
```go
package main
import (
"github.com/example/user-management/internal/models"
"github.com/example/user-management/internal/services"
"gorm.io/driver/sqlite"
"gorm.io/gorm"
)
func main() {
// Initialize database
db, err := gorm.Open(sqlite.Open("users.db"), &gorm.Config{})
if err != nil {
panic(err)
}
// Auto migrate
db.AutoMigrate(&models.User{})
// Initialize service
userService := services.NewUserService(db)
// Create user
req := &models.UserRequest{
Username: "alice",
Email: "alice@example.com",
Name: "Alice Smith",
Age: 25,
Password: "password123",
Role: models.RoleUser,
}
user, err := userService.CreateUser(req)
if err != nil {
panic(err)
}
// Authenticate user
authUser, err := userService.AuthenticateUser("alice", "password123")
if err != nil {
panic(err)
}
// Get statistics
stats, err := userService.GetUserStats()
if err != nil {
panic(err)
}
}
```
## Testing Features
This project tests the following Go language features:
### Core Language Features
- **Structs and Methods**: User model with associated methods
- **Interfaces**: Service and handler interfaces
- **Pointers**: Efficient memory management
- **Error Handling**: Comprehensive error handling patterns
- **Packages**: Modular code organization
- **Imports**: Internal and external package imports
### Modern Go Features
- **Generics**: Type-safe collections (Go 1.18+)
- **Modules**: Dependency management with go.mod
- **Context**: Request context handling
- **Channels**: Concurrent programming (in background tasks)
- **Goroutines**: Concurrent execution
- **JSON Tags**: Struct field mapping
### Advanced Features
- **Reflection**: GORM model reflection
- **Build Tags**: Conditional compilation
- **Embedding**: Struct embedding for composition
- **Type Assertions**: Interface type checking
- **Panic/Recover**: Error recovery mechanisms
### Framework Integration
- **Gin**: HTTP router and middleware
- **GORM**: ORM with hooks and associations
- **UUID**: Unique identifier generation
- **BCrypt**: Cryptographic hashing
- **SQLite**: Embedded database
### Design Patterns
- **Repository Pattern**: Data access layer
- **Service Layer**: Business logic separation
- **Dependency Injection**: Service composition
- **Middleware Pattern**: HTTP request processing
- **Factory Pattern**: Service creation
## Dependencies
### Core Dependencies
- **gin-gonic/gin**: Web framework
- **gorm.io/gorm**: ORM
- **gorm.io/driver/sqlite**: SQLite driver
- **google/uuid**: UUID generation
- **golang.org/x/crypto**: Cryptographic functions
### CLI Dependencies
- **spf13/cobra**: CLI framework
- **spf13/viper**: Configuration management
### Development Dependencies
- **testify**: Testing framework
- **mockery**: Mock generation
## Configuration
The application can be configured using environment variables or a configuration file:
```yaml
database:
driver: sqlite
database: users.db
server:
port: 8080
host: localhost
jwt:
secret_key: your-secret-key
expiration_hours: 24
```
## Development
### Run Tests
```bash
go test ./...
```
### Generate Mocks
```bash
mockery --all
```
### Format Code
```bash
gofmt -w .
```
### Lint Code
```bash
golangci-lint run
```
## License
MIT License - This is a sample project for testing purposes.

View File

@@ -0,0 +1,294 @@
package main
import (
"fmt"
"log"
"net/http"
"time"
"github.com/example/user-management/internal/models"
"github.com/example/user-management/internal/services"
"github.com/example/user-management/internal/utils"
"github.com/example/user-management/pkg/api"
"github.com/gin-gonic/gin"
"gorm.io/driver/sqlite"
"gorm.io/gorm"
)
func main() {
// Initialize database
db, err := initDatabase()
if err != nil {
log.Fatal("Failed to initialize database:", err)
}
// Initialize services
userService := services.NewUserService(db)
// Initialize API handlers
userHandler := api.NewUserHandler(userService)
// Setup routes
router := setupRoutes(userHandler)
// Create sample data
createSampleData(userService)
// Start server
log.Println("Starting server on :8080")
if err := router.Run(":8080"); err != nil {
log.Fatal("Failed to start server:", err)
}
}
func initDatabase() (*gorm.DB, error) {
db, err := gorm.Open(sqlite.Open("users.db"), &gorm.Config{})
if err != nil {
return nil, err
}
// Auto migrate
if err := db.AutoMigrate(&models.User{}); err != nil {
return nil, err
}
return db, nil
}
func setupRoutes(userHandler *api.UserHandler) *gin.Engine {
router := gin.Default()
// Middleware
router.Use(corsMiddleware())
router.Use(loggingMiddleware())
// Health check
router.GET("/health", healthCheck)
// API routes
v1 := router.Group("/api/v1")
{
users := v1.Group("/users")
{
users.POST("", userHandler.CreateUser)
users.GET("", userHandler.GetUsers)
users.GET("/:id", userHandler.GetUser)
users.PUT("/:id", userHandler.UpdateUser)
users.DELETE("/:id", userHandler.DeleteUser)
users.GET("/search", userHandler.SearchUsers)
users.GET("/stats", userHandler.GetUserStats)
users.GET("/export", userHandler.ExportUsers)
}
auth := v1.Group("/auth")
{
auth.POST("/login", userHandler.Login)
auth.POST("/logout", userHandler.Logout)
auth.POST("/change-password", userHandler.ChangePassword)
}
admin := v1.Group("/admin")
{
admin.POST("/users/:id/reset-password", userHandler.ResetPassword)
admin.POST("/users/:id/permissions", userHandler.AddPermission)
admin.DELETE("/users/:id/permissions", userHandler.RemovePermission)
}
}
return router
}
func healthCheck(c *gin.Context) {
c.JSON(http.StatusOK, gin.H{
"status": "healthy",
"timestamp": time.Now().UTC(),
"version": "1.0.0",
})
}
func corsMiddleware() gin.HandlerFunc {
return func(c *gin.Context) {
c.Header("Access-Control-Allow-Origin", "*")
c.Header("Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS")
c.Header("Access-Control-Allow-Headers", "Content-Type, Authorization")
if c.Request.Method == "OPTIONS" {
c.AbortWithStatus(http.StatusOK)
return
}
c.Next()
}
}
func loggingMiddleware() gin.HandlerFunc {
return gin.LoggerWithFormatter(func(param gin.LogFormatterParams) string {
return fmt.Sprintf("%s - [%s] \"%s %s %s %d %s \"%s\" %s\"\n",
param.ClientIP,
param.TimeStamp.Format(time.RFC1123),
param.Method,
param.Path,
param.Request.Proto,
param.StatusCode,
param.Latency,
param.Request.UserAgent(),
param.ErrorMessage,
)
})
}
func createSampleData(userService *services.UserService) {
// Check if admin user already exists
if _, err := userService.GetUserByUsername("admin"); err == nil {
return // Admin user already exists
}
// Create admin user
adminReq := &models.UserRequest{
Username: "admin",
Email: "admin@example.com",
Name: "System Administrator",
Age: 30,
Password: "admin123",
Role: models.RoleAdmin,
}
admin, err := userService.CreateUser(adminReq)
if err != nil {
log.Printf("Failed to create admin user: %v", err)
return
}
// Add admin permissions
permissions := []string{
"user_management",
"system_admin",
"user_read",
"user_write",
"user_delete",
}
for _, perm := range permissions {
if err := userService.AddPermission(admin.ID, perm); err != nil {
log.Printf("Failed to add permission %s to admin: %v", perm, err)
}
}
// Create sample users
sampleUsers := []*models.UserRequest{
{
Username: "john_doe",
Email: "john@example.com",
Name: "John Doe",
Age: 25,
Password: "password123",
Role: models.RoleUser,
},
{
Username: "jane_smith",
Email: "jane@example.com",
Name: "Jane Smith",
Age: 28,
Password: "password123",
Role: models.RoleUser,
},
{
Username: "guest_user",
Email: "guest@example.com",
Name: "Guest User",
Age: 22,
Password: "password123",
Role: models.RoleGuest,
},
}
for _, userReq := range sampleUsers {
if _, err := userService.CreateUser(userReq); err != nil {
log.Printf("Failed to create user %s: %v", userReq.Username, err)
}
}
log.Println("Sample data created successfully")
}
// Helper functions for demo
func printUserStats(userService *services.UserService) {
stats, err := userService.GetUserStats()
if err != nil {
log.Printf("Failed to get user stats: %v", err)
return
}
log.Printf("User Statistics:")
log.Printf(" Total: %d", stats.Total)
log.Printf(" Active: %d", stats.Active)
log.Printf(" Admin: %d", stats.Admin)
log.Printf(" User: %d", stats.User)
log.Printf(" Guest: %d", stats.Guest)
log.Printf(" With Email: %d", stats.WithEmail)
}
func demonstrateUserOperations(userService *services.UserService) {
log.Println("\n=== User Management Demo ===")
// Get all users
users, total, err := userService.GetAllUsers(1, 10)
if err != nil {
log.Printf("Failed to get users: %v", err)
return
}
log.Printf("Found %d users (total: %d):", len(users), total)
for _, user := range users {
log.Printf(" - %s (%s) - %s [%s]",
user.Username, user.Name, user.Role, user.Status)
}
// Test authentication
log.Println("\n=== Authentication Test ===")
user, err := userService.AuthenticateUser("admin", "admin123")
if err != nil {
log.Printf("Authentication failed: %v", err)
} else {
log.Printf("Authentication successful for: %s", user.Username)
log.Printf("Last login: %v", user.LastLogin)
}
// Test search
log.Println("\n=== Search Test ===")
searchResults, _, err := userService.SearchUsers("john", 1, 10)
if err != nil {
log.Printf("Search failed: %v", err)
} else {
log.Printf("Search results for 'john': %d users", len(searchResults))
for _, user := range searchResults {
log.Printf(" - %s (%s)", user.Username, user.Name)
}
}
// Print stats
log.Println("\n=== Statistics ===")
printUserStats(userService)
}
// Run demo if not in server mode
func runDemo() {
log.Println("Running User Management Demo...")
// Initialize database
db, err := initDatabase()
if err != nil {
log.Fatal("Failed to initialize database:", err)
}
// Initialize services
userService := services.NewUserService(db)
// Create sample data
createSampleData(userService)
// Demonstrate operations
demonstrateUserOperations(userService)
log.Println("\nDemo completed!")
}

View File

@@ -0,0 +1,53 @@
module github.com/example/user-management
go 1.21
require (
github.com/gin-gonic/gin v1.9.1
github.com/golang-jwt/jwt/v5 v5.0.0
github.com/google/uuid v1.3.0
github.com/spf13/cobra v1.7.0
github.com/spf13/viper v1.16.0
golang.org/x/crypto v0.11.0
gorm.io/driver/sqlite v1.5.2
gorm.io/gorm v1.25.2
)
require (
github.com/bytedance/sonic v1.9.1 // indirect
github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect
github.com/fsnotify/fsnotify v1.6.0 // indirect
github.com/gabriel-vasile/mimetype v1.4.2 // indirect
github.com/gin-contrib/sse v0.1.0 // indirect
github.com/go-playground/locales v0.14.1 // indirect
github.com/go-playground/universal-translator v0.18.1 // indirect
github.com/go-playground/validator/v10 v10.14.0 // indirect
github.com/goccy/go-json v0.10.2 // indirect
github.com/hashicorp/hcl v1.0.0 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/jinzhu/inflection v1.0.0 // indirect
github.com/jinzhu/now v1.1.5 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/klauspost/cpuid/v2 v2.2.4 // indirect
github.com/leodido/go-urn v1.2.4 // indirect
github.com/magiconair/properties v1.8.7 // indirect
github.com/mattn/go-isatty v0.0.19 // indirect
github.com/mattn/go-sqlite3 v1.14.17 // indirect
github.com/mitchellh/mapstructure v1.5.0 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/pelletier/go-toml/v2 v2.0.8 // indirect
github.com/spf13/afero v1.9.5 // indirect
github.com/spf13/cast v1.5.1 // indirect
github.com/spf13/jwalterweatherman v1.1.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/subosito/gotenv v1.4.2 // indirect
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
github.com/ugorji/go/codec v1.2.11 // indirect
golang.org/x/arch v0.3.0 // indirect
golang.org/x/net v0.10.0 // indirect
golang.org/x/sys v0.10.0 // indirect
golang.org/x/text v0.11.0 // indirect
gopkg.in/ini.v1 v1.67.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

View File

@@ -0,0 +1,310 @@
package models
import (
"encoding/json"
"errors"
"time"
"github.com/google/uuid"
"golang.org/x/crypto/bcrypt"
"gorm.io/gorm"
)
// UserRole represents the role of a user
type UserRole string
const (
RoleAdmin UserRole = "admin"
RoleUser UserRole = "user"
RoleGuest UserRole = "guest"
)
// UserStatus represents the status of a user
type UserStatus string
const (
StatusActive UserStatus = "active"
StatusInactive UserStatus = "inactive"
StatusSuspended UserStatus = "suspended"
StatusDeleted UserStatus = "deleted"
)
// User represents a user in the system
type User struct {
ID uuid.UUID `json:"id" gorm:"type:uuid;primary_key"`
Username string `json:"username" gorm:"uniqueIndex;not null"`
Email string `json:"email" gorm:"uniqueIndex"`
Name string `json:"name" gorm:"not null"`
Age int `json:"age"`
PasswordHash string `json:"-" gorm:"not null"`
Role UserRole `json:"role" gorm:"default:user"`
Status UserStatus `json:"status" gorm:"default:active"`
LastLogin *time.Time `json:"last_login"`
LoginAttempts int `json:"login_attempts" gorm:"default:0"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
DeletedAt gorm.DeletedAt `json:"-" gorm:"index"`
// Permissions is a JSON field containing user permissions
Permissions []string `json:"permissions" gorm:"type:json"`
// Metadata for additional user information
Metadata map[string]interface{} `json:"metadata" gorm:"type:json"`
}
// UserRequest represents a request to create or update a user
type UserRequest struct {
Username string `json:"username" binding:"required,min=3,max=20"`
Email string `json:"email" binding:"omitempty,email"`
Name string `json:"name" binding:"required,min=1,max=100"`
Age int `json:"age" binding:"min=0,max=150"`
Password string `json:"password" binding:"required,min=8"`
Role UserRole `json:"role" binding:"omitempty,oneof=admin user guest"`
Metadata map[string]interface{} `json:"metadata"`
}
// UserResponse represents a user response (without sensitive data)
type UserResponse struct {
ID uuid.UUID `json:"id"`
Username string `json:"username"`
Email string `json:"email"`
Name string `json:"name"`
Age int `json:"age"`
Role UserRole `json:"role"`
Status UserStatus `json:"status"`
LastLogin *time.Time `json:"last_login"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
Permissions []string `json:"permissions"`
Metadata map[string]interface{} `json:"metadata"`
}
// BeforeCreate is a GORM hook that runs before creating a user
func (u *User) BeforeCreate(tx *gorm.DB) error {
if u.ID == uuid.Nil {
u.ID = uuid.New()
}
if u.Permissions == nil {
u.Permissions = []string{}
}
if u.Metadata == nil {
u.Metadata = make(map[string]interface{})
}
return nil
}
// SetPassword hashes and sets the user's password
func (u *User) SetPassword(password string) error {
if len(password) < 8 {
return errors.New("password must be at least 8 characters long")
}
hash, err := bcrypt.GenerateFromPassword([]byte(password), bcrypt.DefaultCost)
if err != nil {
return err
}
u.PasswordHash = string(hash)
return nil
}
// VerifyPassword checks if the provided password matches the user's password
func (u *User) VerifyPassword(password string) bool {
err := bcrypt.CompareHashAndPassword([]byte(u.PasswordHash), []byte(password))
return err == nil
}
// HasPermission checks if the user has a specific permission
func (u *User) HasPermission(permission string) bool {
for _, p := range u.Permissions {
if p == permission {
return true
}
}
return false
}
// AddPermission adds a permission to the user
func (u *User) AddPermission(permission string) {
if !u.HasPermission(permission) {
u.Permissions = append(u.Permissions, permission)
}
}
// RemovePermission removes a permission from the user
func (u *User) RemovePermission(permission string) {
for i, p := range u.Permissions {
if p == permission {
u.Permissions = append(u.Permissions[:i], u.Permissions[i+1:]...)
break
}
}
}
// IsActive checks if the user is active
func (u *User) IsActive() bool {
return u.Status == StatusActive
}
// IsAdmin checks if the user is an admin
func (u *User) IsAdmin() bool {
return u.Role == RoleAdmin
}
// IsLocked checks if the user is locked due to too many failed login attempts
func (u *User) IsLocked() bool {
return u.LoginAttempts >= 5 || u.Status == StatusSuspended
}
// Login records a successful login
func (u *User) Login() error {
if !u.IsActive() {
return errors.New("user is not active")
}
if u.IsLocked() {
return errors.New("user is locked")
}
now := time.Now()
u.LastLogin = &now
u.LoginAttempts = 0
return nil
}
// FailedLoginAttempt records a failed login attempt
func (u *User) FailedLoginAttempt() {
u.LoginAttempts++
if u.LoginAttempts >= 5 {
u.Status = StatusSuspended
}
}
// ResetLoginAttempts resets the login attempts counter
func (u *User) ResetLoginAttempts() {
u.LoginAttempts = 0
}
// Activate activates the user account
func (u *User) Activate() {
u.Status = StatusActive
u.LoginAttempts = 0
}
// Deactivate deactivates the user account
func (u *User) Deactivate() {
u.Status = StatusInactive
}
// Suspend suspends the user account
func (u *User) Suspend() {
u.Status = StatusSuspended
}
// Delete marks the user as deleted
func (u *User) Delete() {
u.Status = StatusDeleted
}
// ToResponse converts a User to a UserResponse
func (u *User) ToResponse() *UserResponse {
return &UserResponse{
ID: u.ID,
Username: u.Username,
Email: u.Email,
Name: u.Name,
Age: u.Age,
Role: u.Role,
Status: u.Status,
LastLogin: u.LastLogin,
CreatedAt: u.CreatedAt,
UpdatedAt: u.UpdatedAt,
Permissions: u.Permissions,
Metadata: u.Metadata,
}
}
// FromRequest creates a User from a UserRequest
func (u *User) FromRequest(req *UserRequest) error {
u.Username = req.Username
u.Email = req.Email
u.Name = req.Name
u.Age = req.Age
u.Role = req.Role
u.Metadata = req.Metadata
if req.Password != "" {
return u.SetPassword(req.Password)
}
return nil
}
// MarshalJSON customizes JSON marshaling for User
func (u *User) MarshalJSON() ([]byte, error) {
return json.Marshal(u.ToResponse())
}
// Validate validates the user model
func (u *User) Validate() error {
if len(u.Username) < 3 || len(u.Username) > 20 {
return errors.New("username must be between 3 and 20 characters")
}
if len(u.Name) == 0 || len(u.Name) > 100 {
return errors.New("name must be between 1 and 100 characters")
}
if u.Age < 0 || u.Age > 150 {
return errors.New("age must be between 0 and 150")
}
if u.Role != RoleAdmin && u.Role != RoleUser && u.Role != RoleGuest {
return errors.New("invalid role")
}
if u.Status != StatusActive && u.Status != StatusInactive &&
u.Status != StatusSuspended && u.Status != StatusDeleted {
return errors.New("invalid status")
}
return nil
}
// TableName returns the table name for GORM
func (u *User) TableName() string {
return "users"
}
// GetMetadata gets a metadata value by key
func (u *User) GetMetadata(key string) (interface{}, bool) {
if u.Metadata == nil {
return nil, false
}
value, exists := u.Metadata[key]
return value, exists
}
// SetMetadata sets a metadata value
func (u *User) SetMetadata(key string, value interface{}) {
if u.Metadata == nil {
u.Metadata = make(map[string]interface{})
}
u.Metadata[key] = value
}
// RemoveMetadata removes a metadata key
func (u *User) RemoveMetadata(key string) {
if u.Metadata != nil {
delete(u.Metadata, key)
}
}
// String returns a string representation of the user
func (u *User) String() string {
return u.Username + " (" + u.Name + ")"
}

View File

@@ -0,0 +1,419 @@
package services
import (
"encoding/json"
"errors"
"fmt"
"strings"
"time"
"github.com/example/user-management/internal/models"
"github.com/example/user-management/internal/utils"
"github.com/google/uuid"
"gorm.io/gorm"
)
// UserService handles user-related business logic
type UserService struct {
db *gorm.DB
}
// NewUserService creates a new user service
func NewUserService(db *gorm.DB) *UserService {
return &UserService{db: db}
}
// CreateUser creates a new user
func (s *UserService) CreateUser(req *models.UserRequest) (*models.User, error) {
// Check if username already exists
var existingUser models.User
if err := s.db.Where("username = ?", req.Username).First(&existingUser).Error; err == nil {
return nil, errors.New("username already exists")
}
// Check if email already exists (if provided)
if req.Email != "" {
if err := s.db.Where("email = ?", req.Email).First(&existingUser).Error; err == nil {
return nil, errors.New("email already exists")
}
}
// Create new user
user := &models.User{
Role: models.RoleUser,
Status: models.StatusActive,
}
if err := user.FromRequest(req); err != nil {
return nil, fmt.Errorf("failed to create user from request: %w", err)
}
if err := user.Validate(); err != nil {
return nil, fmt.Errorf("user validation failed: %w", err)
}
if err := s.db.Create(user).Error; err != nil {
return nil, fmt.Errorf("failed to create user: %w", err)
}
return user, nil
}
// GetUserByID retrieves a user by ID
func (s *UserService) GetUserByID(id uuid.UUID) (*models.User, error) {
var user models.User
if err := s.db.First(&user, "id = ?", id).Error; err != nil {
if errors.Is(err, gorm.ErrRecordNotFound) {
return nil, errors.New("user not found")
}
return nil, fmt.Errorf("failed to get user: %w", err)
}
return &user, nil
}
// GetUserByUsername retrieves a user by username
func (s *UserService) GetUserByUsername(username string) (*models.User, error) {
var user models.User
if err := s.db.Where("username = ?", username).First(&user).Error; err != nil {
if errors.Is(err, gorm.ErrRecordNotFound) {
return nil, errors.New("user not found")
}
return nil, fmt.Errorf("failed to get user: %w", err)
}
return &user, nil
}
// GetUserByEmail retrieves a user by email
func (s *UserService) GetUserByEmail(email string) (*models.User, error) {
var user models.User
if err := s.db.Where("email = ?", email).First(&user).Error; err != nil {
if errors.Is(err, gorm.ErrRecordNotFound) {
return nil, errors.New("user not found")
}
return nil, fmt.Errorf("failed to get user: %w", err)
}
return &user, nil
}
// UpdateUser updates an existing user
func (s *UserService) UpdateUser(id uuid.UUID, updates map[string]interface{}) (*models.User, error) {
user, err := s.GetUserByID(id)
if err != nil {
return nil, err
}
// Apply updates
for key, value := range updates {
switch key {
case "name":
if name, ok := value.(string); ok {
user.Name = name
}
case "age":
if age, ok := value.(int); ok {
user.Age = age
}
case "email":
if email, ok := value.(string); ok {
user.Email = email
}
case "role":
if role, ok := value.(models.UserRole); ok {
user.Role = role
}
case "status":
if status, ok := value.(models.UserStatus); ok {
user.Status = status
}
case "metadata":
if metadata, ok := value.(map[string]interface{}); ok {
user.Metadata = metadata
}
}
}
if err := user.Validate(); err != nil {
return nil, fmt.Errorf("user validation failed: %w", err)
}
if err := s.db.Save(user).Error; err != nil {
return nil, fmt.Errorf("failed to update user: %w", err)
}
return user, nil
}
// DeleteUser soft deletes a user
func (s *UserService) DeleteUser(id uuid.UUID) error {
user, err := s.GetUserByID(id)
if err != nil {
return err
}
user.Delete()
if err := s.db.Save(user).Error; err != nil {
return fmt.Errorf("failed to delete user: %w", err)
}
return nil
}
// HardDeleteUser permanently deletes a user
func (s *UserService) HardDeleteUser(id uuid.UUID) error {
if err := s.db.Unscoped().Delete(&models.User{}, id).Error; err != nil {
return fmt.Errorf("failed to hard delete user: %w", err)
}
return nil
}
// GetAllUsers retrieves all users with pagination
func (s *UserService) GetAllUsers(page, pageSize int) ([]*models.User, int64, error) {
var users []*models.User
var total int64
// Count total users
if err := s.db.Model(&models.User{}).Count(&total).Error; err != nil {
return nil, 0, fmt.Errorf("failed to count users: %w", err)
}
// Get users with pagination
offset := (page - 1) * pageSize
if err := s.db.Limit(pageSize).Offset(offset).Find(&users).Error; err != nil {
return nil, 0, fmt.Errorf("failed to get users: %w", err)
}
return users, total, nil
}
// GetActiveUsers retrieves all active users
func (s *UserService) GetActiveUsers() ([]*models.User, error) {
var users []*models.User
if err := s.db.Where("status = ?", models.StatusActive).Find(&users).Error; err != nil {
return nil, fmt.Errorf("failed to get active users: %w", err)
}
return users, nil
}
// GetUsersByRole retrieves users by role
func (s *UserService) GetUsersByRole(role models.UserRole) ([]*models.User, error) {
var users []*models.User
if err := s.db.Where("role = ?", role).Find(&users).Error; err != nil {
return nil, fmt.Errorf("failed to get users by role: %w", err)
}
return users, nil
}
// SearchUsers searches for users by name or username
func (s *UserService) SearchUsers(query string, page, pageSize int) ([]*models.User, int64, error) {
var users []*models.User
var total int64
searchQuery := "%" + strings.ToLower(query) + "%"
// Count total matching users
if err := s.db.Model(&models.User{}).Where(
"LOWER(name) LIKE ? OR LOWER(username) LIKE ? OR LOWER(email) LIKE ?",
searchQuery, searchQuery, searchQuery,
).Count(&total).Error; err != nil {
return nil, 0, fmt.Errorf("failed to count search results: %w", err)
}
// Get matching users with pagination
offset := (page - 1) * pageSize
if err := s.db.Where(
"LOWER(name) LIKE ? OR LOWER(username) LIKE ? OR LOWER(email) LIKE ?",
searchQuery, searchQuery, searchQuery,
).Limit(pageSize).Offset(offset).Find(&users).Error; err != nil {
return nil, 0, fmt.Errorf("failed to search users: %w", err)
}
return users, total, nil
}
// GetUserStats returns user statistics
func (s *UserService) GetUserStats() (*utils.UserStats, error) {
var stats utils.UserStats
// Total users
if err := s.db.Model(&models.User{}).Count(&stats.Total).Error; err != nil {
return nil, fmt.Errorf("failed to count total users: %w", err)
}
// Active users
if err := s.db.Model(&models.User{}).Where("status = ?", models.StatusActive).Count(&stats.Active).Error; err != nil {
return nil, fmt.Errorf("failed to count active users: %w", err)
}
// Admin users
if err := s.db.Model(&models.User{}).Where("role = ?", models.RoleAdmin).Count(&stats.Admin).Error; err != nil {
return nil, fmt.Errorf("failed to count admin users: %w", err)
}
// Regular users
if err := s.db.Model(&models.User{}).Where("role = ?", models.RoleUser).Count(&stats.User).Error; err != nil {
return nil, fmt.Errorf("failed to count regular users: %w", err)
}
// Guest users
if err := s.db.Model(&models.User{}).Where("role = ?", models.RoleGuest).Count(&stats.Guest).Error; err != nil {
return nil, fmt.Errorf("failed to count guest users: %w", err)
}
// Users with email
if err := s.db.Model(&models.User{}).Where("email != ''").Count(&stats.WithEmail).Error; err != nil {
return nil, fmt.Errorf("failed to count users with email: %w", err)
}
return &stats, nil
}
// AuthenticateUser authenticates a user with username and password
func (s *UserService) AuthenticateUser(username, password string) (*models.User, error) {
user, err := s.GetUserByUsername(username)
if err != nil {
return nil, errors.New("invalid username or password")
}
if !user.IsActive() {
return nil, errors.New("user account is not active")
}
if user.IsLocked() {
return nil, errors.New("user account is locked")
}
if !user.VerifyPassword(password) {
user.FailedLoginAttempt()
if err := s.db.Save(user).Error; err != nil {
return nil, fmt.Errorf("failed to update failed login attempt: %w", err)
}
return nil, errors.New("invalid username or password")
}
// Successful login
if err := user.Login(); err != nil {
return nil, fmt.Errorf("login failed: %w", err)
}
if err := s.db.Save(user).Error; err != nil {
return nil, fmt.Errorf("failed to update login info: %w", err)
}
return user, nil
}
// ChangePassword changes a user's password
func (s *UserService) ChangePassword(id uuid.UUID, currentPassword, newPassword string) error {
user, err := s.GetUserByID(id)
if err != nil {
return err
}
if !user.VerifyPassword(currentPassword) {
return errors.New("current password is incorrect")
}
if err := user.SetPassword(newPassword); err != nil {
return fmt.Errorf("failed to set new password: %w", err)
}
if err := s.db.Save(user).Error; err != nil {
return fmt.Errorf("failed to update password: %w", err)
}
return nil
}
// ResetPassword resets a user's password (admin function)
func (s *UserService) ResetPassword(id uuid.UUID, newPassword string) error {
user, err := s.GetUserByID(id)
if err != nil {
return err
}
if err := user.SetPassword(newPassword); err != nil {
return fmt.Errorf("failed to set new password: %w", err)
}
user.ResetLoginAttempts()
if err := s.db.Save(user).Error; err != nil {
return fmt.Errorf("failed to update password: %w", err)
}
return nil
}
// AddPermission adds a permission to a user
func (s *UserService) AddPermission(id uuid.UUID, permission string) error {
user, err := s.GetUserByID(id)
if err != nil {
return err
}
user.AddPermission(permission)
if err := s.db.Save(user).Error; err != nil {
return fmt.Errorf("failed to add permission: %w", err)
}
return nil
}
// RemovePermission removes a permission from a user
func (s *UserService) RemovePermission(id uuid.UUID, permission string) error {
user, err := s.GetUserByID(id)
if err != nil {
return err
}
user.RemovePermission(permission)
if err := s.db.Save(user).Error; err != nil {
return fmt.Errorf("failed to remove permission: %w", err)
}
return nil
}
// ExportUsers exports users to JSON
func (s *UserService) ExportUsers() ([]byte, error) {
users, _, err := s.GetAllUsers(1, 1000) // Get all users (limit to 1000 for safety)
if err != nil {
return nil, fmt.Errorf("failed to get users for export: %w", err)
}
var responses []*models.UserResponse
for _, user := range users {
responses = append(responses, user.ToResponse())
}
data, err := json.MarshalIndent(responses, "", " ")
if err != nil {
return nil, fmt.Errorf("failed to marshal users: %w", err)
}
return data, nil
}
// GetUserActivity returns user activity information
func (s *UserService) GetUserActivity(id uuid.UUID) (*utils.UserActivity, error) {
user, err := s.GetUserByID(id)
if err != nil {
return nil, err
}
activity := &utils.UserActivity{
UserID: user.ID,
Username: user.Username,
LastLogin: user.LastLogin,
LoginAttempts: user.LoginAttempts,
IsActive: user.IsActive(),
IsLocked: user.IsLocked(),
CreatedAt: user.CreatedAt,
UpdatedAt: user.UpdatedAt,
}
return activity, nil
}

View File

@@ -0,0 +1,250 @@
package utils
import (
"time"
"github.com/google/uuid"
)
// UserStats represents user statistics
type UserStats struct {
Total int64 `json:"total"`
Active int64 `json:"active"`
Admin int64 `json:"admin"`
User int64 `json:"user"`
Guest int64 `json:"guest"`
WithEmail int64 `json:"with_email"`
}
// UserActivity represents user activity information
type UserActivity struct {
UserID uuid.UUID `json:"user_id"`
Username string `json:"username"`
LastLogin *time.Time `json:"last_login"`
LoginAttempts int `json:"login_attempts"`
IsActive bool `json:"is_active"`
IsLocked bool `json:"is_locked"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
// PaginatedResponse represents a paginated response
type PaginatedResponse struct {
Data interface{} `json:"data"`
Page int `json:"page"`
PageSize int `json:"page_size"`
Total int64 `json:"total"`
TotalPages int `json:"total_pages"`
}
// NewPaginatedResponse creates a new paginated response
func NewPaginatedResponse(data interface{}, page, pageSize int, total int64) *PaginatedResponse {
totalPages := int((total + int64(pageSize) - 1) / int64(pageSize))
return &PaginatedResponse{
Data: data,
Page: page,
PageSize: pageSize,
Total: total,
TotalPages: totalPages,
}
}
// APIResponse represents a standard API response
type APIResponse struct {
Success bool `json:"success"`
Message string `json:"message"`
Data interface{} `json:"data,omitempty"`
Error string `json:"error,omitempty"`
}
// NewSuccessResponse creates a new success response
func NewSuccessResponse(message string, data interface{}) *APIResponse {
return &APIResponse{
Success: true,
Message: message,
Data: data,
}
}
// NewErrorResponse creates a new error response
func NewErrorResponse(message string, err error) *APIResponse {
resp := &APIResponse{
Success: false,
Message: message,
}
if err != nil {
resp.Error = err.Error()
}
return resp
}
// ValidationError represents a validation error
type ValidationError struct {
Field string `json:"field"`
Message string `json:"message"`
}
// ValidationErrors represents multiple validation errors
type ValidationErrors struct {
Errors []ValidationError `json:"errors"`
}
// NewValidationErrors creates a new validation errors instance
func NewValidationErrors() *ValidationErrors {
return &ValidationErrors{
Errors: make([]ValidationError, 0),
}
}
// Add adds a validation error
func (ve *ValidationErrors) Add(field, message string) {
ve.Errors = append(ve.Errors, ValidationError{
Field: field,
Message: message,
})
}
// HasErrors returns true if there are validation errors
func (ve *ValidationErrors) HasErrors() bool {
return len(ve.Errors) > 0
}
// Error implements the error interface
func (ve *ValidationErrors) Error() string {
if len(ve.Errors) == 0 {
return ""
}
if len(ve.Errors) == 1 {
return ve.Errors[0].Message
}
return "multiple validation errors"
}
// DatabaseConfig represents database configuration
type DatabaseConfig struct {
Driver string `json:"driver"`
Host string `json:"host"`
Port int `json:"port"`
Database string `json:"database"`
Username string `json:"username"`
Password string `json:"password"`
SSLMode string `json:"ssl_mode"`
}
// ServerConfig represents server configuration
type ServerConfig struct {
Port int `json:"port"`
Host string `json:"host"`
ReadTimeout int `json:"read_timeout"`
WriteTimeout int `json:"write_timeout"`
IdleTimeout int `json:"idle_timeout"`
}
// JWTConfig represents JWT configuration
type JWTConfig struct {
SecretKey string `json:"secret_key"`
ExpirationHours int `json:"expiration_hours"`
RefreshHours int `json:"refresh_hours"`
Issuer string `json:"issuer"`
SigningAlgorithm string `json:"signing_algorithm"`
}
// Config represents application configuration
type Config struct {
Database DatabaseConfig `json:"database"`
Server ServerConfig `json:"server"`
JWT JWTConfig `json:"jwt"`
LogLevel string `json:"log_level"`
Debug bool `json:"debug"`
}
// SearchParams represents search parameters
type SearchParams struct {
Query string `json:"query"`
Page int `json:"page"`
PageSize int `json:"page_size"`
SortBy string `json:"sort_by"`
SortDir string `json:"sort_dir"`
}
// NewSearchParams creates new search parameters with defaults
func NewSearchParams() *SearchParams {
return &SearchParams{
Page: 1,
PageSize: 20,
SortBy: "created_at",
SortDir: "desc",
}
}
// Validate validates search parameters
func (sp *SearchParams) Validate() error {
if sp.Page < 1 {
sp.Page = 1
}
if sp.PageSize < 1 {
sp.PageSize = 20
}
if sp.PageSize > 100 {
sp.PageSize = 100
}
if sp.SortBy == "" {
sp.SortBy = "created_at"
}
if sp.SortDir != "asc" && sp.SortDir != "desc" {
sp.SortDir = "desc"
}
return nil
}
// FilterParams represents filter parameters
type FilterParams struct {
Role string `json:"role"`
Status string `json:"status"`
AgeMin int `json:"age_min"`
AgeMax int `json:"age_max"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
// AuditLog represents an audit log entry
type AuditLog struct {
ID uuid.UUID `json:"id"`
UserID uuid.UUID `json:"user_id"`
Action string `json:"action"`
Resource string `json:"resource"`
Details map[string]interface{} `json:"details"`
IPAddress string `json:"ip_address"`
UserAgent string `json:"user_agent"`
CreatedAt time.Time `json:"created_at"`
}
// Session represents a user session
type Session struct {
ID uuid.UUID `json:"id"`
UserID uuid.UUID `json:"user_id"`
Token string `json:"token"`
ExpiresAt time.Time `json:"expires_at"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
// IsExpired checks if the session is expired
func (s *Session) IsExpired() bool {
return time.Now().After(s.ExpiresAt)
}
// ExtendSession extends the session expiration
func (s *Session) ExtendSession(duration time.Duration) {
s.ExpiresAt = time.Now().Add(duration)
s.UpdatedAt = time.Now()
}

View File

@@ -0,0 +1,309 @@
package api
import (
"net/http"
"strconv"
"github.com/example/user-management/internal/models"
"github.com/example/user-management/internal/services"
"github.com/example/user-management/internal/utils"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
)
// UserHandler handles user-related HTTP requests
type UserHandler struct {
userService *services.UserService
}
// NewUserHandler creates a new user handler
func NewUserHandler(userService *services.UserService) *UserHandler {
return &UserHandler{
userService: userService,
}
}
// CreateUser handles user creation
func (h *UserHandler) CreateUser(c *gin.Context) {
var req models.UserRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, utils.NewErrorResponse("Invalid request", err))
return
}
user, err := h.userService.CreateUser(&req)
if err != nil {
c.JSON(http.StatusBadRequest, utils.NewErrorResponse("Failed to create user", err))
return
}
c.JSON(http.StatusCreated, utils.NewSuccessResponse("User created successfully", user.ToResponse()))
}
// GetUser handles getting a single user
func (h *UserHandler) GetUser(c *gin.Context) {
idStr := c.Param("id")
id, err := uuid.Parse(idStr)
if err != nil {
c.JSON(http.StatusBadRequest, utils.NewErrorResponse("Invalid user ID", err))
return
}
user, err := h.userService.GetUserByID(id)
if err != nil {
c.JSON(http.StatusNotFound, utils.NewErrorResponse("User not found", err))
return
}
c.JSON(http.StatusOK, utils.NewSuccessResponse("User retrieved successfully", user.ToResponse()))
}
// GetUsers handles getting users with pagination
func (h *UserHandler) GetUsers(c *gin.Context) {
page, _ := strconv.Atoi(c.DefaultQuery("page", "1"))
pageSize, _ := strconv.Atoi(c.DefaultQuery("page_size", "20"))
if page < 1 {
page = 1
}
if pageSize < 1 || pageSize > 100 {
pageSize = 20
}
users, total, err := h.userService.GetAllUsers(page, pageSize)
if err != nil {
c.JSON(http.StatusInternalServerError, utils.NewErrorResponse("Failed to get users", err))
return
}
var responses []*models.UserResponse
for _, user := range users {
responses = append(responses, user.ToResponse())
}
paginatedResponse := utils.NewPaginatedResponse(responses, page, pageSize, total)
c.JSON(http.StatusOK, utils.NewSuccessResponse("Users retrieved successfully", paginatedResponse))
}
// UpdateUser handles user updates
func (h *UserHandler) UpdateUser(c *gin.Context) {
idStr := c.Param("id")
id, err := uuid.Parse(idStr)
if err != nil {
c.JSON(http.StatusBadRequest, utils.NewErrorResponse("Invalid user ID", err))
return
}
var updates map[string]interface{}
if err := c.ShouldBindJSON(&updates); err != nil {
c.JSON(http.StatusBadRequest, utils.NewErrorResponse("Invalid request", err))
return
}
user, err := h.userService.UpdateUser(id, updates)
if err != nil {
c.JSON(http.StatusBadRequest, utils.NewErrorResponse("Failed to update user", err))
return
}
c.JSON(http.StatusOK, utils.NewSuccessResponse("User updated successfully", user.ToResponse()))
}
// DeleteUser handles user deletion
func (h *UserHandler) DeleteUser(c *gin.Context) {
idStr := c.Param("id")
id, err := uuid.Parse(idStr)
if err != nil {
c.JSON(http.StatusBadRequest, utils.NewErrorResponse("Invalid user ID", err))
return
}
if err := h.userService.DeleteUser(id); err != nil {
c.JSON(http.StatusInternalServerError, utils.NewErrorResponse("Failed to delete user", err))
return
}
c.JSON(http.StatusOK, utils.NewSuccessResponse("User deleted successfully", nil))
}
// SearchUsers handles user search
func (h *UserHandler) SearchUsers(c *gin.Context) {
query := c.Query("q")
page, _ := strconv.Atoi(c.DefaultQuery("page", "1"))
pageSize, _ := strconv.Atoi(c.DefaultQuery("page_size", "20"))
if page < 1 {
page = 1
}
if pageSize < 1 || pageSize > 100 {
pageSize = 20
}
users, total, err := h.userService.SearchUsers(query, page, pageSize)
if err != nil {
c.JSON(http.StatusInternalServerError, utils.NewErrorResponse("Failed to search users", err))
return
}
var responses []*models.UserResponse
for _, user := range users {
responses = append(responses, user.ToResponse())
}
paginatedResponse := utils.NewPaginatedResponse(responses, page, pageSize, total)
c.JSON(http.StatusOK, utils.NewSuccessResponse("Search completed successfully", paginatedResponse))
}
// GetUserStats handles getting user statistics
func (h *UserHandler) GetUserStats(c *gin.Context) {
stats, err := h.userService.GetUserStats()
if err != nil {
c.JSON(http.StatusInternalServerError, utils.NewErrorResponse("Failed to get user statistics", err))
return
}
c.JSON(http.StatusOK, utils.NewSuccessResponse("Statistics retrieved successfully", stats))
}
// ExportUsers handles user export
func (h *UserHandler) ExportUsers(c *gin.Context) {
data, err := h.userService.ExportUsers()
if err != nil {
c.JSON(http.StatusInternalServerError, utils.NewErrorResponse("Failed to export users", err))
return
}
c.Header("Content-Type", "application/json")
c.Header("Content-Disposition", "attachment; filename=users.json")
c.Data(http.StatusOK, "application/json", data)
}
// Login handles user authentication
func (h *UserHandler) Login(c *gin.Context) {
var req struct {
Username string `json:"username" binding:"required"`
Password string `json:"password" binding:"required"`
}
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, utils.NewErrorResponse("Invalid request", err))
return
}
user, err := h.userService.AuthenticateUser(req.Username, req.Password)
if err != nil {
c.JSON(http.StatusUnauthorized, utils.NewErrorResponse("Authentication failed", err))
return
}
// In a real application, you would generate a JWT token here
response := map[string]interface{}{
"user": user.ToResponse(),
"token": "dummy-jwt-token", // This would be a real JWT token
"expires": "2024-12-31T23:59:59Z",
}
c.JSON(http.StatusOK, utils.NewSuccessResponse("Login successful", response))
}
// Logout handles user logout
func (h *UserHandler) Logout(c *gin.Context) {
// In a real application, you would invalidate the JWT token here
c.JSON(http.StatusOK, utils.NewSuccessResponse("Logout successful", nil))
}
// ChangePassword handles password change
func (h *UserHandler) ChangePassword(c *gin.Context) {
var req struct {
UserID uuid.UUID `json:"user_id" binding:"required"`
CurrentPassword string `json:"current_password" binding:"required"`
NewPassword string `json:"new_password" binding:"required,min=8"`
}
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, utils.NewErrorResponse("Invalid request", err))
return
}
if err := h.userService.ChangePassword(req.UserID, req.CurrentPassword, req.NewPassword); err != nil {
c.JSON(http.StatusBadRequest, utils.NewErrorResponse("Failed to change password", err))
return
}
c.JSON(http.StatusOK, utils.NewSuccessResponse("Password changed successfully", nil))
}
// ResetPassword handles password reset (admin only)
func (h *UserHandler) ResetPassword(c *gin.Context) {
idStr := c.Param("id")
id, err := uuid.Parse(idStr)
if err != nil {
c.JSON(http.StatusBadRequest, utils.NewErrorResponse("Invalid user ID", err))
return
}
var req struct {
NewPassword string `json:"new_password" binding:"required,min=8"`
}
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, utils.NewErrorResponse("Invalid request", err))
return
}
if err := h.userService.ResetPassword(id, req.NewPassword); err != nil {
c.JSON(http.StatusBadRequest, utils.NewErrorResponse("Failed to reset password", err))
return
}
c.JSON(http.StatusOK, utils.NewSuccessResponse("Password reset successfully", nil))
}
// AddPermission handles adding permission to user
func (h *UserHandler) AddPermission(c *gin.Context) {
idStr := c.Param("id")
id, err := uuid.Parse(idStr)
if err != nil {
c.JSON(http.StatusBadRequest, utils.NewErrorResponse("Invalid user ID", err))
return
}
var req struct {
Permission string `json:"permission" binding:"required"`
}
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, utils.NewErrorResponse("Invalid request", err))
return
}
if err := h.userService.AddPermission(id, req.Permission); err != nil {
c.JSON(http.StatusBadRequest, utils.NewErrorResponse("Failed to add permission", err))
return
}
c.JSON(http.StatusOK, utils.NewSuccessResponse("Permission added successfully", nil))
}
// RemovePermission handles removing permission from user
func (h *UserHandler) RemovePermission(c *gin.Context) {
idStr := c.Param("id")
id, err := uuid.Parse(idStr)
if err != nil {
c.JSON(http.StatusBadRequest, utils.NewErrorResponse("Invalid user ID", err))
return
}
permission := c.Query("permission")
if permission == "" {
c.JSON(http.StatusBadRequest, utils.NewErrorResponse("Permission parameter is required", nil))
return
}
if err := h.userService.RemovePermission(id, permission); err != nil {
c.JSON(http.StatusBadRequest, utils.NewErrorResponse("Failed to remove permission", err))
return
}
c.JSON(http.StatusOK, utils.NewSuccessResponse("Permission removed successfully", nil))
}

View File

@@ -0,0 +1,183 @@
# User Management System (Java)
A comprehensive user management system built in Java for testing Code Index MCP's analysis capabilities.
## Features
- **User Management**: Create, update, delete, and search users
- **Authentication**: BCrypt password hashing and verification
- **Authorization**: Role-based access control (Admin, User, Guest)
- **Data Validation**: Input validation and sanitization
- **Export/Import**: JSON and CSV export capabilities
- **Persistence**: File-based storage with JSON serialization
- **Logging**: SLF4J logging with Logback
## Project Structure
```
src/main/java/com/example/usermanagement/
├── models/
│ ├── Person.java # Base person model
│ ├── User.java # User model with auth features
│ ├── UserRole.java # User role enumeration
│ └── UserStatus.java # User status enumeration
├── services/
│ └── UserManager.java # User management service
├── utils/
│ ├── ValidationUtils.java # Validation utilities
│ ├── UserNotFoundException.java # Custom exception
│ └── DuplicateUserException.java # Custom exception
└── Main.java # Main demo application
```
## Technologies Used
- **Java 11**: Modern Java features and APIs
- **Jackson**: JSON processing and serialization
- **BCrypt**: Secure password hashing
- **Apache Commons**: Utility libraries (Lang3, CSV)
- **SLF4J + Logback**: Logging framework
- **Maven**: Build and dependency management
- **JUnit 5**: Testing framework
## Build and Run
### Prerequisites
- Java 11 or higher
- Maven 3.6+
### Build
```bash
mvn clean compile
```
### Run
```bash
mvn exec:java -Dexec.mainClass="com.example.usermanagement.Main"
```
### Test
```bash
mvn test
```
### Package
```bash
mvn package
```
## Usage
### Creating Users
```java
UserManager userManager = new UserManager();
// Create a basic user
User user = userManager.createUser("John Doe", 30, "john_doe", "john@example.com");
user.setPassword("SecurePass123!");
// Create an admin user
User admin = userManager.createUser("Jane Smith", 35, "jane_admin",
"jane@example.com", UserRole.ADMIN);
admin.setPassword("AdminPass123!");
admin.addPermission("user_management");
```
### User Authentication
```java
// Verify password
boolean isValid = user.verifyPassword("SecurePass123!");
// Login
if (user.login()) {
System.out.println("Login successful!");
System.out.println("Last login: " + user.getLastLogin());
}
```
### User Management
```java
// Search users
List<User> results = userManager.searchUsers("john");
// Filter users
List<User> activeUsers = userManager.getActiveUsers();
List<User> adminUsers = userManager.getUsersByRole(UserRole.ADMIN);
List<User> olderUsers = userManager.getUsersOlderThan(25);
// Update user
Map<String, Object> updates = Map.of("age", 31, "email", "newemail@example.com");
userManager.updateUser("john_doe", updates);
// Export users
String jsonData = userManager.exportUsers("json");
String csvData = userManager.exportUsers("csv");
```
## Testing Features
This project tests the following Java language features:
### Core Language Features
- **Classes and Inheritance**: Person and User class hierarchy
- **Enums**: UserRole and UserStatus with methods
- **Interfaces**: Custom exceptions and validation
- **Generics**: Collections with type safety
- **Annotations**: Jackson JSON annotations
- **Exception Handling**: Custom exceptions and try-catch blocks
### Modern Java Features
- **Streams API**: Filtering, mapping, and collecting
- **Lambda Expressions**: Functional programming
- **Method References**: Stream operations
- **Optional**: Null-safe operations
- **Time API**: LocalDateTime usage
### Advanced Features
- **Concurrent Collections**: ConcurrentHashMap
- **Reflection**: Jackson serialization
- **File I/O**: NIO.2 Path and Files
- **Logging**: SLF4J with parameterized messages
- **Validation**: Input validation and sanitization
### Framework Integration
- **Maven**: Build lifecycle and dependency management
- **Jackson**: JSON serialization/deserialization
- **BCrypt**: Password hashing
- **Apache Commons**: Utility libraries
- **SLF4J**: Structured logging
### Design Patterns
- **Builder Pattern**: Object construction
- **Factory Pattern**: User creation
- **Repository Pattern**: Data access
- **Service Layer**: Business logic separation
## Dependencies
### Core Dependencies
- **Jackson Databind**: JSON processing
- **Jackson JSR310**: Java 8 time support
- **BCrypt**: Password hashing
- **Apache Commons Lang3**: Utilities
- **Apache Commons CSV**: CSV processing
### Logging
- **SLF4J API**: Logging facade
- **Logback Classic**: Logging implementation
### Testing
- **JUnit 5**: Testing framework
- **Mockito**: Mocking framework
## License
MIT License - This is a sample project for testing purposes.

View File

@@ -0,0 +1,117 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.example</groupId>
<artifactId>user-management</artifactId>
<version>1.0.0</version>
<packaging>jar</packaging>
<name>User Management System</name>
<description>A sample user management system for testing Code Index MCP</description>
<properties>
<maven.compiler.source>11</maven.compiler.source>
<maven.compiler.target>11</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<junit.version>5.9.2</junit.version>
<jackson.version>2.15.2</jackson.version>
<slf4j.version>2.0.7</slf4j.version>
<logback.version>1.4.7</logback.version>
</properties>
<dependencies>
<!-- Jackson for JSON processing -->
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>${jackson.version}</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.datatype</groupId>
<artifactId>jackson-datatype-jsr310</artifactId>
<version>${jackson.version}</version>
</dependency>
<!-- Logging -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>${slf4j.version}</version>
</dependency>
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
<version>${logback.version}</version>
</dependency>
<!-- Apache Commons -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.12.0</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-csv</artifactId>
<version>1.9.0</version>
</dependency>
<!-- BCrypt for password hashing -->
<dependency>
<groupId>org.mindrot</groupId>
<artifactId>jbcrypt</artifactId>
<version>0.4</version>
</dependency>
<!-- Test Dependencies -->
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter</artifactId>
<version>${junit.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<version>5.3.1</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.11.0</version>
<configuration>
<source>11</source>
<target>11</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>3.1.2</version>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<version>3.1.0</version>
<configuration>
<mainClass>com.example.usermanagement.Main</mainClass>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@@ -0,0 +1,220 @@
package com.example.usermanagement;
import com.example.usermanagement.models.User;
import com.example.usermanagement.models.UserRole;
import com.example.usermanagement.services.UserManager;
import com.example.usermanagement.utils.UserNotFoundException;
import com.example.usermanagement.utils.DuplicateUserException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
/**
* Main class demonstrating the User Management System.
*/
public class Main {
private static final Logger logger = LoggerFactory.getLogger(Main.class);
public static void main(String[] args) {
System.out.println("=".repeat(50));
System.out.println("User Management System Demo (Java)");
System.out.println("=".repeat(50));
// Create user manager
UserManager userManager = new UserManager();
// Create sample users
System.out.println("\n1. Creating sample users...");
createSampleUsers(userManager);
// Display all users
System.out.println("\n2. Listing all users...");
listAllUsers(userManager);
// Test user retrieval
System.out.println("\n3. Testing user retrieval...");
testUserRetrieval(userManager);
// Test user search
System.out.println("\n4. Testing user search...");
testUserSearch(userManager);
// Test user filtering
System.out.println("\n5. Testing user filtering...");
testUserFiltering(userManager);
// Test user updates
System.out.println("\n6. Testing user updates...");
testUserUpdates(userManager);
// Test authentication
System.out.println("\n7. Testing authentication...");
testAuthentication(userManager);
// Display statistics
System.out.println("\n8. User statistics...");
displayStatistics(userManager);
// Test export functionality
System.out.println("\n9. Testing export functionality...");
testExport(userManager);
// Test user permissions
System.out.println("\n10. Testing user permissions...");
testPermissions(userManager);
System.out.println("\n" + "=".repeat(50));
System.out.println("Demo completed successfully!");
System.out.println("=".repeat(50));
}
private static void createSampleUsers(UserManager userManager) {
try {
// Create admin user
User admin = userManager.createUser("Alice Johnson", 30, "alice_admin",
"alice@example.com", UserRole.ADMIN);
admin.setPassword("AdminPass123!");
admin.addPermission("user_management");
admin.addPermission("system_admin");
// Create regular users
User user1 = userManager.createUser("Bob Smith", 25, "bob_user", "bob@example.com");
user1.setPassword("UserPass123!");
User user2 = userManager.createUser("Charlie Brown", 35, "charlie", "charlie@example.com");
user2.setPassword("CharliePass123!");
User user3 = userManager.createUser("Diana Prince", 28, "diana", "diana@example.com");
user3.setPassword("DianaPass123!");
System.out.println("✓ Created " + userManager.getUserCount() + " users");
} catch (DuplicateUserException e) {
System.out.println("✗ Error creating users: " + e.getMessage());
} catch (Exception e) {
System.out.println("✗ Unexpected error: " + e.getMessage());
logger.error("Error creating sample users", e);
}
}
private static void listAllUsers(UserManager userManager) {
List<User> users = userManager.getAllUsers();
System.out.println("Found " + users.size() + " users:");
users.forEach(user ->
System.out.println("" + user.getUsername() + " (" + user.getName() +
") - " + user.getRole().getDisplayName() +
" [" + user.getStatus().getDisplayName() + "]")
);
}
private static void testUserRetrieval(UserManager userManager) {
try {
User user = userManager.getUser("alice_admin");
System.out.println("✓ Retrieved user: " + user.getUsername() + " (" + user.getName() + ")");
User userByEmail = userManager.getUserByEmail("bob@example.com");
if (userByEmail != null) {
System.out.println("✓ Found user by email: " + userByEmail.getUsername());
}
} catch (UserNotFoundException e) {
System.out.println("✗ User retrieval failed: " + e.getMessage());
}
}
private static void testUserSearch(UserManager userManager) {
List<User> searchResults = userManager.searchUsers("alice");
System.out.println("Search results for 'alice': " + searchResults.size() + " users found");
searchResults.forEach(user ->
System.out.println("" + user.getUsername() + " (" + user.getName() + ")")
);
}
private static void testUserFiltering(UserManager userManager) {
List<User> olderUsers = userManager.getUsersOlderThan(30);
System.out.println("Users older than 30: " + olderUsers.size() + " users");
olderUsers.forEach(user ->
System.out.println("" + user.getUsername() + " (" + user.getName() + ") - age " + user.getAge())
);
List<User> adminUsers = userManager.getUsersByRole(UserRole.ADMIN);
System.out.println("Admin users: " + adminUsers.size() + " users");
}
private static void testUserUpdates(UserManager userManager) {
try {
Map<String, Object> updates = Map.of("age", 26);
User updatedUser = userManager.updateUser("bob_user", updates);
System.out.println("✓ Updated " + updatedUser.getUsername() + "'s age to " + updatedUser.getAge());
} catch (UserNotFoundException e) {
System.out.println("✗ Update failed: " + e.getMessage());
}
}
private static void testAuthentication(UserManager userManager) {
try {
User user = userManager.getUser("alice_admin");
// Test password verification
boolean isValid = user.verifyPassword("AdminPass123!");
System.out.println("✓ Password verification: " + (isValid ? "SUCCESS" : "FAILED"));
// Test login
boolean loginSuccess = user.login();
System.out.println("✓ Login attempt: " + (loginSuccess ? "SUCCESS" : "FAILED"));
if (loginSuccess) {
System.out.println("✓ Last login: " + user.getLastLogin());
}
} catch (UserNotFoundException e) {
System.out.println("✗ Authentication test failed: " + e.getMessage());
}
}
private static void displayStatistics(UserManager userManager) {
Map<String, Integer> stats = userManager.getUserStats();
stats.forEach((key, value) ->
System.out.println(" " + key.replace("_", " ").toUpperCase() + ": " + value)
);
}
private static void testExport(UserManager userManager) {
try {
String jsonExport = userManager.exportUsers("json");
System.out.println("✓ JSON export: " + jsonExport.length() + " characters");
String csvExport = userManager.exportUsers("csv");
System.out.println("✓ CSV export: " + csvExport.split("\n").length + " lines");
} catch (Exception e) {
System.out.println("✗ Export failed: " + e.getMessage());
}
}
private static void testPermissions(UserManager userManager) {
try {
User admin = userManager.getUser("alice_admin");
System.out.println("Admin permissions: " + admin.getPermissions());
System.out.println("Has user_management permission: " + admin.hasPermission("user_management"));
System.out.println("Is admin: " + admin.isAdmin());
// Test role privileges
System.out.println("Admin role can act on USER role: " +
admin.getRole().canActOn(UserRole.USER));
} catch (UserNotFoundException e) {
System.out.println("✗ Permission test failed: " + e.getMessage());
}
}
}

View File

@@ -0,0 +1,284 @@
package com.example.usermanagement.models;
import com.fasterxml.jackson.annotation.JsonFormat;
import com.fasterxml.jackson.annotation.JsonProperty;
import org.apache.commons.lang3.StringUtils;
import java.time.LocalDateTime;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
/**
* Represents a person with basic information.
* This class serves as the base class for more specific person types.
*/
public class Person {
@JsonProperty("name")
private String name;
@JsonProperty("age")
private int age;
@JsonProperty("email")
private String email;
@JsonProperty("created_at")
@JsonFormat(shape = JsonFormat.Shape.STRING, pattern = "yyyy-MM-dd'T'HH:mm:ss")
private LocalDateTime createdAt;
@JsonProperty("metadata")
private Map<String, Object> metadata;
/**
* Default constructor for Jackson deserialization.
*/
public Person() {
this.createdAt = LocalDateTime.now();
this.metadata = new HashMap<>();
}
/**
* Constructor with name and age.
*
* @param name The person's name
* @param age The person's age
* @throws IllegalArgumentException if validation fails
*/
public Person(String name, int age) {
this();
setName(name);
setAge(age);
}
/**
* Constructor with name, age, and email.
*
* @param name The person's name
* @param age The person's age
* @param email The person's email address
* @throws IllegalArgumentException if validation fails
*/
public Person(String name, int age, String email) {
this(name, age);
setEmail(email);
}
// Getters and Setters
public String getName() {
return name;
}
public void setName(String name) {
if (StringUtils.isBlank(name)) {
throw new IllegalArgumentException("Name cannot be null or empty");
}
if (name.length() > 100) {
throw new IllegalArgumentException("Name cannot exceed 100 characters");
}
this.name = name.trim();
}
public int getAge() {
return age;
}
public void setAge(int age) {
if (age < 0) {
throw new IllegalArgumentException("Age cannot be negative");
}
if (age > 150) {
throw new IllegalArgumentException("Age cannot exceed 150");
}
this.age = age;
}
public String getEmail() {
return email;
}
public void setEmail(String email) {
if (StringUtils.isNotBlank(email) && !isValidEmail(email)) {
throw new IllegalArgumentException("Invalid email format");
}
this.email = StringUtils.isBlank(email) ? null : email.trim();
}
public LocalDateTime getCreatedAt() {
return createdAt;
}
public void setCreatedAt(LocalDateTime createdAt) {
this.createdAt = createdAt;
}
public Map<String, Object> getMetadata() {
return new HashMap<>(metadata);
}
public void setMetadata(Map<String, Object> metadata) {
this.metadata = metadata == null ? new HashMap<>() : new HashMap<>(metadata);
}
// Business methods
/**
* Returns a greeting message for the person.
*
* @return A personalized greeting
*/
public String greet() {
return String.format("Hello, I'm %s and I'm %d years old.", name, age);
}
/**
* Checks if the person has an email address.
*
* @return true if email is present and not empty
*/
public boolean hasEmail() {
return StringUtils.isNotBlank(email);
}
/**
* Updates the person's email address.
*
* @param newEmail The new email address
* @throws IllegalArgumentException if email format is invalid
*/
public void updateEmail(String newEmail) {
setEmail(newEmail);
}
/**
* Adds metadata to the person.
*
* @param key The metadata key
* @param value The metadata value
*/
public void addMetadata(String key, Object value) {
if (StringUtils.isNotBlank(key)) {
metadata.put(key, value);
}
}
/**
* Gets metadata value by key.
*
* @param key The metadata key
* @return The metadata value or null if not found
*/
public Object getMetadata(String key) {
return metadata.get(key);
}
/**
* Gets metadata value by key with default value.
*
* @param key The metadata key
* @param defaultValue The default value if key is not found
* @return The metadata value or default value
*/
public Object getMetadata(String key, Object defaultValue) {
return metadata.getOrDefault(key, defaultValue);
}
/**
* Removes metadata by key.
*
* @param key The metadata key to remove
* @return The removed value or null if not found
*/
public Object removeMetadata(String key) {
return metadata.remove(key);
}
/**
* Clears all metadata.
*/
public void clearMetadata() {
metadata.clear();
}
/**
* Validates email format using a simple regex.
*
* @param email The email to validate
* @return true if email format is valid
*/
private boolean isValidEmail(String email) {
String emailPattern = "^[A-Za-z0-9+_.-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}$";
return email.matches(emailPattern);
}
/**
* Creates a Person instance from a map of data.
*
* @param data The data map
* @return A new Person instance
*/
public static Person fromMap(Map<String, Object> data) {
Person person = new Person();
if (data.containsKey("name")) {
person.setName((String) data.get("name"));
}
if (data.containsKey("age")) {
person.setAge((Integer) data.get("age"));
}
if (data.containsKey("email")) {
person.setEmail((String) data.get("email"));
}
if (data.containsKey("metadata")) {
@SuppressWarnings("unchecked")
Map<String, Object> metadata = (Map<String, Object>) data.get("metadata");
person.setMetadata(metadata);
}
return person;
}
/**
* Converts the person to a map representation.
*
* @return A map containing person data
*/
public Map<String, Object> toMap() {
Map<String, Object> map = new HashMap<>();
map.put("name", name);
map.put("age", age);
map.put("email", email);
map.put("created_at", createdAt);
map.put("metadata", new HashMap<>(metadata));
return map;
}
@Override
public boolean equals(Object obj) {
if (this == obj) return true;
if (obj == null || getClass() != obj.getClass()) return false;
Person person = (Person) obj;
return age == person.age &&
Objects.equals(name, person.name) &&
Objects.equals(email, person.email) &&
Objects.equals(createdAt, person.createdAt) &&
Objects.equals(metadata, person.metadata);
}
@Override
public int hashCode() {
return Objects.hash(name, age, email, createdAt, metadata);
}
@Override
public String toString() {
return String.format("Person{name='%s', age=%d, email='%s', createdAt=%s}",
name, age, email, createdAt);
}
}

View File

@@ -0,0 +1,363 @@
package com.example.usermanagement.models;
import com.fasterxml.jackson.annotation.JsonFormat;
import com.fasterxml.jackson.annotation.JsonProperty;
import org.apache.commons.lang3.StringUtils;
import org.mindrot.jbcrypt.BCrypt;
import java.time.LocalDateTime;
import java.util.HashSet;
import java.util.Set;
import java.util.Objects;
/**
* User class extending Person with authentication and authorization features.
*/
public class User extends Person {
@JsonProperty("username")
private String username;
@JsonProperty("password_hash")
private String passwordHash;
@JsonProperty("role")
private UserRole role;
@JsonProperty("status")
private UserStatus status;
@JsonProperty("last_login")
@JsonFormat(shape = JsonFormat.Shape.STRING, pattern = "yyyy-MM-dd'T'HH:mm:ss")
private LocalDateTime lastLogin;
@JsonProperty("login_attempts")
private int loginAttempts;
@JsonProperty("permissions")
private Set<String> permissions;
/**
* Default constructor for Jackson deserialization.
*/
public User() {
super();
this.role = UserRole.USER;
this.status = UserStatus.ACTIVE;
this.loginAttempts = 0;
this.permissions = new HashSet<>();
}
/**
* Constructor with basic information.
*
* @param name The user's name
* @param age The user's age
* @param username The username
*/
public User(String name, int age, String username) {
super(name, age);
setUsername(username);
this.role = UserRole.USER;
this.status = UserStatus.ACTIVE;
this.loginAttempts = 0;
this.permissions = new HashSet<>();
}
/**
* Constructor with email.
*
* @param name The user's name
* @param age The user's age
* @param username The username
* @param email The email address
*/
public User(String name, int age, String username, String email) {
super(name, age, email);
setUsername(username);
this.role = UserRole.USER;
this.status = UserStatus.ACTIVE;
this.loginAttempts = 0;
this.permissions = new HashSet<>();
}
/**
* Constructor with role.
*
* @param name The user's name
* @param age The user's age
* @param username The username
* @param email The email address
* @param role The user role
*/
public User(String name, int age, String username, String email, UserRole role) {
this(name, age, username, email);
this.role = role;
}
// Getters and Setters
public String getUsername() {
return username;
}
public void setUsername(String username) {
if (StringUtils.isBlank(username)) {
throw new IllegalArgumentException("Username cannot be null or empty");
}
if (username.length() < 3 || username.length() > 20) {
throw new IllegalArgumentException("Username must be between 3 and 20 characters");
}
if (!username.matches("^[a-zA-Z0-9_]+$")) {
throw new IllegalArgumentException("Username can only contain letters, numbers, and underscores");
}
this.username = username.trim();
}
public String getPasswordHash() {
return passwordHash;
}
public void setPasswordHash(String passwordHash) {
this.passwordHash = passwordHash;
}
public UserRole getRole() {
return role;
}
public void setRole(UserRole role) {
this.role = role != null ? role : UserRole.USER;
}
public UserStatus getStatus() {
return status;
}
public void setStatus(UserStatus status) {
this.status = status != null ? status : UserStatus.ACTIVE;
}
public LocalDateTime getLastLogin() {
return lastLogin;
}
public void setLastLogin(LocalDateTime lastLogin) {
this.lastLogin = lastLogin;
}
public int getLoginAttempts() {
return loginAttempts;
}
public void setLoginAttempts(int loginAttempts) {
this.loginAttempts = Math.max(0, loginAttempts);
}
public Set<String> getPermissions() {
return new HashSet<>(permissions);
}
public void setPermissions(Set<String> permissions) {
this.permissions = permissions != null ? new HashSet<>(permissions) : new HashSet<>();
}
// Authentication methods
/**
* Sets the user's password using BCrypt hashing.
*
* @param password The plain text password
* @throws IllegalArgumentException if password is invalid
*/
public void setPassword(String password) {
if (StringUtils.isBlank(password)) {
throw new IllegalArgumentException("Password cannot be null or empty");
}
if (password.length() < 8) {
throw new IllegalArgumentException("Password must be at least 8 characters long");
}
// Hash the password with BCrypt
this.passwordHash = BCrypt.hashpw(password, BCrypt.gensalt());
}
/**
* Verifies a password against the stored hash.
*
* @param password The plain text password to verify
* @return true if password matches
*/
public boolean verifyPassword(String password) {
if (StringUtils.isBlank(password) || StringUtils.isBlank(passwordHash)) {
return false;
}
try {
return BCrypt.checkpw(password, passwordHash);
} catch (IllegalArgumentException e) {
return false;
}
}
// Permission methods
/**
* Adds a permission to the user.
*
* @param permission The permission to add
*/
public void addPermission(String permission) {
if (StringUtils.isNotBlank(permission)) {
permissions.add(permission.trim());
}
}
/**
* Removes a permission from the user.
*
* @param permission The permission to remove
*/
public void removePermission(String permission) {
permissions.remove(permission);
}
/**
* Checks if the user has a specific permission.
*
* @param permission The permission to check
* @return true if user has the permission
*/
public boolean hasPermission(String permission) {
return permissions.contains(permission);
}
/**
* Clears all permissions.
*/
public void clearPermissions() {
permissions.clear();
}
// Status and role methods
/**
* Checks if the user is an admin.
*
* @return true if user is admin
*/
public boolean isAdmin() {
return role == UserRole.ADMIN;
}
/**
* Checks if the user is active.
*
* @return true if user is active
*/
public boolean isActive() {
return status == UserStatus.ACTIVE;
}
/**
* Checks if the user is locked due to too many failed login attempts.
*
* @return true if user is locked
*/
public boolean isLocked() {
return status == UserStatus.SUSPENDED || loginAttempts >= 5;
}
// Login methods
/**
* Records a successful login.
*
* @return true if login was successful
*/
public boolean login() {
if (!isActive() || isLocked()) {
return false;
}
this.lastLogin = LocalDateTime.now();
this.loginAttempts = 0;
return true;
}
/**
* Records a failed login attempt.
*/
public void failedLoginAttempt() {
this.loginAttempts++;
if (this.loginAttempts >= 5) {
this.status = UserStatus.SUSPENDED;
}
}
/**
* Resets login attempts.
*/
public void resetLoginAttempts() {
this.loginAttempts = 0;
}
// Status change methods
/**
* Activates the user account.
*/
public void activate() {
this.status = UserStatus.ACTIVE;
this.loginAttempts = 0;
}
/**
* Deactivates the user account.
*/
public void deactivate() {
this.status = UserStatus.INACTIVE;
}
/**
* Suspends the user account.
*/
public void suspend() {
this.status = UserStatus.SUSPENDED;
}
/**
* Marks the user as deleted.
*/
public void delete() {
this.status = UserStatus.DELETED;
}
@Override
public boolean equals(Object obj) {
if (this == obj) return true;
if (obj == null || getClass() != obj.getClass()) return false;
if (!super.equals(obj)) return false;
User user = (User) obj;
return loginAttempts == user.loginAttempts &&
Objects.equals(username, user.username) &&
Objects.equals(passwordHash, user.passwordHash) &&
role == user.role &&
status == user.status &&
Objects.equals(lastLogin, user.lastLogin) &&
Objects.equals(permissions, user.permissions);
}
@Override
public int hashCode() {
return Objects.hash(super.hashCode(), username, passwordHash, role, status,
lastLogin, loginAttempts, permissions);
}
@Override
public String toString() {
return String.format("User{username='%s', name='%s', role=%s, status=%s, lastLogin=%s}",
username, getName(), role, status, lastLogin);
}
}

View File

@@ -0,0 +1,134 @@
package com.example.usermanagement.models;
import com.fasterxml.jackson.annotation.JsonValue;
/**
* Enumeration for user roles in the system.
*/
public enum UserRole {
/**
* Administrator role with full system access.
*/
ADMIN("admin", "Administrator", "Full system access"),
/**
* Regular user role with standard permissions.
*/
USER("user", "User", "Standard user permissions"),
/**
* Guest role with limited permissions.
*/
GUEST("guest", "Guest", "Limited guest permissions");
private final String code;
private final String displayName;
private final String description;
/**
* Constructor for UserRole enum.
*
* @param code The role code
* @param displayName The display name
* @param description The role description
*/
UserRole(String code, String displayName, String description) {
this.code = code;
this.displayName = displayName;
this.description = description;
}
/**
* Gets the role code.
*
* @return The role code
*/
@JsonValue
public String getCode() {
return code;
}
/**
* Gets the display name.
*
* @return The display name
*/
public String getDisplayName() {
return displayName;
}
/**
* Gets the role description.
*
* @return The role description
*/
public String getDescription() {
return description;
}
/**
* Finds a UserRole by its code.
*
* @param code The role code to search for
* @return The UserRole or null if not found
*/
public static UserRole fromCode(String code) {
if (code == null) {
return null;
}
for (UserRole role : values()) {
if (role.code.equalsIgnoreCase(code)) {
return role;
}
}
return null;
}
/**
* Checks if this role has higher privilege than another role.
*
* @param other The other role to compare with
* @return true if this role has higher privilege
*/
public boolean hasHigherPrivilegeThan(UserRole other) {
return this.ordinal() < other.ordinal();
}
/**
* Checks if this role has lower privilege than another role.
*
* @param other The other role to compare with
* @return true if this role has lower privilege
*/
public boolean hasLowerPrivilegeThan(UserRole other) {
return this.ordinal() > other.ordinal();
}
/**
* Checks if this role can perform actions on another role.
*
* @param targetRole The target role
* @return true if this role can act on the target role
*/
public boolean canActOn(UserRole targetRole) {
// Admin can act on all roles
if (this == ADMIN) {
return true;
}
// Users can only act on guests
if (this == USER) {
return targetRole == GUEST;
}
// Guests cannot act on anyone
return false;
}
@Override
public String toString() {
return displayName;
}
}

View File

@@ -0,0 +1,146 @@
package com.example.usermanagement.models;
import com.fasterxml.jackson.annotation.JsonValue;
/**
* Enumeration for user status in the system.
*/
public enum UserStatus {
/**
* Active status - user can login and use the system.
*/
ACTIVE("active", "Active", "User can login and use the system"),
/**
* Inactive status - user account is temporarily disabled.
*/
INACTIVE("inactive", "Inactive", "User account is temporarily disabled"),
/**
* Suspended status - user account is suspended due to violations.
*/
SUSPENDED("suspended", "Suspended", "User account is suspended due to violations"),
/**
* Deleted status - user account is marked for deletion.
*/
DELETED("deleted", "Deleted", "User account is marked for deletion");
private final String code;
private final String displayName;
private final String description;
/**
* Constructor for UserStatus enum.
*
* @param code The status code
* @param displayName The display name
* @param description The status description
*/
UserStatus(String code, String displayName, String description) {
this.code = code;
this.displayName = displayName;
this.description = description;
}
/**
* Gets the status code.
*
* @return The status code
*/
@JsonValue
public String getCode() {
return code;
}
/**
* Gets the display name.
*
* @return The display name
*/
public String getDisplayName() {
return displayName;
}
/**
* Gets the status description.
*
* @return The status description
*/
public String getDescription() {
return description;
}
/**
* Finds a UserStatus by its code.
*
* @param code The status code to search for
* @return The UserStatus or null if not found
*/
public static UserStatus fromCode(String code) {
if (code == null) {
return null;
}
for (UserStatus status : values()) {
if (status.code.equalsIgnoreCase(code)) {
return status;
}
}
return null;
}
/**
* Checks if this status allows user login.
*
* @return true if user can login with this status
*/
public boolean allowsLogin() {
return this == ACTIVE;
}
/**
* Checks if this status indicates the user is disabled.
*
* @return true if user is disabled
*/
public boolean isDisabled() {
return this == INACTIVE || this == SUSPENDED || this == DELETED;
}
/**
* Checks if this status indicates the user is deleted.
*
* @return true if user is deleted
*/
public boolean isDeleted() {
return this == DELETED;
}
/**
* Checks if this status can be changed to another status.
*
* @param targetStatus The target status
* @return true if status change is allowed
*/
public boolean canChangeTo(UserStatus targetStatus) {
// Cannot change from deleted status
if (this == DELETED) {
return false;
}
// Cannot change to same status
if (this == targetStatus) {
return false;
}
// All other changes are allowed
return true;
}
@Override
public String toString() {
return displayName;
}
}

View File

@@ -0,0 +1,488 @@
package com.example.usermanagement.services;
import com.example.usermanagement.models.User;
import com.example.usermanagement.models.UserRole;
import com.example.usermanagement.models.UserStatus;
import com.example.usermanagement.utils.UserNotFoundException;
import com.example.usermanagement.utils.DuplicateUserException;
import com.example.usermanagement.utils.ValidationUtils;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.*;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.Predicate;
import java.util.stream.Collectors;
/**
* Service class for managing users in the system.
* Provides CRUD operations, search functionality, and data persistence.
*/
public class UserManager {
private static final Logger logger = LoggerFactory.getLogger(UserManager.class);
private final Map<String, User> users;
private final ObjectMapper objectMapper;
private final String storagePath;
/**
* Constructor with default storage path.
*/
public UserManager() {
this(null);
}
/**
* Constructor with custom storage path.
*
* @param storagePath The file path for user data storage
*/
public UserManager(String storagePath) {
this.users = new ConcurrentHashMap<>();
this.objectMapper = new ObjectMapper();
this.objectMapper.registerModule(new JavaTimeModule());
this.storagePath = storagePath;
if (StringUtils.isNotBlank(storagePath)) {
loadUsersFromFile();
}
}
/**
* Creates a new user in the system.
*
* @param name The user's name
* @param age The user's age
* @param username The username
* @param email The email address (optional)
* @param role The user role
* @return The created user
* @throws DuplicateUserException if username already exists
* @throws IllegalArgumentException if validation fails
*/
public User createUser(String name, int age, String username, String email, UserRole role) {
logger.debug("Creating user with username: {}", username);
if (users.containsKey(username)) {
throw new DuplicateUserException("User with username '" + username + "' already exists");
}
// Validate inputs
ValidationUtils.validateUsername(username);
if (StringUtils.isNotBlank(email)) {
ValidationUtils.validateEmail(email);
}
User user = new User(name, age, username, email, role);
users.put(username, user);
saveUsersToFile();
logger.info("User created successfully: {}", username);
return user;
}
/**
* Creates a new user with default role.
*
* @param name The user's name
* @param age The user's age
* @param username The username
* @param email The email address (optional)
* @return The created user
*/
public User createUser(String name, int age, String username, String email) {
return createUser(name, age, username, email, UserRole.USER);
}
/**
* Creates a new user with minimal information.
*
* @param name The user's name
* @param age The user's age
* @param username The username
* @return The created user
*/
public User createUser(String name, int age, String username) {
return createUser(name, age, username, null, UserRole.USER);
}
/**
* Retrieves a user by username.
*
* @param username The username
* @return The user
* @throws UserNotFoundException if user is not found
*/
public User getUser(String username) {
User user = users.get(username);
if (user == null) {
throw new UserNotFoundException("User with username '" + username + "' not found");
}
return user;
}
/**
* Retrieves a user by email address.
*
* @param email The email address
* @return The user or null if not found
*/
public User getUserByEmail(String email) {
return users.values().stream()
.filter(user -> Objects.equals(user.getEmail(), email))
.findFirst()
.orElse(null);
}
/**
* Updates user information.
*
* @param username The username
* @param updates A map of field updates
* @return The updated user
* @throws UserNotFoundException if user is not found
*/
public User updateUser(String username, Map<String, Object> updates) {
User user = getUser(username);
updates.forEach((field, value) -> {
switch (field.toLowerCase()) {
case "name":
user.setName((String) value);
break;
case "age":
user.setAge((Integer) value);
break;
case "email":
user.setEmail((String) value);
break;
case "role":
if (value instanceof UserRole) {
user.setRole((UserRole) value);
} else if (value instanceof String) {
user.setRole(UserRole.fromCode((String) value));
}
break;
case "status":
if (value instanceof UserStatus) {
user.setStatus((UserStatus) value);
} else if (value instanceof String) {
user.setStatus(UserStatus.fromCode((String) value));
}
break;
default:
logger.warn("Unknown field for update: {}", field);
}
});
saveUsersToFile();
logger.info("User updated successfully: {}", username);
return user;
}
/**
* Deletes a user (soft delete).
*
* @param username The username
* @return true if user was deleted
* @throws UserNotFoundException if user is not found
*/
public boolean deleteUser(String username) {
User user = getUser(username);
user.delete();
saveUsersToFile();
logger.info("User deleted successfully: {}", username);
return true;
}
/**
* Removes a user completely from the system.
*
* @param username The username
* @return true if user was removed
* @throws UserNotFoundException if user is not found
*/
public boolean removeUser(String username) {
if (!users.containsKey(username)) {
throw new UserNotFoundException("User with username '" + username + "' not found");
}
users.remove(username);
saveUsersToFile();
logger.info("User removed completely: {}", username);
return true;
}
/**
* Gets all users in the system.
*
* @return A list of all users
*/
public List<User> getAllUsers() {
return new ArrayList<>(users.values());
}
/**
* Gets all active users.
*
* @return A list of active users
*/
public List<User> getActiveUsers() {
return users.values().stream()
.filter(User::isActive)
.collect(Collectors.toList());
}
/**
* Gets users by role.
*
* @param role The user role
* @return A list of users with the specified role
*/
public List<User> getUsersByRole(UserRole role) {
return users.values().stream()
.filter(user -> user.getRole() == role)
.collect(Collectors.toList());
}
/**
* Filters users using a custom predicate.
*
* @param predicate The filter predicate
* @return A list of filtered users
*/
public List<User> filterUsers(Predicate<User> predicate) {
return users.values().stream()
.filter(predicate)
.collect(Collectors.toList());
}
/**
* Searches users by name or username.
*
* @param query The search query
* @return A list of matching users
*/
public List<User> searchUsers(String query) {
if (StringUtils.isBlank(query)) {
return new ArrayList<>();
}
String lowercaseQuery = query.toLowerCase();
return users.values().stream()
.filter(user ->
user.getName().toLowerCase().contains(lowercaseQuery) ||
user.getUsername().toLowerCase().contains(lowercaseQuery) ||
(user.getEmail() != null && user.getEmail().toLowerCase().contains(lowercaseQuery)))
.collect(Collectors.toList());
}
/**
* Gets users older than specified age.
*
* @param age The age threshold
* @return A list of users older than the specified age
*/
public List<User> getUsersOlderThan(int age) {
return filterUsers(user -> user.getAge() > age);
}
/**
* Gets users with email addresses.
*
* @return A list of users with email addresses
*/
public List<User> getUsersWithEmail() {
return filterUsers(User::hasEmail);
}
/**
* Gets users with specific permission.
*
* @param permission The permission to check
* @return A list of users with the specified permission
*/
public List<User> getUsersWithPermission(String permission) {
return filterUsers(user -> user.hasPermission(permission));
}
/**
* Gets the total number of users.
*
* @return The user count
*/
public int getUserCount() {
return users.size();
}
/**
* Gets user statistics.
*
* @return A map of user statistics
*/
public Map<String, Integer> getUserStats() {
Map<String, Integer> stats = new HashMap<>();
stats.put("total", users.size());
stats.put("active", getActiveUsers().size());
stats.put("admin", getUsersByRole(UserRole.ADMIN).size());
stats.put("user", getUsersByRole(UserRole.USER).size());
stats.put("guest", getUsersByRole(UserRole.GUEST).size());
stats.put("with_email", getUsersWithEmail().size());
return stats;
}
/**
* Exports users to specified format.
*
* @param format The export format ("json" or "csv")
* @return The exported data as string
* @throws IllegalArgumentException if format is unsupported
*/
public String exportUsers(String format) {
switch (format.toLowerCase()) {
case "json":
return exportToJson();
case "csv":
return exportToCsv();
default:
throw new IllegalArgumentException("Unsupported export format: " + format);
}
}
/**
* Exports users to JSON format.
*
* @return JSON string representation of users
*/
private String exportToJson() {
try {
return objectMapper.writerWithDefaultPrettyPrinter()
.writeValueAsString(users.values());
} catch (JsonProcessingException e) {
logger.error("Error exporting users to JSON", e);
return "[]";
}
}
/**
* Exports users to CSV format.
*
* @return CSV string representation of users
*/
private String exportToCsv() {
try (StringWriter writer = new StringWriter();
CSVPrinter printer = new CSVPrinter(writer, CSVFormat.DEFAULT.withHeader(
"Username", "Name", "Age", "Email", "Role", "Status", "Last Login"))) {
for (User user : users.values()) {
printer.printRecord(
user.getUsername(),
user.getName(),
user.getAge(),
user.getEmail(),
user.getRole().getCode(),
user.getStatus().getCode(),
user.getLastLogin()
);
}
return writer.toString();
} catch (IOException e) {
logger.error("Error exporting users to CSV", e);
return "Username,Name,Age,Email,Role,Status,Last Login\n";
}
}
/**
* Checks if a username exists in the system.
*
* @param username The username to check
* @return true if username exists
*/
public boolean userExists(String username) {
return users.containsKey(username);
}
/**
* Clears all users from the system.
*/
public void clearAllUsers() {
users.clear();
saveUsersToFile();
logger.info("All users cleared from system");
}
/**
* Loads users from file storage.
*/
private void loadUsersFromFile() {
if (StringUtils.isBlank(storagePath)) {
return;
}
try {
Path path = Paths.get(storagePath);
if (!Files.exists(path)) {
logger.debug("User storage file does not exist: {}", storagePath);
return;
}
String content = Files.readString(path);
List<User> userList = Arrays.asList(objectMapper.readValue(content, User[].class));
users.clear();
for (User user : userList) {
users.put(user.getUsername(), user);
}
logger.info("Loaded {} users from file: {}", users.size(), storagePath);
} catch (IOException e) {
logger.error("Error loading users from file: {}", storagePath, e);
}
}
/**
* Saves users to file storage.
*/
private void saveUsersToFile() {
if (StringUtils.isBlank(storagePath)) {
return;
}
try {
Path path = Paths.get(storagePath);
Files.createDirectories(path.getParent());
String content = objectMapper.writerWithDefaultPrettyPrinter()
.writeValueAsString(users.values());
Files.writeString(path, content);
logger.debug("Saved {} users to file: {}", users.size(), storagePath);
} catch (IOException e) {
logger.error("Error saving users to file: {}", storagePath, e);
}
}
// CI marker method to verify auto-reindex on change
public String ciAddedSymbolMarker() {
return "ci_symbol_java";
}
}

View File

@@ -0,0 +1,26 @@
package com.example.usermanagement.utils;
/**
* Exception thrown when attempting to create a user that already exists.
*/
public class DuplicateUserException extends RuntimeException {
/**
* Constructs a new DuplicateUserException with the specified detail message.
*
* @param message the detail message
*/
public DuplicateUserException(String message) {
super(message);
}
/**
* Constructs a new DuplicateUserException with the specified detail message and cause.
*
* @param message the detail message
* @param cause the cause
*/
public DuplicateUserException(String message, Throwable cause) {
super(message, cause);
}
}

View File

@@ -0,0 +1,26 @@
package com.example.usermanagement.utils;
/**
* Exception thrown when a user is not found in the system.
*/
public class UserNotFoundException extends RuntimeException {
/**
* Constructs a new UserNotFoundException with the specified detail message.
*
* @param message the detail message
*/
public UserNotFoundException(String message) {
super(message);
}
/**
* Constructs a new UserNotFoundException with the specified detail message and cause.
*
* @param message the detail message
* @param cause the cause
*/
public UserNotFoundException(String message, Throwable cause) {
super(message, cause);
}
}

Some files were not shown because too many files have changed in this diff Show More