mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-05 01:50:27 +08:00
Refactor code structure for improved readability and maintainability
This commit is contained in:
2099
.claude/docs/CODEXLENS_TECHNICAL_SPEC.md
Normal file
2099
.claude/docs/CODEXLENS_TECHNICAL_SPEC.md
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,177 +0,0 @@
|
||||
import { readFileSync, writeFileSync } from 'fs';
|
||||
|
||||
const filePath = 'ccw/src/tools/smart-search.js';
|
||||
let content = readFileSync(filePath, 'utf8');
|
||||
|
||||
// 1. Add buildFuzzyRegex function after detectRelationship
|
||||
const buildFuzzyRegexFunc = `
|
||||
/**
|
||||
* Build fuzzy regex pattern for approximate matching
|
||||
* @param {string} query - Search query string
|
||||
* @param {number} maxDistance - Edit distance tolerance (default: 1)
|
||||
* @returns {string} - Regex pattern suitable for ripgrep -e flag
|
||||
*/
|
||||
function buildFuzzyRegex(query, maxDistance = 1) {
|
||||
const escaped = query.replace(/[.*+?^${}()|[\\]\\\\]/g, '\\\\$&');
|
||||
let pattern;
|
||||
if (maxDistance === 1) {
|
||||
pattern = escaped.split('').map(c => {
|
||||
const upper = c.toUpperCase();
|
||||
const lower = c.toLowerCase();
|
||||
if (upper !== lower) {
|
||||
return \`[\${upper}\${lower}]\`;
|
||||
}
|
||||
return c;
|
||||
}).join('');
|
||||
} else if (maxDistance === 2) {
|
||||
pattern = escaped.split('').map(c => \`\${c}?\`).join('.*');
|
||||
} else {
|
||||
pattern = escaped;
|
||||
}
|
||||
if (/^[a-zA-Z0-9_]+$/.test(query)) {
|
||||
pattern = \`\\\\b\${pattern}\\\\b\`;
|
||||
}
|
||||
return pattern;
|
||||
}
|
||||
`;
|
||||
|
||||
content = content.replace(
|
||||
/(function detectRelationship\(query\) \{[\s\S]*?\n\})\n\n(\/\*\*\n \* Classify query intent)/,
|
||||
`$1\n${buildFuzzyRegexFunc}\n$2`
|
||||
);
|
||||
|
||||
// 2. Add fuzzy parameter to buildRipgrepCommand
|
||||
content = content.replace(
|
||||
'const { query, paths = [\'.\'], contextLines = 0, maxResults = 100, includeHidden = false } = params;',
|
||||
'const { query, paths = [\'.\'], contextLines = 0, maxResults = 100, includeHidden = false, fuzzy = false } = params;'
|
||||
);
|
||||
|
||||
// 3. Replace literal matching line with fuzzy conditional
|
||||
content = content.replace(
|
||||
/\/\/ Use literal\/fixed string matching for exact mode\n args\.push\('-F', query\);/,
|
||||
`// Use fuzzy regex or literal matching based on mode
|
||||
if (fuzzy) {
|
||||
args.push('-i', '-e', buildFuzzyRegex(query));
|
||||
} else {
|
||||
args.push('-F', query);
|
||||
}`
|
||||
);
|
||||
|
||||
// 4. Add fuzzy case in executeAutoMode
|
||||
content = content.replace(
|
||||
/(case 'exact':[\s\S]*?\};\n\n)( case 'fuzzy':\n case 'semantic':)/,
|
||||
`$1 case 'fuzzy':
|
||||
// Execute fuzzy mode and enrich result with classification metadata
|
||||
const fuzzyResult = await executeFuzzyMode(params);
|
||||
return {
|
||||
...fuzzyResult,
|
||||
metadata: {
|
||||
...fuzzyResult.metadata,
|
||||
classified_as: classification.mode,
|
||||
confidence: classification.confidence,
|
||||
reasoning: classification.reasoning
|
||||
}
|
||||
};
|
||||
|
||||
case 'semantic':`
|
||||
);
|
||||
|
||||
// 5. Replace executeFuzzyMode implementation
|
||||
const fuzzyModeImpl = `async function executeFuzzyMode(params) {
|
||||
const { query, paths = [], contextLines = 0, maxResults = 100, includeHidden = false } = params;
|
||||
|
||||
// Check ripgrep availability
|
||||
if (!checkToolAvailability('rg')) {
|
||||
return {
|
||||
success: false,
|
||||
error: 'ripgrep not available - please install ripgrep (rg) to use fuzzy search mode'
|
||||
};
|
||||
}
|
||||
|
||||
// Build ripgrep command with fuzzy=true
|
||||
const { command, args } = buildRipgrepCommand({
|
||||
query,
|
||||
paths: paths.length > 0 ? paths : ['.'],
|
||||
contextLines,
|
||||
maxResults,
|
||||
includeHidden,
|
||||
fuzzy: true
|
||||
});
|
||||
|
||||
return new Promise((resolve) => {
|
||||
const child = spawn(command, args, {
|
||||
cwd: process.cwd(),
|
||||
stdio: ['ignore', 'pipe', 'pipe']
|
||||
});
|
||||
|
||||
let stdout = '';
|
||||
let stderr = '';
|
||||
|
||||
child.stdout.on('data', (data) => {
|
||||
stdout += data.toString();
|
||||
});
|
||||
|
||||
child.stderr.on('data', (data) => {
|
||||
stderr += data.toString();
|
||||
});
|
||||
|
||||
child.on('close', (code) => {
|
||||
const results = [];
|
||||
|
||||
if (code === 0 || (code === 1 && stdout.trim())) {
|
||||
const lines = stdout.split('\\n').filter(line => line.trim());
|
||||
|
||||
for (const line of lines) {
|
||||
try {
|
||||
const item = JSON.parse(line);
|
||||
if (item.type === 'match') {
|
||||
const match = {
|
||||
file: item.data.path.text,
|
||||
line: item.data.line_number,
|
||||
column: item.data.submatches && item.data.submatches[0] ? item.data.submatches[0].start + 1 : 1,
|
||||
content: item.data.lines.text.trim()
|
||||
};
|
||||
results.push(match);
|
||||
}
|
||||
} catch (err) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
resolve({
|
||||
success: true,
|
||||
results,
|
||||
metadata: {
|
||||
mode: 'fuzzy',
|
||||
backend: 'ripgrep-regex',
|
||||
fuzzy_strategy: 'approximate regex',
|
||||
count: results.length,
|
||||
query
|
||||
}
|
||||
});
|
||||
} else {
|
||||
resolve({
|
||||
success: false,
|
||||
error: \`ripgrep execution failed with code \${code}: \${stderr}\`,
|
||||
results: []
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
child.on('error', (error) => {
|
||||
resolve({
|
||||
success: false,
|
||||
error: \`Failed to spawn ripgrep: \${error.message}\`,
|
||||
results: []
|
||||
});
|
||||
});
|
||||
});
|
||||
}`;
|
||||
|
||||
content = content.replace(
|
||||
/async function executeFuzzyMode\(params\) \{[\s\S]*? \}\n\}/,
|
||||
fuzzyModeImpl
|
||||
);
|
||||
|
||||
writeFileSync(filePath, content, 'utf8');
|
||||
console.log('Fuzzy mode implementation applied successfully');
|
||||
187
apply-fuzzy.py
187
apply-fuzzy.py
@@ -1,187 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
import re
|
||||
|
||||
with open('ccw/src/tools/smart-search.js', 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# Step 1: Add buildFuzzyRegex after detectRelationship
|
||||
fuzzy_regex_func = r'''
|
||||
/**
|
||||
* Build fuzzy regex pattern for approximate matching
|
||||
* @param {string} query - Search query string
|
||||
* @param {number} maxDistance - Edit distance tolerance (default: 1)
|
||||
* @returns {string} - Regex pattern suitable for ripgrep -e flag
|
||||
*/
|
||||
function buildFuzzyRegex(query, maxDistance = 1) {
|
||||
const escaped = query.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||
let pattern;
|
||||
if (maxDistance === 1) {
|
||||
pattern = escaped.split('').map(c => {
|
||||
const upper = c.toUpperCase();
|
||||
const lower = c.toLowerCase();
|
||||
if (upper !== lower) {
|
||||
return `[${upper}${lower}]`;
|
||||
}
|
||||
return c;
|
||||
}).join('');
|
||||
} else if (maxDistance === 2) {
|
||||
pattern = escaped.split('').map(c => `${c}?`).join('.*');
|
||||
} else {
|
||||
pattern = escaped;
|
||||
}
|
||||
if (/^[a-zA-Z0-9_]+$/.test(query)) {
|
||||
pattern = `\\b${pattern}\\b`;
|
||||
}
|
||||
return pattern;
|
||||
}
|
||||
'''
|
||||
|
||||
content = re.sub(
|
||||
r'(function detectRelationship\(query\) \{[^}]+\})\n\n(/\*\*\n \* Classify)',
|
||||
r'\1' + fuzzy_regex_func + r'\n\2',
|
||||
content
|
||||
)
|
||||
|
||||
# Step 2: Add fuzzy param to buildRipgrepCommand
|
||||
content = content.replace(
|
||||
"const { query, paths = ['.'], contextLines = 0, maxResults = 100, includeHidden = false } = params;",
|
||||
"const { query, paths = ['.'], contextLines = 0, maxResults = 100, includeHidden = false, fuzzy = false } = params;"
|
||||
)
|
||||
|
||||
# Step 3: Replace literal matching with fuzzy conditional
|
||||
content = re.sub(
|
||||
r' // Use literal/fixed string matching for exact mode\n args\.push\(\'-F\', query\);',
|
||||
r''' // Use fuzzy regex or literal matching based on mode
|
||||
if (fuzzy) {
|
||||
args.push('-i', '-e', buildFuzzyRegex(query));
|
||||
} else {
|
||||
args.push('-F', query);
|
||||
}''',
|
||||
content
|
||||
)
|
||||
|
||||
# Step 4: Update executeAutoMode fuzzy case
|
||||
fuzzy_case = ''' case 'fuzzy':
|
||||
// Execute fuzzy mode and enrich result with classification metadata
|
||||
const fuzzyResult = await executeFuzzyMode(params);
|
||||
return {
|
||||
...fuzzyResult,
|
||||
metadata: {
|
||||
...fuzzyResult.metadata,
|
||||
classified_as: classification.mode,
|
||||
confidence: classification.confidence,
|
||||
reasoning: classification.reasoning
|
||||
}
|
||||
};
|
||||
|
||||
case 'semantic':'''
|
||||
|
||||
content = re.sub(
|
||||
r" case 'fuzzy':\n case 'semantic':",
|
||||
fuzzy_case,
|
||||
content
|
||||
)
|
||||
|
||||
# Step 5: Replace executeFuzzyMode
|
||||
fuzzy_impl = '''async function executeFuzzyMode(params) {
|
||||
const { query, paths = [], contextLines = 0, maxResults = 100, includeHidden = false } = params;
|
||||
|
||||
// Check ripgrep availability
|
||||
if (!checkToolAvailability('rg')) {
|
||||
return {
|
||||
success: false,
|
||||
error: 'ripgrep not available - please install ripgrep (rg) to use fuzzy search mode'
|
||||
};
|
||||
}
|
||||
|
||||
// Build ripgrep command with fuzzy=true
|
||||
const { command, args } = buildRipgrepCommand({
|
||||
query,
|
||||
paths: paths.length > 0 ? paths : ['.'],
|
||||
contextLines,
|
||||
maxResults,
|
||||
includeHidden,
|
||||
fuzzy: true
|
||||
});
|
||||
|
||||
return new Promise((resolve) => {
|
||||
const child = spawn(command, args, {
|
||||
cwd: process.cwd(),
|
||||
stdio: ['ignore', 'pipe', 'pipe']
|
||||
});
|
||||
|
||||
let stdout = '';
|
||||
let stderr = '';
|
||||
|
||||
child.stdout.on('data', (data) => {
|
||||
stdout += data.toString();
|
||||
});
|
||||
|
||||
child.stderr.on('data', (data) => {
|
||||
stderr += data.toString();
|
||||
});
|
||||
|
||||
child.on('close', (code) => {
|
||||
const results = [];
|
||||
|
||||
if (code === 0 || (code === 1 && stdout.trim())) {
|
||||
const lines = stdout.split('\\n').filter(line => line.trim());
|
||||
|
||||
for (const line of lines) {
|
||||
try {
|
||||
const item = JSON.parse(line);
|
||||
if (item.type === 'match') {
|
||||
const match = {
|
||||
file: item.data.path.text,
|
||||
line: item.data.line_number,
|
||||
column: item.data.submatches && item.data.submatches[0] ? item.data.submatches[0].start + 1 : 1,
|
||||
content: item.data.lines.text.trim()
|
||||
};
|
||||
results.push(match);
|
||||
}
|
||||
} catch (err) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
resolve({
|
||||
success: true,
|
||||
results,
|
||||
metadata: {
|
||||
mode: 'fuzzy',
|
||||
backend: 'ripgrep-regex',
|
||||
fuzzy_strategy: 'approximate regex',
|
||||
count: results.length,
|
||||
query
|
||||
}
|
||||
});
|
||||
} else {
|
||||
resolve({
|
||||
success: false,
|
||||
error: `ripgrep execution failed with code ${code}: ${stderr}`,
|
||||
results: []
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
child.on('error', (error) => {
|
||||
resolve({
|
||||
success: false,
|
||||
error: `Failed to spawn ripgrep: ${error.message}`,
|
||||
results: []
|
||||
});
|
||||
});
|
||||
});
|
||||
}'''
|
||||
|
||||
content = re.sub(
|
||||
r'async function executeFuzzyMode\(params\) \{.*? \}\n\}',
|
||||
fuzzy_impl,
|
||||
content,
|
||||
flags=re.DOTALL
|
||||
)
|
||||
|
||||
with open('ccw/src/tools/smart-search.js', 'w', encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
|
||||
print('Fuzzy mode implementation applied successfully')
|
||||
@@ -34,7 +34,7 @@ function detectLiteral(query) {
|
||||
* Detect regex pattern (contains regex metacharacters)
|
||||
*/
|
||||
function detectRegex(query) {
|
||||
return /[.*+?^${}()|[\]\]/.test(query);
|
||||
return /[.*+?^${}()|[\]\\]/.test(query);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -111,10 +111,6 @@ function classifyIntent(query) {
|
||||
}
|
||||
|
||||
|
||||
n// Classification confidence threshold
|
||||
const CONFIDENCE_THRESHOLD = 0.7;
|
||||
|
||||
/**
|
||||
/**
|
||||
* Check if a tool is available in PATH
|
||||
* @param {string} toolName - Tool executable name
|
||||
|
||||
1
reference/codanna
Submodule
1
reference/codanna
Submodule
Submodule reference/codanna added at 80ed5e3d5f
47
reference/code-index-mcp-master/.dockerignore
Normal file
47
reference/code-index-mcp-master/.dockerignore
Normal file
@@ -0,0 +1,47 @@
|
||||
# Git
|
||||
.git
|
||||
.gitignore
|
||||
|
||||
# Python cache files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
*.so
|
||||
.Python
|
||||
env/
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
|
||||
# Virtual environments
|
||||
venv/
|
||||
env/
|
||||
ENV/
|
||||
|
||||
# IDE files
|
||||
.idea/
|
||||
.vscode/
|
||||
*.swp
|
||||
*.swo
|
||||
|
||||
# OS specific files
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# Code Index MCP specific files
|
||||
.code_indexer/
|
||||
|
||||
# Docker files
|
||||
Dockerfile
|
||||
.dockerignore
|
||||
26
reference/code-index-mcp-master/.gitattributes
vendored
Normal file
26
reference/code-index-mcp-master/.gitattributes
vendored
Normal file
@@ -0,0 +1,26 @@
|
||||
# Set default behavior to automatically normalize line endings
|
||||
* text=auto
|
||||
|
||||
# Force specific file types to use LF line endings
|
||||
*.py text eol=lf
|
||||
*.js text eol=lf
|
||||
*.ts text eol=lf
|
||||
*.json text eol=lf
|
||||
*.md text eol=lf
|
||||
*.yml text eol=lf
|
||||
*.yaml text eol=lf
|
||||
*.toml text eol=lf
|
||||
*.txt text eol=lf
|
||||
|
||||
# Force specific file types to use CRLF line endings
|
||||
*.bat text eol=crlf
|
||||
*.cmd text eol=crlf
|
||||
|
||||
# Binary files should be left untouched
|
||||
*.png binary
|
||||
*.jpg binary
|
||||
*.jpeg binary
|
||||
*.gif binary
|
||||
*.ico binary
|
||||
*.zip binary
|
||||
*.tar.gz binary
|
||||
96
reference/code-index-mcp-master/.github/workflows/publish-to-pypi.yml
vendored
Normal file
96
reference/code-index-mcp-master/.github/workflows/publish-to-pypi.yml
vendored
Normal file
@@ -0,0 +1,96 @@
|
||||
name: Release
|
||||
|
||||
on:
|
||||
release:
|
||||
types: [published]
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
tag:
|
||||
description: 'Release tag (vX.Y.Z) to re-run publish flow'
|
||||
required: true
|
||||
type: string
|
||||
|
||||
concurrency:
|
||||
group: release-${{ github.event_name == 'workflow_dispatch' && format('refs/tags/{0}', github.event.inputs.tag) || github.ref }}
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
verify-and-build:
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
RELEASE_REF: ${{ github.event_name == 'workflow_dispatch' && format('refs/tags/{0}', github.event.inputs.tag) || github.ref }}
|
||||
RELEASE_TAG: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.tag || github.ref_name }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
ref: ${{ env.RELEASE_REF }}
|
||||
|
||||
- name: Ensure tag points to default branch
|
||||
run: |
|
||||
git fetch origin
|
||||
TARGET_BRANCH=$(git remote show origin | awk '/HEAD branch/ {print $NF}')
|
||||
if [ -z "$TARGET_BRANCH" ]; then
|
||||
TARGET_BRANCH=master
|
||||
fi
|
||||
if ! git merge-base --is-ancestor "$(git rev-parse HEAD)" "origin/${TARGET_BRANCH}"; then
|
||||
echo "::error::Release tag must point to a commit reachable from ${TARGET_BRANCH}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.10'
|
||||
|
||||
- name: Install uv
|
||||
run: python -m pip install --upgrade pip uv
|
||||
|
||||
- name: Cache uv environments
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
.venv
|
||||
.uv-cache
|
||||
key: uv-${{ runner.os }}-${{ hashFiles('uv.lock') }}
|
||||
restore-keys: |
|
||||
uv-${{ runner.os }}-
|
||||
|
||||
- name: Install dependencies
|
||||
run: uv sync --frozen
|
||||
|
||||
- name: Install build tooling
|
||||
run: uv pip install build twine
|
||||
|
||||
- name: Build distributions
|
||||
run: uv run python -m build
|
||||
|
||||
- name: Twine check
|
||||
run: uv run twine check dist/*
|
||||
|
||||
- name: Upload dist artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: dist-${{ env.RELEASE_TAG }}
|
||||
path: dist/*
|
||||
retention-days: 7
|
||||
|
||||
publish:
|
||||
needs: verify-and-build
|
||||
runs-on: ubuntu-latest
|
||||
environment:
|
||||
name: production
|
||||
env:
|
||||
RELEASE_TAG: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.tag || github.ref_name }}
|
||||
steps:
|
||||
- name: Download build artifacts
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: dist-${{ env.RELEASE_TAG }}
|
||||
path: dist
|
||||
|
||||
- name: Publish to PyPI
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
packages-dir: dist
|
||||
password: ${{ secrets.PYPI_API_TOKEN }}
|
||||
51
reference/code-index-mcp-master/.gitignore
vendored
Normal file
51
reference/code-index-mcp-master/.gitignore
vendored
Normal file
@@ -0,0 +1,51 @@
|
||||
# Python cache files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
*.so
|
||||
.Python
|
||||
env/
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
|
||||
# Virtual environments
|
||||
venv/
|
||||
env/
|
||||
ENV/
|
||||
|
||||
# IDE files
|
||||
.idea/
|
||||
.vscode/
|
||||
*.swp
|
||||
*.swo
|
||||
|
||||
# OS specific files
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# Code Index MCP specific files
|
||||
.code_indexer/
|
||||
|
||||
|
||||
# Claude Code generated files
|
||||
CLAUDE.local.md
|
||||
.claude/
|
||||
.claude_chat/
|
||||
claude_*
|
||||
COMMIT_MESSAGE.txt
|
||||
RELEASE_NOTE.txt
|
||||
|
||||
.llm-context/
|
||||
AGENTS.md
|
||||
24
reference/code-index-mcp-master/.pylintrc
Normal file
24
reference/code-index-mcp-master/.pylintrc
Normal file
@@ -0,0 +1,24 @@
|
||||
[MAIN]
|
||||
# Ignore auto-generated protobuf files
|
||||
ignore-paths=src/code_index_mcp/scip/proto/scip_pb2.py
|
||||
|
||||
[MESSAGES CONTROL]
|
||||
# Disable specific warnings for protobuf generated code
|
||||
disable=
|
||||
# Generated code warnings
|
||||
protected-access,
|
||||
bad-indentation,
|
||||
line-too-long,
|
||||
# Other common warnings we might want to disable globally
|
||||
unused-import,
|
||||
logging-fstring-interpolation
|
||||
|
||||
[FORMAT]
|
||||
# Maximum number of characters on a single line
|
||||
max-line-length=100
|
||||
|
||||
[DESIGN]
|
||||
# Maximum number of arguments for function / method
|
||||
max-args=7
|
||||
# Maximum number of locals for function / method body
|
||||
max-locals=20
|
||||
1
reference/code-index-mcp-master/.python-version
Normal file
1
reference/code-index-mcp-master/.python-version
Normal file
@@ -0,0 +1 @@
|
||||
3.11
|
||||
28
reference/code-index-mcp-master/.well-known/mcp.json
Normal file
28
reference/code-index-mcp-master/.well-known/mcp.json
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"$schema": "https://modelcontextprotocol.io/schemas/mcp.json",
|
||||
"mcpServers": {
|
||||
"code-index": {
|
||||
"command": "uv",
|
||||
"args": [
|
||||
"run",
|
||||
"code-index-mcp"
|
||||
],
|
||||
"transport": {
|
||||
"type": "stdio"
|
||||
},
|
||||
"metadata": {
|
||||
"name": "Code Index MCP",
|
||||
"description": "Local code-aware MCP server with project indexing, search, and file tools.",
|
||||
"homepage": "https://github.com/johnhuang316/code-index-mcp",
|
||||
"capabilities": [
|
||||
"code-search",
|
||||
"symbol-indexing",
|
||||
"file-system"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"llmfeed_extension": {
|
||||
"path": ".well-known/mcp.llmfeed.json"
|
||||
}
|
||||
}
|
||||
32
reference/code-index-mcp-master/.well-known/mcp.llmfeed.json
Normal file
32
reference/code-index-mcp-master/.well-known/mcp.llmfeed.json
Normal file
@@ -0,0 +1,32 @@
|
||||
{
|
||||
"$schema": "https://modelcontextprotocol.io/schemas/mcp-llmfeed.json",
|
||||
"feed_type": "mcp_server_list",
|
||||
"servers": [
|
||||
{
|
||||
"id": "code-index",
|
||||
"name": "Code Index MCP",
|
||||
"description": "Exposes project-aware indexing, search, and file utilities for LLM agents via MCP transports.",
|
||||
"version": "2.9.1",
|
||||
"transport": "stdio",
|
||||
"command": "uv",
|
||||
"args": [
|
||||
"run",
|
||||
"code-index-mcp"
|
||||
],
|
||||
"links": {
|
||||
"documentation": "https://github.com/johnhuang316/code-index-mcp#readme",
|
||||
"source": "https://github.com/johnhuang316/code-index-mcp"
|
||||
},
|
||||
"capabilities": [
|
||||
"code-search",
|
||||
"symbol-indexing",
|
||||
"file-system"
|
||||
],
|
||||
"tags": [
|
||||
"fastmcp",
|
||||
"code-intelligence",
|
||||
"watcher"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
24
reference/code-index-mcp-master/Dockerfile
Normal file
24
reference/code-index-mcp-master/Dockerfile
Normal file
@@ -0,0 +1,24 @@
|
||||
# Use lightweight Python image
|
||||
FROM python:3.11-slim
|
||||
|
||||
# Install git (for code analysis)
|
||||
RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
|
||||
# Copy dependency list and install dependencies
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy code
|
||||
COPY . .
|
||||
|
||||
# Set Python path
|
||||
ENV PYTHONPATH="${PYTHONPATH}:/app:/app/src"
|
||||
|
||||
# No default project directory mount point needed, user will explicitly set project path
|
||||
|
||||
# Run MCP tool
|
||||
# MCP server uses stdio mode by default
|
||||
ENTRYPOINT ["python", "-m", "code_index_mcp.server"]
|
||||
21
reference/code-index-mcp-master/LICENSE
Normal file
21
reference/code-index-mcp-master/LICENSE
Normal file
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2015 johnhuang316
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
412
reference/code-index-mcp-master/README.md
Normal file
412
reference/code-index-mcp-master/README.md
Normal file
@@ -0,0 +1,412 @@
|
||||
# Code Index MCP
|
||||
|
||||
<div align="center">
|
||||
|
||||
[](https://modelcontextprotocol.io)
|
||||
[](https://www.python.org/)
|
||||
[](LICENSE)
|
||||
|
||||
**Intelligent code indexing and analysis for Large Language Models**
|
||||
|
||||
Transform how AI understands your codebase with advanced search, analysis, and navigation capabilities.
|
||||
|
||||
</div>
|
||||
|
||||
<a href="https://glama.ai/mcp/servers/@johnhuang316/code-index-mcp">
|
||||
<img width="380" height="200" src="https://glama.ai/mcp/servers/@johnhuang316/code-index-mcp/badge" alt="code-index-mcp MCP server" />
|
||||
</a>
|
||||
|
||||
## Overview
|
||||
|
||||
Code Index MCP is a [Model Context Protocol](https://modelcontextprotocol.io) server that bridges the gap between AI models and complex codebases. It provides intelligent indexing, advanced search capabilities, and detailed code analysis to help AI assistants understand and navigate your projects effectively.
|
||||
|
||||
**Perfect for:** Code review, refactoring, documentation generation, debugging assistance, and architectural analysis.
|
||||
|
||||
## Quick Start
|
||||
|
||||
### 🚀 **Recommended Setup (Most Users)**
|
||||
|
||||
The easiest way to get started with any MCP-compatible application:
|
||||
|
||||
**Prerequisites:** Python 3.10+ and [uv](https://github.com/astral-sh/uv)
|
||||
|
||||
1. **Add to your MCP configuration** (e.g., `claude_desktop_config.json` or `~/.claude.json`):
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"code-index": {
|
||||
"command": "uvx",
|
||||
"args": ["code-index-mcp"]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
> Optional: append `--project-path /absolute/path/to/repo` to the `args` array so the server
|
||||
> initializes with that repository automatically (equivalent to calling `set_project_path`
|
||||
> after startup).
|
||||
|
||||
2. **Restart your application** – `uvx` automatically handles installation and execution
|
||||
|
||||
3. **Start using** (give these prompts to your AI assistant):
|
||||
```
|
||||
Set the project path to /Users/dev/my-react-app
|
||||
Find all TypeScript files in this project
|
||||
Search for "authentication" functions
|
||||
Analyze the main App.tsx file
|
||||
```
|
||||
*If you launch with `--project-path`, you can skip the first command above - the server already
|
||||
knows the project location.*
|
||||
|
||||
### Codex CLI Configuration
|
||||
|
||||
If you are using Anthropic's Codex CLI, add the server to `~/.codex/config.toml`.
|
||||
On Windows the file lives at `C:\Users\<you>\.codex\config.toml`:
|
||||
|
||||
```toml
|
||||
[mcp_servers.code-index]
|
||||
type = "stdio"
|
||||
command = "uvx"
|
||||
args = ["code-index-mcp"]
|
||||
```
|
||||
> You can append `--project-path C:/absolute/path/to/repo` to the `args` list to set the project
|
||||
> automatically on startup (same effect as running the `set_project_path` tool).
|
||||
|
||||
On Windows, `uvx` needs the standard profile directories to be present.
|
||||
Keep the environment override in the same block so the MCP starts reliably:
|
||||
|
||||
```toml
|
||||
env = {
|
||||
HOME = "C:\\Users\\<you>",
|
||||
APPDATA = "C:\\Users\\<you>\\AppData\\Roaming",
|
||||
LOCALAPPDATA = "C:\\Users\\<you>\\AppData\\Local",
|
||||
SystemRoot = "C:\\Windows"
|
||||
}
|
||||
```
|
||||
|
||||
Linux and macOS already expose the required XDG paths and `HOME`, so you can usually omit the `env`
|
||||
table there.
|
||||
Add overrides only if you run the CLI inside a restricted container.
|
||||
|
||||
### FastMCP & Discovery Manifests
|
||||
|
||||
- Run `fastmcp run fastmcp.json` to launch the server via [FastMCP](https://fastmcp.wiki/) with
|
||||
the correct source entrypoint and dependency metadata. Pass `--project-path` (or call the
|
||||
`set_project_path` tool after startup) so the index boots against the right repository.
|
||||
- Serve or copy `.well-known/mcp.json` to share a standards-compliant MCP manifest. Clients that
|
||||
support the `.well-known` convention (e.g., Claude Desktop, Codex CLI) can import this file
|
||||
directly instead of crafting configs manually.
|
||||
- Publish `.well-known/mcp.llmfeed.json` when you want to expose the richer LLM Feed metadata.
|
||||
It references the same `code-index` server definition plus documentation/source links, which
|
||||
helps registries present descriptions, tags, and capabilities automatically.
|
||||
|
||||
When sharing the manifests, remind consumers to supply `--project-path` (or to call
|
||||
`set_project_path`) so the server indexes the intended repository.
|
||||
|
||||
## Typical Use Cases
|
||||
|
||||
**Code Review**: "Find all places using the old API"
|
||||
**Refactoring Help**: "Where is this function called?"
|
||||
**Learning Projects**: "Show me the main components of this React project"
|
||||
**Debugging**: "Search for all error handling related code"
|
||||
|
||||
## Key Features
|
||||
|
||||
### 🔍 **Intelligent Search & Analysis**
|
||||
- **Dual-Strategy Architecture**: Specialized tree-sitter parsing for 7 core languages, fallback strategy for 50+ file types
|
||||
- **Direct Tree-sitter Integration**: No regex fallbacks for specialized languages - fail fast with clear errors
|
||||
- **Advanced Search**: Auto-detects and uses the best available tool (ugrep, ripgrep, ag, or grep)
|
||||
- **Universal File Support**: Comprehensive coverage from advanced AST parsing to basic file indexing
|
||||
- **File Analysis**: Deep insights into structure, imports, classes, methods, and complexity metrics after running `build_deep_index`
|
||||
|
||||
### 🗂️ **Multi-Language Support**
|
||||
- **7 Languages with Tree-sitter AST Parsing**: Python, JavaScript, TypeScript, Java, Go, Objective-C, Zig
|
||||
- **50+ File Types with Fallback Strategy**: C/C++, Rust, Ruby, PHP, and all other programming languages
|
||||
- **Document & Config Files**: Markdown, JSON, YAML, XML with appropriate handling
|
||||
- **Web Frontend**: Vue, React, Svelte, HTML, CSS, SCSS
|
||||
- **Java Web & Build**: JSP/Tag files (`.jsp`, `.jspx`, `.jspf`, `.tag`, `.tagx`), Grails/GSP (`.gsp`), Gradle & Groovy builds (`.gradle`, `.groovy`), `.properties`, and Protocol Buffers (`.proto`)
|
||||
- **Database**: SQL variants, NoSQL, stored procedures, migrations
|
||||
- **Configuration**: JSON, YAML, XML, Markdown
|
||||
- **[View complete list](#supported-file-types)**
|
||||
|
||||
### ⚡ **Real-time Monitoring & Auto-refresh**
|
||||
- **File Watcher**: Automatic index updates when files change
|
||||
- **Cross-platform**: Native OS file system monitoring
|
||||
- **Smart Processing**: Batches rapid changes to prevent excessive rebuilds
|
||||
- **Shallow Index Refresh**: Watches file changes and keeps the file list current; run a deep rebuild when you need symbol metadata
|
||||
|
||||
### ⚡ **Performance & Efficiency**
|
||||
- **Tree-sitter AST Parsing**: Native syntax parsing for accurate symbol extraction
|
||||
- **Persistent Caching**: Stores indexes for lightning-fast subsequent access
|
||||
- **Smart Filtering**: Intelligent exclusion of build directories and temporary files
|
||||
- **Memory Efficient**: Optimized for large codebases
|
||||
- **Direct Dependencies**: No fallback mechanisms - fail fast with clear error messages
|
||||
|
||||
## Supported File Types
|
||||
|
||||
<details>
|
||||
<summary><strong>📁 Programming Languages (Click to expand)</strong></summary>
|
||||
|
||||
**Languages with Specialized Tree-sitter Strategies:**
|
||||
- **Python** (`.py`, `.pyw`) - Full AST analysis with class/method extraction and call tracking
|
||||
- **JavaScript** (`.js`, `.jsx`, `.mjs`, `.cjs`) - ES6+ class and function parsing with tree-sitter
|
||||
- **TypeScript** (`.ts`, `.tsx`) - Complete type-aware symbol extraction with interfaces
|
||||
- **Java** (`.java`) - Full class hierarchy, method signatures, and call relationships
|
||||
- **Go** (`.go`) - Struct methods, receiver types, and function analysis
|
||||
- **Objective-C** (`.m`, `.mm`) - Class/instance method distinction with +/- notation
|
||||
- **Zig** (`.zig`, `.zon`) - Function and struct parsing with tree-sitter AST
|
||||
|
||||
**All Other Programming Languages:**
|
||||
All other programming languages use the **FallbackParsingStrategy** which provides basic file indexing and metadata extraction. This includes:
|
||||
- **System & Low-Level:** C/C++ (`.c`, `.cpp`, `.h`, `.hpp`), Rust (`.rs`)
|
||||
- **Object-Oriented:** C# (`.cs`), Kotlin (`.kt`), Scala (`.scala`), Swift (`.swift`)
|
||||
- **Scripting & Dynamic:** Ruby (`.rb`), PHP (`.php`), Shell (`.sh`, `.bash`)
|
||||
- **And 40+ more file types** - All handled through the fallback strategy for basic indexing
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>🌐 Web & Frontend (Click to expand)</strong></summary>
|
||||
|
||||
**Frameworks & Libraries:**
|
||||
- Vue (`.vue`)
|
||||
- Svelte (`.svelte`)
|
||||
- Astro (`.astro`)
|
||||
|
||||
**Styling:**
|
||||
- CSS (`.css`, `.scss`, `.less`, `.sass`, `.stylus`, `.styl`)
|
||||
- HTML (`.html`)
|
||||
|
||||
**Templates:**
|
||||
- Handlebars (`.hbs`, `.handlebars`)
|
||||
- EJS (`.ejs`)
|
||||
- Pug (`.pug`)
|
||||
- FreeMarker (`.ftl`)
|
||||
- Mustache (`.mustache`)
|
||||
- Liquid (`.liquid`)
|
||||
- ERB (`.erb`)
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>🗄️ Database & SQL (Click to expand)</strong></summary>
|
||||
|
||||
**SQL Variants:**
|
||||
- Standard SQL (`.sql`, `.ddl`, `.dml`)
|
||||
- Database-specific (`.mysql`, `.postgresql`, `.psql`, `.sqlite`, `.mssql`, `.oracle`, `.ora`, `.db2`)
|
||||
|
||||
**Database Objects:**
|
||||
- Procedures & Functions (`.proc`, `.procedure`, `.func`, `.function`)
|
||||
- Views & Triggers (`.view`, `.trigger`, `.index`)
|
||||
|
||||
**Migration & Tools:**
|
||||
- Migration files (`.migration`, `.seed`, `.fixture`, `.schema`)
|
||||
- Tool-specific (`.liquibase`, `.flyway`)
|
||||
|
||||
**NoSQL & Modern:**
|
||||
- Graph & Query (`.cql`, `.cypher`, `.sparql`, `.gql`)
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>📄 Documentation & Config (Click to expand)</strong></summary>
|
||||
|
||||
- Markdown (`.md`, `.mdx`)
|
||||
- Configuration (`.json`, `.xml`, `.yml`, `.yaml`, `.properties`)
|
||||
|
||||
</details>
|
||||
|
||||
### 🛠️ **Development Setup**
|
||||
|
||||
For contributing or local development:
|
||||
|
||||
1. **Clone and install:**
|
||||
```bash
|
||||
git clone https://github.com/johnhuang316/code-index-mcp.git
|
||||
cd code-index-mcp
|
||||
uv sync
|
||||
```
|
||||
|
||||
2. **Configure for local development:**
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"code-index": {
|
||||
"command": "uv",
|
||||
"args": ["run", "code-index-mcp"]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
3. **Debug with MCP Inspector:**
|
||||
```bash
|
||||
npx @modelcontextprotocol/inspector uv run code-index-mcp
|
||||
```
|
||||
|
||||
<details>
|
||||
<summary><strong>Alternative: Manual pip Installation</strong></summary>
|
||||
|
||||
If you prefer traditional pip management:
|
||||
|
||||
```bash
|
||||
pip install code-index-mcp
|
||||
```
|
||||
|
||||
Then configure:
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"code-index": {
|
||||
"command": "code-index-mcp",
|
||||
"args": []
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
|
||||
## Available Tools
|
||||
|
||||
### 🏗️ **Project Management**
|
||||
| Tool | Description |
|
||||
|------|-------------|
|
||||
| **`set_project_path`** | Initialize indexing for a project directory |
|
||||
| **`refresh_index`** | Rebuild the shallow file index after file changes |
|
||||
| **`build_deep_index`** | Generate the full symbol index used by deep analysis |
|
||||
| **`get_settings_info`** | View current project configuration and status |
|
||||
|
||||
*Run `build_deep_index` when you need symbol-level data; the default shallow index powers quick file discovery.*
|
||||
|
||||
### 🔍 **Search & Discovery**
|
||||
| Tool | Description |
|
||||
|------|-------------|
|
||||
| **`search_code_advanced`** | Smart search with regex, fuzzy matching, file filtering, and paginated results (10 per page by default) |
|
||||
| **`find_files`** | Locate files using glob patterns (e.g., `**/*.py`) |
|
||||
| **`get_file_summary`** | Analyze file structure, functions, imports, and complexity (requires deep index) |
|
||||
|
||||
### 🔄 **Monitoring & Auto-refresh**
|
||||
| Tool | Description |
|
||||
|------|-------------|
|
||||
| **`get_file_watcher_status`** | Check file watcher status and configuration |
|
||||
| **`configure_file_watcher`** | Enable/disable auto-refresh and configure settings |
|
||||
|
||||
### 🛠️ **System & Maintenance**
|
||||
| Tool | Description |
|
||||
|------|-------------|
|
||||
| **`create_temp_directory`** | Set up storage directory for index data |
|
||||
| **`check_temp_directory`** | Verify index storage location and permissions |
|
||||
| **`clear_settings`** | Reset all cached data and configurations |
|
||||
| **`refresh_search_tools`** | Re-detect available search tools (ugrep, ripgrep, etc.) |
|
||||
|
||||
## Usage Examples
|
||||
|
||||
### 🎯 **Quick Start Workflow**
|
||||
|
||||
**1. Initialize Your Project**
|
||||
```
|
||||
Set the project path to /Users/dev/my-react-app
|
||||
```
|
||||
*Automatically indexes your codebase and creates searchable cache*
|
||||
|
||||
**2. Explore Project Structure**
|
||||
```
|
||||
Find all TypeScript component files in src/components
|
||||
```
|
||||
*Uses: `find_files` with pattern `src/components/**/*.tsx`*
|
||||
|
||||
**3. Analyze Key Files**
|
||||
```
|
||||
Give me a summary of src/api/userService.ts
|
||||
```
|
||||
*Uses: `get_file_summary` to show functions, imports, and complexity*
|
||||
*Tip: run `build_deep_index` first if you get a `needs_deep_index` response.*
|
||||
|
||||
### 🔍 **Advanced Search Examples**
|
||||
|
||||
<details>
|
||||
<summary><strong>Code Pattern Search</strong></summary>
|
||||
|
||||
```
|
||||
Search for all function calls matching "get.*Data" using regex
|
||||
```
|
||||
*Finds: `getData()`, `getUserData()`, `getFormData()`, etc.*
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>Fuzzy Function Search</strong></summary>
|
||||
|
||||
```
|
||||
Find authentication-related functions with fuzzy search for 'authUser'
|
||||
```
|
||||
*Matches: `authenticateUser`, `authUserToken`, `userAuthCheck`, etc.*
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>Language-Specific Search</strong></summary>
|
||||
|
||||
```
|
||||
Search for "API_ENDPOINT" only in Python files
|
||||
```
|
||||
*Uses: `search_code_advanced` with `file_pattern: "*.py"` (defaults to 10 matches; use `max_results` to expand or `start_index` to page)*
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>Auto-refresh Configuration</strong></summary>
|
||||
|
||||
```
|
||||
Configure automatic index updates when files change
|
||||
```
|
||||
*Uses: `configure_file_watcher` to enable/disable monitoring and set debounce timing*
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>Project Maintenance</strong></summary>
|
||||
|
||||
```
|
||||
I added new components, please refresh the project index
|
||||
```
|
||||
*Uses: `refresh_index` to update the searchable cache*
|
||||
|
||||
</details>
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### 🔄 **Auto-refresh Not Working**
|
||||
|
||||
If automatic index updates aren't working when files change, try:
|
||||
- `pip install watchdog` (may resolve environment isolation issues)
|
||||
- Use manual refresh: Call the `refresh_index` tool after making file changes
|
||||
- Check file watcher status: Use `get_file_watcher_status` to verify monitoring is active
|
||||
|
||||
## Development & Contributing
|
||||
|
||||
### 🔧 **Building from Source**
|
||||
```bash
|
||||
git clone https://github.com/johnhuang316/code-index-mcp.git
|
||||
cd code-index-mcp
|
||||
uv sync
|
||||
uv run code-index-mcp
|
||||
```
|
||||
|
||||
### 🐛 **Debugging**
|
||||
```bash
|
||||
npx @modelcontextprotocol/inspector uvx code-index-mcp
|
||||
```
|
||||
|
||||
### 🤝 **Contributing**
|
||||
Contributions are welcome! Please feel free to submit a Pull Request.
|
||||
|
||||
---
|
||||
|
||||
### 📜 **License**
|
||||
[MIT License](LICENSE)
|
||||
|
||||
### 🌐 **Translations**
|
||||
- [繁體中文](README_zh.md)
|
||||
- [日本語](README_ja.md)
|
||||
419
reference/code-index-mcp-master/README_ja.md
Normal file
419
reference/code-index-mcp-master/README_ja.md
Normal file
@@ -0,0 +1,419 @@
|
||||
# Code Index MCP
|
||||
|
||||
<div align="center">
|
||||
|
||||
[](https://modelcontextprotocol.io)
|
||||
[](https://www.python.org/)
|
||||
[](LICENSE)
|
||||
|
||||
**大規模言語モデルのためのインテリジェントコードインデックス作成と解析**
|
||||
|
||||
高度な検索、解析、ナビゲーション機能で、AIのコードベース理解を根本的に変革します。
|
||||
|
||||
</div>
|
||||
|
||||
<a href="https://glama.ai/mcp/servers/@johnhuang316/code-index-mcp">
|
||||
<img width="380" height="200" src="https://glama.ai/mcp/servers/@johnhuang316/code-index-mcp/badge" alt="code-index-mcp MCP server" />
|
||||
</a>
|
||||
|
||||
## 概要
|
||||
|
||||
Code Index MCPは、AIモデルと複雑なコードベースの橋渡しをする[Model Context Protocol](https://modelcontextprotocol.io)サーバーです。インテリジェントなインデックス作成、高度な検索機能、詳細なコード解析を提供し、AIアシスタントがプロジェクトを効果的に理解しナビゲートできるようにします。
|
||||
|
||||
**最適な用途:**コードレビュー、リファクタリング、ドキュメント生成、デバッグ支援、アーキテクチャ解析。
|
||||
|
||||
## クイックスタート
|
||||
|
||||
### 🚀 **推奨セットアップ(ほとんどのユーザー)**
|
||||
|
||||
任意MCP対応アプリケーションで開始する最も簡単な方法:
|
||||
|
||||
**前提条件:** Python 3.10+ および [uv](https://github.com/astral-sh/uv)
|
||||
|
||||
1. **MCP設定に追加** (例:`claude_desktop_config.json` または `~/.claude.json`):
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"code-index": {
|
||||
"command": "uvx",
|
||||
"args": ["code-index-mcp"]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
> 起動時にプロジェクトを自動設定したい場合は、`args` 配列の末尾に
|
||||
> `--project-path /absolute/path/to/repo` を追加してください。これで起動直後に
|
||||
> `set_project_path` を呼び出した場合と同じ状態になります。
|
||||
2. **アプリケーションを再起動** – `uvx`がインストールと実行を自動処理
|
||||
|
||||
3. **使用開始**(AIアシスタントにこれらのプロンプトを与える):
|
||||
```
|
||||
プロジェクトパスを/Users/dev/my-react-appに設定
|
||||
このプロジェクトのすべてのTypeScriptファイルを検索
|
||||
「authentication」関連関数を検索
|
||||
メインのApp.tsxファイルを解析
|
||||
```
|
||||
*起動時に `--project-path` を付けた場合は、最初のコマンドは不要です。サーバーが既にパスを認識しています。*
|
||||
|
||||
### Codex CLI 設定
|
||||
|
||||
Anthropic の Codex CLI を使用している場合は、`~/.codex/config.toml` に次のサーバー設定を追加します。
|
||||
Windows では `C:\Users\<you>\.codex\config.toml` に保存されています。
|
||||
|
||||
```toml
|
||||
[mcp_servers.code-index]
|
||||
type = "stdio"
|
||||
command = "uvx"
|
||||
args = ["code-index-mcp"]
|
||||
```
|
||||
> 起動時にプロジェクトを設定したい場合は、`args` リストに `--project-path C:/absolute/path/to/repo` を追加してください。
|
||||
> これは起動後に `set_project_path` ツールを呼び出すのと同じ効果です。
|
||||
|
||||
Windows の `uvx` は標準ユーザープロファイルディレクトリが必要です。
|
||||
MCP を安定して起動するために、同じブロックに次の環境変数を残してください。
|
||||
|
||||
```toml
|
||||
env = {
|
||||
HOME = "C:\\Users\\<you>",
|
||||
APPDATA = "C:\\Users\\<you>\\AppData\\Roaming",
|
||||
LOCALAPPDATA = "C:\\Users\\<you>\\AppData\\Local",
|
||||
SystemRoot = "C:\\Windows"
|
||||
}
|
||||
```
|
||||
|
||||
Linux と macOS では OS が `HOME` や XDG 系のパスを標準で公開しているため、通常は `env` セクションは不要です。
|
||||
制限されたコンテナで実行する場合やキャッシュ/設定の保存先を手動で変更したいときだけ上書きしてください。
|
||||
環境変数の一覧は [`uv` の環境変数リファレンス](https://docs.astral.sh/uv/reference/environment/)(`HOME`、`XDG_CACHE_HOME`、`XDG_CONFIG_HOME`、`APPDATA` など)を参照してください。
|
||||
|
||||
|
||||
## 一般的な使用ケース
|
||||
|
||||
**コードレビュー**:「旧いAPIを使用しているすべての箇所を検索」
|
||||
**リファクタリング支援**:「この関数はどこで呼ばれている?」
|
||||
**プロジェクト学習**:「このReactプロジェクトの主要コンポーネントを表示」
|
||||
**デバッグ支援**:「エラーハンドリング関連のコードをすべて検索」
|
||||
|
||||
## 主な機能
|
||||
|
||||
### 🔍 **インテリジェント検索・解析**
|
||||
- **二重戦略アーキテクチャ**:7つのコア言語に特化したTree-sitter解析、50+ファイルタイプにフォールバック戦略
|
||||
- **直接Tree-sitter統合**:特化言語で正規表現フォールバックなし - 明確なエラーメッセージで高速フェイル
|
||||
- **高度な検索**:最適なツール(ugrep、ripgrep、ag、grep)を自動検出・使用
|
||||
- **汎用ファイルサポート**:高度なAST解析から基本ファイルインデックスまでの包括的カバレッジ
|
||||
- **ファイル解析**:`build_deep_index` 実行後に構造、インポート、クラス、メソッド、複雑度メトリクスを深く把握
|
||||
|
||||
### 🗂️ **多言語サポート**
|
||||
- **7言語でTree-sitter AST解析**:Python、JavaScript、TypeScript、Java、Go、Objective-C、Zig
|
||||
- **50+ファイルタイプでフォールバック戦略**:C/C++、Rust、Ruby、PHPおよびすべての他のプログラミング言語
|
||||
- **文書・設定ファイル**:Markdown、JSON、YAML、XML適切な処理
|
||||
- **Webフロントエンド**:Vue、React、Svelte、HTML、CSS、SCSS
|
||||
- **Java Webとビルド**:JSP/タグファイル(`.jsp`, `.jspx`, `.jspf`, `.tag`, `.tagx`)、Grails/GSP(`.gsp`)、Gradle/Groovyスクリプト(`.gradle`, `.groovy`)、`.properties`、Protocol Buffers(`.proto`)
|
||||
- **データベース**:SQLバリアント、NoSQL、ストアドプロシージャ、マイグレーション
|
||||
- **設定ファイル**:JSON、YAML、XML、Markdown
|
||||
- **[完全なリストを表示](#サポートされているファイルタイプ)**
|
||||
|
||||
### ⚡ **リアルタイム監視・自動更新**
|
||||
- **ファイルウォッチャー**:ファイル変更時の自動インデックス更新
|
||||
- **クロスプラットフォーム**:ネイティブOSファイルシステム監視
|
||||
- **スマート処理**:急速な変更をバッチ処理して過度な再構築を防止
|
||||
- **浅いインデックス更新**:ファイル変更を監視して最新のファイル一覧を維持し、シンボルが必要な場合は `build_deep_index` を実行
|
||||
|
||||
### ⚡ **パフォーマンス・効率性**
|
||||
- **Tree-sitter AST解析**:正確なシンボル抽出のためのネイティブ構文解析
|
||||
- **永続キャッシュ**:超高速な後続アクセスのためのインデックス保存
|
||||
- **スマートフィルタリング**:ビルドディレクトリと一時ファイルのインテリジェント除外
|
||||
- **メモリ効率**:大規模コードベース向けに最適化
|
||||
- **直接依存関係**:フォールバック機構なし - 明確なエラーメッセージで高速フェイル
|
||||
|
||||
## サポートされているファイルタイプ
|
||||
|
||||
<details>
|
||||
<summary><strong>📁 プログラミング言語(クリックで展開)</strong></summary>
|
||||
|
||||
**特化Tree-sitter戦略言語:**
|
||||
- **Python** (`.py`, `.pyw`) - クラス/メソッド抽出と呼び出し追跡を含む完全AST解析
|
||||
- **JavaScript** (`.js`, `.jsx`, `.mjs`, `.cjs`) - Tree-sitterを使用したES6+クラスと関数解析
|
||||
- **TypeScript** (`.ts`, `.tsx`) - インターフェースを含む完全な型認識シンボル抽出
|
||||
- **Java** (`.java`) - 完全なクラス階層、メソッドシグネチャ、呼び出し関係
|
||||
- **Go** (`.go`) - 構造体メソッド、レシーバータイプ、関数解析
|
||||
- **Objective-C** (`.m`, `.mm`) - +/-記法を使用したクラス/インスタンスメソッド区別
|
||||
- **Zig** (`.zig`, `.zon`) - Tree-sitter ASTを使用した関数と構造体解析
|
||||
|
||||
**すべての他のプログラミング言語:**
|
||||
すべての他のプログラミング言語は**フォールバック解析戦略**を使用し、基本ファイルインデックスとメタデータ抽出を提供します。これには以下が含まれます:
|
||||
- **システム・低レベル言語:** C/C++ (`.c`, `.cpp`, `.h`, `.hpp`)、Rust (`.rs`)
|
||||
- **オブジェクト指向言語:** C# (`.cs`)、Kotlin (`.kt`)、Scala (`.scala`)、Swift (`.swift`)
|
||||
- **スクリプト・動的言語:** Ruby (`.rb`)、PHP (`.php`)、Shell (`.sh`, `.bash`)
|
||||
- **および40+ファイルタイプ** - すべてフォールバック戦略による基本インデックス処理
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>🌐 Web・フロントエンド(クリックで展開)</strong></summary>
|
||||
|
||||
**フレームワーク・ライブラリ:**
|
||||
- Vue (`.vue`)
|
||||
- Svelte (`.svelte`)
|
||||
- Astro (`.astro`)
|
||||
|
||||
**スタイリング:**
|
||||
- CSS (`.css`, `.scss`, `.less`, `.sass`, `.stylus`, `.styl`)
|
||||
- HTML (`.html`)
|
||||
|
||||
**テンプレート:**
|
||||
- Handlebars (`.hbs`, `.handlebars`)
|
||||
- EJS (`.ejs`)
|
||||
- Pug (`.pug`)
|
||||
- FreeMarker (`.ftl`)
|
||||
- Mustache (`.mustache`)
|
||||
- Liquid (`.liquid`)
|
||||
- ERB (`.erb`)
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>🗄️ データベース・SQL(クリックで展開)</strong></summary>
|
||||
|
||||
**SQL バリアント:**
|
||||
- 標準SQL (`.sql`, `.ddl`, `.dml`)
|
||||
- データベース固有 (`.mysql`, `.postgresql`, `.psql`, `.sqlite`, `.mssql`, `.oracle`, `.ora`, `.db2`)
|
||||
|
||||
**データベースオブジェクト:**
|
||||
- プロシージャ・関数 (`.proc`, `.procedure`, `.func`, `.function`)
|
||||
- ビュー・トリガー (`.view`, `.trigger`, `.index`)
|
||||
|
||||
**マイグレーション・ツール:**
|
||||
- マイグレーションファイル (`.migration`, `.seed`, `.fixture`, `.schema`)
|
||||
- ツール固有 (`.liquibase`, `.flyway`)
|
||||
|
||||
**NoSQL・モダンDB:**
|
||||
- グラフ・クエリ (`.cql`, `.cypher`, `.sparql`, `.gql`)
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>📄 ドキュメント・設定(クリックで展開)</strong></summary>
|
||||
|
||||
- Markdown (`.md`, `.mdx`)
|
||||
- 設定 (`.json`, `.xml`, `.yml`, `.yaml`, `.properties`)
|
||||
|
||||
</details>
|
||||
|
||||
## クイックスタート
|
||||
|
||||
### 🚀 **推奨セットアップ(ほとんどのユーザー向け)**
|
||||
|
||||
任意のMCP対応アプリケーションで開始する最も簡単な方法:
|
||||
|
||||
**前提条件:** Python 3.10+ と [uv](https://github.com/astral-sh/uv)
|
||||
|
||||
1. **MCP設定に追加**(例:`claude_desktop_config.json` または `~/.claude.json`):
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"code-index": {
|
||||
"command": "uvx",
|
||||
"args": ["code-index-mcp"]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
2. **アプリケーションを再起動** – `uvx` が自動的にインストールと実行を処理
|
||||
|
||||
### 🛠️ **開発セットアップ**
|
||||
|
||||
貢献やローカル開発用:
|
||||
|
||||
1. **クローンとインストール:**
|
||||
```bash
|
||||
git clone https://github.com/johnhuang316/code-index-mcp.git
|
||||
cd code-index-mcp
|
||||
uv sync
|
||||
```
|
||||
|
||||
2. **ローカル開発用設定:**
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"code-index": {
|
||||
"command": "uv",
|
||||
"args": ["run", "code-index-mcp"]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
3. **MCP Inspectorでデバッグ:**
|
||||
```bash
|
||||
npx @modelcontextprotocol/inspector uv run code-index-mcp
|
||||
```
|
||||
|
||||
<details>
|
||||
<summary><strong>代替案:手動pipインストール</strong></summary>
|
||||
|
||||
従来のpip管理を好む場合:
|
||||
|
||||
```bash
|
||||
pip install code-index-mcp
|
||||
```
|
||||
|
||||
そして設定:
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"code-index": {
|
||||
"command": "code-index-mcp",
|
||||
"args": []
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
|
||||
## 利用可能なツール
|
||||
|
||||
### 🏗️ **プロジェクト管理**
|
||||
| ツール | 説明 |
|
||||
|--------|------|
|
||||
| **`set_project_path`** | プロジェクトディレクトリのインデックス作成を初期化 |
|
||||
| **`refresh_index`** | ファイル変更後に浅いファイルインデックスを再構築 |
|
||||
| **`build_deep_index`** | 深い解析で使う完全なシンボルインデックスを生成 |
|
||||
| **`get_settings_info`** | 現在のプロジェクト設定と状態を表示 |
|
||||
|
||||
*シンボルレベルのデータが必要な場合は `build_deep_index` を実行してください。デフォルトの浅いインデックスは高速なファイル探索を担います。*
|
||||
|
||||
### 🔍 **検索・発見**
|
||||
| ツール | 説明 |
|
||||
|--------|------|
|
||||
| **`search_code_advanced`** | 正規表現、ファジーマッチング、ファイルフィルタリング対応のスマート検索。デフォルトで 1 ページあたり 10 件を返し、`max_results` と `start_index` で調整可能 |
|
||||
| **`find_files`** | globパターンを使用したファイル検索(例:`**/*.py`) |
|
||||
| **`get_file_summary`** | ファイル構造、関数、インポート、複雑度の解析(深いインデックスが必要) |
|
||||
|
||||
### 🔄 **監視・自動更新**
|
||||
| ツール | 説明 |
|
||||
|--------|------|
|
||||
| **`get_file_watcher_status`** | ファイルウォッチャーの状態と設定を確認 |
|
||||
| **`configure_file_watcher`** | 自動更新の有効化/無効化と設定の構成 |
|
||||
|
||||
### 🛠️ **システム・メンテナンス**
|
||||
| ツール | 説明 |
|
||||
|--------|------|
|
||||
| **`create_temp_directory`** | インデックスデータの保存ディレクトリをセットアップ |
|
||||
| **`check_temp_directory`** | インデックス保存場所と権限を確認 |
|
||||
| **`clear_settings`** | すべてのキャッシュデータと設定をリセット |
|
||||
| **`refresh_search_tools`** | 利用可能な検索ツール(ugrep、ripgrep等)を再検出 |
|
||||
|
||||
## 使用例
|
||||
|
||||
### 🎯 **クイックスタートワークフロー**
|
||||
|
||||
**1. プロジェクトの初期化**
|
||||
```
|
||||
プロジェクトパスを /Users/dev/my-react-app に設定してください
|
||||
```
|
||||
*コードベースを自動インデックス作成し、検索可能なキャッシュを構築*
|
||||
|
||||
**2. プロジェクト構造の探索**
|
||||
```
|
||||
src/components で全てのTypeScriptコンポーネントファイルを見つけてください
|
||||
```
|
||||
*使用ツール:`find_files`、パターン `src/components/**/*.tsx`*
|
||||
|
||||
**3. キーファイルの解析**
|
||||
```
|
||||
src/api/userService.ts の要約を教えてください
|
||||
```
|
||||
*使用ツール:`get_file_summary` で関数、インポート、複雑度を表示*
|
||||
*ヒント:`needs_deep_index` が返った場合は `build_deep_index` を先に実行してください。*
|
||||
|
||||
### 🔍 **高度な検索例**
|
||||
|
||||
<details>
|
||||
<summary><strong>コードパターン検索</strong></summary>
|
||||
|
||||
```
|
||||
正規表現を使って "get.*Data" にマッチする全ての関数呼び出しを検索してください
|
||||
```
|
||||
*発見:`getData()`、`getUserData()`、`getFormData()` など*
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>ファジー関数検索</strong></summary>
|
||||
|
||||
```
|
||||
'authUser' でファジー検索して認証関連の関数を見つけてください
|
||||
```
|
||||
*マッチ:`authenticateUser`、`authUserToken`、`userAuthCheck` など*
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>言語固有検索</strong></summary>
|
||||
|
||||
```
|
||||
Pythonファイルのみで "API_ENDPOINT" を検索してください
|
||||
```
|
||||
*使用ツール:`search_code_advanced`、`file_pattern: "*.py"`(デフォルトは 10 件。`max_results` で件数を増やし、`start_index` でページ送り)*
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>自動更新設定</strong></summary>
|
||||
|
||||
```
|
||||
ファイル変更時の自動インデックス更新を設定してください
|
||||
```
|
||||
*使用ツール:`configure_file_watcher` で監視の有効化/無効化とデバウンス時間を設定*
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>プロジェクトメンテナンス</strong></summary>
|
||||
|
||||
```
|
||||
新しいコンポーネントを追加したので、プロジェクトインデックスを更新してください
|
||||
```
|
||||
*使用ツール:`refresh_index` で検索可能なキャッシュを更新*
|
||||
|
||||
</details>
|
||||
|
||||
## トラブルシューティング
|
||||
|
||||
### 🔄 **自動リフレッシュが動作しない**
|
||||
|
||||
ファイル変更時に自動インデックス更新が動作しない場合、以下を試してください:
|
||||
- `pip install watchdog`(環境分離の問題を解決する可能性があります)
|
||||
- 手動リフレッシュを使用:ファイル変更後に `refresh_index` ツールを呼び出す
|
||||
- ファイルウォッチャーステータスを確認:`get_file_watcher_status` を使用して監視がアクティブかどうかを確認
|
||||
|
||||
## 開発・貢献
|
||||
|
||||
### 🔧 **ソースからのビルド**
|
||||
```bash
|
||||
git clone https://github.com/johnhuang316/code-index-mcp.git
|
||||
cd code-index-mcp
|
||||
uv sync
|
||||
uv run code-index-mcp
|
||||
```
|
||||
|
||||
### 🐛 **デバッグ**
|
||||
```bash
|
||||
npx @modelcontextprotocol/inspector uvx code-index-mcp
|
||||
```
|
||||
|
||||
### 🤝 **貢献**
|
||||
貢献を歓迎します!お気軽にプルリクエストを提出してください。
|
||||
|
||||
---
|
||||
|
||||
### 📜 **ライセンス**
|
||||
[MIT License](LICENSE)
|
||||
|
||||
### 🌐 **翻訳**
|
||||
- [English](README.md)
|
||||
- [繁體中文](README_zh.md)
|
||||
319
reference/code-index-mcp-master/README_ko.md
Normal file
319
reference/code-index-mcp-master/README_ko.md
Normal file
@@ -0,0 +1,319 @@
|
||||
# 코드 인덱스 MCP
|
||||
|
||||
<div align="center">
|
||||
|
||||
[](https://modelcontextprotocol.io)
|
||||
[](https://www.python.org/)
|
||||
[](LICENSE)
|
||||
|
||||
**대규모 언어 모델을 위한 지능형 코드 인덱싱과 분석**
|
||||
|
||||
고급 검색, 정밀 분석, 유연한 탐색 기능으로 AI가 코드베이스를 이해하고 활용하는 방식을 혁신하세요.
|
||||
|
||||
</div>
|
||||
|
||||
<a href="https://glama.ai/mcp/servers/@johnhuang316/code-index-mcp">
|
||||
<img width="380" height="200" src="https://glama.ai/mcp/servers/@johnhuang316/code-index-mcp/badge" alt="code-index-mcp MCP server" />
|
||||
</a>
|
||||
|
||||
## 개요
|
||||
|
||||
Code Index MCP는 [Model Context Protocol](https://modelcontextprotocol.io) 기반 MCP 서버로, AI 어시스턴트와 복잡한 코드베이스 사이를 연결합니다. 빠른 인덱싱, 강력한 검색, 정밀한 코드 분석을 제공하여 AI가 프로젝트 구조를 정확히 파악하고 효과적으로 지원하도록 돕습니다.
|
||||
|
||||
**이럴 때 안성맞춤:** 코드 리뷰, 리팩터링, 문서화, 디버깅 지원, 아키텍처 분석
|
||||
|
||||
## 빠른 시작
|
||||
|
||||
### 🚀 **권장 설정 (대부분의 사용자)**
|
||||
|
||||
어떤 MCP 호환 애플리케이션에서도 몇 단계만으로 시작할 수 있습니다.
|
||||
|
||||
**사전 준비:** Python 3.10+ 및 [uv](https://github.com/astral-sh/uv)
|
||||
|
||||
1. **MCP 설정에 서버 추가** (예: `claude_desktop_config.json` 또는 `~/.claude.json`)
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"code-index": {
|
||||
"command": "uvx",
|
||||
"args": ["code-index-mcp"]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
> 시작할 때 프로젝트를 자동으로 지정하려면 `args` 배열 끝에
|
||||
> `--project-path /absolute/path/to/repo` 를 추가하세요. 이렇게 하면 시작 직후 `set_project_path` 를 호출한 것과 동일한 상태가 됩니다.
|
||||
2. **애플리케이션 재시작** – `uvx`가 설치와 실행을 자동으로 처리합니다.
|
||||
|
||||
3. **사용 시작** (AI 어시스턴트에게 아래 프롬프트를 전달)
|
||||
```
|
||||
프로젝트 경로를 /Users/dev/my-react-app 으로 설정해줘
|
||||
이 프로젝트에서 모든 TypeScript 파일을 찾아줘
|
||||
"authentication" 관련 함수를 검색해줘
|
||||
src/App.tsx 파일을 분석해줘
|
||||
```
|
||||
*실행 시 `--project-path` 옵션을 사용했다면 첫 번째 명령은 건너뛰어도 됩니다. 서버가 이미 경로를 알고 있습니다.*
|
||||
|
||||
### Codex CLI 설정
|
||||
|
||||
Anthropic의 Codex CLI를 사용하는 경우 `~/.codex/config.toml`에 다음 MCP 서버 설정을 추가하세요.
|
||||
Windows에서는 `C:\Users\<you>\.codex\config.toml`에 위치합니다.
|
||||
|
||||
```toml
|
||||
[mcp_servers.code-index]
|
||||
type = "stdio"
|
||||
command = "uvx"
|
||||
args = ["code-index-mcp"]
|
||||
```
|
||||
> 실행 시 프로젝트를 자동으로 지정하려면 `args` 리스트에 `--project-path C:/absolute/path/to/repo` 를 추가하세요.
|
||||
> 이는 이후에 `set_project_path` 도구를 호출하는 것과 같은 효과입니다.
|
||||
|
||||
Windows의 `uvx`는 기본 사용자 프로필 디렉터리가 필요합니다.
|
||||
MCP가 안정적으로 시작되도록 같은 블록에 아래 환경 변수 덮어쓰기를 유지하세요.
|
||||
|
||||
```toml
|
||||
env = {
|
||||
HOME = "C:\Users\<you>",
|
||||
APPDATA = "C:\Users\<you>\AppData\Roaming",
|
||||
LOCALAPPDATA = "C:\Users\<you>\AppData\Local",
|
||||
SystemRoot = "C:\Windows"
|
||||
}
|
||||
```
|
||||
|
||||
Linux와 macOS는 운영체제가 `HOME`과 XDG 경로를 기본으로 제공하므로 대부분 별도의 `env` 섹션이 필요하지 않습니다.
|
||||
제한된 컨테이너에서 실행하거나 캐시/설정 위치를 수동으로 바꾸고 싶을 때만 덮어쓰면 됩니다.
|
||||
환경 변수 전체 목록은 [`uv` 환경 변수 문서](https://docs.astral.sh/uv/reference/environment/) (`HOME`, `XDG_CACHE_HOME`, `XDG_CONFIG_HOME`, `APPDATA` 등)를 참고하세요.
|
||||
|
||||
|
||||
## 대표 사용 사례
|
||||
|
||||
**코드 리뷰:** "예전 API를 사용하는 부분을 모두 찾아줘"
|
||||
**리팩터링 지원:** "이 함수는 어디에서 호출되나요?"
|
||||
**프로젝트 학습:** "이 React 프로젝트의 핵심 컴포넌트를 보여줘"
|
||||
**디버깅:** "에러 처리 로직이 있는 파일을 찾아줘"
|
||||
|
||||
## 주요 기능
|
||||
|
||||
### 🧠 **지능형 검색과 분석**
|
||||
- **듀얼 전략 아키텍처:** 7개 핵심 언어는 전용 tree-sitter 파서를 사용하고, 그 외 50+ 파일 형식은 폴백 전략으로 처리
|
||||
- **직접 Tree-sitter 통합:** 특화 언어에 정규식 폴백 없음 – 문제 시 즉시 실패하고 명확한 오류 메시지 제공
|
||||
- **고급 검색:** ugrep, ripgrep, ag, grep 중 최적의 도구를 자동 선택해 활용
|
||||
- **범용 파일 지원:** 정교한 AST 분석부터 기본 파일 인덱싱까지 폭넓게 커버
|
||||
- **파일 분석:** `build_deep_index` 실행 후 구조, 임포트, 클래스, 메서드, 복잡도 지표를 심층적으로 파악
|
||||
|
||||
### 🗂️ **다중 언어 지원**
|
||||
- **Tree-sitter AST 분석(7종):** Python, JavaScript, TypeScript, Java, Go, Objective-C, Zig
|
||||
- **폴백 전략(50+ 형식):** C/C++, Rust, Ruby, PHP 등 대부분의 프로그래밍 언어 지원
|
||||
- **문서 및 설정 파일:** Markdown, JSON, YAML, XML 등 상황에 맞는 처리
|
||||
- **웹 프론트엔드:** Vue, React, Svelte, HTML, CSS, SCSS
|
||||
- **Java 웹 & 빌드:** JSP/태그 (`.jsp`, `.jspx`, `.jspf`, `.tag`, `.tagx`), Grails/GSP (`.gsp`), Gradle/Groovy 스크립트 (`.gradle`, `.groovy`), `.properties`, Protocol Buffers (`.proto`)
|
||||
- **데이터 계층:** SQL, NoSQL, 스토어드 프로시저, 마이그레이션 스크립트
|
||||
- **구성 파일:** JSON, YAML, XML, Markdown
|
||||
- **[지원 파일 전체 목록 보기](#지원-파일-형식)**
|
||||
|
||||
### 🔄 **실시간 모니터링 & 자동 새로고침**
|
||||
- **파일 워처:** 파일 변경 시 자동으로 얕은 인덱스(파일 목록) 갱신
|
||||
- **크로스 플랫폼:** 운영체제 기본 파일시스템 이벤트 활용
|
||||
- **스마트 처리:** 빠른 변경을 묶어 과도한 재빌드를 방지
|
||||
- **얕은 인덱스 갱신:** 파일 목록을 최신 상태로 유지하며, 심볼 데이터가 필요하면 `build_deep_index`를 실행
|
||||
|
||||
### ⚡ **성능 & 효율성**
|
||||
- **Tree-sitter AST 파싱:** 정확한 심볼 추출을 위한 네이티브 구문 분석
|
||||
- **지속 캐싱:** 인덱스를 저장해 이후 응답 속도를 극대화
|
||||
- **스마트 필터링:** 빌드 디렉터리·임시 파일을 자동 제외
|
||||
- **메모리 효율:** 대규모 코드베이스를 염두에 둔 설계
|
||||
- **직접 의존성:** 불필요한 폴백 없이 명확한 오류 메시지 제공
|
||||
|
||||
## 지원 파일 형식
|
||||
|
||||
<details>
|
||||
<summary><strong>💻 프로그래밍 언어 (클릭하여 확장)</strong></summary>
|
||||
|
||||
**전용 Tree-sitter 전략 언어:**
|
||||
- **Python** (`.py`, `.pyw`) – 클래스/메서드 추출 및 호출 추적이 포함된 완전 AST 분석
|
||||
- **JavaScript** (`.js`, `.jsx`, `.mjs`, `.cjs`) – ES6+ 클래스와 함수를 tree-sitter로 파싱
|
||||
- **TypeScript** (`.ts`, `.tsx`) – 인터페이스를 포함한 타입 인지 심볼 추출
|
||||
- **Java** (`.java`) – 클래스 계층, 메서드 시그니처, 호출 관계 분석
|
||||
- **Go** (`.go`) – 구조체 메서드, 리시버 타입, 함수 분석
|
||||
- **Objective-C** (`.m`, `.mm`) – 클래스/인스턴스 메서드를 +/- 표기로 구분
|
||||
- **Zig** (`.zig`, `.zon`) – 함수와 구조체를 tree-sitter AST로 분석
|
||||
|
||||
**기타 모든 프로그래밍 언어:**
|
||||
나머지 언어는 **폴백 파싱 전략**으로 기본 메타데이터와 파일 인덱싱을 제공합니다. 예:
|
||||
- **시스템/저수준:** C/C++ (`.c`, `.cpp`, `.h`, `.hpp`), Rust (`.rs`)
|
||||
- **객체지향:** C# (`.cs`), Kotlin (`.kt`), Scala (`.scala`), Swift (`.swift`)
|
||||
- **스크립트:** Ruby (`.rb`), PHP (`.php`), Shell (`.sh`, `.bash`)
|
||||
- **그 외 40+ 형식** – 폴백 전략으로 빠른 탐색 가능
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>🌐 웹 프론트엔드 & UI</strong></summary>
|
||||
|
||||
- 프레임워크: Vue (`.vue`), Svelte (`.svelte`), Astro (`.astro`)
|
||||
- 스타일링: CSS (`.css`, `.scss`, `.less`, `.sass`, `.stylus`, `.styl`), HTML (`.html`)
|
||||
- 템플릿: Handlebars (`.hbs`, `.handlebars`), EJS (`.ejs`), Pug (`.pug`), FreeMarker (`.ftl`), Mustache (`.mustache`), Liquid (`.liquid`), ERB (`.erb`)
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>🗄️ 데이터 계층 & SQL</strong></summary>
|
||||
|
||||
- **SQL 변형:** 표준 SQL (`.sql`, `.ddl`, `.dml`), 데이터베이스별 방언 (`.mysql`, `.postgresql`, `.psql`, `.sqlite`, `.mssql`, `.oracle`, `.ora`, `.db2`)
|
||||
- **DB 객체:** 프로시저/함수 (`.proc`, `.procedure`, `.func`, `.function`), 뷰/트리거/인덱스 (`.view`, `.trigger`, `.index`)
|
||||
- **마이그레이션 도구:** 마이그레이션 파일 (`.migration`, `.seed`, `.fixture`, `.schema`), 도구 구성 (`.liquibase`, `.flyway`)
|
||||
- **NoSQL & 그래프:** 질의 언어 (`.cql`, `.cypher`, `.sparql`, `.gql`)
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>📄 문서 & 설정 파일</strong></summary>
|
||||
|
||||
- Markdown (`.md`, `.mdx`)
|
||||
- 구성 파일 (`.json`, `.xml`, `.yml`, `.yaml`, `.properties`)
|
||||
|
||||
</details>
|
||||
|
||||
## 사용 가능한 도구
|
||||
|
||||
### 🏗️ **프로젝트 관리**
|
||||
| 도구 | 설명 |
|
||||
|------|------|
|
||||
| **`set_project_path`** | 프로젝트 디렉터리의 인덱스를 초기화 |
|
||||
| **`refresh_index`** | 파일 변경 후 얕은 파일 인덱스를 재생성 |
|
||||
| **`build_deep_index`** | 심층 분석에 사용하는 전체 심볼 인덱스를 생성 |
|
||||
| **`get_settings_info`** | 현재 프로젝트 설정과 상태를 확인 |
|
||||
|
||||
*심볼 레벨 데이터가 필요하면 `build_deep_index`를 실행하세요. 기본 얕은 인덱스는 빠른 파일 탐색을 담당합니다.*
|
||||
|
||||
### 🔍 **검색 & 탐색**
|
||||
| 도구 | 설명 |
|
||||
|------|------|
|
||||
| **`search_code_advanced`** | 정규식, 퍼지 매칭, 파일 필터링을 지원하는 스마트 검색 (기본적으로 페이지당 10개 결과 반환, `max_results`·`start_index`로 조정 가능) |
|
||||
| **`find_files`** | 글롭 패턴으로 파일 찾기 (예: `**/*.py`) |
|
||||
| **`get_file_summary`** | 파일 구조, 함수, 임포트, 복잡도를 분석 (심층 인덱스 필요) |
|
||||
|
||||
### 🔄 **모니터링 & 자동 새로고침**
|
||||
| 도구 | 설명 |
|
||||
|------|------|
|
||||
| **`get_file_watcher_status`** | 파일 워처 상태와 구성을 확인 |
|
||||
| **`configure_file_watcher`** | 자동 새로고침 설정 (활성/비활성, 지연 시간, 추가 제외 패턴) |
|
||||
|
||||
### 🛠️ **시스템 & 유지 관리**
|
||||
| 도구 | 설명 |
|
||||
|------|------|
|
||||
| **`create_temp_directory`** | 인덱스 저장용 임시 디렉터리를 생성 |
|
||||
| **`check_temp_directory`** | 인덱스 저장 위치와 권한을 확인 |
|
||||
| **`clear_settings`** | 모든 설정과 캐시 데이터를 초기화 |
|
||||
| **`refresh_search_tools`** | 사용 가능한 검색 도구를 재검색 (ugrep, ripgrep 등) |
|
||||
|
||||
## 사용 예시
|
||||
|
||||
### 🧭 **빠른 시작 워크플로**
|
||||
|
||||
**1. 프로젝트 초기화**
|
||||
```
|
||||
프로젝트 경로를 /Users/dev/my-react-app 으로 설정해줘
|
||||
```
|
||||
*프로젝트를 설정하고 얕은 인덱스를 생성합니다.*
|
||||
|
||||
**2. 프로젝트 구조 탐색**
|
||||
```
|
||||
src/components 안의 TypeScript 컴포넌트 파일을 모두 찾아줘
|
||||
```
|
||||
*사용 도구: `find_files` (`src/components/**/*.tsx`)*
|
||||
|
||||
**3. 핵심 파일 분석**
|
||||
```
|
||||
src/api/userService.ts 요약을 알려줘
|
||||
```
|
||||
*사용 도구: `get_file_summary` (함수, 임포트, 복잡도 표시)*
|
||||
*팁: `needs_deep_index` 응답이 나오면 먼저 `build_deep_index`를 실행하세요.*
|
||||
|
||||
### 🔍 **고급 검색 예시**
|
||||
|
||||
<details>
|
||||
<summary><strong>코드 패턴 검색</strong></summary>
|
||||
|
||||
```
|
||||
"get.*Data"에 해당하는 함수 호출을 정규식으로 찾아줘
|
||||
```
|
||||
*예: `getData()`, `getUserData()`, `getFormData()`*
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>퍼지 함수 검색</strong></summary>
|
||||
|
||||
```
|
||||
'authUser'와 유사한 인증 관련 함수를 찾아줘
|
||||
```
|
||||
*예: `authenticateUser`, `authUserToken`, `userAuthCheck`*
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>언어별 검색</strong></summary>
|
||||
|
||||
```
|
||||
Python 파일에서만 "API_ENDPOINT" 를 찾아줘
|
||||
```
|
||||
*`search_code_advanced` + `file_pattern="*.py"` (기본 10개 결과, `max_results`로 확장하고 `start_index`로 페이지 이동)*
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>자동 새로고침 설정</strong></summary>
|
||||
|
||||
```
|
||||
파일 변경 시 자동으로 인덱스를 새로고침하도록 설정해줘
|
||||
```
|
||||
*`configure_file_watcher`로 활성화 및 지연 시간 설정*
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>프로젝트 유지 관리</strong></summary>
|
||||
|
||||
```
|
||||
새 컴포넌트를 추가했어. 프로젝트 인덱스를 다시 빌드해줘
|
||||
```
|
||||
*`refresh_index`로 빠르게 얕은 인덱스를 업데이트*
|
||||
|
||||
</details>
|
||||
|
||||
## 문제 해결
|
||||
|
||||
### 🔄 **자동 새로고침이 동작하지 않을 때**
|
||||
- 환경 문제로 `watchdog`가 빠졌다면 설치: `pip install watchdog`
|
||||
- 수동 새로고침: 변경 후 `refresh_index` 도구 실행
|
||||
- 워처 상태 확인: `get_file_watcher_status` 도구로 활성 여부 점검
|
||||
|
||||
## 개발 & 기여
|
||||
|
||||
### 🛠️ **소스에서 실행하기**
|
||||
```bash
|
||||
git clone https://github.com/johnhuang316/code-index-mcp.git
|
||||
cd code-index-mcp
|
||||
uv sync
|
||||
uv run code-index-mcp
|
||||
```
|
||||
|
||||
### 🧪 **디버깅 도구**
|
||||
```bash
|
||||
npx @modelcontextprotocol/inspector uvx code-index-mcp
|
||||
```
|
||||
|
||||
### 🤝 **기여 안내**
|
||||
Pull Request를 언제든 환영합니다. 변경 사항과 테스트 방법을 함께 공유해주세요.
|
||||
|
||||
---
|
||||
|
||||
### 📄 **라이선스**
|
||||
[MIT License](LICENSE)
|
||||
|
||||
### 🌍 **번역본**
|
||||
- [English](README.md)
|
||||
- [繁體中文](README_zh.md)
|
||||
- [日本語](README_ja.md)
|
||||
416
reference/code-index-mcp-master/README_zh.md
Normal file
416
reference/code-index-mcp-master/README_zh.md
Normal file
@@ -0,0 +1,416 @@
|
||||
# 程式碼索引 MCP
|
||||
|
||||
<div align="center">
|
||||
|
||||
[](https://modelcontextprotocol.io)
|
||||
[](https://www.python.org/)
|
||||
[](LICENSE)
|
||||
|
||||
**為大型語言模型提供智慧程式碼索引與分析**
|
||||
|
||||
以先進的搜尋、分析和導航功能,徹底改變 AI 對程式碼庫的理解方式。
|
||||
|
||||
</div>
|
||||
|
||||
<a href="https://glama.ai/mcp/servers/@johnhuang316/code-index-mcp">
|
||||
<img width="380" height="200" src="https://glama.ai/mcp/servers/@johnhuang316/code-index-mcp/badge" alt="code-index-mcp MCP server" />
|
||||
</a>
|
||||
|
||||
## 概述
|
||||
|
||||
程式碼索引 MCP 是一個 [模型上下文協定](https://modelcontextprotocol.io) 伺服器,架起 AI 模型與複雜程式碼庫之間的橋樑。它提供智慧索引、先進搜尋功能和詳細程式碼分析,幫助 AI 助理有效地理解和導航您的專案。
|
||||
|
||||
**適用於:**程式碼審查、重構、文件生成、除錯協助和架構分析。
|
||||
|
||||
## 快速開始
|
||||
|
||||
### 🚀 **推薦設定(大多數使用者)**
|
||||
|
||||
與任何 MCP 相容應用程式開始的最簡單方式:
|
||||
|
||||
**前置需求:** Python 3.10+ 和 [uv](https://github.com/astral-sh/uv)
|
||||
|
||||
1. **新增到您的 MCP 設定** (例如 `claude_desktop_config.json` 或 `~/.claude.json`):
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"code-index": {
|
||||
"command": "uvx",
|
||||
"args": ["code-index-mcp"]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
> 若想在啟動時自動設定專案路徑,可在 `args` 陣列末尾加入
|
||||
> `--project-path /絕對/路徑`,效果等同於啟動後呼叫 `set_project_path`。
|
||||
|
||||
2. **重新啟動應用程式** – `uvx` 會自動處理安裝和執行
|
||||
|
||||
3. **開始使用**(向您的 AI 助理提供這些提示):
|
||||
```
|
||||
設定專案路徑為 /Users/dev/my-react-app
|
||||
在這個專案中找到所有 TypeScript 檔案
|
||||
搜尋「authentication」相關函數
|
||||
分析主要的 App.tsx 檔案
|
||||
```
|
||||
*如果啟動時已提供 `--project-path`,可以略過第一個指令,伺服器會自動記住路徑。*
|
||||
|
||||
### Codex CLI 設定
|
||||
|
||||
如果你使用 Anthropic 的 Codex CLI,請在 `~/.codex/config.toml` 中加入下列伺服器設定。
|
||||
Windows 的設定檔位於 `C:\Users\<you>\.codex\config.toml`:
|
||||
|
||||
```toml
|
||||
[mcp_servers.code-index]
|
||||
type = "stdio"
|
||||
command = "uvx"
|
||||
args = ["code-index-mcp"]
|
||||
```
|
||||
> 如需要啟動時自動設定專案,請把 `--project-path C:/絕對/路徑` 加到 `args`
|
||||
> 清單中,與手動呼叫 `set_project_path` 的結果相同。
|
||||
|
||||
在 Windows 中,`uvx` 需要找到標準的使用者目錄。保留下列環境變數覆寫設定,才能讓 MCP 穩定啟動:
|
||||
|
||||
```toml
|
||||
env = {
|
||||
HOME = "C:\\Users\\<you>",
|
||||
APPDATA = "C:\\Users\\<you>\\AppData\\Roaming",
|
||||
LOCALAPPDATA = "C:\\Users\\<you>\\AppData\\Local",
|
||||
SystemRoot = "C:\\Windows"
|
||||
}
|
||||
```
|
||||
|
||||
Linux 與 macOS 預設會提供 `HOME` 與 XDG 路徑,因此通常不需要額外的 `env` 區塊;只有在受限的容器環境或想手動調整快取/設定位置時才需要覆寫。
|
||||
完整的環境變數清單請參考 [`uv` 環境變數說明](https://docs.astral.sh/uv/reference/environment/)(包含 `HOME`、`XDG_CACHE_HOME`、`XDG_CONFIG_HOME`、`APPDATA` 等)。
|
||||
|
||||
|
||||
## 典型使用場景
|
||||
|
||||
**程式碼審查**:「找出所有使用舊 API 的地方」
|
||||
**重構協助**:「這個函數在哪裡被呼叫?」
|
||||
**學習專案**:「顯示這個 React 專案的主要元件」
|
||||
**除錯協助**:「搜尋所有錯誤處理相關的程式碼」
|
||||
|
||||
## 主要特性
|
||||
|
||||
### 🔍 **智慧搜尋與分析**
|
||||
- **雙策略架構**:7 種核心語言使用專業化 Tree-sitter 解析,50+ 種檔案類型使用備用策略
|
||||
- **直接 Tree-sitter 整合**:專業化語言無正則表達式備用 - 快速失敗並提供清晰錯誤訊息
|
||||
- **進階搜尋**:自動偵測並使用最佳工具(ugrep、ripgrep、ag 或 grep)
|
||||
- **通用檔案支援**:從進階 AST 解析到基本檔案索引的全面覆蓋
|
||||
- **檔案分析**:執行 `build_deep_index` 後深入了解結構、匯入、類別、方法和複雜度指標
|
||||
|
||||
### 🗂️ **多語言支援**
|
||||
- **7 種語言使用 Tree-sitter AST 解析**:Python、JavaScript、TypeScript、Java、Go、Objective-C、Zig
|
||||
- **50+ 種檔案類型使用備用策略**:C/C++、Rust、Ruby、PHP 和所有其他程式語言
|
||||
- **文件與配置檔案**:Markdown、JSON、YAML、XML 適當處理
|
||||
- **網頁前端**:Vue、React、Svelte、HTML、CSS、SCSS
|
||||
- **Java Web 與建置**:JSP/Tag (`.jsp`, `.jspx`, `.jspf`, `.tag`, `.tagx`)、Grails/GSP (`.gsp`)、Gradle/Groovy 腳本 (`.gradle`, `.groovy`)、`.properties`、Protocol Buffers (`.proto`)
|
||||
- **資料庫**:SQL 變體、NoSQL、存儲過程、遷移腳本
|
||||
- **配置檔案**:JSON、YAML、XML、Markdown
|
||||
- **[查看完整列表](#支援的檔案類型)**
|
||||
|
||||
### ⚡ **即時監控與自動刷新**
|
||||
- **檔案監控器**:檔案變更時自動更新索引
|
||||
- **跨平台**:原生作業系統檔案系統監控
|
||||
- **智慧處理**:批次處理快速變更以防止過度重建
|
||||
- **淺層索引更新**:監控檔案變更並維持檔案清單最新;需要符號資料時請執行 `build_deep_index`
|
||||
|
||||
### ⚡ **效能與效率**
|
||||
- **Tree-sitter AST 解析**:原生語法解析以實現準確的符號提取
|
||||
- **持久快取**:儲存索引以實現超快速的後續存取
|
||||
- **智慧篩選**:智能排除建構目錄和暫存檔案
|
||||
- **記憶體高效**:針對大型程式碼庫優化
|
||||
- **直接依賴**:無備用機制 - 快速失敗並提供清晰錯誤訊息
|
||||
|
||||
## 支援的檔案類型
|
||||
|
||||
<details>
|
||||
<summary><strong>📁 程式語言(點擊展開)</strong></summary>
|
||||
|
||||
**專業化 Tree-sitter 策略語言:**
|
||||
- **Python** (`.py`, `.pyw`) - 完整 AST 分析,包含類別/方法提取和呼叫追蹤
|
||||
- **JavaScript** (`.js`, `.jsx`, `.mjs`, `.cjs`) - ES6+ 類別和函數解析使用 Tree-sitter
|
||||
- **TypeScript** (`.ts`, `.tsx`) - 完整類型感知符號提取,包含介面
|
||||
- **Java** (`.java`) - 完整類別階層、方法簽名和呼叫關係
|
||||
- **Go** (`.go`) - 結構方法、接收者類型和函數分析
|
||||
- **Objective-C** (`.m`, `.mm`) - 類別/實例方法區分,使用 +/- 標記法
|
||||
- **Zig** (`.zig`, `.zon`) - 函數和結構解析使用 Tree-sitter AST
|
||||
|
||||
**所有其他程式語言:**
|
||||
所有其他程式語言使用 **備用解析策略**,提供基本檔案索引和元資料提取。包括:
|
||||
- **系統與低階語言:** C/C++ (`.c`, `.cpp`, `.h`, `.hpp`)、Rust (`.rs`)
|
||||
- **物件導向語言:** C# (`.cs`)、Kotlin (`.kt`)、Scala (`.scala`)、Swift (`.swift`)
|
||||
- **腳本與動態語言:** Ruby (`.rb`)、PHP (`.php`)、Shell (`.sh`, `.bash`)
|
||||
- **以及 40+ 種檔案類型** - 全部通過備用策略處理進行基本索引
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>🌐 網頁與前端(點擊展開)</strong></summary>
|
||||
|
||||
**框架與函式庫:**
|
||||
- Vue (`.vue`)
|
||||
- Svelte (`.svelte`)
|
||||
- Astro (`.astro`)
|
||||
|
||||
**樣式:**
|
||||
- CSS (`.css`, `.scss`, `.less`, `.sass`, `.stylus`, `.styl`)
|
||||
- HTML (`.html`)
|
||||
|
||||
**模板:**
|
||||
- Handlebars (`.hbs`, `.handlebars`)
|
||||
- EJS (`.ejs`)
|
||||
- Pug (`.pug`)
|
||||
- FreeMarker (`.ftl`)
|
||||
- Mustache (`.mustache`)
|
||||
- Liquid (`.liquid`)
|
||||
- ERB (`.erb`)
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>🗄️ 資料庫與 SQL(點擊展開)</strong></summary>
|
||||
|
||||
**SQL 變體:**
|
||||
- 標準 SQL (`.sql`, `.ddl`, `.dml`)
|
||||
- 資料庫特定 (`.mysql`, `.postgresql`, `.psql`, `.sqlite`, `.mssql`, `.oracle`, `.ora`, `.db2`)
|
||||
|
||||
**資料庫物件:**
|
||||
- 程序與函式 (`.proc`, `.procedure`, `.func`, `.function`)
|
||||
- 檢視與觸發器 (`.view`, `.trigger`, `.index`)
|
||||
|
||||
**遷移與工具:**
|
||||
- 遷移檔案 (`.migration`, `.seed`, `.fixture`, `.schema`)
|
||||
- 工具特定 (`.liquibase`, `.flyway`)
|
||||
|
||||
**NoSQL 與現代資料庫:**
|
||||
- 圖形與查詢 (`.cql`, `.cypher`, `.sparql`, `.gql`)
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>📄 文件與配置(點擊展開)</strong></summary>
|
||||
|
||||
- Markdown (`.md`, `.mdx`)
|
||||
- 配置 (`.json`, `.xml`, `.yml`, `.yaml`, `.properties`)
|
||||
|
||||
</details>
|
||||
|
||||
## 快速開始
|
||||
|
||||
### 🚀 **建議設定(適用於大多數使用者)**
|
||||
|
||||
在任何相容 MCP 的應用程式中開始使用的最簡單方法:
|
||||
|
||||
**先決條件:** Python 3.10+ 和 [uv](https://github.com/astral-sh/uv)
|
||||
|
||||
1. **新增到您的 MCP 配置**(例如 `claude_desktop_config.json` 或 `~/.claude.json`):
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"code-index": {
|
||||
"command": "uvx",
|
||||
"args": ["code-index-mcp"]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
2. **重新啟動您的應用程式** – `uvx` 會自動處理安裝和執行
|
||||
|
||||
### 🛠️ **開發設定**
|
||||
|
||||
適用於貢獻或本地開發:
|
||||
|
||||
1. **克隆並安裝:**
|
||||
```bash
|
||||
git clone https://github.com/johnhuang316/code-index-mcp.git
|
||||
cd code-index-mcp
|
||||
uv sync
|
||||
```
|
||||
|
||||
2. **配置本地開發:**
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"code-index": {
|
||||
"command": "uv",
|
||||
"args": ["run", "code-index-mcp"]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
3. **使用 MCP Inspector 除錯:**
|
||||
```bash
|
||||
npx @modelcontextprotocol/inspector uv run code-index-mcp
|
||||
```
|
||||
|
||||
<details>
|
||||
<summary><strong>替代方案:手動 pip 安裝</strong></summary>
|
||||
|
||||
如果您偏好傳統的 pip 管理:
|
||||
|
||||
```bash
|
||||
pip install code-index-mcp
|
||||
```
|
||||
|
||||
然後配置:
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"code-index": {
|
||||
"command": "code-index-mcp",
|
||||
"args": []
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
|
||||
## 可用工具
|
||||
|
||||
### 🏗️ **專案管理**
|
||||
| 工具 | 描述 |
|
||||
|------|------|
|
||||
| **`set_project_path`** | 為專案目錄初始化索引 |
|
||||
| **`refresh_index`** | 在檔案變更後重建淺層檔案索引 |
|
||||
| **`build_deep_index`** | 產生供深度分析使用的完整符號索引 |
|
||||
| **`get_settings_info`** | 檢視目前專案配置和狀態 |
|
||||
|
||||
*需要符號層級資料時,請執行 `build_deep_index`;預設的淺層索引提供快速檔案探索。*
|
||||
|
||||
### 🔍 **搜尋與探索**
|
||||
| 工具 | 描述 |
|
||||
|------|------|
|
||||
| **`search_code_advanced`** | 智慧搜尋,支援正規表達式、模糊匹配和檔案篩選,預設每頁回傳 10 筆結果,可透過 `max_results` 與 `start_index` 調整 |
|
||||
| **`find_files`** | 使用萬用字元模式尋找檔案(例如 `**/*.py`) |
|
||||
| **`get_file_summary`** | 分析檔案結構、函式、匯入和複雜度(需要深度索引) |
|
||||
|
||||
### 🔄 **監控與自動刷新**
|
||||
| 工具 | 描述 |
|
||||
|------|------|
|
||||
| **`get_file_watcher_status`** | 檢查檔案監控器狀態和配置 |
|
||||
| **`configure_file_watcher`** | 啟用/停用自動刷新並配置設定 |
|
||||
|
||||
### 🛠️ **系統與維護**
|
||||
| 工具 | 描述 |
|
||||
|------|------|
|
||||
| **`create_temp_directory`** | 設定索引資料的儲存目錄 |
|
||||
| **`check_temp_directory`** | 驗證索引儲存位置和權限 |
|
||||
| **`clear_settings`** | 重設所有快取資料和配置 |
|
||||
| **`refresh_search_tools`** | 重新偵測可用的搜尋工具(ugrep、ripgrep 等) |
|
||||
|
||||
## 使用範例
|
||||
|
||||
### 🎯 **快速開始工作流程**
|
||||
|
||||
**1. 初始化您的專案**
|
||||
```
|
||||
將專案路徑設定為 /Users/dev/my-react-app
|
||||
```
|
||||
*自動索引您的程式碼庫並建立可搜尋的快取*
|
||||
|
||||
**2. 探索專案結構**
|
||||
```
|
||||
在 src/components 中尋找所有 TypeScript 元件檔案
|
||||
```
|
||||
*使用:`find_files`,模式為 `src/components/**/*.tsx`*
|
||||
|
||||
**3. 分析關鍵檔案**
|
||||
```
|
||||
給我 src/api/userService.ts 的摘要
|
||||
```
|
||||
*使用:`get_file_summary` 顯示函式、匯入和複雜度*
|
||||
*提示:若收到 `needs_deep_index` 回應,請先執行 `build_deep_index`。*
|
||||
|
||||
### 🔍 **進階搜尋範例**
|
||||
|
||||
<details>
|
||||
<summary><strong>程式碼模式搜尋</strong></summary>
|
||||
|
||||
```
|
||||
使用正規表達式搜尋所有符合 "get.*Data" 的函式呼叫
|
||||
```
|
||||
*找到:`getData()`、`getUserData()`、`getFormData()` 等*
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>模糊函式搜尋</strong></summary>
|
||||
|
||||
```
|
||||
使用 'authUser' 的模糊搜尋尋找驗證相關函式
|
||||
```
|
||||
*匹配:`authenticateUser`、`authUserToken`、`userAuthCheck` 等*
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>特定語言搜尋</strong></summary>
|
||||
|
||||
```
|
||||
只在 Python 檔案中搜尋 "API_ENDPOINT"
|
||||
```
|
||||
*使用:`search_code_advanced`,`file_pattern: "*.py"`(預設回傳 10 筆;使用 `max_results` 擴充或 `start_index` 換頁)*
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>自動刷新配置</strong></summary>
|
||||
|
||||
```
|
||||
配置檔案變更時的自動索引更新
|
||||
```
|
||||
*使用:`configure_file_watcher` 啟用/停用監控並設定防抖時間*
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>專案維護</strong></summary>
|
||||
|
||||
```
|
||||
我新增了新元件,請重新整理專案索引
|
||||
```
|
||||
*使用:`refresh_index` 更新可搜尋的快取*
|
||||
|
||||
</details>
|
||||
|
||||
## 故障排除
|
||||
|
||||
### 🔄 **自動刷新無法運作**
|
||||
|
||||
如果檔案變更時自動索引更新無法運作,請嘗試:
|
||||
- `pip install watchdog`(可能解決環境隔離問題)
|
||||
- 使用手動刷新:在檔案變更後呼叫 `refresh_index` 工具
|
||||
- 檢查檔案監視器狀態:使用 `get_file_watcher_status` 驗證監控是否處於活動狀態
|
||||
|
||||
## 開發與貢獻
|
||||
|
||||
### 🔧 **從原始碼建構**
|
||||
```bash
|
||||
git clone https://github.com/johnhuang316/code-index-mcp.git
|
||||
cd code-index-mcp
|
||||
uv sync
|
||||
uv run code-index-mcp
|
||||
```
|
||||
|
||||
### 🐛 **除錯**
|
||||
```bash
|
||||
npx @modelcontextprotocol/inspector uvx code-index-mcp
|
||||
```
|
||||
|
||||
### 🤝 **貢獻**
|
||||
歡迎貢獻!請隨時提交拉取請求。
|
||||
|
||||
---
|
||||
|
||||
### 📜 **授權條款**
|
||||
[MIT 授權條款](LICENSE)
|
||||
|
||||
### 🌐 **翻譯**
|
||||
- [English](README.md)
|
||||
- [日本語](README_ja.md)
|
||||
83
reference/code-index-mcp-master/docs/mcp-restart-playbook.md
Normal file
83
reference/code-index-mcp-master/docs/mcp-restart-playbook.md
Normal file
@@ -0,0 +1,83 @@
|
||||
# MCP Restart Playbook (November 10, 2025)
|
||||
|
||||
This runbook is for the first LLM/agent session *after* the MCP server restarts (for example, after bumping dependencies or recycling the FastMCP process). Follow every step in order so we quickly regain context, validate the upgraded toolchain, and communicate status to the rest of the team.
|
||||
|
||||
---
|
||||
|
||||
## 1. Current Snapshot
|
||||
- **Branch**: `mcp-upgrade-notes`
|
||||
- **Python**: 3.13.2 (uv-managed)
|
||||
- **Key dependency**: `mcp>=1.21.0,<2.0.0` (synced across `pyproject.toml`, `requirements.txt`, and `uv.lock`)
|
||||
- **Latest validation**: `uv run pytest` — 16 tests passed on **November 10, 2025 @ 02:05 UTC**
|
||||
- **Reference doc**: `docs/mcp-upgrade-notes.md` (rationale, API deltas, validation checklist)
|
||||
|
||||
If any of these details drift (new branch, newer SDK, etc.) update this file before handing off.
|
||||
|
||||
---
|
||||
|
||||
## 2. Post-Restart MCP Calls (must run all tools)
|
||||
Run through every exposed MCP primitive to guarantee parity after restart. Use the table below as a checklist and record each response summary.
|
||||
|
||||
| # | Tool | Minimum Input | Expected outcome |
|
||||
|---|------|---------------|------------------|
|
||||
| 1 | `set_project_path` | `path="C:\Users\p10362321\project\code-index-mcp"` | Indexed ~149 files; watcher initialized. |
|
||||
| 2 | `build_deep_index` | - | Project re-indexed. Found ~149 files / ~1,070 symbols. |
|
||||
| 3 | `search_code_advanced` | `pattern="FastMCP", file_pattern="src/**/*.py", max_results=20` | Hits in `server.py` plus pagination metadata. |
|
||||
| 4 | `find_files` | `pattern="tests/**/*.py"` | Returns 10 test modules. |
|
||||
| 5 | `get_file_summary` | `file_path="src/code_index_mcp/server.py"` | ~390 lines, 20+ functions reported. |
|
||||
| 6 | `refresh_index` | - | Shallow index re-built with ~149 files. |
|
||||
| 7 | `get_settings_info` | - | Shows temp/settings dirs, writable=true. |
|
||||
| 8 | `create_temp_directory` | - | Confirms directory exists/created. |
|
||||
| 9 | `check_temp_directory` | - | Lists `index.db`, `index.msgpack`, `index.shallow.json`. |
|
||||
|10 | `clear_settings` | - | Project settings, index, and cache have been cleared (rerun #1 + #2). |
|
||||
|11 | `refresh_search_tools` | - | Available: ['ripgrep', 'basic']; preferred: ripgrep. |
|
||||
|12 | `get_file_watcher_status` | - | status: active, debounce_seconds=6. |
|
||||
|13 | `configure_file_watcher` | `enabled=True, debounce_seconds=6` | Confirmation message (restart may be required). |
|
||||
|
||||
Notes:
|
||||
- After running `clear_settings`, immediately repeat `set_project_path` + `build_deep_index` to restore context before proceeding.
|
||||
- If any tool fails, stop the playbook, capture output, and escalate before continuing.
|
||||
|
||||
Log each response summary in the session notes so the next engineer knows everything is green.
|
||||
|
||||
---
|
||||
|
||||
## 3. CLI / End-to-End Smoke
|
||||
Run these in the repo root once the MCP tools succeed:
|
||||
|
||||
```powershell
|
||||
uv run code-index-mcp --project-path C:\Users\p10362321\project\code-index-mcp
|
||||
uv run pytest
|
||||
```
|
||||
|
||||
- Treat any warning or stderr output as a blocker.
|
||||
- Capture timestamps + durations; attach to release prep if we are close to tagging.
|
||||
|
||||
---
|
||||
|
||||
## 4. Communicate Status
|
||||
When handing the session back to the team, summarize:
|
||||
|
||||
- **SDK state**: Confirm we are still on MCP 1.21.0 (with context injection + capability helpers).
|
||||
- **Tool cache**: Mention that clients should re-cache tool lists after restart (FastMCP now enforces metadata changes).
|
||||
- **Known issues**: Note any skipped steps, flaky tests, or manual interventions.
|
||||
- **Next action**: “Ready for release prep” or “Need follow-up on X” — whichever applies after the smoke tests.
|
||||
|
||||
---
|
||||
|
||||
## 5. Troubleshooting Quick Reference
|
||||
- **`set_project_path` fails** → Ensure the repo path is accessible (sandbox permissions) and no other agent locked `index.db`. Run `clear_settings()` then retry.
|
||||
- **Search returns zero results** → Run `refresh_search_tools()`; if ripgrep missing, fall back to `basic` and flag the infra team.
|
||||
- **Watcher inactive** → Call `configure_file_watcher(enabled=True)` and `refresh_index()`. Document if it remains inactive.
|
||||
- **CLI smoke exits non-zero** → Capture full stdout/stderr, file an issue linked to `docs/mcp-upgrade-notes.md`, and pause release work.
|
||||
|
||||
Keep this section updated with any new gotchas discovered during restarts.
|
||||
|
||||
---
|
||||
|
||||
## 6. Hand-off Checklist
|
||||
- [ ] Steps 1–4 executed and logged in the current session.
|
||||
- [ ] Any deviations documented (include timestamps + command output).
|
||||
- [ ] This playbook reviewed/updated if procedures changed.
|
||||
|
||||
If all boxes are checked, the MCP server is considered healthy and ready for normal development or release activities.
|
||||
28
reference/code-index-mcp-master/docs/mcp-upgrade-notes.md
Normal file
28
reference/code-index-mcp-master/docs/mcp-upgrade-notes.md
Normal file
@@ -0,0 +1,28 @@
|
||||
# MCP Upgrade Notes (November 2025)
|
||||
|
||||
## Why this upgrade matters
|
||||
- `mcp` 1.21.0 was published to PyPI on 2025-11-06, so we are at least 17 point releases behind the current SDK and missing recent transport, auth, and client-surface fixes.
|
||||
- The MCP governance group will cut the next specification release on 2025-11-25 (RC on 2025-11-11), so validating 1.21.0 now keeps us aligned ahead of another protocol bump.
|
||||
|
||||
## Dependency & packaging considerations
|
||||
1. Run `uv lock --upgrade mcp` (or equivalent) so `uv.lock` stops pinning 1.4.1 and picks up the 1.21.0 wheels plus their refreshed transitive set (Starlette 0.49.1, AnyIO/HTTPX upgrades, etc.).
|
||||
2. Re-run `uv run pytest` and our smoke commands (`uv run code-index-mcp --project-path <repo>`) because AnyIO cancellation semantics and Starlette ASGI changes can surface subtle regressions in watcher services.
|
||||
3. Publish the lockfile and version bumps together; our release checklist requires pyproject + package __init__ + uv.lock to stay in sync.
|
||||
|
||||
## API & runtime changes to verify
|
||||
- SEP-985 landed in 1.21.0, adding OAuth-protected resource metadata fallback: confirm our SettingsService handles `WWW-Authenticate` responses and that CLI flags surface any required bearer tokens.
|
||||
- `ClientSession.get_server_capabilities()` is new; if clients or integration tests introspect capabilities manually, migrate to this helper.
|
||||
- Starlette 0.49.1 ships tighter ASGI scope validation; double-check our SSE transport and progress notifications.
|
||||
|
||||
## Recommended practices for 1.21.x
|
||||
1. **Depend on Context injection, not globals.** Annotate `ctx: Context` parameters so FastMCP injects the request context automatically instead of calling `mcp.get_context()` directly; this keeps us compatible with async-only handlers and future dependency-injection changes.
|
||||
2. **Cache expensive tool listings in clients.** Newer agents (OpenAI Agents SDK, Claude Desktop) call `list_tools()` on every run; set `cache_tools_list=True` only when our tool roster is static and call `invalidate_tools_cache()` after deployments.
|
||||
3. **Respect capability negotiation each session.** Protocol version 2025-06-18 remains current, and version negotiation happens during `initialize`; ensure our server exposes accurate `capabilities` metadata and gracefully errors when clients offer only future versions.
|
||||
4. **Stay ahead of November spec changes.** The upcoming 2025-11-25 spec focuses on additional security hardening. Schedule time to exercise the RC (available 2025-11-11) so we can absorb any required surface changes early.
|
||||
5. **Document OAuth and transport choices.** With SEP-985 and other auth SEPs in flight, record which flows (`device`, `jwt-bearer`, etc.) each deployment expects, and prefer the Streamable HTTP transport when exposing remote servers to benefit from the latest security guidance.
|
||||
|
||||
## Validation checklist before merging
|
||||
- [ ] Lockfile regenerated (`uv lock --upgrade mcp`) and `uv run python -m code_index_mcp.server --help` still succeeds.
|
||||
- [ ] `uv run code-index-mcp --project-path <repo>` exercises `set_project_path`, `build_deep_index`, and `search_code_advanced` end-to-end.
|
||||
- [ ] Smoke Claude Desktop / Codex CLI against the upgraded server; confirm resources + tools enumerate and that tool caching behaves as expected.
|
||||
- [ ] Update release notes + AGENTS.md summary once 1.21.x is verified in staging.
|
||||
43
reference/code-index-mcp-master/fastmcp.json
Normal file
43
reference/code-index-mcp-master/fastmcp.json
Normal file
@@ -0,0 +1,43 @@
|
||||
{
|
||||
"$schema": "https://fastmcp.wiki/en/schemas/fastmcp.json",
|
||||
"name": "Code Index MCP",
|
||||
"description": "Indexes a local repository and exposes search, indexing, and file utilities via the Model Context Protocol.",
|
||||
"license": "MIT",
|
||||
"keywords": [
|
||||
"mcp",
|
||||
"code-index",
|
||||
"search",
|
||||
"fastmcp"
|
||||
],
|
||||
"links": [
|
||||
{
|
||||
"rel": "source",
|
||||
"href": "https://github.com/johnhuang316/code-index-mcp"
|
||||
},
|
||||
{
|
||||
"rel": "documentation",
|
||||
"href": "https://github.com/johnhuang316/code-index-mcp#readme"
|
||||
}
|
||||
],
|
||||
"source": {
|
||||
"path": "src/code_index_mcp/server.py",
|
||||
"entrypoint": "mcp"
|
||||
},
|
||||
"environment": {
|
||||
"python": ">=3.10",
|
||||
"dependencies": [
|
||||
"mcp>=1.21.0,<2.0.0",
|
||||
"watchdog>=3.0.0",
|
||||
"tree-sitter>=0.20.0",
|
||||
"tree-sitter-javascript>=0.20.0",
|
||||
"tree-sitter-typescript>=0.20.0",
|
||||
"tree-sitter-java>=0.20.0",
|
||||
"tree-sitter-zig>=0.20.0",
|
||||
"pathspec>=0.12.1",
|
||||
"msgpack>=1.0.0"
|
||||
]
|
||||
},
|
||||
"deployment": {
|
||||
"transport": "stdio"
|
||||
}
|
||||
}
|
||||
35
reference/code-index-mcp-master/pyproject.toml
Normal file
35
reference/code-index-mcp-master/pyproject.toml
Normal file
@@ -0,0 +1,35 @@
|
||||
[build-system]
|
||||
requires = ["setuptools>=61.0"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "code-index-mcp"
|
||||
version = "2.9.4"
|
||||
description = "Code indexing and analysis tools for LLMs using MCP"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
license = {text = "MIT"}
|
||||
authors = [
|
||||
{name = "johnhuang316"}
|
||||
]
|
||||
dependencies = [
|
||||
"mcp>=1.21.0,<2.0.0",
|
||||
"watchdog>=3.0.0",
|
||||
"tree-sitter>=0.20.0",
|
||||
"tree-sitter-javascript>=0.20.0",
|
||||
"tree-sitter-typescript>=0.20.0",
|
||||
"tree-sitter-java>=0.20.0",
|
||||
"tree-sitter-zig>=0.20.0",
|
||||
"pathspec>=0.12.1",
|
||||
"msgpack>=1.0.0",
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
Homepage = "https://github.com/johnhuang316/code-index-mcp"
|
||||
"Bug Tracker" = "https://github.com/johnhuang316/code-index-mcp/issues"
|
||||
|
||||
[project.scripts]
|
||||
code-index-mcp = "code_index_mcp.server:main"
|
||||
|
||||
[tool.setuptools]
|
||||
package-dir = {"" = "src"}
|
||||
10
reference/code-index-mcp-master/requirements.txt
Normal file
10
reference/code-index-mcp-master/requirements.txt
Normal file
@@ -0,0 +1,10 @@
|
||||
mcp>=1.21.0,<2.0.0
|
||||
watchdog>=3.0.0
|
||||
protobuf>=4.21.0
|
||||
tree-sitter>=0.20.0
|
||||
tree-sitter-javascript>=0.20.0
|
||||
tree-sitter-typescript>=0.20.0
|
||||
tree-sitter-java>=0.20.0
|
||||
tree-sitter-zig>=0.20.0
|
||||
pathspec>=0.12.1
|
||||
libclang>=16.0.0
|
||||
19
reference/code-index-mcp-master/run.py
Normal file
19
reference/code-index-mcp-master/run.py
Normal file
@@ -0,0 +1,19 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Development convenience script to run the Code Index MCP server.
|
||||
"""
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Add src directory to path
|
||||
src_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'src')
|
||||
sys.path.insert(0, src_path)
|
||||
|
||||
try:
|
||||
from code_index_mcp.server import main
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
except Exception:
|
||||
# Exit silently on failure without printing any messages
|
||||
raise SystemExit(1)
|
||||
@@ -0,0 +1,6 @@
|
||||
"""Code Index MCP package.
|
||||
|
||||
A Model Context Protocol server for code indexing, searching, and analysis.
|
||||
"""
|
||||
|
||||
__version__ = "2.9.4"
|
||||
@@ -0,0 +1,6 @@
|
||||
"""Main entry point for the code-index-mcp package."""
|
||||
|
||||
from code_index_mcp.server import main
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
128
reference/code-index-mcp-master/src/code_index_mcp/constants.py
Normal file
128
reference/code-index-mcp-master/src/code_index_mcp/constants.py
Normal file
@@ -0,0 +1,128 @@
|
||||
"""
|
||||
Shared constants for the Code Index MCP server.
|
||||
"""
|
||||
|
||||
# Directory and file names
|
||||
SETTINGS_DIR = "code_indexer"
|
||||
CONFIG_FILE = "config.json"
|
||||
INDEX_FILE = "index.json" # JSON index file (deep index)
|
||||
INDEX_FILE_SHALLOW = "index.shallow.json" # Minimal shallow index (file list)
|
||||
INDEX_FILE_DB = "index.db" # SQLite deep index file
|
||||
|
||||
# Supported file extensions for code analysis
|
||||
# This is the authoritative list used by both old and new indexing systems
|
||||
SUPPORTED_EXTENSIONS = [
|
||||
# Core programming languages
|
||||
'.py', '.pyw', # Python
|
||||
'.js', '.jsx', '.ts', '.tsx', # JavaScript/TypeScript
|
||||
'.mjs', '.cjs', # Modern JavaScript
|
||||
'.java', # Java
|
||||
'.c', '.cpp', '.h', '.hpp', # C/C++
|
||||
'.cxx', '.cc', '.hxx', '.hh', # C++ variants
|
||||
'.cs', # C#
|
||||
'.go', # Go
|
||||
'.m', '.mm', # Objective-C
|
||||
'.rb', # Ruby
|
||||
'.php', # PHP
|
||||
'.swift', # Swift
|
||||
'.kt', '.kts', # Kotlin
|
||||
'.rs', # Rust
|
||||
'.scala', # Scala
|
||||
'.sh', '.bash', '.zsh', # Shell scripts
|
||||
'.ps1', # PowerShell
|
||||
'.bat', '.cmd', # Windows batch
|
||||
'.r', '.R', # R
|
||||
'.pl', '.pm', # Perl
|
||||
'.lua', # Lua
|
||||
'.dart', # Dart
|
||||
'.hs', # Haskell
|
||||
'.ml', '.mli', # OCaml
|
||||
'.fs', '.fsx', # F#
|
||||
'.clj', '.cljs', # Clojure
|
||||
'.vim', # Vim script
|
||||
'.zig', '.zon', # Zig
|
||||
|
||||
# Web and markup
|
||||
'.html', '.htm', # HTML
|
||||
'.css', '.scss', '.sass', # Stylesheets
|
||||
'.less', '.stylus', '.styl', # Style languages
|
||||
'.md', '.mdx', # Markdown
|
||||
'.json', '.jsonc', # JSON
|
||||
'.xml', # XML
|
||||
'.yml', '.yaml', # YAML
|
||||
|
||||
# Frontend frameworks
|
||||
'.vue', # Vue.js
|
||||
'.svelte', # Svelte
|
||||
'.astro', # Astro
|
||||
|
||||
# Java web & build artifacts
|
||||
'.jsp', '.jspx', '.jspf', # JSP pages
|
||||
'.tag', '.tagx', # JSP tag files
|
||||
'.gsp', # Grails templates
|
||||
'.properties', # Java .properties configs
|
||||
'.gradle', '.groovy', # Gradle/Groovy build scripts
|
||||
'.proto', # Protocol Buffers
|
||||
|
||||
# Template engines
|
||||
'.hbs', '.handlebars', # Handlebars
|
||||
'.ejs', # EJS
|
||||
'.pug', # Pug
|
||||
'.ftl', # FreeMarker
|
||||
'.mustache', '.liquid', '.erb', # Additional template engines
|
||||
|
||||
# Database and SQL
|
||||
'.sql', '.ddl', '.dml', # SQL
|
||||
'.mysql', '.postgresql', '.psql', # Database-specific SQL
|
||||
'.sqlite', '.mssql', '.oracle', # More databases
|
||||
'.ora', '.db2', # Oracle and DB2
|
||||
'.proc', '.procedure', # Stored procedures
|
||||
'.func', '.function', # Functions
|
||||
'.view', '.trigger', '.index', # Database objects
|
||||
'.migration', '.seed', '.fixture', # Migration files
|
||||
'.schema', # Schema files
|
||||
'.cql', '.cypher', '.sparql', # NoSQL query languages
|
||||
'.gql', # GraphQL
|
||||
'.liquibase', '.flyway', # Migration tools
|
||||
]
|
||||
|
||||
# Centralized filtering configuration
|
||||
FILTER_CONFIG = {
|
||||
"exclude_directories": {
|
||||
# Version control
|
||||
'.git', '.svn', '.hg', '.bzr',
|
||||
|
||||
# Package managers & dependencies
|
||||
'node_modules', '__pycache__', '.venv', 'venv',
|
||||
'vendor', 'bower_components',
|
||||
|
||||
# Build outputs
|
||||
'dist', 'build', 'target', 'out', 'bin', 'obj',
|
||||
|
||||
# IDE & editors
|
||||
'.idea', '.vscode', '.vs', '.sublime-workspace',
|
||||
|
||||
# Testing & coverage
|
||||
'.pytest_cache', '.coverage', '.tox', '.nyc_output',
|
||||
'coverage', 'htmlcov',
|
||||
|
||||
# OS artifacts
|
||||
'.DS_Store', 'Thumbs.db', 'desktop.ini'
|
||||
},
|
||||
|
||||
"exclude_files": {
|
||||
# Temporary files
|
||||
'*.tmp', '*.temp', '*.swp', '*.swo',
|
||||
|
||||
# Backup files
|
||||
'*.bak', '*~', '*.orig',
|
||||
|
||||
# Log files
|
||||
'*.log',
|
||||
|
||||
# Lock files
|
||||
'package-lock.json', 'yarn.lock', 'Pipfile.lock'
|
||||
},
|
||||
|
||||
"supported_extensions": SUPPORTED_EXTENSIONS
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
"""
|
||||
Code indexing utilities for the MCP server.
|
||||
|
||||
Deep indexing now relies exclusively on the SQLite backend.
|
||||
"""
|
||||
|
||||
from .qualified_names import generate_qualified_name, normalize_file_path
|
||||
from .json_index_builder import JSONIndexBuilder, IndexMetadata
|
||||
from .sqlite_index_builder import SQLiteIndexBuilder
|
||||
from .sqlite_index_manager import SQLiteIndexManager
|
||||
from .shallow_index_manager import ShallowIndexManager, get_shallow_index_manager
|
||||
from .deep_index_manager import DeepIndexManager
|
||||
from .models import SymbolInfo, FileInfo
|
||||
|
||||
_sqlite_index_manager = SQLiteIndexManager()
|
||||
|
||||
|
||||
def get_index_manager() -> SQLiteIndexManager:
|
||||
"""Return the singleton SQLite index manager."""
|
||||
return _sqlite_index_manager
|
||||
|
||||
|
||||
__all__ = [
|
||||
"generate_qualified_name",
|
||||
"normalize_file_path",
|
||||
"JSONIndexBuilder",
|
||||
"IndexMetadata",
|
||||
"SQLiteIndexBuilder",
|
||||
"SQLiteIndexManager",
|
||||
"get_index_manager",
|
||||
"ShallowIndexManager",
|
||||
"get_shallow_index_manager",
|
||||
"DeepIndexManager",
|
||||
"SymbolInfo",
|
||||
"FileInfo",
|
||||
]
|
||||
@@ -0,0 +1,44 @@
|
||||
"""
|
||||
Deep Index Manager - Wrapper around the SQLite index manager.
|
||||
|
||||
This class provides a clear semantic separation from the shallow manager while
|
||||
delegating operations to the SQLite-backed implementation.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional, Dict, Any, List
|
||||
|
||||
from .sqlite_index_manager import SQLiteIndexManager
|
||||
|
||||
|
||||
class DeepIndexManager:
|
||||
"""Thin wrapper over SQLiteIndexManager to expose deep-index API."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._mgr = SQLiteIndexManager()
|
||||
|
||||
# Expose a subset of API to keep callers simple
|
||||
def set_project_path(self, project_path: str) -> bool:
|
||||
return self._mgr.set_project_path(project_path)
|
||||
|
||||
def build_index(self, force_rebuild: bool = False) -> bool:
|
||||
return self._mgr.build_index(force_rebuild=force_rebuild)
|
||||
|
||||
def load_index(self) -> bool:
|
||||
return self._mgr.load_index()
|
||||
|
||||
def refresh_index(self) -> bool:
|
||||
return self._mgr.refresh_index()
|
||||
|
||||
def find_files(self, pattern: str = "*") -> List[str]:
|
||||
return self._mgr.find_files(pattern)
|
||||
|
||||
def get_file_summary(self, file_path: str) -> Optional[Dict[str, Any]]:
|
||||
return self._mgr.get_file_summary(file_path)
|
||||
|
||||
def get_index_stats(self) -> Dict[str, Any]:
|
||||
return self._mgr.get_index_stats()
|
||||
|
||||
def cleanup(self) -> None:
|
||||
self._mgr.cleanup()
|
||||
@@ -0,0 +1,125 @@
|
||||
"""
|
||||
Index provider interface definitions.
|
||||
|
||||
Defines standard interfaces for all index access, ensuring consistency across different implementations.
|
||||
"""
|
||||
|
||||
from typing import List, Optional, Dict, Any, Protocol
|
||||
from dataclasses import dataclass
|
||||
|
||||
from .models import SymbolInfo, FileInfo
|
||||
|
||||
|
||||
@dataclass
|
||||
class IndexMetadata:
|
||||
"""Standard index metadata structure."""
|
||||
version: str
|
||||
format_type: str
|
||||
created_at: float
|
||||
last_updated: float
|
||||
file_count: int
|
||||
project_root: str
|
||||
tool_version: str
|
||||
|
||||
|
||||
class IIndexProvider(Protocol):
|
||||
"""
|
||||
Standard index provider interface.
|
||||
|
||||
All index implementations must follow this interface to ensure consistent access patterns.
|
||||
"""
|
||||
|
||||
def get_file_list(self) -> List[FileInfo]:
|
||||
"""
|
||||
Get list of all indexed files.
|
||||
|
||||
Returns:
|
||||
List of file information objects
|
||||
"""
|
||||
...
|
||||
|
||||
def get_file_info(self, file_path: str) -> Optional[FileInfo]:
|
||||
"""
|
||||
Get information for a specific file.
|
||||
|
||||
Args:
|
||||
file_path: Relative file path
|
||||
|
||||
Returns:
|
||||
File information, or None if file is not in index
|
||||
"""
|
||||
...
|
||||
|
||||
def query_symbols(self, file_path: str) -> List[SymbolInfo]:
|
||||
"""
|
||||
Query symbol information in a file.
|
||||
|
||||
Args:
|
||||
file_path: Relative file path
|
||||
|
||||
Returns:
|
||||
List of symbol information objects
|
||||
"""
|
||||
...
|
||||
|
||||
def search_files(self, pattern: str) -> List[str]:
|
||||
"""
|
||||
Search files by pattern.
|
||||
|
||||
Args:
|
||||
pattern: Glob pattern or regular expression
|
||||
|
||||
Returns:
|
||||
List of matching file paths
|
||||
"""
|
||||
...
|
||||
|
||||
def get_metadata(self) -> IndexMetadata:
|
||||
"""
|
||||
Get index metadata.
|
||||
|
||||
Returns:
|
||||
Index metadata information
|
||||
"""
|
||||
...
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""
|
||||
Check if index is available.
|
||||
|
||||
Returns:
|
||||
True if index is available and functional
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
class IIndexManager(Protocol):
|
||||
"""
|
||||
Index manager interface.
|
||||
|
||||
Defines standard interface for index lifecycle management.
|
||||
"""
|
||||
|
||||
def initialize(self) -> bool:
|
||||
"""Initialize the index manager."""
|
||||
...
|
||||
|
||||
def get_provider(self) -> Optional[IIndexProvider]:
|
||||
"""Get the current active index provider."""
|
||||
...
|
||||
|
||||
def refresh_index(self, force: bool = False) -> bool:
|
||||
"""Refresh the index."""
|
||||
...
|
||||
|
||||
def save_index(self) -> bool:
|
||||
"""Save index state."""
|
||||
...
|
||||
|
||||
def clear_index(self) -> None:
|
||||
"""Clear index state."""
|
||||
...
|
||||
|
||||
def get_index_status(self) -> Dict[str, Any]:
|
||||
"""Get index status information."""
|
||||
...
|
||||
@@ -0,0 +1,472 @@
|
||||
"""
|
||||
JSON Index Builder - Clean implementation using Strategy pattern.
|
||||
|
||||
This replaces the monolithic parser implementation with a clean,
|
||||
maintainable Strategy pattern architecture.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
|
||||
from dataclasses import dataclass, asdict
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Any, Tuple
|
||||
|
||||
from .strategies import StrategyFactory
|
||||
from .models import SymbolInfo, FileInfo
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class IndexMetadata:
|
||||
"""Metadata for the JSON index."""
|
||||
project_path: str
|
||||
indexed_files: int
|
||||
index_version: str
|
||||
timestamp: str
|
||||
languages: List[str]
|
||||
total_symbols: int = 0
|
||||
specialized_parsers: int = 0
|
||||
fallback_files: int = 0
|
||||
|
||||
|
||||
class JSONIndexBuilder:
|
||||
"""
|
||||
Main index builder using Strategy pattern for language parsing.
|
||||
|
||||
This class orchestrates the index building process by:
|
||||
1. Discovering files in the project
|
||||
2. Using StrategyFactory to get appropriate parsers
|
||||
3. Extracting symbols and metadata
|
||||
4. Assembling the final JSON index
|
||||
"""
|
||||
|
||||
def __init__(self, project_path: str, additional_excludes: Optional[List[str]] = None):
|
||||
from ..utils import FileFilter
|
||||
|
||||
# Input validation
|
||||
if not isinstance(project_path, str):
|
||||
raise ValueError(f"Project path must be a string, got {type(project_path)}")
|
||||
|
||||
project_path = project_path.strip()
|
||||
if not project_path:
|
||||
raise ValueError("Project path cannot be empty")
|
||||
|
||||
if not os.path.isdir(project_path):
|
||||
raise ValueError(f"Project path does not exist: {project_path}")
|
||||
|
||||
self.project_path = project_path
|
||||
self.in_memory_index: Optional[Dict[str, Any]] = None
|
||||
self.strategy_factory = StrategyFactory()
|
||||
self.file_filter = FileFilter(additional_excludes)
|
||||
|
||||
logger.info(f"Initialized JSON index builder for {project_path}")
|
||||
strategy_info = self.strategy_factory.get_strategy_info()
|
||||
logger.info(f"Available parsing strategies: {len(strategy_info)} types")
|
||||
|
||||
# Log specialized vs fallback coverage
|
||||
specialized = len(self.strategy_factory.get_specialized_extensions())
|
||||
fallback = len(self.strategy_factory.get_fallback_extensions())
|
||||
logger.info(f"Specialized parsers: {specialized} extensions, Fallback coverage: {fallback} extensions")
|
||||
|
||||
def _process_file(self, file_path: str, specialized_extensions: set) -> Optional[Tuple[Dict, Dict, str, bool]]:
|
||||
"""
|
||||
Process a single file - designed for parallel execution.
|
||||
|
||||
Args:
|
||||
file_path: Path to the file to process
|
||||
specialized_extensions: Set of extensions with specialized parsers
|
||||
|
||||
Returns:
|
||||
Tuple of (symbols, file_info, language, is_specialized) or None on error
|
||||
"""
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
content = f.read()
|
||||
|
||||
ext = Path(file_path).suffix.lower()
|
||||
rel_path = os.path.relpath(file_path, self.project_path).replace('\\', '/')
|
||||
|
||||
# Get appropriate strategy
|
||||
strategy = self.strategy_factory.get_strategy(ext)
|
||||
|
||||
# Track strategy usage
|
||||
is_specialized = ext in specialized_extensions
|
||||
|
||||
# Parse file using strategy
|
||||
symbols, file_info = strategy.parse_file(rel_path, content)
|
||||
|
||||
logger.debug(f"Parsed {rel_path}: {len(symbols)} symbols ({file_info.language})")
|
||||
|
||||
return (symbols, {rel_path: file_info}, file_info.language, is_specialized)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error processing {file_path}: {e}")
|
||||
return None
|
||||
|
||||
def build_index(self, parallel: bool = True, max_workers: Optional[int] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Build the complete index using Strategy pattern with parallel processing.
|
||||
|
||||
Args:
|
||||
parallel: Whether to use parallel processing (default: True)
|
||||
max_workers: Maximum number of worker processes/threads (default: CPU count)
|
||||
|
||||
Returns:
|
||||
Complete JSON index with metadata, symbols, and file information
|
||||
"""
|
||||
logger.info(f"Building JSON index using Strategy pattern (parallel={parallel})...")
|
||||
start_time = time.time()
|
||||
|
||||
all_symbols = {}
|
||||
all_files = {}
|
||||
languages = set()
|
||||
specialized_count = 0
|
||||
fallback_count = 0
|
||||
pending_calls: List[Tuple[str, str]] = []
|
||||
|
||||
# Get specialized extensions for tracking
|
||||
specialized_extensions = set(self.strategy_factory.get_specialized_extensions())
|
||||
|
||||
# Get list of files to process
|
||||
files_to_process = self._get_supported_files()
|
||||
total_files = len(files_to_process)
|
||||
|
||||
if total_files == 0:
|
||||
logger.warning("No files to process")
|
||||
return self._create_empty_index()
|
||||
|
||||
logger.info(f"Processing {total_files} files...")
|
||||
|
||||
def process_result(result):
|
||||
nonlocal specialized_count, fallback_count
|
||||
if not result:
|
||||
return
|
||||
symbols, file_info_dict, language, is_specialized = result
|
||||
for symbol_id, symbol_info in symbols.items():
|
||||
all_symbols[symbol_id] = symbol_info
|
||||
for rel_path, file_info in file_info_dict.items():
|
||||
all_files[rel_path] = file_info
|
||||
file_pending = getattr(file_info, "pending_calls", [])
|
||||
if file_pending:
|
||||
pending_calls.extend(file_pending)
|
||||
languages.add(language)
|
||||
if is_specialized:
|
||||
specialized_count += 1
|
||||
else:
|
||||
fallback_count += 1
|
||||
|
||||
if parallel and total_files > 1:
|
||||
# Use ThreadPoolExecutor for I/O-bound file reading
|
||||
# ProcessPoolExecutor has issues with strategy sharing
|
||||
if max_workers is None:
|
||||
max_workers = min(os.cpu_count() or 4, total_files)
|
||||
|
||||
logger.info(f"Using parallel processing with {max_workers} workers")
|
||||
|
||||
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||
# Submit all tasks
|
||||
future_to_file = {
|
||||
executor.submit(self._process_file, file_path, specialized_extensions): file_path
|
||||
for file_path in files_to_process
|
||||
}
|
||||
|
||||
# Process completed tasks
|
||||
processed = 0
|
||||
for future in as_completed(future_to_file):
|
||||
file_path = future_to_file[future]
|
||||
result = future.result()
|
||||
|
||||
process_result(result)
|
||||
|
||||
processed += 1
|
||||
if processed % 100 == 0:
|
||||
logger.debug(f"Processed {processed}/{total_files} files")
|
||||
else:
|
||||
# Sequential processing
|
||||
logger.info("Using sequential processing")
|
||||
for file_path in files_to_process:
|
||||
result = self._process_file(file_path, specialized_extensions)
|
||||
process_result(result)
|
||||
|
||||
self._resolve_pending_calls(all_symbols, pending_calls)
|
||||
|
||||
# Build index metadata
|
||||
metadata = IndexMetadata(
|
||||
project_path=self.project_path,
|
||||
indexed_files=len(all_files),
|
||||
index_version="2.0.0-strategy",
|
||||
timestamp=time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
|
||||
languages=sorted(list(languages)),
|
||||
total_symbols=len(all_symbols),
|
||||
specialized_parsers=specialized_count,
|
||||
fallback_files=fallback_count
|
||||
)
|
||||
|
||||
# Assemble final index
|
||||
index = {
|
||||
"metadata": asdict(metadata),
|
||||
"symbols": {k: asdict(v) for k, v in all_symbols.items()},
|
||||
"files": {k: asdict(v) for k, v in all_files.items()}
|
||||
}
|
||||
|
||||
# Cache in memory
|
||||
self.in_memory_index = index
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
logger.info(f"Built index with {len(all_symbols)} symbols from {len(all_files)} files in {elapsed:.2f}s")
|
||||
logger.info(f"Languages detected: {sorted(languages)}")
|
||||
logger.info(f"Strategy usage: {specialized_count} specialized, {fallback_count} fallback")
|
||||
|
||||
return index
|
||||
|
||||
def _resolve_pending_calls(
|
||||
self,
|
||||
all_symbols: Dict[str, SymbolInfo],
|
||||
pending_calls: List[Tuple[str, str]]
|
||||
) -> None:
|
||||
"""Resolve cross-file call relationships using global symbol index."""
|
||||
if not pending_calls:
|
||||
return
|
||||
|
||||
short_index: Dict[str, List[str]] = defaultdict(list)
|
||||
for symbol_id in all_symbols:
|
||||
short_name = symbol_id.split("::")[-1]
|
||||
short_index[short_name].append(symbol_id)
|
||||
|
||||
for caller, called in pending_calls:
|
||||
target_ids: List[str] = []
|
||||
if called in all_symbols:
|
||||
target_ids = [called]
|
||||
else:
|
||||
if called in short_index:
|
||||
target_ids = short_index[called]
|
||||
if not target_ids and "." in called:
|
||||
target_ids = short_index.get(called, [])
|
||||
if not target_ids:
|
||||
matches: List[str] = []
|
||||
suffix = f".{called}"
|
||||
for short_name, ids in short_index.items():
|
||||
if short_name.endswith(suffix):
|
||||
matches.extend(ids)
|
||||
target_ids = matches
|
||||
|
||||
if len(target_ids) != 1:
|
||||
continue
|
||||
|
||||
symbol_info = all_symbols[target_ids[0]]
|
||||
if caller not in symbol_info.called_by:
|
||||
symbol_info.called_by.append(caller)
|
||||
|
||||
def _create_empty_index(self) -> Dict[str, Any]:
|
||||
"""Create an empty index structure."""
|
||||
metadata = IndexMetadata(
|
||||
project_path=self.project_path,
|
||||
indexed_files=0,
|
||||
index_version="2.0.0-strategy",
|
||||
timestamp=time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
|
||||
languages=[],
|
||||
total_symbols=0,
|
||||
specialized_parsers=0,
|
||||
fallback_files=0
|
||||
)
|
||||
|
||||
return {
|
||||
"metadata": asdict(metadata),
|
||||
"symbols": {},
|
||||
"files": {}
|
||||
}
|
||||
|
||||
def get_index(self) -> Optional[Dict[str, Any]]:
|
||||
"""Get the current in-memory index."""
|
||||
return self.in_memory_index
|
||||
|
||||
def clear_index(self):
|
||||
"""Clear the in-memory index."""
|
||||
self.in_memory_index = None
|
||||
logger.debug("Cleared in-memory index")
|
||||
|
||||
def _get_supported_files(self) -> List[str]:
|
||||
"""
|
||||
Get all supported files in the project using centralized filtering.
|
||||
|
||||
Returns:
|
||||
List of file paths that can be parsed
|
||||
"""
|
||||
supported_files = []
|
||||
base_path = Path(self.project_path)
|
||||
|
||||
try:
|
||||
for root, dirs, files in os.walk(self.project_path):
|
||||
# Filter directories in-place using centralized logic
|
||||
dirs[:] = [d for d in dirs if not self.file_filter.should_exclude_directory(d)]
|
||||
|
||||
# Filter files using centralized logic
|
||||
for file in files:
|
||||
file_path = Path(root) / file
|
||||
if self.file_filter.should_process_path(file_path, base_path):
|
||||
supported_files.append(str(file_path))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error scanning directory {self.project_path}: {e}")
|
||||
|
||||
logger.debug(f"Found {len(supported_files)} supported files")
|
||||
return supported_files
|
||||
|
||||
def build_shallow_file_list(self) -> List[str]:
|
||||
"""
|
||||
Build a minimal shallow index consisting of relative file paths only.
|
||||
|
||||
This method does not read file contents. It enumerates supported files
|
||||
using centralized filtering and returns normalized relative paths with
|
||||
forward slashes for cross-platform consistency.
|
||||
|
||||
Returns:
|
||||
List of relative file paths (using '/').
|
||||
"""
|
||||
try:
|
||||
absolute_files = self._get_supported_files()
|
||||
result: List[str] = []
|
||||
for abs_path in absolute_files:
|
||||
rel_path = os.path.relpath(abs_path, self.project_path).replace('\\', '/')
|
||||
# Normalize leading './'
|
||||
if rel_path.startswith('./'):
|
||||
rel_path = rel_path[2:]
|
||||
result.append(rel_path)
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to build shallow file list: {e}")
|
||||
return []
|
||||
|
||||
def save_index(self, index: Dict[str, Any], index_path: str) -> bool:
|
||||
"""
|
||||
Save index to disk.
|
||||
|
||||
Args:
|
||||
index: Index data to save
|
||||
index_path: Path where to save the index
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
try:
|
||||
import json
|
||||
with open(index_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(index, f, indent=2, ensure_ascii=False)
|
||||
logger.info(f"Saved index to {index_path}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save index to {index_path}: {e}")
|
||||
return False
|
||||
|
||||
def load_index(self, index_path: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Load index from disk.
|
||||
|
||||
Args:
|
||||
index_path: Path to the index file
|
||||
|
||||
Returns:
|
||||
Index data if successful, None otherwise
|
||||
"""
|
||||
try:
|
||||
if not os.path.exists(index_path):
|
||||
logger.debug(f"Index file not found: {index_path}")
|
||||
return None
|
||||
|
||||
import json
|
||||
with open(index_path, 'r', encoding='utf-8') as f:
|
||||
index = json.load(f)
|
||||
|
||||
# Cache in memory
|
||||
self.in_memory_index = index
|
||||
logger.info(f"Loaded index from {index_path}")
|
||||
return index
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load index from {index_path}: {e}")
|
||||
return None
|
||||
|
||||
def get_parsing_statistics(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Get detailed statistics about parsing capabilities.
|
||||
|
||||
Returns:
|
||||
Dictionary with parsing statistics and strategy information
|
||||
"""
|
||||
strategy_info = self.strategy_factory.get_strategy_info()
|
||||
|
||||
return {
|
||||
"total_strategies": len(strategy_info),
|
||||
"specialized_languages": [lang for lang in strategy_info.keys() if not lang.startswith('fallback_')],
|
||||
"fallback_languages": [lang.replace('fallback_', '') for lang in strategy_info.keys() if lang.startswith('fallback_')],
|
||||
"total_extensions": len(self.strategy_factory.get_all_supported_extensions()),
|
||||
"specialized_extensions": len(self.strategy_factory.get_specialized_extensions()),
|
||||
"fallback_extensions": len(self.strategy_factory.get_fallback_extensions()),
|
||||
"strategy_details": strategy_info
|
||||
}
|
||||
|
||||
def get_file_symbols(self, file_path: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get symbols for a specific file.
|
||||
|
||||
Args:
|
||||
file_path: Relative path to the file
|
||||
|
||||
Returns:
|
||||
List of symbols in the file
|
||||
"""
|
||||
if not self.in_memory_index:
|
||||
logger.warning("Index not loaded")
|
||||
return []
|
||||
|
||||
try:
|
||||
# Normalize file path
|
||||
file_path = file_path.replace('\\', '/')
|
||||
if file_path.startswith('./'):
|
||||
file_path = file_path[2:]
|
||||
|
||||
# Get file info
|
||||
file_info = self.in_memory_index["files"].get(file_path)
|
||||
if not file_info:
|
||||
logger.warning(f"File not found in index: {file_path}")
|
||||
return []
|
||||
|
||||
# Work directly with global symbols for this file
|
||||
global_symbols = self.in_memory_index.get("symbols", {})
|
||||
result = []
|
||||
|
||||
# Find all symbols for this file directly from global symbols
|
||||
for symbol_id, symbol_data in global_symbols.items():
|
||||
symbol_file = symbol_data.get("file", "").replace("\\", "/")
|
||||
|
||||
# Check if this symbol belongs to our file
|
||||
if symbol_file == file_path:
|
||||
symbol_type = symbol_data.get("type", "unknown")
|
||||
symbol_name = symbol_id.split("::")[-1] # Extract symbol name from ID
|
||||
|
||||
# Create symbol info
|
||||
symbol_info = {
|
||||
"name": symbol_name,
|
||||
"called_by": symbol_data.get("called_by", []),
|
||||
"line": symbol_data.get("line"),
|
||||
"signature": symbol_data.get("signature")
|
||||
}
|
||||
|
||||
# Categorize by type
|
||||
if symbol_type in ["function", "method"]:
|
||||
result.append(symbol_info)
|
||||
elif symbol_type == "class":
|
||||
result.append(symbol_info)
|
||||
|
||||
# Sort by line number for consistent ordering
|
||||
result.sort(key=lambda x: x.get("line", 0))
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting file symbols for {file_path}: {e}")
|
||||
return []
|
||||
@@ -0,0 +1,8 @@
|
||||
"""
|
||||
Model classes for the indexing system.
|
||||
"""
|
||||
|
||||
from .symbol_info import SymbolInfo
|
||||
from .file_info import FileInfo
|
||||
|
||||
__all__ = ['SymbolInfo', 'FileInfo']
|
||||
@@ -0,0 +1,24 @@
|
||||
"""
|
||||
FileInfo model for representing file metadata.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, List, Optional, Any
|
||||
|
||||
|
||||
@dataclass
|
||||
class FileInfo:
|
||||
"""Information about a source code file."""
|
||||
|
||||
language: str # programming language
|
||||
line_count: int # total lines in file
|
||||
symbols: Dict[str, List[str]] # symbol categories (functions, classes, etc.)
|
||||
imports: List[str] # imported modules/packages
|
||||
exports: Optional[List[str]] = None # exported symbols (for JS/TS modules)
|
||||
package: Optional[str] = None # package name (for Java, Go, etc.)
|
||||
docstring: Optional[str] = None # file-level documentation
|
||||
|
||||
def __post_init__(self):
|
||||
"""Initialize mutable defaults."""
|
||||
if self.exports is None:
|
||||
self.exports = []
|
||||
@@ -0,0 +1,23 @@
|
||||
"""
|
||||
SymbolInfo model for representing code symbols.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional, List
|
||||
|
||||
|
||||
@dataclass
|
||||
class SymbolInfo:
|
||||
"""Information about a code symbol (function, class, method, etc.)."""
|
||||
|
||||
type: str # function, class, method, interface, etc.
|
||||
file: str # file path where symbol is defined
|
||||
line: int # line number where symbol starts
|
||||
signature: Optional[str] = None # function/method signature
|
||||
docstring: Optional[str] = None # documentation string
|
||||
called_by: Optional[List[str]] = None # list of symbols that call this symbol
|
||||
|
||||
def __post_init__(self):
|
||||
"""Initialize mutable defaults."""
|
||||
if self.called_by is None:
|
||||
self.called_by = []
|
||||
@@ -0,0 +1,49 @@
|
||||
"""
|
||||
Qualified name generation utilities.
|
||||
"""
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def normalize_file_path(file_path: str) -> str:
|
||||
"""
|
||||
Normalize a file path to use forward slashes and relative paths.
|
||||
|
||||
Args:
|
||||
file_path: The file path to normalize
|
||||
|
||||
Returns:
|
||||
Normalized file path
|
||||
"""
|
||||
# Convert to forward slashes and make relative
|
||||
normalized = file_path.replace('\\', '/')
|
||||
|
||||
# Remove leading slash if present
|
||||
if normalized.startswith('/'):
|
||||
normalized = normalized[1:]
|
||||
|
||||
return normalized
|
||||
|
||||
|
||||
def generate_qualified_name(file_path: str, symbol_name: str, namespace: Optional[str] = None) -> str:
|
||||
"""
|
||||
Generate a qualified name for a symbol.
|
||||
|
||||
Args:
|
||||
file_path: Path to the file containing the symbol
|
||||
symbol_name: Name of the symbol
|
||||
namespace: Optional namespace/module context
|
||||
|
||||
Returns:
|
||||
Qualified name for the symbol
|
||||
"""
|
||||
normalized_path = normalize_file_path(file_path)
|
||||
|
||||
# Remove file extension for module-like name
|
||||
base_name = os.path.splitext(normalized_path)[0]
|
||||
module_path = base_name.replace('/', '.')
|
||||
|
||||
if namespace:
|
||||
return f"{module_path}.{namespace}.{symbol_name}"
|
||||
else:
|
||||
return f"{module_path}.{symbol_name}"
|
||||
@@ -0,0 +1,194 @@
|
||||
"""
|
||||
Shallow Index Manager - Manages a minimal file-list-only index.
|
||||
|
||||
This manager builds and loads a shallow index consisting of relative file
|
||||
paths only. It is optimized for fast initialization and filename-based
|
||||
search/browsing. Content parsing and symbol extraction are not performed.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import tempfile
|
||||
import threading
|
||||
from typing import List, Optional
|
||||
import re
|
||||
|
||||
from .json_index_builder import JSONIndexBuilder
|
||||
from ..constants import SETTINGS_DIR, INDEX_FILE_SHALLOW
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ShallowIndexManager:
|
||||
"""Manage shallow (file-list) index lifecycle and storage."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.project_path: Optional[str] = None
|
||||
self.index_builder: Optional[JSONIndexBuilder] = None
|
||||
self.temp_dir: Optional[str] = None
|
||||
self.index_path: Optional[str] = None
|
||||
self._file_list: Optional[List[str]] = None
|
||||
self._lock = threading.RLock()
|
||||
|
||||
def set_project_path(self, project_path: str) -> bool:
|
||||
with self._lock:
|
||||
try:
|
||||
if not isinstance(project_path, str) or not project_path.strip():
|
||||
logger.error("Invalid project path for shallow index")
|
||||
return False
|
||||
project_path = project_path.strip()
|
||||
if not os.path.isdir(project_path):
|
||||
logger.error(f"Project path does not exist: {project_path}")
|
||||
return False
|
||||
|
||||
self.project_path = project_path
|
||||
self.index_builder = JSONIndexBuilder(project_path)
|
||||
|
||||
project_hash = hashlib.md5(project_path.encode()).hexdigest()[:12]
|
||||
self.temp_dir = os.path.join(tempfile.gettempdir(), SETTINGS_DIR, project_hash)
|
||||
os.makedirs(self.temp_dir, exist_ok=True)
|
||||
self.index_path = os.path.join(self.temp_dir, INDEX_FILE_SHALLOW)
|
||||
return True
|
||||
except Exception as e: # noqa: BLE001 - centralized logging
|
||||
logger.error(f"Failed to set project path (shallow): {e}")
|
||||
return False
|
||||
|
||||
def build_index(self) -> bool:
|
||||
"""Build and persist the shallow file list index."""
|
||||
with self._lock:
|
||||
if not self.index_builder or not self.index_path:
|
||||
logger.error("ShallowIndexManager not initialized")
|
||||
return False
|
||||
try:
|
||||
file_list = self.index_builder.build_shallow_file_list()
|
||||
with open(self.index_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(file_list, f, ensure_ascii=False)
|
||||
self._file_list = file_list
|
||||
logger.info(f"Built shallow index with {len(file_list)} files")
|
||||
return True
|
||||
except Exception as e: # noqa: BLE001
|
||||
logger.error(f"Failed to build shallow index: {e}")
|
||||
return False
|
||||
|
||||
def load_index(self) -> bool:
|
||||
"""Load shallow index from disk to memory."""
|
||||
with self._lock:
|
||||
try:
|
||||
if not self.index_path or not os.path.exists(self.index_path):
|
||||
return False
|
||||
with open(self.index_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
if isinstance(data, list):
|
||||
# Normalize slashes/prefix
|
||||
normalized: List[str] = []
|
||||
for p in data:
|
||||
if isinstance(p, str):
|
||||
q = p.replace('\\\\', '/').replace('\\', '/')
|
||||
if q.startswith('./'):
|
||||
q = q[2:]
|
||||
normalized.append(q)
|
||||
self._file_list = normalized
|
||||
return True
|
||||
return False
|
||||
except Exception as e: # noqa: BLE001
|
||||
logger.error(f"Failed to load shallow index: {e}")
|
||||
return False
|
||||
|
||||
def get_file_list(self) -> List[str]:
|
||||
with self._lock:
|
||||
return list(self._file_list or [])
|
||||
|
||||
def find_files(self, pattern: str = "*") -> List[str]:
|
||||
with self._lock:
|
||||
if not isinstance(pattern, str):
|
||||
return []
|
||||
norm = (pattern.strip() or "*").replace('\\\\','/').replace('\\','/')
|
||||
files = self._file_list or []
|
||||
|
||||
# Fast path: wildcard all
|
||||
if norm == "*":
|
||||
return list(files)
|
||||
|
||||
# 1) Exact, case-sensitive
|
||||
exact_regex = self._compile_glob_regex(norm)
|
||||
exact_hits = [f for f in files if exact_regex.match(f) is not None]
|
||||
if exact_hits or '/' in norm:
|
||||
return exact_hits
|
||||
|
||||
# 2) Recursive **/ fallback (case-sensitive)
|
||||
recursive_pattern = f"**/{norm}"
|
||||
rec_regex = self._compile_glob_regex(recursive_pattern)
|
||||
rec_hits = [f for f in files if rec_regex.match(f) is not None]
|
||||
if rec_hits:
|
||||
return self._dedupe_preserve_order(exact_hits + rec_hits)
|
||||
|
||||
# 3) Case-insensitive (root only)
|
||||
ci_regex = self._compile_glob_regex(norm, ignore_case=True)
|
||||
ci_hits = [f for f in files if ci_regex.match(f) is not None]
|
||||
if ci_hits:
|
||||
return self._dedupe_preserve_order(exact_hits + rec_hits + ci_hits)
|
||||
|
||||
# 4) Case-insensitive recursive
|
||||
rec_ci_regex = self._compile_glob_regex(recursive_pattern, ignore_case=True)
|
||||
rec_ci_hits = [f for f in files if rec_ci_regex.match(f) is not None]
|
||||
if rec_ci_hits:
|
||||
return self._dedupe_preserve_order(
|
||||
exact_hits + rec_hits + ci_hits + rec_ci_hits
|
||||
)
|
||||
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
def _compile_glob_regex(pattern: str, ignore_case: bool = False) -> re.Pattern:
|
||||
i = 0
|
||||
out = []
|
||||
special = ".^$+{}[]|()"
|
||||
while i < len(pattern):
|
||||
c = pattern[i]
|
||||
if c == '*':
|
||||
if i + 1 < len(pattern) and pattern[i + 1] == '*':
|
||||
out.append('.*')
|
||||
i += 2
|
||||
continue
|
||||
else:
|
||||
out.append('[^/]*')
|
||||
elif c == '?':
|
||||
out.append('[^/]')
|
||||
elif c in special:
|
||||
out.append('\\' + c)
|
||||
else:
|
||||
out.append(c)
|
||||
i += 1
|
||||
flags = re.IGNORECASE if ignore_case else 0
|
||||
return re.compile('^' + ''.join(out) + '$', flags=flags)
|
||||
|
||||
@staticmethod
|
||||
def _dedupe_preserve_order(items: List[str]) -> List[str]:
|
||||
seen = set()
|
||||
result = []
|
||||
for item in items:
|
||||
if item not in seen:
|
||||
seen.add(item)
|
||||
result.append(item)
|
||||
return result
|
||||
|
||||
def cleanup(self) -> None:
|
||||
with self._lock:
|
||||
self.project_path = None
|
||||
self.index_builder = None
|
||||
self.temp_dir = None
|
||||
self.index_path = None
|
||||
self._file_list = None
|
||||
|
||||
|
||||
# Global singleton
|
||||
_shallow_manager = ShallowIndexManager()
|
||||
|
||||
|
||||
def get_shallow_index_manager() -> ShallowIndexManager:
|
||||
return _shallow_manager
|
||||
|
||||
@@ -0,0 +1,327 @@
|
||||
"""
|
||||
SQLite-backed index builder leveraging existing strategy pipeline.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from typing import Dict, Iterable, List, Optional, Tuple
|
||||
|
||||
from .json_index_builder import JSONIndexBuilder
|
||||
from .sqlite_store import SQLiteIndexStore
|
||||
from .models import FileInfo, SymbolInfo
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SQLiteIndexBuilder(JSONIndexBuilder):
|
||||
"""
|
||||
Build the deep index directly into SQLite storage.
|
||||
|
||||
Inherits scanning/strategy utilities from JSONIndexBuilder but writes rows
|
||||
to the provided SQLiteIndexStore instead of assembling large dictionaries.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
project_path: str,
|
||||
store: SQLiteIndexStore,
|
||||
additional_excludes: Optional[List[str]] = None,
|
||||
):
|
||||
super().__init__(project_path, additional_excludes)
|
||||
self.store = store
|
||||
|
||||
def build_index(
|
||||
self,
|
||||
parallel: bool = True,
|
||||
max_workers: Optional[int] = None,
|
||||
) -> Dict[str, int]:
|
||||
"""
|
||||
Build the SQLite index and return lightweight statistics.
|
||||
|
||||
Args:
|
||||
parallel: Whether to parse files in parallel.
|
||||
max_workers: Optional override for worker count.
|
||||
|
||||
Returns:
|
||||
Dictionary with totals for files, symbols, and languages.
|
||||
"""
|
||||
logger.info("Building SQLite index (parallel=%s)...", parallel)
|
||||
start_time = time.time()
|
||||
|
||||
files_to_process = self._get_supported_files()
|
||||
total_files = len(files_to_process)
|
||||
if total_files == 0:
|
||||
logger.warning("No files to process")
|
||||
with self.store.connect(for_build=True) as conn:
|
||||
self._reset_database(conn)
|
||||
self._persist_metadata(conn, 0, 0, [], 0, 0, {})
|
||||
return {
|
||||
"files": 0,
|
||||
"symbols": 0,
|
||||
"languages": 0,
|
||||
}
|
||||
|
||||
specialized_extensions = set(self.strategy_factory.get_specialized_extensions())
|
||||
|
||||
results_iter: Iterable[Tuple[Dict[str, SymbolInfo], Dict[str, FileInfo], str, bool]]
|
||||
|
||||
executor = None
|
||||
|
||||
if parallel and total_files > 1:
|
||||
if max_workers is None:
|
||||
max_workers = min(os.cpu_count() or 4, total_files)
|
||||
logger.info("Using ThreadPoolExecutor with %s workers", max_workers)
|
||||
executor = ThreadPoolExecutor(max_workers=max_workers)
|
||||
future_to_file = {
|
||||
executor.submit(self._process_file, file_path, specialized_extensions): file_path
|
||||
for file_path in files_to_process
|
||||
}
|
||||
|
||||
def _iter_results():
|
||||
for future in as_completed(future_to_file):
|
||||
result = future.result()
|
||||
if result:
|
||||
yield result
|
||||
|
||||
results_iter = _iter_results()
|
||||
else:
|
||||
logger.info("Using sequential processing")
|
||||
|
||||
def _iter_results_sequential():
|
||||
for file_path in files_to_process:
|
||||
result = self._process_file(file_path, specialized_extensions)
|
||||
if result:
|
||||
yield result
|
||||
|
||||
results_iter = _iter_results_sequential()
|
||||
|
||||
languages = set()
|
||||
specialized_count = 0
|
||||
fallback_count = 0
|
||||
pending_calls: List[Tuple[str, str]] = []
|
||||
total_symbols = 0
|
||||
symbol_types: Dict[str, int] = {}
|
||||
processed_files = 0
|
||||
|
||||
self.store.initialize_schema()
|
||||
with self.store.connect(for_build=True) as conn:
|
||||
conn.execute("PRAGMA foreign_keys=ON")
|
||||
self._reset_database(conn)
|
||||
|
||||
for symbols, file_info_dict, language, is_specialized in results_iter:
|
||||
file_path, file_info = next(iter(file_info_dict.items()))
|
||||
file_id = self._insert_file(conn, file_path, file_info)
|
||||
file_pending = getattr(file_info, "pending_calls", [])
|
||||
if file_pending:
|
||||
pending_calls.extend(file_pending)
|
||||
symbol_rows = self._prepare_symbol_rows(symbols, file_id)
|
||||
|
||||
if symbol_rows:
|
||||
conn.executemany(
|
||||
"""
|
||||
INSERT INTO symbols(
|
||||
symbol_id,
|
||||
file_id,
|
||||
type,
|
||||
line,
|
||||
signature,
|
||||
docstring,
|
||||
called_by,
|
||||
short_name
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
symbol_rows,
|
||||
)
|
||||
|
||||
languages.add(language)
|
||||
processed_files += 1
|
||||
total_symbols += len(symbol_rows)
|
||||
|
||||
if is_specialized:
|
||||
specialized_count += 1
|
||||
else:
|
||||
fallback_count += 1
|
||||
|
||||
for _, _, symbol_type, _, _, _, _, _ in symbol_rows:
|
||||
key = symbol_type or "unknown"
|
||||
symbol_types[key] = symbol_types.get(key, 0) + 1
|
||||
|
||||
self._persist_metadata(
|
||||
conn,
|
||||
processed_files,
|
||||
total_symbols,
|
||||
sorted(languages),
|
||||
specialized_count,
|
||||
fallback_count,
|
||||
symbol_types,
|
||||
)
|
||||
self._resolve_pending_calls_sqlite(conn, pending_calls)
|
||||
try:
|
||||
conn.execute("PRAGMA optimize")
|
||||
except Exception: # pragma: no cover - best effort
|
||||
pass
|
||||
|
||||
if executor:
|
||||
executor.shutdown(wait=True)
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
logger.info(
|
||||
"SQLite index built: files=%s symbols=%s languages=%s elapsed=%.2fs",
|
||||
processed_files,
|
||||
total_symbols,
|
||||
len(languages),
|
||||
elapsed,
|
||||
)
|
||||
|
||||
return {
|
||||
"files": processed_files,
|
||||
"symbols": total_symbols,
|
||||
"languages": len(languages),
|
||||
}
|
||||
|
||||
# Internal helpers -------------------------------------------------
|
||||
|
||||
def _reset_database(self, conn):
|
||||
conn.execute("DELETE FROM symbols")
|
||||
conn.execute("DELETE FROM files")
|
||||
conn.execute(
|
||||
"DELETE FROM metadata WHERE key NOT IN ('schema_version')"
|
||||
)
|
||||
|
||||
def _insert_file(self, conn, path: str, file_info: FileInfo) -> int:
|
||||
params = (
|
||||
path,
|
||||
file_info.language,
|
||||
file_info.line_count,
|
||||
json.dumps(file_info.imports or []),
|
||||
json.dumps(file_info.exports or []),
|
||||
file_info.package,
|
||||
file_info.docstring,
|
||||
)
|
||||
cur = conn.execute(
|
||||
"""
|
||||
INSERT INTO files(
|
||||
path,
|
||||
language,
|
||||
line_count,
|
||||
imports,
|
||||
exports,
|
||||
package,
|
||||
docstring
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
params,
|
||||
)
|
||||
return cur.lastrowid
|
||||
|
||||
def _prepare_symbol_rows(
|
||||
self,
|
||||
symbols: Dict[str, SymbolInfo],
|
||||
file_id: int,
|
||||
) -> List[Tuple[str, int, Optional[str], Optional[int], Optional[str], Optional[str], str, str]]:
|
||||
rows: List[Tuple[str, int, Optional[str], Optional[int], Optional[str], Optional[str], str, str]] = []
|
||||
for symbol_id, symbol_info in symbols.items():
|
||||
called_by = json.dumps(symbol_info.called_by or [])
|
||||
short_name = symbol_id.split("::")[-1]
|
||||
rows.append(
|
||||
(
|
||||
symbol_id,
|
||||
file_id,
|
||||
symbol_info.type,
|
||||
symbol_info.line,
|
||||
symbol_info.signature,
|
||||
symbol_info.docstring,
|
||||
called_by,
|
||||
short_name,
|
||||
)
|
||||
)
|
||||
return rows
|
||||
|
||||
def _persist_metadata(
|
||||
self,
|
||||
conn,
|
||||
file_count: int,
|
||||
symbol_count: int,
|
||||
languages: List[str],
|
||||
specialized_count: int,
|
||||
fallback_count: int,
|
||||
symbol_types: Dict[str, int],
|
||||
) -> None:
|
||||
metadata = {
|
||||
"project_path": self.project_path,
|
||||
"indexed_files": file_count,
|
||||
"index_version": "3.0.0-sqlite",
|
||||
"timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
|
||||
"languages": languages,
|
||||
"total_symbols": symbol_count,
|
||||
"specialized_parsers": specialized_count,
|
||||
"fallback_files": fallback_count,
|
||||
"symbol_types": symbol_types,
|
||||
}
|
||||
self.store.set_metadata(conn, "project_path", self.project_path)
|
||||
self.store.set_metadata(conn, "index_metadata", metadata)
|
||||
|
||||
def _resolve_pending_calls_sqlite(
|
||||
self,
|
||||
conn,
|
||||
pending_calls: List[Tuple[str, str]]
|
||||
) -> None:
|
||||
"""Resolve cross-file call relationships directly in SQLite storage."""
|
||||
if not pending_calls:
|
||||
return
|
||||
|
||||
rows = list(
|
||||
conn.execute(
|
||||
"SELECT symbol_id, short_name, called_by FROM symbols"
|
||||
)
|
||||
)
|
||||
symbol_map = {row["symbol_id"]: row for row in rows}
|
||||
short_index: Dict[str, List[str]] = defaultdict(list)
|
||||
for row in rows:
|
||||
short_name = row["short_name"]
|
||||
if short_name:
|
||||
short_index[short_name].append(row["symbol_id"])
|
||||
|
||||
updates: Dict[str, set] = defaultdict(set)
|
||||
|
||||
for caller, called in pending_calls:
|
||||
target_ids: List[str] = []
|
||||
if called in symbol_map:
|
||||
target_ids = [called]
|
||||
else:
|
||||
if called in short_index:
|
||||
target_ids = short_index[called]
|
||||
if not target_ids:
|
||||
suffix = f".{called}"
|
||||
matches: List[str] = []
|
||||
for short_name, ids in short_index.items():
|
||||
if short_name and short_name.endswith(suffix):
|
||||
matches.extend(ids)
|
||||
target_ids = matches
|
||||
|
||||
if len(target_ids) != 1:
|
||||
continue
|
||||
|
||||
updates[target_ids[0]].add(caller)
|
||||
|
||||
for symbol_id, callers in updates.items():
|
||||
row = symbol_map.get(symbol_id)
|
||||
if not row:
|
||||
continue
|
||||
existing = []
|
||||
if row["called_by"]:
|
||||
try:
|
||||
existing = json.loads(row["called_by"])
|
||||
except json.JSONDecodeError:
|
||||
existing = []
|
||||
merged = list(dict.fromkeys(existing + list(callers)))
|
||||
conn.execute(
|
||||
"UPDATE symbols SET called_by=? WHERE symbol_id=?",
|
||||
(json.dumps(merged), symbol_id),
|
||||
)
|
||||
@@ -0,0 +1,354 @@
|
||||
"""
|
||||
SQLite-backed index manager coordinating builder and store.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import tempfile
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from .sqlite_index_builder import SQLiteIndexBuilder
|
||||
from .sqlite_store import SQLiteIndexStore, SQLiteSchemaMismatchError
|
||||
from ..constants import INDEX_FILE_DB, INDEX_FILE, INDEX_FILE_SHALLOW, SETTINGS_DIR
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SQLiteIndexManager:
|
||||
"""Manage lifecycle of SQLite-backed deep index."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.project_path: Optional[str] = None
|
||||
self.index_builder: Optional[SQLiteIndexBuilder] = None
|
||||
self.store: Optional[SQLiteIndexStore] = None
|
||||
self.temp_dir: Optional[str] = None
|
||||
self.index_path: Optional[str] = None
|
||||
self.shallow_index_path: Optional[str] = None
|
||||
self._shallow_file_list: Optional[List[str]] = None
|
||||
self._is_loaded = False
|
||||
self._lock = threading.RLock()
|
||||
logger.info("Initialized SQLite Index Manager")
|
||||
|
||||
def set_project_path(self, project_path: str) -> bool:
|
||||
"""Configure project path and underlying storage location."""
|
||||
with self._lock:
|
||||
if not project_path or not isinstance(project_path, str):
|
||||
logger.error("Invalid project path: %s", project_path)
|
||||
return False
|
||||
|
||||
project_path = project_path.strip()
|
||||
if not project_path or not os.path.isdir(project_path):
|
||||
logger.error("Project path does not exist: %s", project_path)
|
||||
return False
|
||||
|
||||
self.project_path = project_path
|
||||
project_hash = _hash_project_path(project_path)
|
||||
self.temp_dir = os.path.join(tempfile.gettempdir(), SETTINGS_DIR, project_hash)
|
||||
os.makedirs(self.temp_dir, exist_ok=True)
|
||||
|
||||
self.index_path = os.path.join(self.temp_dir, INDEX_FILE_DB)
|
||||
legacy_path = os.path.join(self.temp_dir, INDEX_FILE)
|
||||
if os.path.exists(legacy_path):
|
||||
try:
|
||||
os.remove(legacy_path)
|
||||
logger.info("Removed legacy JSON index at %s", legacy_path)
|
||||
except OSError as exc: # pragma: no cover - best effort
|
||||
logger.warning("Failed to remove legacy index %s: %s", legacy_path, exc)
|
||||
|
||||
self.shallow_index_path = os.path.join(self.temp_dir, INDEX_FILE_SHALLOW)
|
||||
self.store = SQLiteIndexStore(self.index_path)
|
||||
self.index_builder = SQLiteIndexBuilder(project_path, self.store)
|
||||
self._is_loaded = False
|
||||
logger.info("SQLite index storage: %s", self.index_path)
|
||||
return True
|
||||
|
||||
def build_index(self, force_rebuild: bool = False) -> bool:
|
||||
"""Build or rebuild the SQLite index."""
|
||||
with self._lock:
|
||||
if not self.index_builder:
|
||||
logger.error("Index builder not initialized")
|
||||
return False
|
||||
try:
|
||||
stats = self.index_builder.build_index()
|
||||
logger.info(
|
||||
"SQLite index build complete: %s files, %s symbols",
|
||||
stats.get("files"),
|
||||
stats.get("symbols"),
|
||||
)
|
||||
self._is_loaded = True
|
||||
return True
|
||||
except SQLiteSchemaMismatchError:
|
||||
logger.warning("Schema mismatch detected; recreating database")
|
||||
self.store.clear() # type: ignore[union-attr]
|
||||
stats = self.index_builder.build_index()
|
||||
logger.info(
|
||||
"SQLite index rebuild after schema reset: %s files, %s symbols",
|
||||
stats.get("files"),
|
||||
stats.get("symbols"),
|
||||
)
|
||||
self._is_loaded = True
|
||||
return True
|
||||
except Exception as exc: # pragma: no cover - defensive
|
||||
logger.error("Failed to build SQLite index: %s", exc)
|
||||
self._is_loaded = False
|
||||
return False
|
||||
|
||||
def load_index(self) -> bool:
|
||||
"""Validate that an index database exists and schema is current."""
|
||||
with self._lock:
|
||||
if not self.store:
|
||||
logger.error("Index store not initialized")
|
||||
return False
|
||||
try:
|
||||
self.store.initialize_schema()
|
||||
with self.store.connect() as conn:
|
||||
metadata = self.store.get_metadata(conn, "index_metadata")
|
||||
except SQLiteSchemaMismatchError:
|
||||
logger.info("Schema mismatch on load; forcing rebuild on next build_index()")
|
||||
self._is_loaded = False
|
||||
return False
|
||||
except Exception as exc: # pragma: no cover
|
||||
logger.error("Failed to load SQLite index: %s", exc)
|
||||
self._is_loaded = False
|
||||
return False
|
||||
self._is_loaded = metadata is not None
|
||||
return self._is_loaded
|
||||
|
||||
def refresh_index(self) -> bool:
|
||||
"""Force rebuild of the SQLite index."""
|
||||
with self._lock:
|
||||
logger.info("Refreshing SQLite deep index...")
|
||||
if self.build_index(force_rebuild=True):
|
||||
return self.load_index()
|
||||
return False
|
||||
|
||||
def build_shallow_index(self) -> bool:
|
||||
"""Build the shallow index file list using existing builder helper."""
|
||||
with self._lock:
|
||||
if not self.index_builder or not self.project_path or not self.shallow_index_path:
|
||||
logger.error("Index builder not initialized for shallow index")
|
||||
return False
|
||||
try:
|
||||
file_list = self.index_builder.build_shallow_file_list()
|
||||
with open(self.shallow_index_path, "w", encoding="utf-8") as handle:
|
||||
json.dump(file_list, handle, ensure_ascii=False)
|
||||
self._shallow_file_list = file_list
|
||||
return True
|
||||
except Exception as exc: # pragma: no cover
|
||||
logger.error("Failed to build shallow index: %s", exc)
|
||||
return False
|
||||
|
||||
def load_shallow_index(self) -> bool:
|
||||
"""Load shallow index from disk."""
|
||||
with self._lock:
|
||||
if not self.shallow_index_path or not os.path.exists(self.shallow_index_path):
|
||||
return False
|
||||
try:
|
||||
with open(self.shallow_index_path, "r", encoding="utf-8") as handle:
|
||||
data = json.load(handle)
|
||||
if isinstance(data, list):
|
||||
self._shallow_file_list = [_normalize_path(p) for p in data if isinstance(p, str)]
|
||||
return True
|
||||
except Exception as exc: # pragma: no cover
|
||||
logger.error("Failed to load shallow index: %s", exc)
|
||||
return False
|
||||
|
||||
def find_files(self, pattern: str = "*") -> List[str]:
|
||||
"""Find files from the shallow index using glob semantics."""
|
||||
with self._lock:
|
||||
if not isinstance(pattern, str):
|
||||
logger.error("Pattern must be a string, got %s", type(pattern))
|
||||
return []
|
||||
pattern = pattern.strip() or "*"
|
||||
norm_pattern = pattern.replace("\\\\", "/").replace("\\", "/")
|
||||
regex = _compile_glob_regex(norm_pattern)
|
||||
|
||||
if self._shallow_file_list is None:
|
||||
if not self.load_shallow_index():
|
||||
if self.build_shallow_index():
|
||||
self.load_shallow_index()
|
||||
|
||||
files = list(self._shallow_file_list or [])
|
||||
if norm_pattern == "*":
|
||||
return files
|
||||
return [f for f in files if regex.match(f)]
|
||||
|
||||
def get_file_summary(self, file_path: str) -> Optional[Dict[str, Any]]:
|
||||
"""Return summary information for a file from SQLite storage."""
|
||||
with self._lock:
|
||||
if not isinstance(file_path, str):
|
||||
logger.error("File path must be a string, got %s", type(file_path))
|
||||
return None
|
||||
if not self.store or not self._is_loaded:
|
||||
if not self.load_index():
|
||||
return None
|
||||
|
||||
normalized = _normalize_path(file_path)
|
||||
with self.store.connect() as conn:
|
||||
row = conn.execute(
|
||||
"""
|
||||
SELECT id, language, line_count, imports, exports, docstring
|
||||
FROM files WHERE path = ?
|
||||
""",
|
||||
(normalized,),
|
||||
).fetchone()
|
||||
|
||||
if not row:
|
||||
logger.warning("File not found in index: %s", normalized)
|
||||
return None
|
||||
|
||||
symbol_rows = conn.execute(
|
||||
"""
|
||||
SELECT type, line, signature, docstring, called_by, short_name
|
||||
FROM symbols
|
||||
WHERE file_id = ?
|
||||
ORDER BY line ASC
|
||||
""",
|
||||
(row["id"],),
|
||||
).fetchall()
|
||||
|
||||
imports = _safe_json_loads(row["imports"])
|
||||
exports = _safe_json_loads(row["exports"])
|
||||
|
||||
categorized = _categorize_symbols(symbol_rows)
|
||||
|
||||
return {
|
||||
"file_path": normalized,
|
||||
"language": row["language"],
|
||||
"line_count": row["line_count"],
|
||||
"symbol_count": len(symbol_rows),
|
||||
"functions": categorized["functions"],
|
||||
"classes": categorized["classes"],
|
||||
"methods": categorized["methods"],
|
||||
"imports": imports,
|
||||
"exports": exports,
|
||||
"docstring": row["docstring"],
|
||||
}
|
||||
|
||||
def get_index_stats(self) -> Dict[str, Any]:
|
||||
"""Return basic statistics for the current index."""
|
||||
with self._lock:
|
||||
if not self.store:
|
||||
return {"status": "not_loaded"}
|
||||
try:
|
||||
with self.store.connect() as conn:
|
||||
metadata = self.store.get_metadata(conn, "index_metadata")
|
||||
except SQLiteSchemaMismatchError:
|
||||
return {"status": "not_loaded"}
|
||||
if not metadata:
|
||||
return {"status": "not_loaded"}
|
||||
return {
|
||||
"status": "loaded" if self._is_loaded else "not_loaded",
|
||||
"indexed_files": metadata.get("indexed_files", 0),
|
||||
"total_symbols": metadata.get("total_symbols", 0),
|
||||
"symbol_types": metadata.get("symbol_types", {}),
|
||||
"languages": metadata.get("languages", []),
|
||||
"project_path": metadata.get("project_path"),
|
||||
"timestamp": metadata.get("timestamp"),
|
||||
}
|
||||
|
||||
def cleanup(self) -> None:
|
||||
"""Reset internal state."""
|
||||
with self._lock:
|
||||
self.project_path = None
|
||||
self.index_builder = None
|
||||
self.store = None
|
||||
self.temp_dir = None
|
||||
self.index_path = None
|
||||
self._shallow_file_list = None
|
||||
self._is_loaded = False
|
||||
|
||||
|
||||
def _hash_project_path(project_path: str) -> str:
|
||||
import hashlib
|
||||
|
||||
return hashlib.md5(project_path.encode()).hexdigest()[:12]
|
||||
|
||||
|
||||
def _compile_glob_regex(pattern: str):
|
||||
i = 0
|
||||
out = []
|
||||
special = ".^$+{}[]|()"
|
||||
while i < len(pattern):
|
||||
c = pattern[i]
|
||||
if c == "*":
|
||||
if i + 1 < len(pattern) and pattern[i + 1] == "*":
|
||||
out.append(".*")
|
||||
i += 2
|
||||
continue
|
||||
out.append("[^/]*")
|
||||
elif c == "?":
|
||||
out.append("[^/]")
|
||||
elif c in special:
|
||||
out.append("\\" + c)
|
||||
else:
|
||||
out.append(c)
|
||||
i += 1
|
||||
return re.compile("^" + "".join(out) + "$")
|
||||
|
||||
|
||||
def _normalize_path(path: str) -> str:
|
||||
result = path.replace("\\\\", "/").replace("\\", "/")
|
||||
if result.startswith("./"):
|
||||
result = result[2:]
|
||||
return result
|
||||
|
||||
|
||||
def _safe_json_loads(value: Any) -> List[Any]:
|
||||
if not value:
|
||||
return []
|
||||
if isinstance(value, list):
|
||||
return value
|
||||
try:
|
||||
parsed = json.loads(value)
|
||||
return parsed if isinstance(parsed, list) else []
|
||||
except json.JSONDecodeError:
|
||||
return []
|
||||
|
||||
|
||||
def _categorize_symbols(symbol_rows) -> Dict[str, List[Dict[str, Any]]]:
|
||||
functions: List[Dict[str, Any]] = []
|
||||
classes: List[Dict[str, Any]] = []
|
||||
methods: List[Dict[str, Any]] = []
|
||||
|
||||
for row in symbol_rows:
|
||||
symbol_type = row["type"]
|
||||
called_by = _safe_json_loads(row["called_by"])
|
||||
info = {
|
||||
"name": row["short_name"],
|
||||
"called_by": called_by,
|
||||
"line": row["line"],
|
||||
"signature": row["signature"],
|
||||
"docstring": row["docstring"],
|
||||
}
|
||||
|
||||
signature = row["signature"] or ""
|
||||
if signature.startswith("def ") and "::" in signature:
|
||||
methods.append(info)
|
||||
elif signature.startswith("def "):
|
||||
functions.append(info)
|
||||
elif signature.startswith("class ") or symbol_type == "class":
|
||||
classes.append(info)
|
||||
else:
|
||||
if symbol_type == "method":
|
||||
methods.append(info)
|
||||
elif symbol_type == "class":
|
||||
classes.append(info)
|
||||
else:
|
||||
functions.append(info)
|
||||
|
||||
functions.sort(key=lambda item: item.get("line") or 0)
|
||||
classes.sort(key=lambda item: item.get("line") or 0)
|
||||
methods.sort(key=lambda item: item.get("line") or 0)
|
||||
|
||||
return {
|
||||
"functions": functions,
|
||||
"classes": classes,
|
||||
"methods": methods,
|
||||
}
|
||||
@@ -0,0 +1,173 @@
|
||||
"""
|
||||
SQLite storage layer for deep code index data.
|
||||
|
||||
This module centralizes SQLite setup, schema management, and connection
|
||||
pragmas so higher-level builders/managers can focus on data orchestration.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import sqlite3
|
||||
import threading
|
||||
from contextlib import contextmanager
|
||||
from typing import Any, Dict, Generator, Optional
|
||||
|
||||
SCHEMA_VERSION = 1
|
||||
|
||||
|
||||
class SQLiteSchemaMismatchError(RuntimeError):
|
||||
"""Raised when the on-disk schema cannot be used safely."""
|
||||
|
||||
|
||||
class SQLiteIndexStore:
|
||||
"""Utility wrapper around an on-disk SQLite database for the deep index."""
|
||||
|
||||
def __init__(self, db_path: str) -> None:
|
||||
if not db_path or not isinstance(db_path, str):
|
||||
raise ValueError("db_path must be a non-empty string")
|
||||
self.db_path = db_path
|
||||
self._lock = threading.RLock()
|
||||
|
||||
def initialize_schema(self) -> None:
|
||||
"""Create database schema if needed and validate schema version."""
|
||||
with self._lock:
|
||||
os.makedirs(os.path.dirname(self.db_path), exist_ok=True)
|
||||
with self.connect(for_build=True) as conn:
|
||||
self._create_tables(conn)
|
||||
self._ensure_schema_version(conn)
|
||||
# Ensure metadata contains the canonical project path placeholder
|
||||
if self.get_metadata(conn, "project_path") is None:
|
||||
self.set_metadata(conn, "project_path", "")
|
||||
|
||||
@contextmanager
|
||||
def connect(self, *, for_build: bool = False) -> Generator[sqlite3.Connection, None, None]:
|
||||
"""
|
||||
Context manager yielding a configured SQLite connection.
|
||||
|
||||
Args:
|
||||
for_build: Apply write-optimized pragmas (journal mode, cache size).
|
||||
"""
|
||||
with self._lock:
|
||||
conn = sqlite3.connect(self.db_path, check_same_thread=False)
|
||||
conn.row_factory = sqlite3.Row
|
||||
self._apply_pragmas(conn, for_build)
|
||||
try:
|
||||
yield conn
|
||||
conn.commit()
|
||||
except Exception:
|
||||
conn.rollback()
|
||||
raise
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def clear(self) -> None:
|
||||
"""Remove existing database file."""
|
||||
with self._lock:
|
||||
if os.path.exists(self.db_path):
|
||||
os.remove(self.db_path)
|
||||
|
||||
# Metadata helpers -------------------------------------------------
|
||||
|
||||
def set_metadata(self, conn: sqlite3.Connection, key: str, value: Any) -> None:
|
||||
"""Persist a metadata key/value pair (value stored as JSON string)."""
|
||||
conn.execute(
|
||||
"""
|
||||
INSERT INTO metadata(key, value)
|
||||
VALUES(?, ?)
|
||||
ON CONFLICT(key) DO UPDATE SET value=excluded.value
|
||||
""",
|
||||
(key, json.dumps(value)),
|
||||
)
|
||||
|
||||
def get_metadata(self, conn: sqlite3.Connection, key: str) -> Optional[Any]:
|
||||
"""Retrieve a metadata value (deserialized from JSON)."""
|
||||
row = conn.execute("SELECT value FROM metadata WHERE key=?", (key,)).fetchone()
|
||||
if not row:
|
||||
return None
|
||||
try:
|
||||
return json.loads(row["value"])
|
||||
except json.JSONDecodeError:
|
||||
return row["value"]
|
||||
|
||||
# Internal helpers -------------------------------------------------
|
||||
|
||||
def _create_tables(self, conn: sqlite3.Connection) -> None:
|
||||
conn.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS metadata (
|
||||
key TEXT PRIMARY KEY,
|
||||
value TEXT NOT NULL
|
||||
)
|
||||
"""
|
||||
)
|
||||
conn.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS files (
|
||||
id INTEGER PRIMARY KEY,
|
||||
path TEXT UNIQUE NOT NULL,
|
||||
language TEXT,
|
||||
line_count INTEGER,
|
||||
imports TEXT,
|
||||
exports TEXT,
|
||||
package TEXT,
|
||||
docstring TEXT
|
||||
)
|
||||
"""
|
||||
)
|
||||
conn.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS symbols (
|
||||
id INTEGER PRIMARY KEY,
|
||||
symbol_id TEXT UNIQUE NOT NULL,
|
||||
file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
|
||||
type TEXT,
|
||||
line INTEGER,
|
||||
signature TEXT,
|
||||
docstring TEXT,
|
||||
called_by TEXT,
|
||||
short_name TEXT
|
||||
)
|
||||
"""
|
||||
)
|
||||
conn.execute(
|
||||
"""
|
||||
CREATE INDEX IF NOT EXISTS idx_symbols_file ON symbols(file_id)
|
||||
"""
|
||||
)
|
||||
conn.execute(
|
||||
"""
|
||||
CREATE INDEX IF NOT EXISTS idx_symbols_short_name ON symbols(short_name)
|
||||
"""
|
||||
)
|
||||
|
||||
def _ensure_schema_version(self, conn: sqlite3.Connection) -> None:
|
||||
stored = self.get_metadata(conn, "schema_version")
|
||||
if stored is None:
|
||||
self.set_metadata(conn, "schema_version", SCHEMA_VERSION)
|
||||
return
|
||||
|
||||
if int(stored) != SCHEMA_VERSION:
|
||||
raise SQLiteSchemaMismatchError(
|
||||
f"Unexpected schema version {stored} (expected {SCHEMA_VERSION})"
|
||||
)
|
||||
|
||||
def _apply_pragmas(self, conn: sqlite3.Connection, for_build: bool) -> None:
|
||||
pragmas: Dict[str, Any] = {
|
||||
"journal_mode": "WAL" if for_build else "WAL",
|
||||
"synchronous": "NORMAL" if for_build else "FULL",
|
||||
"cache_size": -262144, # negative => size in KB, ~256MB
|
||||
}
|
||||
for pragma, value in pragmas.items():
|
||||
try:
|
||||
conn.execute(f"PRAGMA {pragma}={value}")
|
||||
except sqlite3.DatabaseError:
|
||||
# PRAGMA not supported or rejected; continue best-effort.
|
||||
continue
|
||||
if for_build:
|
||||
try:
|
||||
conn.execute("PRAGMA temp_store=MEMORY")
|
||||
except sqlite3.DatabaseError:
|
||||
pass
|
||||
|
||||
@@ -0,0 +1,8 @@
|
||||
"""
|
||||
Parsing strategies for different programming languages.
|
||||
"""
|
||||
|
||||
from .base_strategy import ParsingStrategy
|
||||
from .strategy_factory import StrategyFactory
|
||||
|
||||
__all__ = ['ParsingStrategy', 'StrategyFactory']
|
||||
@@ -0,0 +1,91 @@
|
||||
"""
|
||||
Abstract base class for language parsing strategies.
|
||||
"""
|
||||
|
||||
import os
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict, List, Tuple, Optional
|
||||
from ..models import SymbolInfo, FileInfo
|
||||
|
||||
|
||||
class ParsingStrategy(ABC):
|
||||
"""Abstract base class for language parsing strategies."""
|
||||
|
||||
@abstractmethod
|
||||
def get_language_name(self) -> str:
|
||||
"""Return the language name this strategy handles."""
|
||||
|
||||
@abstractmethod
|
||||
def get_supported_extensions(self) -> List[str]:
|
||||
"""Return list of file extensions this strategy supports."""
|
||||
|
||||
@abstractmethod
|
||||
def parse_file(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo], FileInfo]:
|
||||
"""
|
||||
Parse file content and extract symbols.
|
||||
|
||||
Args:
|
||||
file_path: Path to the file being parsed
|
||||
content: File content as string
|
||||
|
||||
Returns:
|
||||
Tuple of (symbols_dict, file_info)
|
||||
- symbols_dict: Maps symbol_id -> SymbolInfo
|
||||
- file_info: FileInfo with metadata about the file
|
||||
"""
|
||||
|
||||
def _create_symbol_id(self, file_path: str, symbol_name: str) -> str:
|
||||
"""
|
||||
Create a unique symbol ID.
|
||||
|
||||
Args:
|
||||
file_path: Path to the file containing the symbol
|
||||
symbol_name: Name of the symbol
|
||||
|
||||
Returns:
|
||||
Unique symbol identifier in format "relative_path::symbol_name"
|
||||
"""
|
||||
relative_path = self._get_relative_path(file_path)
|
||||
return f"{relative_path}::{symbol_name}"
|
||||
|
||||
def _get_relative_path(self, file_path: str) -> str:
|
||||
"""Normalize path for symbol identifiers relative to project root."""
|
||||
if not file_path:
|
||||
return ""
|
||||
|
||||
normalized = os.path.normpath(file_path)
|
||||
if normalized == ".":
|
||||
return ""
|
||||
|
||||
normalized = normalized.replace("\\", "/")
|
||||
if normalized.startswith("./"):
|
||||
normalized = normalized[2:]
|
||||
|
||||
if not os.path.isabs(file_path):
|
||||
normalized = normalized.lstrip("/")
|
||||
|
||||
return normalized or os.path.basename(file_path)
|
||||
|
||||
def _extract_line_number(self, content: str, symbol_position: int) -> int:
|
||||
"""
|
||||
Extract line number from character position in content.
|
||||
|
||||
Args:
|
||||
content: File content
|
||||
symbol_position: Character position in content
|
||||
|
||||
Returns:
|
||||
Line number (1-based)
|
||||
"""
|
||||
return content[:symbol_position].count('\n') + 1
|
||||
|
||||
def _get_file_name(self, file_path: str) -> str:
|
||||
"""Get just the filename from a full path."""
|
||||
return os.path.basename(file_path)
|
||||
|
||||
def _safe_extract_text(self, content: str, start: int, end: int) -> str:
|
||||
"""Safely extract text from content, handling bounds."""
|
||||
try:
|
||||
return content[start:end].strip()
|
||||
except (IndexError, TypeError):
|
||||
return ""
|
||||
@@ -0,0 +1,46 @@
|
||||
"""
|
||||
Fallback parsing strategy for unsupported languages and file types.
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import Dict, List, Tuple
|
||||
from .base_strategy import ParsingStrategy
|
||||
from ..models import SymbolInfo, FileInfo
|
||||
|
||||
|
||||
class FallbackParsingStrategy(ParsingStrategy):
|
||||
"""Fallback parser for unsupported languages and file types."""
|
||||
|
||||
def __init__(self, language_name: str = "unknown"):
|
||||
self.language_name = language_name
|
||||
|
||||
def get_language_name(self) -> str:
|
||||
return self.language_name
|
||||
|
||||
def get_supported_extensions(self) -> List[str]:
|
||||
return [] # Fallback supports any extension
|
||||
|
||||
def parse_file(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo], FileInfo]:
|
||||
"""Basic parsing: extract file information without symbol parsing."""
|
||||
symbols = {}
|
||||
|
||||
# For document files, we can at least index their existence
|
||||
file_info = FileInfo(
|
||||
language=self.language_name,
|
||||
line_count=len(content.splitlines()),
|
||||
symbols={"functions": [], "classes": []},
|
||||
imports=[]
|
||||
)
|
||||
|
||||
# For document files (e.g. .md, .txt, .json), we can add a symbol representing the file itself
|
||||
if self.language_name in ['markdown', 'text', 'json', 'yaml', 'xml', 'config', 'css', 'html']:
|
||||
filename = os.path.basename(file_path)
|
||||
symbol_id = self._create_symbol_id(file_path, f"file:{filename}")
|
||||
symbols[symbol_id] = SymbolInfo(
|
||||
type="file",
|
||||
file=file_path,
|
||||
line=1,
|
||||
signature=f"{self.language_name} file: {filename}"
|
||||
)
|
||||
|
||||
return symbols, file_info
|
||||
@@ -0,0 +1,359 @@
|
||||
"""
|
||||
Go parsing strategy using regex patterns.
|
||||
"""
|
||||
|
||||
import re
|
||||
from typing import Dict, List, Tuple, Optional
|
||||
from .base_strategy import ParsingStrategy
|
||||
from ..models import SymbolInfo, FileInfo
|
||||
|
||||
|
||||
class GoParsingStrategy(ParsingStrategy):
|
||||
"""Go-specific parsing strategy using regex patterns."""
|
||||
|
||||
def get_language_name(self) -> str:
|
||||
return "go"
|
||||
|
||||
def get_supported_extensions(self) -> List[str]:
|
||||
return ['.go']
|
||||
|
||||
def parse_file(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo], FileInfo]:
|
||||
"""Parse Go file using regex patterns."""
|
||||
symbols = {}
|
||||
functions = []
|
||||
lines = content.splitlines()
|
||||
classes = [] # Go doesn't have classes, but we'll track structs/interfaces
|
||||
imports = self._extract_go_imports(lines)
|
||||
package = None
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
line = line.strip()
|
||||
|
||||
# Package declaration
|
||||
if line.startswith('package '):
|
||||
package = line.split('package ')[1].strip()
|
||||
|
||||
# Function declarations
|
||||
elif line.startswith('func '):
|
||||
func_match = re.match(r'func\s+(\w+)\s*\(', line)
|
||||
if func_match:
|
||||
func_name = func_match.group(1)
|
||||
docstring = self._extract_go_comment(lines, i)
|
||||
symbol_id = self._create_symbol_id(file_path, func_name)
|
||||
symbols[symbol_id] = SymbolInfo(
|
||||
type="function",
|
||||
file=file_path,
|
||||
line=i + 1,
|
||||
signature=line,
|
||||
docstring=docstring
|
||||
)
|
||||
functions.append(func_name)
|
||||
|
||||
# Method declarations (func (receiver) methodName)
|
||||
method_match = re.match(r'func\s+\([^)]+\)\s+(\w+)\s*\(', line)
|
||||
if method_match:
|
||||
method_name = method_match.group(1)
|
||||
docstring = self._extract_go_comment(lines, i)
|
||||
symbol_id = self._create_symbol_id(file_path, method_name)
|
||||
symbols[symbol_id] = SymbolInfo(
|
||||
type="method",
|
||||
file=file_path,
|
||||
line=i + 1,
|
||||
signature=line,
|
||||
docstring=docstring
|
||||
)
|
||||
functions.append(method_name)
|
||||
|
||||
# Struct declarations
|
||||
elif re.match(r'type\s+\w+\s+struct\s*\{', line):
|
||||
struct_match = re.match(r'type\s+(\w+)\s+struct', line)
|
||||
if struct_match:
|
||||
struct_name = struct_match.group(1)
|
||||
docstring = self._extract_go_comment(lines, i)
|
||||
symbol_id = self._create_symbol_id(file_path, struct_name)
|
||||
symbols[symbol_id] = SymbolInfo(
|
||||
type="struct",
|
||||
file=file_path,
|
||||
line=i + 1,
|
||||
docstring=docstring
|
||||
)
|
||||
classes.append(struct_name)
|
||||
|
||||
# Interface declarations
|
||||
elif re.match(r'type\s+\w+\s+interface\s*\{', line):
|
||||
interface_match = re.match(r'type\s+(\w+)\s+interface', line)
|
||||
if interface_match:
|
||||
interface_name = interface_match.group(1)
|
||||
docstring = self._extract_go_comment(lines, i)
|
||||
symbol_id = self._create_symbol_id(file_path, interface_name)
|
||||
symbols[symbol_id] = SymbolInfo(
|
||||
type="interface",
|
||||
file=file_path,
|
||||
line=i + 1,
|
||||
docstring=docstring
|
||||
)
|
||||
classes.append(interface_name)
|
||||
|
||||
# Phase 2: Add call relationship analysis
|
||||
self._analyze_go_calls(content, symbols, file_path)
|
||||
|
||||
file_info = FileInfo(
|
||||
language=self.get_language_name(),
|
||||
line_count=len(lines),
|
||||
symbols={"functions": functions, "classes": classes},
|
||||
imports=imports,
|
||||
package=package
|
||||
)
|
||||
|
||||
return symbols, file_info
|
||||
|
||||
def _analyze_go_calls(self, content: str, symbols: Dict[str, SymbolInfo], file_path: str):
|
||||
"""Analyze Go function calls for relationships."""
|
||||
lines = content.splitlines()
|
||||
current_function = None
|
||||
is_function_declaration_line = False
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
original_line = line
|
||||
line = line.strip()
|
||||
|
||||
# Track current function context
|
||||
if line.startswith('func '):
|
||||
func_name = self._extract_go_function_name(line)
|
||||
if func_name:
|
||||
current_function = self._create_symbol_id(file_path, func_name)
|
||||
is_function_declaration_line = True
|
||||
else:
|
||||
is_function_declaration_line = False
|
||||
|
||||
# Find function calls: functionName() or obj.methodName()
|
||||
# Skip the function declaration line itself to avoid false self-calls
|
||||
if current_function and not is_function_declaration_line and ('(' in line and ')' in line):
|
||||
called_functions = self._extract_go_called_functions(line)
|
||||
for called_func in called_functions:
|
||||
# Find the called function in symbols and add relationship
|
||||
for symbol_id, symbol_info in symbols.items():
|
||||
if called_func in symbol_id.split("::")[-1]:
|
||||
if current_function not in symbol_info.called_by:
|
||||
symbol_info.called_by.append(current_function)
|
||||
|
||||
def _extract_go_function_name(self, line: str) -> Optional[str]:
|
||||
"""Extract function name from Go function declaration."""
|
||||
try:
|
||||
# func functionName(...) or func (receiver) methodName(...)
|
||||
match = re.match(r'func\s+(?:\([^)]*\)\s+)?(\w+)\s*\(', line)
|
||||
if match:
|
||||
return match.group(1)
|
||||
except:
|
||||
pass
|
||||
return None
|
||||
|
||||
def _extract_go_imports(self, lines: List[str]) -> List[str]:
|
||||
"""Extract Go import paths, handling multi-line blocks and comments."""
|
||||
imports: List[str] = []
|
||||
in_block_comment = False
|
||||
paren_depth = 0
|
||||
|
||||
for raw_line in lines:
|
||||
clean_line, in_block_comment = self._strip_go_comments(raw_line, in_block_comment)
|
||||
stripped = clean_line.strip()
|
||||
|
||||
if not stripped:
|
||||
continue
|
||||
|
||||
if paren_depth == 0:
|
||||
if not stripped.startswith('import '):
|
||||
continue
|
||||
|
||||
remainder = stripped[len('import '):].strip()
|
||||
if not remainder:
|
||||
continue
|
||||
|
||||
imports.extend(self._extract_string_literals(remainder))
|
||||
|
||||
paren_depth = (
|
||||
self._count_unquoted_characters(remainder, '(')
|
||||
- self._count_unquoted_characters(remainder, ')')
|
||||
)
|
||||
if paren_depth <= 0:
|
||||
paren_depth = 0
|
||||
continue
|
||||
|
||||
imports.extend(self._extract_string_literals(clean_line))
|
||||
paren_depth += self._count_unquoted_characters(clean_line, '(')
|
||||
paren_depth -= self._count_unquoted_characters(clean_line, ')')
|
||||
if paren_depth <= 0:
|
||||
paren_depth = 0
|
||||
|
||||
return imports
|
||||
|
||||
def _strip_go_comments(self, line: str, in_block_comment: bool) -> Tuple[str, bool]:
|
||||
"""Remove Go comments from a line while tracking block comment state."""
|
||||
result: List[str] = []
|
||||
i = 0
|
||||
length = len(line)
|
||||
|
||||
while i < length:
|
||||
if in_block_comment:
|
||||
if line.startswith('*/', i):
|
||||
in_block_comment = False
|
||||
i += 2
|
||||
else:
|
||||
i += 1
|
||||
continue
|
||||
|
||||
if line.startswith('//', i):
|
||||
break
|
||||
|
||||
if line.startswith('/*', i):
|
||||
in_block_comment = True
|
||||
i += 2
|
||||
continue
|
||||
|
||||
result.append(line[i])
|
||||
i += 1
|
||||
|
||||
return ''.join(result), in_block_comment
|
||||
|
||||
def _extract_string_literals(self, line: str) -> List[str]:
|
||||
"""Return string literal values found in a line (supports " and `)."""
|
||||
literals: List[str] = []
|
||||
i = 0
|
||||
length = len(line)
|
||||
|
||||
while i < length:
|
||||
char = line[i]
|
||||
if char not in ('"', '`'):
|
||||
i += 1
|
||||
continue
|
||||
|
||||
delimiter = char
|
||||
i += 1
|
||||
buffer: List[str] = []
|
||||
while i < length:
|
||||
current = line[i]
|
||||
if delimiter == '"':
|
||||
if current == '\\':
|
||||
if i + 1 < length:
|
||||
buffer.append(line[i + 1])
|
||||
i += 2
|
||||
continue
|
||||
elif current == '"':
|
||||
literals.append(''.join(buffer))
|
||||
i += 1
|
||||
break
|
||||
else: # Raw string delimited by backticks
|
||||
if current == '`':
|
||||
literals.append(''.join(buffer))
|
||||
i += 1
|
||||
break
|
||||
|
||||
buffer.append(current)
|
||||
i += 1
|
||||
else:
|
||||
break
|
||||
|
||||
return literals
|
||||
|
||||
def _count_unquoted_characters(self, line: str, target: str) -> int:
|
||||
"""Count occurrences of a character outside string literals."""
|
||||
count = 0
|
||||
i = 0
|
||||
length = len(line)
|
||||
delimiter: Optional[str] = None
|
||||
|
||||
while i < length:
|
||||
char = line[i]
|
||||
if delimiter is None:
|
||||
if char in ('"', '`'):
|
||||
delimiter = char
|
||||
elif char == target:
|
||||
count += 1
|
||||
else:
|
||||
if delimiter == '"':
|
||||
if char == '\\':
|
||||
i += 2
|
||||
continue
|
||||
if char == '"':
|
||||
delimiter = None
|
||||
elif delimiter == '`' and char == '`':
|
||||
delimiter = None
|
||||
|
||||
i += 1
|
||||
|
||||
return count
|
||||
|
||||
def _extract_go_comment(self, lines: List[str], line_index: int) -> Optional[str]:
|
||||
"""Extract Go comment (docstring) from lines preceding the given line.
|
||||
|
||||
Go documentation comments are regular comments that appear immediately before
|
||||
the declaration, with no blank line in between.
|
||||
"""
|
||||
comment_lines = []
|
||||
|
||||
# Look backwards from the line before the declaration
|
||||
i = line_index - 1
|
||||
while i >= 0:
|
||||
stripped = lines[i].strip()
|
||||
|
||||
# Stop at empty line
|
||||
if not stripped:
|
||||
break
|
||||
|
||||
# Single-line comment
|
||||
if stripped.startswith('//'):
|
||||
comment_text = stripped[2:].strip()
|
||||
comment_lines.insert(0, comment_text)
|
||||
i -= 1
|
||||
# Multi-line comment block
|
||||
elif stripped.startswith('/*') or stripped.endswith('*/'):
|
||||
# Handle single-line /* comment */
|
||||
if stripped.startswith('/*') and stripped.endswith('*/'):
|
||||
comment_text = stripped[2:-2].strip()
|
||||
comment_lines.insert(0, comment_text)
|
||||
i -= 1
|
||||
# Handle multi-line comment block
|
||||
elif stripped.endswith('*/'):
|
||||
# Found end of multi-line comment, collect until start
|
||||
temp_lines = []
|
||||
temp_lines.insert(0, stripped[:-2].strip())
|
||||
i -= 1
|
||||
while i >= 0:
|
||||
temp_stripped = lines[i].strip()
|
||||
if temp_stripped.startswith('/*'):
|
||||
temp_lines.insert(0, temp_stripped[2:].strip())
|
||||
comment_lines = temp_lines + comment_lines
|
||||
i -= 1
|
||||
break
|
||||
else:
|
||||
temp_lines.insert(0, temp_stripped)
|
||||
i -= 1
|
||||
break
|
||||
else:
|
||||
break
|
||||
else:
|
||||
# Not a comment, stop looking
|
||||
break
|
||||
|
||||
if comment_lines:
|
||||
# Join with newlines and clean up
|
||||
docstring = '\n'.join(comment_lines)
|
||||
return docstring if docstring else None
|
||||
|
||||
return None
|
||||
|
||||
def _extract_go_called_functions(self, line: str) -> List[str]:
|
||||
"""Extract function names that are being called in this line."""
|
||||
called_functions = []
|
||||
|
||||
# Find patterns like: functionName( or obj.methodName(
|
||||
patterns = [
|
||||
r'(\w+)\s*\(', # functionName(
|
||||
r'\.(\w+)\s*\(', # .methodName(
|
||||
]
|
||||
|
||||
for pattern in patterns:
|
||||
matches = re.findall(pattern, line)
|
||||
called_functions.extend(matches)
|
||||
|
||||
return called_functions
|
||||
@@ -0,0 +1,209 @@
|
||||
"""
|
||||
Java parsing strategy using tree-sitter - Optimized single-pass version.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, List, Tuple, Optional, Set
|
||||
from .base_strategy import ParsingStrategy
|
||||
from ..models import SymbolInfo, FileInfo
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
import tree_sitter
|
||||
from tree_sitter_java import language
|
||||
|
||||
|
||||
class JavaParsingStrategy(ParsingStrategy):
|
||||
"""Java-specific parsing strategy - Single Pass Optimized."""
|
||||
|
||||
def __init__(self):
|
||||
self.java_language = tree_sitter.Language(language())
|
||||
|
||||
def get_language_name(self) -> str:
|
||||
return "java"
|
||||
|
||||
def get_supported_extensions(self) -> List[str]:
|
||||
return ['.java']
|
||||
|
||||
def parse_file(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo], FileInfo]:
|
||||
"""Parse Java file using tree-sitter with single-pass optimization."""
|
||||
symbols = {}
|
||||
functions = []
|
||||
classes = []
|
||||
imports = []
|
||||
package = None
|
||||
|
||||
# Symbol lookup index for O(1) access
|
||||
symbol_lookup = {} # name -> symbol_id mapping
|
||||
|
||||
parser = tree_sitter.Parser(self.java_language)
|
||||
|
||||
try:
|
||||
tree = parser.parse(content.encode('utf8'))
|
||||
|
||||
# Extract package info first
|
||||
for node in tree.root_node.children:
|
||||
if node.type == 'package_declaration':
|
||||
package = self._extract_java_package(node, content)
|
||||
break
|
||||
|
||||
# Single-pass traversal that handles everything
|
||||
context = TraversalContext(
|
||||
content=content,
|
||||
file_path=file_path,
|
||||
symbols=symbols,
|
||||
functions=functions,
|
||||
classes=classes,
|
||||
imports=imports,
|
||||
symbol_lookup=symbol_lookup
|
||||
)
|
||||
|
||||
self._traverse_node_single_pass(tree.root_node, context)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error parsing Java file {file_path}: {e}")
|
||||
|
||||
file_info = FileInfo(
|
||||
language=self.get_language_name(),
|
||||
line_count=len(content.splitlines()),
|
||||
symbols={"functions": functions, "classes": classes},
|
||||
imports=imports,
|
||||
package=package
|
||||
)
|
||||
|
||||
return symbols, file_info
|
||||
|
||||
def _traverse_node_single_pass(self, node, context: 'TraversalContext',
|
||||
current_class: Optional[str] = None,
|
||||
current_method: Optional[str] = None):
|
||||
"""Single-pass traversal that extracts symbols and analyzes calls."""
|
||||
|
||||
# Handle class declarations
|
||||
if node.type == 'class_declaration':
|
||||
name = self._get_java_class_name(node, context.content)
|
||||
if name:
|
||||
symbol_id = self._create_symbol_id(context.file_path, name)
|
||||
symbol_info = SymbolInfo(
|
||||
type="class",
|
||||
file=context.file_path,
|
||||
line=node.start_point[0] + 1
|
||||
)
|
||||
context.symbols[symbol_id] = symbol_info
|
||||
context.symbol_lookup[name] = symbol_id
|
||||
context.classes.append(name)
|
||||
|
||||
# Traverse class body with updated context
|
||||
for child in node.children:
|
||||
self._traverse_node_single_pass(child, context, current_class=name, current_method=current_method)
|
||||
return
|
||||
|
||||
# Handle method declarations
|
||||
elif node.type == 'method_declaration':
|
||||
name = self._get_java_method_name(node, context.content)
|
||||
if name:
|
||||
# Build full method name with class context
|
||||
if current_class:
|
||||
full_name = f"{current_class}.{name}"
|
||||
else:
|
||||
full_name = name
|
||||
|
||||
symbol_id = self._create_symbol_id(context.file_path, full_name)
|
||||
symbol_info = SymbolInfo(
|
||||
type="method",
|
||||
file=context.file_path,
|
||||
line=node.start_point[0] + 1,
|
||||
signature=self._get_java_method_signature(node, context.content)
|
||||
)
|
||||
context.symbols[symbol_id] = symbol_info
|
||||
context.symbol_lookup[full_name] = symbol_id
|
||||
context.symbol_lookup[name] = symbol_id # Also index by method name alone
|
||||
context.functions.append(full_name)
|
||||
|
||||
# Traverse method body with updated context
|
||||
for child in node.children:
|
||||
self._traverse_node_single_pass(child, context, current_class=current_class,
|
||||
current_method=symbol_id)
|
||||
return
|
||||
|
||||
# Handle method invocations (calls)
|
||||
elif node.type == 'method_invocation':
|
||||
if current_method:
|
||||
called_method = self._get_called_method_name(node, context.content)
|
||||
if called_method:
|
||||
# Use O(1) lookup instead of O(n) iteration
|
||||
if called_method in context.symbol_lookup:
|
||||
symbol_id = context.symbol_lookup[called_method]
|
||||
symbol_info = context.symbols[symbol_id]
|
||||
if current_method not in symbol_info.called_by:
|
||||
symbol_info.called_by.append(current_method)
|
||||
else:
|
||||
# Try to find method with class prefix
|
||||
for name, sid in context.symbol_lookup.items():
|
||||
if name.endswith(f".{called_method}"):
|
||||
symbol_info = context.symbols[sid]
|
||||
if current_method not in symbol_info.called_by:
|
||||
symbol_info.called_by.append(current_method)
|
||||
break
|
||||
|
||||
# Handle import declarations
|
||||
elif node.type == 'import_declaration':
|
||||
import_text = context.content[node.start_byte:node.end_byte]
|
||||
# Extract the import path (remove 'import' keyword and semicolon)
|
||||
import_path = import_text.replace('import', '').replace(';', '').strip()
|
||||
if import_path:
|
||||
context.imports.append(import_path)
|
||||
|
||||
# Continue traversing children for other node types
|
||||
for child in node.children:
|
||||
self._traverse_node_single_pass(child, context, current_class=current_class,
|
||||
current_method=current_method)
|
||||
|
||||
def _get_java_class_name(self, node, content: str) -> Optional[str]:
|
||||
for child in node.children:
|
||||
if child.type == 'identifier':
|
||||
return content[child.start_byte:child.end_byte]
|
||||
return None
|
||||
|
||||
def _get_java_method_name(self, node, content: str) -> Optional[str]:
|
||||
for child in node.children:
|
||||
if child.type == 'identifier':
|
||||
return content[child.start_byte:child.end_byte]
|
||||
return None
|
||||
|
||||
def _get_java_method_signature(self, node, content: str) -> str:
|
||||
return content[node.start_byte:node.end_byte].split('\n')[0].strip()
|
||||
|
||||
def _extract_java_package(self, node, content: str) -> Optional[str]:
|
||||
for child in node.children:
|
||||
if child.type == 'scoped_identifier':
|
||||
return content[child.start_byte:child.end_byte]
|
||||
return None
|
||||
|
||||
def _get_called_method_name(self, node, content: str) -> Optional[str]:
|
||||
"""Extract called method name from method invocation node."""
|
||||
# Handle obj.method() pattern - look for the method name after the dot
|
||||
for child in node.children:
|
||||
if child.type == 'field_access':
|
||||
# For field_access nodes, get the field (method) name
|
||||
for subchild in child.children:
|
||||
if subchild.type == 'identifier' and subchild.start_byte > child.start_byte:
|
||||
# Get the rightmost identifier (the method name)
|
||||
return content[subchild.start_byte:subchild.end_byte]
|
||||
elif child.type == 'identifier':
|
||||
# Direct method call without object reference
|
||||
return content[child.start_byte:child.end_byte]
|
||||
return None
|
||||
|
||||
|
||||
class TraversalContext:
|
||||
"""Context object to pass state during single-pass traversal."""
|
||||
|
||||
def __init__(self, content: str, file_path: str, symbols: Dict,
|
||||
functions: List, classes: List, imports: List, symbol_lookup: Dict):
|
||||
self.content = content
|
||||
self.file_path = file_path
|
||||
self.symbols = symbols
|
||||
self.functions = functions
|
||||
self.classes = classes
|
||||
self.imports = imports
|
||||
self.symbol_lookup = symbol_lookup
|
||||
@@ -0,0 +1,628 @@
|
||||
"""
|
||||
JavaScript parsing strategy using tree-sitter.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, List, Tuple, Optional, Set
|
||||
|
||||
import tree_sitter
|
||||
from tree_sitter_javascript import language
|
||||
|
||||
from .base_strategy import ParsingStrategy
|
||||
from ..models import SymbolInfo, FileInfo
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class JavaScriptParsingStrategy(ParsingStrategy):
|
||||
"""JavaScript-specific parsing strategy using tree-sitter."""
|
||||
|
||||
def __init__(self):
|
||||
self.js_language = tree_sitter.Language(language())
|
||||
|
||||
def get_language_name(self) -> str:
|
||||
return "javascript"
|
||||
|
||||
def get_supported_extensions(self) -> List[str]:
|
||||
return ['.js', '.jsx', '.mjs', '.cjs']
|
||||
|
||||
def parse_file(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo], FileInfo]:
|
||||
"""Parse JavaScript file using tree-sitter."""
|
||||
symbols: Dict[str, SymbolInfo] = {}
|
||||
functions: List[str] = []
|
||||
classes: List[str] = []
|
||||
imports: List[str] = []
|
||||
exports: List[str] = []
|
||||
symbol_lookup: Dict[str, str] = {}
|
||||
pending_calls: List[Tuple[str, str]] = []
|
||||
pending_call_set: Set[Tuple[str, str]] = set()
|
||||
variable_scopes: List[Dict[str, str]] = [{}]
|
||||
|
||||
parser = tree_sitter.Parser(self.js_language)
|
||||
tree = parser.parse(content.encode('utf8'))
|
||||
self._traverse_js_node(
|
||||
tree.root_node,
|
||||
content,
|
||||
file_path,
|
||||
symbols,
|
||||
functions,
|
||||
classes,
|
||||
imports,
|
||||
exports,
|
||||
symbol_lookup,
|
||||
pending_calls,
|
||||
pending_call_set,
|
||||
variable_scopes,
|
||||
)
|
||||
|
||||
file_info = FileInfo(
|
||||
language=self.get_language_name(),
|
||||
line_count=len(content.splitlines()),
|
||||
symbols={"functions": functions, "classes": classes},
|
||||
imports=imports,
|
||||
exports=exports
|
||||
)
|
||||
|
||||
if pending_calls:
|
||||
file_info.pending_calls = pending_calls
|
||||
|
||||
return symbols, file_info
|
||||
|
||||
def _traverse_js_node(
|
||||
self,
|
||||
node,
|
||||
content: str,
|
||||
file_path: str,
|
||||
symbols: Dict[str, SymbolInfo],
|
||||
functions: List[str],
|
||||
classes: List[str],
|
||||
imports: List[str],
|
||||
exports: List[str],
|
||||
symbol_lookup: Dict[str, str],
|
||||
pending_calls: List[Tuple[str, str]],
|
||||
pending_call_set: Set[Tuple[str, str]],
|
||||
variable_scopes: List[Dict[str, str]],
|
||||
current_function: Optional[str] = None,
|
||||
current_class: Optional[str] = None,
|
||||
):
|
||||
"""Traverse JavaScript AST node and collect symbols and relationships."""
|
||||
node_type = node.type
|
||||
|
||||
if node_type == 'function_declaration':
|
||||
name = self._get_function_name(node, content)
|
||||
if name:
|
||||
symbol_id = self._create_symbol_id(file_path, name)
|
||||
signature = self._get_js_function_signature(node, content)
|
||||
symbols[symbol_id] = SymbolInfo(
|
||||
type="function",
|
||||
file=file_path,
|
||||
line=node.start_point[0] + 1,
|
||||
signature=signature
|
||||
)
|
||||
symbol_lookup[name] = symbol_id
|
||||
functions.append(name)
|
||||
function_id = f"{file_path}::{name}"
|
||||
variable_scopes.append({})
|
||||
for child in node.children:
|
||||
self._traverse_js_node(
|
||||
child,
|
||||
content,
|
||||
file_path,
|
||||
symbols,
|
||||
functions,
|
||||
classes,
|
||||
imports,
|
||||
exports,
|
||||
symbol_lookup,
|
||||
pending_calls,
|
||||
pending_call_set,
|
||||
variable_scopes,
|
||||
current_function=function_id,
|
||||
current_class=current_class,
|
||||
)
|
||||
variable_scopes.pop()
|
||||
return
|
||||
|
||||
if node_type == 'class_declaration':
|
||||
name = self._get_class_name(node, content)
|
||||
if name:
|
||||
symbol_id = self._create_symbol_id(file_path, name)
|
||||
symbols[symbol_id] = SymbolInfo(
|
||||
type="class",
|
||||
file=file_path,
|
||||
line=node.start_point[0] + 1
|
||||
)
|
||||
symbol_lookup[name] = symbol_id
|
||||
classes.append(name)
|
||||
for child in node.children:
|
||||
self._traverse_js_node(
|
||||
child,
|
||||
content,
|
||||
file_path,
|
||||
symbols,
|
||||
functions,
|
||||
classes,
|
||||
imports,
|
||||
exports,
|
||||
symbol_lookup,
|
||||
pending_calls,
|
||||
pending_call_set,
|
||||
variable_scopes,
|
||||
current_function=current_function,
|
||||
current_class=name,
|
||||
)
|
||||
return
|
||||
|
||||
if node_type == 'method_definition':
|
||||
method_name = self._get_method_name(node, content)
|
||||
class_name = current_class or self._find_parent_class(node, content)
|
||||
if method_name and class_name:
|
||||
full_name = f"{class_name}.{method_name}"
|
||||
symbol_id = self._create_symbol_id(file_path, full_name)
|
||||
signature = self._get_js_function_signature(node, content)
|
||||
symbols[symbol_id] = SymbolInfo(
|
||||
type="method",
|
||||
file=file_path,
|
||||
line=node.start_point[0] + 1,
|
||||
signature=signature
|
||||
)
|
||||
symbol_lookup[full_name] = symbol_id
|
||||
symbol_lookup[method_name] = symbol_id
|
||||
functions.append(full_name)
|
||||
function_id = f"{file_path}::{full_name}"
|
||||
variable_scopes.append({})
|
||||
for child in node.children:
|
||||
self._traverse_js_node(
|
||||
child,
|
||||
content,
|
||||
file_path,
|
||||
symbols,
|
||||
functions,
|
||||
classes,
|
||||
imports,
|
||||
exports,
|
||||
symbol_lookup,
|
||||
pending_calls,
|
||||
pending_call_set,
|
||||
variable_scopes,
|
||||
current_function=function_id,
|
||||
current_class=class_name,
|
||||
)
|
||||
variable_scopes.pop()
|
||||
return
|
||||
|
||||
if node_type in ['lexical_declaration', 'variable_declaration']:
|
||||
for child in node.children:
|
||||
if child.type != 'variable_declarator':
|
||||
self._traverse_js_node(
|
||||
child,
|
||||
content,
|
||||
file_path,
|
||||
symbols,
|
||||
functions,
|
||||
classes,
|
||||
imports,
|
||||
exports,
|
||||
symbol_lookup,
|
||||
pending_calls,
|
||||
pending_call_set,
|
||||
variable_scopes,
|
||||
current_function=current_function,
|
||||
current_class=current_class,
|
||||
)
|
||||
continue
|
||||
|
||||
name_node = child.child_by_field_name('name')
|
||||
value_node = child.child_by_field_name('value')
|
||||
if not name_node:
|
||||
continue
|
||||
|
||||
name = self._get_node_text(name_node, content)
|
||||
|
||||
if value_node and value_node.type in ['arrow_function', 'function_expression', 'function']:
|
||||
symbol_id = self._create_symbol_id(file_path, name)
|
||||
signature = content[child.start_byte:child.end_byte].split('\n')[0].strip()
|
||||
symbols[symbol_id] = SymbolInfo(
|
||||
type="function",
|
||||
file=file_path,
|
||||
line=child.start_point[0] + 1,
|
||||
signature=signature
|
||||
)
|
||||
symbol_lookup[name] = symbol_id
|
||||
functions.append(name)
|
||||
function_id = f"{file_path}::{name}"
|
||||
variable_scopes.append({})
|
||||
self._traverse_js_node(
|
||||
value_node,
|
||||
content,
|
||||
file_path,
|
||||
symbols,
|
||||
functions,
|
||||
classes,
|
||||
imports,
|
||||
exports,
|
||||
symbol_lookup,
|
||||
pending_calls,
|
||||
pending_call_set,
|
||||
variable_scopes,
|
||||
current_function=function_id,
|
||||
current_class=current_class,
|
||||
)
|
||||
variable_scopes.pop()
|
||||
else:
|
||||
inferred = self._infer_expression_type(value_node, content)
|
||||
if inferred:
|
||||
self._set_variable_type(variable_scopes, name, inferred)
|
||||
if value_node:
|
||||
self._traverse_js_node(
|
||||
value_node,
|
||||
content,
|
||||
file_path,
|
||||
symbols,
|
||||
functions,
|
||||
classes,
|
||||
imports,
|
||||
exports,
|
||||
symbol_lookup,
|
||||
pending_calls,
|
||||
pending_call_set,
|
||||
variable_scopes,
|
||||
current_function=current_function,
|
||||
current_class=current_class,
|
||||
)
|
||||
return
|
||||
|
||||
if node_type == 'arrow_function':
|
||||
variable_scopes.append({})
|
||||
for child in node.children:
|
||||
self._traverse_js_node(
|
||||
child,
|
||||
content,
|
||||
file_path,
|
||||
symbols,
|
||||
functions,
|
||||
classes,
|
||||
imports,
|
||||
exports,
|
||||
symbol_lookup,
|
||||
pending_calls,
|
||||
pending_call_set,
|
||||
variable_scopes,
|
||||
current_function=current_function,
|
||||
current_class=current_class,
|
||||
)
|
||||
variable_scopes.pop()
|
||||
return
|
||||
|
||||
if node_type == 'call_expression':
|
||||
caller = current_function or f"{file_path}:{node.start_point[0] + 1}"
|
||||
called = self._resolve_called_function(
|
||||
node,
|
||||
content,
|
||||
variable_scopes,
|
||||
current_class
|
||||
)
|
||||
if caller and called:
|
||||
self._register_call(
|
||||
symbols,
|
||||
symbol_lookup,
|
||||
pending_calls,
|
||||
pending_call_set,
|
||||
caller,
|
||||
called
|
||||
)
|
||||
if caller:
|
||||
self._collect_callback_arguments(
|
||||
node,
|
||||
content,
|
||||
symbols,
|
||||
symbol_lookup,
|
||||
pending_calls,
|
||||
pending_call_set,
|
||||
variable_scopes,
|
||||
current_class,
|
||||
caller
|
||||
)
|
||||
|
||||
if node_type in ['import_statement', 'require_call']:
|
||||
import_text = self._get_node_text(node, content)
|
||||
imports.append(import_text)
|
||||
elif node_type in ['export_statement', 'export_clause', 'export_default_declaration']:
|
||||
exports.append(self._get_node_text(node, content))
|
||||
|
||||
for child in node.children:
|
||||
self._traverse_js_node(
|
||||
child,
|
||||
content,
|
||||
file_path,
|
||||
symbols,
|
||||
functions,
|
||||
classes,
|
||||
imports,
|
||||
exports,
|
||||
symbol_lookup,
|
||||
pending_calls,
|
||||
pending_call_set,
|
||||
variable_scopes,
|
||||
current_function=current_function,
|
||||
current_class=current_class,
|
||||
)
|
||||
|
||||
def _collect_callback_arguments(
|
||||
self,
|
||||
call_node,
|
||||
content: str,
|
||||
symbols: Dict[str, SymbolInfo],
|
||||
symbol_lookup: Dict[str, str],
|
||||
pending_calls: List[Tuple[str, str]],
|
||||
pending_call_set: Set[Tuple[str, str]],
|
||||
variable_scopes: List[Dict[str, str]],
|
||||
current_class: Optional[str],
|
||||
caller: str
|
||||
) -> None:
|
||||
"""Capture identifier callbacks passed as call expression arguments."""
|
||||
arguments_node = call_node.child_by_field_name('arguments')
|
||||
if not arguments_node:
|
||||
return
|
||||
|
||||
for argument in arguments_node.children:
|
||||
if not getattr(argument, "is_named", False):
|
||||
continue
|
||||
callback_name = self._resolve_argument_reference(
|
||||
argument,
|
||||
content,
|
||||
variable_scopes,
|
||||
current_class
|
||||
)
|
||||
if not callback_name:
|
||||
continue
|
||||
self._register_call(
|
||||
symbols,
|
||||
symbol_lookup,
|
||||
pending_calls,
|
||||
pending_call_set,
|
||||
caller,
|
||||
callback_name
|
||||
)
|
||||
|
||||
def _resolve_argument_reference(
|
||||
self,
|
||||
node,
|
||||
content: str,
|
||||
variable_scopes: List[Dict[str, str]],
|
||||
current_class: Optional[str]
|
||||
) -> Optional[str]:
|
||||
"""Resolve a potential callback reference used as an argument."""
|
||||
node_type = node.type
|
||||
|
||||
if node_type == 'identifier':
|
||||
return self._get_node_text(node, content)
|
||||
|
||||
if node_type == 'member_expression':
|
||||
property_node = node.child_by_field_name('property')
|
||||
if property_node is None:
|
||||
for child in node.children:
|
||||
if child.type in ['property_identifier', 'identifier']:
|
||||
property_node = child
|
||||
break
|
||||
if property_node is None:
|
||||
return None
|
||||
|
||||
property_name = self._get_node_text(property_node, content)
|
||||
qualifier_node = node.child_by_field_name('object')
|
||||
qualifier = None
|
||||
if qualifier_node is not None:
|
||||
qualifier = self._resolve_member_qualifier(
|
||||
qualifier_node,
|
||||
content,
|
||||
variable_scopes,
|
||||
current_class
|
||||
)
|
||||
if not qualifier:
|
||||
for child in node.children:
|
||||
if child is property_node:
|
||||
continue
|
||||
qualifier = self._resolve_member_qualifier(
|
||||
child,
|
||||
content,
|
||||
variable_scopes,
|
||||
current_class
|
||||
)
|
||||
if qualifier:
|
||||
break
|
||||
if qualifier:
|
||||
return f"{qualifier}.{property_name}"
|
||||
return property_name
|
||||
|
||||
if node_type in ['call_expression', 'arrow_function', 'function', 'function_expression']:
|
||||
return None
|
||||
|
||||
return None
|
||||
|
||||
def _get_function_name(self, node, content: str) -> Optional[str]:
|
||||
"""Extract function name from tree-sitter node."""
|
||||
for child in node.children:
|
||||
if child.type == 'identifier':
|
||||
return self._get_node_text(child, content)
|
||||
return None
|
||||
|
||||
def _get_class_name(self, node, content: str) -> Optional[str]:
|
||||
"""Extract class name from tree-sitter node."""
|
||||
for child in node.children:
|
||||
if child.type == 'identifier':
|
||||
return self._get_node_text(child, content)
|
||||
return None
|
||||
|
||||
def _get_method_name(self, node, content: str) -> Optional[str]:
|
||||
"""Extract method name from tree-sitter node."""
|
||||
for child in node.children:
|
||||
if child.type == 'property_identifier':
|
||||
return self._get_node_text(child, content)
|
||||
return None
|
||||
|
||||
def _find_parent_class(self, node, content: str) -> Optional[str]:
|
||||
"""Find the parent class of a method."""
|
||||
parent = node.parent
|
||||
while parent:
|
||||
if parent.type == 'class_declaration':
|
||||
return self._get_class_name(parent, content)
|
||||
parent = parent.parent
|
||||
return None
|
||||
|
||||
def _get_js_function_signature(self, node, content: str) -> str:
|
||||
"""Extract JavaScript function signature."""
|
||||
return content[node.start_byte:node.end_byte].split('\n')[0].strip()
|
||||
|
||||
def _get_node_text(self, node, content: str) -> str:
|
||||
return content[node.start_byte:node.end_byte]
|
||||
|
||||
def _set_variable_type(self, variable_scopes: List[Dict[str, str]], name: str, value: str) -> None:
|
||||
if not variable_scopes:
|
||||
return
|
||||
variable_scopes[-1][name] = value
|
||||
|
||||
def _lookup_variable_type(self, variable_scopes: List[Dict[str, str]], name: str) -> Optional[str]:
|
||||
for scope in reversed(variable_scopes):
|
||||
if name in scope:
|
||||
return scope[name]
|
||||
return None
|
||||
|
||||
def _infer_expression_type(self, node, content: str) -> Optional[str]:
|
||||
"""Infer the class/type from a simple expression like `new ClassName()`."""
|
||||
if node is None:
|
||||
return None
|
||||
|
||||
if node.type == 'new_expression':
|
||||
constructor_node = node.child_by_field_name('constructor')
|
||||
if constructor_node is None:
|
||||
# Fallback: first identifier or member expression child
|
||||
for child in node.children:
|
||||
if child.type in ['identifier', 'member_expression']:
|
||||
constructor_node = child
|
||||
break
|
||||
|
||||
if constructor_node:
|
||||
if constructor_node.type == 'identifier':
|
||||
return self._get_node_text(constructor_node, content)
|
||||
if constructor_node.type == 'member_expression':
|
||||
property_node = constructor_node.child_by_field_name('property')
|
||||
if property_node:
|
||||
return self._get_node_text(property_node, content)
|
||||
for child in reversed(constructor_node.children):
|
||||
if child.type in ['identifier', 'property_identifier']:
|
||||
return self._get_node_text(child, content)
|
||||
return None
|
||||
|
||||
def _resolve_called_function(
|
||||
self,
|
||||
node,
|
||||
content: str,
|
||||
variable_scopes: List[Dict[str, str]],
|
||||
current_class: Optional[str]
|
||||
) -> Optional[str]:
|
||||
function_node = node.child_by_field_name('function')
|
||||
if function_node is None and node.children:
|
||||
function_node = node.children[0]
|
||||
if function_node is None:
|
||||
return None
|
||||
|
||||
if function_node.type == 'identifier':
|
||||
return self._get_node_text(function_node, content)
|
||||
|
||||
if function_node.type == 'member_expression':
|
||||
property_node = function_node.child_by_field_name('property')
|
||||
if property_node is None:
|
||||
for child in function_node.children:
|
||||
if child.type in ['property_identifier', 'identifier']:
|
||||
property_node = child
|
||||
break
|
||||
if property_node is None:
|
||||
return None
|
||||
|
||||
property_name = self._get_node_text(property_node, content)
|
||||
object_node = function_node.child_by_field_name('object')
|
||||
qualifier = None
|
||||
if object_node is not None:
|
||||
qualifier = self._resolve_member_qualifier(
|
||||
object_node,
|
||||
content,
|
||||
variable_scopes,
|
||||
current_class
|
||||
)
|
||||
else:
|
||||
for child in function_node.children:
|
||||
if child is property_node:
|
||||
continue
|
||||
qualifier = self._resolve_member_qualifier(
|
||||
child,
|
||||
content,
|
||||
variable_scopes,
|
||||
current_class
|
||||
)
|
||||
if qualifier:
|
||||
break
|
||||
|
||||
if qualifier:
|
||||
return f"{qualifier}.{property_name}"
|
||||
return property_name
|
||||
|
||||
return None
|
||||
|
||||
def _resolve_member_qualifier(
|
||||
self,
|
||||
node,
|
||||
content: str,
|
||||
variable_scopes: List[Dict[str, str]],
|
||||
current_class: Optional[str]
|
||||
) -> Optional[str]:
|
||||
node_type = node.type
|
||||
if node_type == 'this':
|
||||
return current_class
|
||||
|
||||
if node_type == 'identifier':
|
||||
name = self._get_node_text(node, content)
|
||||
var_type = self._lookup_variable_type(variable_scopes, name)
|
||||
return var_type or name
|
||||
|
||||
if node_type == 'member_expression':
|
||||
property_node = node.child_by_field_name('property')
|
||||
if property_node is None:
|
||||
for child in node.children:
|
||||
if child.type in ['property_identifier', 'identifier']:
|
||||
property_node = child
|
||||
break
|
||||
if property_node is None:
|
||||
return None
|
||||
|
||||
qualifier = self._resolve_member_qualifier(
|
||||
node.child_by_field_name('object'),
|
||||
content,
|
||||
variable_scopes,
|
||||
current_class
|
||||
)
|
||||
property_name = self._get_node_text(property_node, content)
|
||||
if qualifier:
|
||||
return f"{qualifier}.{property_name}"
|
||||
return property_name
|
||||
|
||||
return None
|
||||
|
||||
def _register_call(
|
||||
self,
|
||||
symbols: Dict[str, SymbolInfo],
|
||||
symbol_lookup: Dict[str, str],
|
||||
pending_calls: List[Tuple[str, str]],
|
||||
pending_call_set: Set[Tuple[str, str]],
|
||||
caller: str,
|
||||
called: str
|
||||
) -> None:
|
||||
if called in symbol_lookup:
|
||||
symbol_info = symbols[symbol_lookup[called]]
|
||||
if caller not in symbol_info.called_by:
|
||||
symbol_info.called_by.append(caller)
|
||||
return
|
||||
|
||||
key = (caller, called)
|
||||
if key not in pending_call_set:
|
||||
pending_call_set.add(key)
|
||||
pending_calls.append(key)
|
||||
@@ -0,0 +1,154 @@
|
||||
"""
|
||||
Objective-C parsing strategy using regex patterns.
|
||||
"""
|
||||
|
||||
import re
|
||||
from typing import Dict, List, Tuple, Optional
|
||||
from .base_strategy import ParsingStrategy
|
||||
from ..models import SymbolInfo, FileInfo
|
||||
|
||||
|
||||
class ObjectiveCParsingStrategy(ParsingStrategy):
|
||||
"""Objective-C parsing strategy using regex patterns."""
|
||||
|
||||
def get_language_name(self) -> str:
|
||||
return "objective-c"
|
||||
|
||||
def get_supported_extensions(self) -> List[str]:
|
||||
return ['.m', '.mm']
|
||||
|
||||
def parse_file(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo], FileInfo]:
|
||||
"""Parse Objective-C file using regex patterns."""
|
||||
symbols = {}
|
||||
functions = []
|
||||
classes = []
|
||||
imports = []
|
||||
|
||||
lines = content.splitlines()
|
||||
current_class = None
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
line = line.strip()
|
||||
|
||||
# Import statements
|
||||
if line.startswith('#import ') or line.startswith('#include '):
|
||||
import_match = re.search(r'#(?:import|include)\s+[<"]([^>"]+)[>"]', line)
|
||||
if import_match:
|
||||
imports.append(import_match.group(1))
|
||||
|
||||
# Interface declarations
|
||||
elif line.startswith('@interface '):
|
||||
interface_match = re.match(r'@interface\s+(\w+)', line)
|
||||
if interface_match:
|
||||
class_name = interface_match.group(1)
|
||||
current_class = class_name
|
||||
symbol_id = self._create_symbol_id(file_path, class_name)
|
||||
symbols[symbol_id] = SymbolInfo(
|
||||
type="class",
|
||||
file=file_path,
|
||||
line=i + 1
|
||||
)
|
||||
classes.append(class_name)
|
||||
|
||||
# Implementation declarations
|
||||
elif line.startswith('@implementation '):
|
||||
impl_match = re.match(r'@implementation\s+(\w+)', line)
|
||||
if impl_match:
|
||||
current_class = impl_match.group(1)
|
||||
|
||||
# Method declarations
|
||||
elif line.startswith(('- (', '+ (')):
|
||||
method_match = re.search(r'[+-]\s*\([^)]+\)\s*(\w+)', line)
|
||||
if method_match:
|
||||
method_name = method_match.group(1)
|
||||
full_name = f"{current_class}.{method_name}" if current_class else method_name
|
||||
symbol_id = self._create_symbol_id(file_path, full_name)
|
||||
symbols[symbol_id] = SymbolInfo(
|
||||
type="method",
|
||||
file=file_path,
|
||||
line=i + 1,
|
||||
signature=line
|
||||
)
|
||||
functions.append(full_name)
|
||||
|
||||
# C function declarations
|
||||
elif re.match(r'\w+.*\s+\w+\s*\([^)]*\)\s*\{?', line) and not line.startswith(('if', 'for', 'while')):
|
||||
func_match = re.search(r'\s(\w+)\s*\([^)]*\)', line)
|
||||
if func_match:
|
||||
func_name = func_match.group(1)
|
||||
symbol_id = self._create_symbol_id(file_path, func_name)
|
||||
symbols[symbol_id] = SymbolInfo(
|
||||
type="function",
|
||||
file=file_path,
|
||||
line=i + 1,
|
||||
signature=line
|
||||
)
|
||||
functions.append(func_name)
|
||||
|
||||
# End of class
|
||||
elif line == '@end':
|
||||
current_class = None
|
||||
|
||||
# Phase 2: Add call relationship analysis
|
||||
self._analyze_objc_calls(content, symbols, file_path)
|
||||
|
||||
file_info = FileInfo(
|
||||
language=self.get_language_name(),
|
||||
line_count=len(lines),
|
||||
symbols={"functions": functions, "classes": classes},
|
||||
imports=imports
|
||||
)
|
||||
|
||||
return symbols, file_info
|
||||
|
||||
def _analyze_objc_calls(self, content: str, symbols: Dict[str, SymbolInfo], file_path: str):
|
||||
"""Analyze Objective-C method calls for relationships."""
|
||||
lines = content.splitlines()
|
||||
current_function = None
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
original_line = line
|
||||
line = line.strip()
|
||||
|
||||
# Track current method context
|
||||
if line.startswith('- (') or line.startswith('+ ('):
|
||||
func_name = self._extract_objc_method_name(line)
|
||||
if func_name:
|
||||
current_function = self._create_symbol_id(file_path, func_name)
|
||||
|
||||
# Find method calls: [obj methodName] or functionName()
|
||||
if current_function and ('[' in line and ']' in line or ('(' in line and ')' in line)):
|
||||
called_functions = self._extract_objc_called_functions(line)
|
||||
for called_func in called_functions:
|
||||
# Find the called function in symbols and add relationship
|
||||
for symbol_id, symbol_info in symbols.items():
|
||||
if called_func in symbol_id.split("::")[-1]:
|
||||
if current_function not in symbol_info.called_by:
|
||||
symbol_info.called_by.append(current_function)
|
||||
|
||||
def _extract_objc_method_name(self, line: str) -> Optional[str]:
|
||||
"""Extract method name from Objective-C method declaration."""
|
||||
try:
|
||||
# - (returnType)methodName:(params) or + (returnType)methodName
|
||||
match = re.search(r'[+-]\s*\([^)]*\)\s*(\w+)', line)
|
||||
if match:
|
||||
return match.group(1)
|
||||
except:
|
||||
pass
|
||||
return None
|
||||
|
||||
def _extract_objc_called_functions(self, line: str) -> List[str]:
|
||||
"""Extract method names that are being called in this line."""
|
||||
called_functions = []
|
||||
|
||||
# Find patterns like: [obj methodName] or functionName(
|
||||
patterns = [
|
||||
r'\[\s*\w+\s+(\w+)\s*[\]:]', # [obj methodName]
|
||||
r'(\w+)\s*\(', # functionName(
|
||||
]
|
||||
|
||||
for pattern in patterns:
|
||||
matches = re.findall(pattern, line)
|
||||
called_functions.extend(matches)
|
||||
|
||||
return called_functions
|
||||
@@ -0,0 +1,367 @@
|
||||
"""
|
||||
Python parsing strategy using AST - Optimized single-pass version.
|
||||
"""
|
||||
|
||||
import ast
|
||||
import logging
|
||||
from typing import Dict, List, Tuple, Optional, Set
|
||||
from .base_strategy import ParsingStrategy
|
||||
from ..models import SymbolInfo, FileInfo
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PythonParsingStrategy(ParsingStrategy):
|
||||
"""Python-specific parsing strategy using Python's built-in AST - Single Pass Optimized."""
|
||||
|
||||
def get_language_name(self) -> str:
|
||||
return "python"
|
||||
|
||||
def get_supported_extensions(self) -> List[str]:
|
||||
return ['.py', '.pyw']
|
||||
|
||||
def parse_file(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo], FileInfo]:
|
||||
"""Parse Python file using AST with single-pass optimization."""
|
||||
symbols = {}
|
||||
functions = []
|
||||
classes = []
|
||||
imports = []
|
||||
|
||||
try:
|
||||
tree = ast.parse(content)
|
||||
# Single-pass visitor that handles everything at once
|
||||
visitor = SinglePassVisitor(symbols, functions, classes, imports, file_path)
|
||||
visitor.visit(tree)
|
||||
except SyntaxError as e:
|
||||
logger.warning(f"Syntax error in Python file {file_path}: {e}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error parsing Python file {file_path}: {e}")
|
||||
|
||||
file_info = FileInfo(
|
||||
language=self.get_language_name(),
|
||||
line_count=len(content.splitlines()),
|
||||
symbols={"functions": functions, "classes": classes},
|
||||
imports=imports
|
||||
)
|
||||
|
||||
pending_calls = visitor.resolve_deferred_calls()
|
||||
if pending_calls:
|
||||
file_info.pending_calls = pending_calls
|
||||
|
||||
return symbols, file_info
|
||||
|
||||
|
||||
class SinglePassVisitor(ast.NodeVisitor):
|
||||
"""Single-pass AST visitor that extracts symbols and analyzes calls in one traversal."""
|
||||
|
||||
def __init__(self, symbols: Dict[str, SymbolInfo], functions: List[str],
|
||||
classes: List[str], imports: List[str], file_path: str):
|
||||
self.symbols = symbols
|
||||
self.functions = functions
|
||||
self.classes = classes
|
||||
self.imports = imports
|
||||
self.file_path = file_path
|
||||
|
||||
# Context tracking for call analysis
|
||||
self.current_function_stack = []
|
||||
self.current_class = None
|
||||
self.variable_type_stack: List[Dict[str, str]] = [{}]
|
||||
|
||||
# Symbol lookup index for O(1) access
|
||||
self.symbol_lookup = {} # name -> symbol_id mapping for fast lookups
|
||||
|
||||
# Track processed nodes to avoid duplicates
|
||||
self.processed_nodes: Set[int] = set()
|
||||
# Deferred call relationships for forward references
|
||||
self.deferred_calls: List[Tuple[str, str]] = []
|
||||
|
||||
def visit_ClassDef(self, node: ast.ClassDef):
|
||||
"""Visit class definition - extract symbol and analyze in single pass."""
|
||||
class_name = node.name
|
||||
symbol_id = self._create_symbol_id(self.file_path, class_name)
|
||||
|
||||
# Extract docstring
|
||||
docstring = ast.get_docstring(node)
|
||||
|
||||
# Create symbol info
|
||||
symbol_info = SymbolInfo(
|
||||
type="class",
|
||||
file=self.file_path,
|
||||
line=node.lineno,
|
||||
docstring=docstring
|
||||
)
|
||||
|
||||
# Store in symbols and lookup index
|
||||
self.symbols[symbol_id] = symbol_info
|
||||
self.symbol_lookup[class_name] = symbol_id
|
||||
self.classes.append(class_name)
|
||||
|
||||
# Track class context for method processing
|
||||
old_class = self.current_class
|
||||
self.current_class = class_name
|
||||
|
||||
method_nodes = []
|
||||
# First pass: register methods so forward references resolve
|
||||
for child in node.body:
|
||||
if isinstance(child, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
||||
self._register_method(child, class_name)
|
||||
method_nodes.append(child)
|
||||
else:
|
||||
self.visit(child)
|
||||
|
||||
# Second pass: visit method bodies for call analysis
|
||||
for method_node in method_nodes:
|
||||
self._visit_registered_method(method_node, class_name)
|
||||
|
||||
# Restore previous class context
|
||||
self.current_class = old_class
|
||||
|
||||
def visit_FunctionDef(self, node: ast.FunctionDef):
|
||||
"""Visit function definition - extract symbol and track context."""
|
||||
self._process_function(node)
|
||||
|
||||
def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef):
|
||||
"""Visit async function definition - extract symbol and track context."""
|
||||
self._process_function(node)
|
||||
|
||||
def _process_function(self, node):
|
||||
"""Process both sync and async function definitions."""
|
||||
# Skip if this is a method (already handled by ClassDef)
|
||||
if self.current_class:
|
||||
return
|
||||
|
||||
# Skip if already processed
|
||||
node_id = id(node)
|
||||
if node_id in self.processed_nodes:
|
||||
return
|
||||
self.processed_nodes.add(node_id)
|
||||
|
||||
func_name = node.name
|
||||
symbol_id = self._create_symbol_id(self.file_path, func_name)
|
||||
|
||||
# Extract function signature and docstring
|
||||
signature = self._extract_function_signature(node)
|
||||
docstring = ast.get_docstring(node)
|
||||
|
||||
# Create symbol info
|
||||
symbol_info = SymbolInfo(
|
||||
type="function",
|
||||
file=self.file_path,
|
||||
line=node.lineno,
|
||||
signature=signature,
|
||||
docstring=docstring
|
||||
)
|
||||
|
||||
# Store in symbols and lookup index
|
||||
self.symbols[symbol_id] = symbol_info
|
||||
self.symbol_lookup[func_name] = symbol_id
|
||||
self.functions.append(func_name)
|
||||
|
||||
# Track function context for call analysis
|
||||
function_id = f"{self.file_path}::{func_name}"
|
||||
self.variable_type_stack.append({})
|
||||
self.current_function_stack.append(function_id)
|
||||
|
||||
# Visit function body to analyze calls
|
||||
self.generic_visit(node)
|
||||
|
||||
# Pop function from stack
|
||||
self.current_function_stack.pop()
|
||||
self.variable_type_stack.pop()
|
||||
|
||||
def visit_Assign(self, node: ast.Assign):
|
||||
"""Track simple variable assignments to class instances."""
|
||||
class_name = self._infer_class_name(node.value)
|
||||
if class_name:
|
||||
current_scope = self._current_var_types()
|
||||
for target in node.targets:
|
||||
if isinstance(target, ast.Name):
|
||||
current_scope[target.id] = class_name
|
||||
self.generic_visit(node)
|
||||
|
||||
def visit_AnnAssign(self, node: ast.AnnAssign):
|
||||
"""Track annotated assignments that instantiate classes."""
|
||||
class_name = self._infer_class_name(node.value)
|
||||
if class_name and isinstance(node.target, ast.Name):
|
||||
self._current_var_types()[node.target.id] = class_name
|
||||
self.generic_visit(node)
|
||||
|
||||
def _current_var_types(self) -> Dict[str, str]:
|
||||
return self.variable_type_stack[-1]
|
||||
|
||||
def _infer_class_name(self, value: Optional[ast.AST]) -> Optional[str]:
|
||||
if isinstance(value, ast.Call):
|
||||
func = value.func
|
||||
if isinstance(func, ast.Name):
|
||||
return func.id
|
||||
if isinstance(func, ast.Attribute):
|
||||
return func.attr
|
||||
return None
|
||||
|
||||
def _register_method(self, node: ast.FunctionDef, class_name: str):
|
||||
"""Register a method symbol without visiting its body."""
|
||||
method_name = f"{class_name}.{node.name}"
|
||||
method_symbol_id = self._create_symbol_id(self.file_path, method_name)
|
||||
|
||||
method_signature = self._extract_function_signature(node)
|
||||
method_docstring = ast.get_docstring(node)
|
||||
|
||||
symbol_info = SymbolInfo(
|
||||
type="method",
|
||||
file=self.file_path,
|
||||
line=node.lineno,
|
||||
signature=method_signature,
|
||||
docstring=method_docstring
|
||||
)
|
||||
|
||||
self.symbols[method_symbol_id] = symbol_info
|
||||
self.symbol_lookup[method_name] = method_symbol_id
|
||||
self.symbol_lookup[node.name] = method_symbol_id # Also index by short method name
|
||||
self.functions.append(method_name)
|
||||
|
||||
def _visit_registered_method(self, node: ast.FunctionDef, class_name: str):
|
||||
"""Visit a previously registered method body for call analysis."""
|
||||
method_name = f"{class_name}.{node.name}"
|
||||
function_id = f"{self.file_path}::{method_name}"
|
||||
self.variable_type_stack.append({})
|
||||
self.current_function_stack.append(function_id)
|
||||
for child in node.body:
|
||||
self.visit(child)
|
||||
self.current_function_stack.pop()
|
||||
self.variable_type_stack.pop()
|
||||
|
||||
def visit_Import(self, node: ast.Import):
|
||||
"""Handle import statements."""
|
||||
for alias in node.names:
|
||||
self.imports.append(alias.name)
|
||||
self.generic_visit(node)
|
||||
|
||||
def visit_ImportFrom(self, node: ast.ImportFrom):
|
||||
"""Handle from...import statements."""
|
||||
if node.module:
|
||||
for alias in node.names:
|
||||
self.imports.append(f"{node.module}.{alias.name}")
|
||||
self.generic_visit(node)
|
||||
|
||||
def visit_Call(self, node: ast.Call):
|
||||
"""Visit function call and record relationship using O(1) lookup."""
|
||||
if not self.current_function_stack:
|
||||
self.generic_visit(node)
|
||||
return
|
||||
|
||||
try:
|
||||
# Get the function name being called
|
||||
called_function = None
|
||||
|
||||
if isinstance(node.func, ast.Name):
|
||||
# Direct function call: function_name()
|
||||
called_function = self._qualify_name(node.func.id)
|
||||
elif isinstance(node.func, ast.Attribute):
|
||||
# Method call: obj.method() or module.function()
|
||||
if not self._is_super_call(node.func):
|
||||
qualifier = self._infer_attribute_qualifier(node.func.value)
|
||||
if qualifier:
|
||||
called_function = f"{qualifier}.{node.func.attr}"
|
||||
else:
|
||||
called_function = node.func.attr
|
||||
|
||||
if called_function:
|
||||
caller_function = self.current_function_stack[-1]
|
||||
if not self._register_call_relationship(caller_function, called_function):
|
||||
self.deferred_calls.append((caller_function, called_function))
|
||||
except Exception:
|
||||
# Silently handle parsing errors for complex call patterns
|
||||
pass
|
||||
|
||||
# Continue visiting child nodes
|
||||
self.generic_visit(node)
|
||||
|
||||
def _register_call_relationship(self, caller_function: str, called_function: str) -> bool:
|
||||
"""Attempt to resolve a call relationship immediately."""
|
||||
try:
|
||||
if called_function in self.symbol_lookup:
|
||||
symbol_id = self.symbol_lookup[called_function]
|
||||
symbol_info = self.symbols[symbol_id]
|
||||
if symbol_info.type in ["function", "method"]:
|
||||
if caller_function not in symbol_info.called_by:
|
||||
symbol_info.called_by.append(caller_function)
|
||||
return True
|
||||
|
||||
for name, symbol_id in self.symbol_lookup.items():
|
||||
if name.endswith(f".{called_function}"):
|
||||
symbol_info = self.symbols[symbol_id]
|
||||
if symbol_info.type in ["function", "method"]:
|
||||
if caller_function not in symbol_info.called_by:
|
||||
symbol_info.called_by.append(caller_function)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
return False
|
||||
|
||||
def _qualify_name(self, name: str) -> str:
|
||||
"""Map bare identifiers to fully qualified symbol names."""
|
||||
if name in self.symbol_lookup:
|
||||
return name
|
||||
if name and name[0].isupper():
|
||||
return f"{name}.__init__"
|
||||
return name
|
||||
|
||||
def _infer_attribute_qualifier(self, value: ast.AST) -> Optional[str]:
|
||||
"""Infer class name for attribute-based calls."""
|
||||
if isinstance(value, ast.Name):
|
||||
return self._current_var_types().get(value.id)
|
||||
if isinstance(value, ast.Call):
|
||||
return self._infer_class_name(value)
|
||||
if isinstance(value, ast.Attribute):
|
||||
if isinstance(value.value, ast.Name):
|
||||
inferred = self._current_var_types().get(value.value.id)
|
||||
if inferred:
|
||||
return inferred
|
||||
return value.attr
|
||||
return None
|
||||
|
||||
def resolve_deferred_calls(self) -> List[Tuple[str, str]]:
|
||||
"""Resolve stored call relationships once all symbols are known."""
|
||||
if not self.deferred_calls:
|
||||
return []
|
||||
current = list(self.deferred_calls)
|
||||
unresolved: List[Tuple[str, str]] = []
|
||||
self.deferred_calls.clear()
|
||||
for caller, called in current:
|
||||
if not self._register_call_relationship(caller, called):
|
||||
unresolved.append((caller, called))
|
||||
self.deferred_calls = unresolved
|
||||
return unresolved
|
||||
|
||||
@staticmethod
|
||||
def _is_super_call(attr_node: ast.Attribute) -> bool:
|
||||
"""Detect super().method(...) patterns."""
|
||||
value = attr_node.value
|
||||
if isinstance(value, ast.Call) and isinstance(value.func, ast.Name):
|
||||
return value.func.id == "super"
|
||||
return False
|
||||
|
||||
def _create_symbol_id(self, file_path: str, symbol_name: str) -> str:
|
||||
"""Create a unique symbol ID."""
|
||||
return f"{file_path}::{symbol_name}"
|
||||
|
||||
def _extract_function_signature(self, node: ast.FunctionDef) -> str:
|
||||
"""Extract function signature from AST node."""
|
||||
# Build basic signature
|
||||
args = []
|
||||
|
||||
# Regular arguments
|
||||
for arg in node.args.args:
|
||||
args.append(arg.arg)
|
||||
|
||||
# Varargs (*args)
|
||||
if node.args.vararg:
|
||||
args.append(f"*{node.args.vararg.arg}")
|
||||
|
||||
# Keyword arguments (**kwargs)
|
||||
if node.args.kwarg:
|
||||
args.append(f"**{node.args.kwarg.arg}")
|
||||
|
||||
signature = f"def {node.name}({', '.join(args)}):"
|
||||
return signature
|
||||
@@ -0,0 +1,201 @@
|
||||
"""
|
||||
Strategy factory for creating appropriate parsing strategies.
|
||||
"""
|
||||
|
||||
import threading
|
||||
from typing import Dict, List
|
||||
from .base_strategy import ParsingStrategy
|
||||
from .python_strategy import PythonParsingStrategy
|
||||
from .javascript_strategy import JavaScriptParsingStrategy
|
||||
from .typescript_strategy import TypeScriptParsingStrategy
|
||||
from .java_strategy import JavaParsingStrategy
|
||||
from .go_strategy import GoParsingStrategy
|
||||
from .objective_c_strategy import ObjectiveCParsingStrategy
|
||||
from .zig_strategy import ZigParsingStrategy
|
||||
from .fallback_strategy import FallbackParsingStrategy
|
||||
|
||||
|
||||
class StrategyFactory:
|
||||
"""Factory for creating appropriate parsing strategies."""
|
||||
|
||||
def __init__(self):
|
||||
# Initialize all strategies with thread safety
|
||||
self._strategies: Dict[str, ParsingStrategy] = {}
|
||||
self._initialized = False
|
||||
self._lock = threading.RLock()
|
||||
self._initialize_strategies()
|
||||
|
||||
# File type mappings for fallback parser
|
||||
self._file_type_mappings = {
|
||||
# Web and markup
|
||||
'.html': 'html', '.htm': 'html',
|
||||
'.css': 'css', '.scss': 'css', '.sass': 'css',
|
||||
'.less': 'css', '.stylus': 'css', '.styl': 'css',
|
||||
'.md': 'markdown', '.mdx': 'markdown',
|
||||
'.json': 'json', '.jsonc': 'json',
|
||||
'.xml': 'xml',
|
||||
'.yml': 'yaml', '.yaml': 'yaml',
|
||||
|
||||
# Frontend frameworks
|
||||
'.vue': 'vue',
|
||||
'.svelte': 'svelte',
|
||||
'.astro': 'astro',
|
||||
|
||||
# Template engines
|
||||
'.hbs': 'handlebars', '.handlebars': 'handlebars',
|
||||
'.ejs': 'ejs',
|
||||
'.pug': 'pug',
|
||||
|
||||
# Database and SQL
|
||||
'.sql': 'sql', '.ddl': 'sql', '.dml': 'sql',
|
||||
'.mysql': 'sql', '.postgresql': 'sql', '.psql': 'sql',
|
||||
'.sqlite': 'sql', '.mssql': 'sql', '.oracle': 'sql',
|
||||
'.ora': 'sql', '.db2': 'sql',
|
||||
'.proc': 'sql', '.procedure': 'sql',
|
||||
'.func': 'sql', '.function': 'sql',
|
||||
'.view': 'sql', '.trigger': 'sql', '.index': 'sql',
|
||||
'.migration': 'sql', '.seed': 'sql', '.fixture': 'sql',
|
||||
'.schema': 'sql',
|
||||
'.cql': 'sql', '.cypher': 'sql', '.sparql': 'sql',
|
||||
'.gql': 'graphql',
|
||||
'.liquibase': 'sql', '.flyway': 'sql',
|
||||
|
||||
# Config and text files
|
||||
'.txt': 'text',
|
||||
'.ini': 'config', '.cfg': 'config', '.conf': 'config',
|
||||
'.toml': 'config',
|
||||
'.properties': 'config',
|
||||
'.env': 'config',
|
||||
'.gitignore': 'config',
|
||||
'.dockerignore': 'config',
|
||||
'.editorconfig': 'config',
|
||||
|
||||
# Other programming languages (will use fallback)
|
||||
'.c': 'c', '.cpp': 'cpp', '.h': 'h', '.hpp': 'hpp',
|
||||
'.cxx': 'cpp', '.cc': 'cpp', '.hxx': 'hpp', '.hh': 'hpp',
|
||||
'.cs': 'csharp',
|
||||
'.rb': 'ruby',
|
||||
'.php': 'php',
|
||||
'.swift': 'swift',
|
||||
'.kt': 'kotlin', '.kts': 'kotlin',
|
||||
'.rs': 'rust',
|
||||
'.scala': 'scala',
|
||||
'.sh': 'shell', '.bash': 'shell', '.zsh': 'shell',
|
||||
'.ps1': 'powershell',
|
||||
'.bat': 'batch', '.cmd': 'batch',
|
||||
'.r': 'r', '.R': 'r',
|
||||
'.pl': 'perl', '.pm': 'perl',
|
||||
'.lua': 'lua',
|
||||
'.dart': 'dart',
|
||||
'.hs': 'haskell',
|
||||
'.ml': 'ocaml', '.mli': 'ocaml',
|
||||
'.fs': 'fsharp', '.fsx': 'fsharp',
|
||||
'.clj': 'clojure', '.cljs': 'clojure',
|
||||
'.vim': 'vim',
|
||||
}
|
||||
|
||||
def _initialize_strategies(self):
|
||||
"""Initialize all parsing strategies with thread safety."""
|
||||
with self._lock:
|
||||
if self._initialized:
|
||||
return
|
||||
|
||||
try:
|
||||
# Python
|
||||
python_strategy = PythonParsingStrategy()
|
||||
for ext in python_strategy.get_supported_extensions():
|
||||
self._strategies[ext] = python_strategy
|
||||
|
||||
# JavaScript
|
||||
js_strategy = JavaScriptParsingStrategy()
|
||||
for ext in js_strategy.get_supported_extensions():
|
||||
self._strategies[ext] = js_strategy
|
||||
|
||||
# TypeScript
|
||||
ts_strategy = TypeScriptParsingStrategy()
|
||||
for ext in ts_strategy.get_supported_extensions():
|
||||
self._strategies[ext] = ts_strategy
|
||||
|
||||
# Java
|
||||
java_strategy = JavaParsingStrategy()
|
||||
for ext in java_strategy.get_supported_extensions():
|
||||
self._strategies[ext] = java_strategy
|
||||
|
||||
# Go
|
||||
go_strategy = GoParsingStrategy()
|
||||
for ext in go_strategy.get_supported_extensions():
|
||||
self._strategies[ext] = go_strategy
|
||||
|
||||
# Objective-C
|
||||
objc_strategy = ObjectiveCParsingStrategy()
|
||||
for ext in objc_strategy.get_supported_extensions():
|
||||
self._strategies[ext] = objc_strategy
|
||||
|
||||
# Zig
|
||||
zig_strategy = ZigParsingStrategy()
|
||||
for ext in zig_strategy.get_supported_extensions():
|
||||
self._strategies[ext] = zig_strategy
|
||||
|
||||
self._initialized = True
|
||||
|
||||
except Exception as e:
|
||||
# Reset state on failure to allow retry
|
||||
self._strategies.clear()
|
||||
self._initialized = False
|
||||
raise e
|
||||
|
||||
def get_strategy(self, file_extension: str) -> ParsingStrategy:
|
||||
"""
|
||||
Get appropriate strategy for file extension.
|
||||
|
||||
Args:
|
||||
file_extension: File extension (e.g., '.py', '.js')
|
||||
|
||||
Returns:
|
||||
Appropriate parsing strategy
|
||||
"""
|
||||
with self._lock:
|
||||
# Ensure initialization is complete
|
||||
if not self._initialized:
|
||||
self._initialize_strategies()
|
||||
|
||||
# Check for specialized strategies first
|
||||
if file_extension in self._strategies:
|
||||
return self._strategies[file_extension]
|
||||
|
||||
# Use fallback strategy with appropriate language name
|
||||
language_name = self._file_type_mappings.get(file_extension, 'unknown')
|
||||
return FallbackParsingStrategy(language_name)
|
||||
|
||||
def get_all_supported_extensions(self) -> List[str]:
|
||||
"""Get all supported extensions across strategies."""
|
||||
specialized = list(self._strategies.keys())
|
||||
fallback = list(self._file_type_mappings.keys())
|
||||
return specialized + fallback
|
||||
|
||||
def get_specialized_extensions(self) -> List[str]:
|
||||
"""Get extensions that have specialized parsers."""
|
||||
return list(self._strategies.keys())
|
||||
|
||||
def get_fallback_extensions(self) -> List[str]:
|
||||
"""Get extensions that use fallback parsing."""
|
||||
return list(self._file_type_mappings.keys())
|
||||
|
||||
def get_strategy_info(self) -> Dict[str, List[str]]:
|
||||
"""Get information about available strategies."""
|
||||
info = {}
|
||||
|
||||
# Group extensions by strategy type
|
||||
for ext, strategy in self._strategies.items():
|
||||
strategy_name = strategy.get_language_name()
|
||||
if strategy_name not in info:
|
||||
info[strategy_name] = []
|
||||
info[strategy_name].append(ext)
|
||||
|
||||
# Add fallback info
|
||||
fallback_languages = set(self._file_type_mappings.values())
|
||||
for lang in fallback_languages:
|
||||
extensions = [ext for ext, mapped_lang in self._file_type_mappings.items() if mapped_lang == lang]
|
||||
info[f"fallback_{lang}"] = extensions
|
||||
|
||||
return info
|
||||
@@ -0,0 +1,487 @@
|
||||
"""
|
||||
TypeScript parsing strategy using tree-sitter - Optimized single-pass version.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, List, Tuple, Optional, Set
|
||||
from .base_strategy import ParsingStrategy
|
||||
from ..models import SymbolInfo, FileInfo
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
import tree_sitter
|
||||
from tree_sitter_typescript import language_typescript
|
||||
|
||||
|
||||
class TypeScriptParsingStrategy(ParsingStrategy):
|
||||
"""TypeScript-specific parsing strategy using tree-sitter - Single Pass Optimized."""
|
||||
|
||||
def __init__(self):
|
||||
self.ts_language = tree_sitter.Language(language_typescript())
|
||||
|
||||
def get_language_name(self) -> str:
|
||||
return "typescript"
|
||||
|
||||
def get_supported_extensions(self) -> List[str]:
|
||||
return ['.ts', '.tsx']
|
||||
|
||||
def parse_file(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo], FileInfo]:
|
||||
"""Parse TypeScript file using tree-sitter with single-pass optimization."""
|
||||
symbols = {}
|
||||
functions = []
|
||||
classes = []
|
||||
imports = []
|
||||
exports = []
|
||||
|
||||
# Symbol lookup index for O(1) access
|
||||
symbol_lookup = {} # name -> symbol_id mapping
|
||||
pending_calls: List[Tuple[str, str]] = []
|
||||
pending_call_set: Set[Tuple[str, str]] = set()
|
||||
variable_scopes: List[Dict[str, str]] = [{}]
|
||||
|
||||
parser = tree_sitter.Parser(self.ts_language)
|
||||
tree = parser.parse(content.encode('utf8'))
|
||||
|
||||
# Single-pass traversal that handles everything
|
||||
context = TraversalContext(
|
||||
content=content,
|
||||
file_path=file_path,
|
||||
symbols=symbols,
|
||||
functions=functions,
|
||||
classes=classes,
|
||||
imports=imports,
|
||||
exports=exports,
|
||||
symbol_lookup=symbol_lookup,
|
||||
pending_calls=pending_calls,
|
||||
pending_call_set=pending_call_set,
|
||||
variable_scopes=variable_scopes,
|
||||
)
|
||||
|
||||
self._traverse_node_single_pass(tree.root_node, context)
|
||||
|
||||
file_info = FileInfo(
|
||||
language=self.get_language_name(),
|
||||
line_count=len(content.splitlines()),
|
||||
symbols={"functions": functions, "classes": classes},
|
||||
imports=imports,
|
||||
exports=exports
|
||||
)
|
||||
|
||||
if context.pending_calls:
|
||||
file_info.pending_calls = context.pending_calls
|
||||
|
||||
return symbols, file_info
|
||||
|
||||
def _traverse_node_single_pass(self, node, context: 'TraversalContext',
|
||||
current_function: Optional[str] = None,
|
||||
current_class: Optional[str] = None):
|
||||
"""Single-pass traversal that extracts symbols and analyzes calls."""
|
||||
|
||||
node_type = node.type
|
||||
|
||||
# Handle function declarations
|
||||
if node_type == 'function_declaration':
|
||||
name = self._get_function_name(node, context.content)
|
||||
if name:
|
||||
symbol_id = self._create_symbol_id(context.file_path, name)
|
||||
signature = self._get_ts_function_signature(node, context.content)
|
||||
symbol_info = SymbolInfo(
|
||||
type="function",
|
||||
file=context.file_path,
|
||||
line=node.start_point[0] + 1,
|
||||
signature=signature
|
||||
)
|
||||
context.symbols[symbol_id] = symbol_info
|
||||
context.symbol_lookup[name] = symbol_id
|
||||
context.functions.append(name)
|
||||
|
||||
# Traverse function body with updated context
|
||||
func_context = f"{context.file_path}::{name}"
|
||||
for child in node.children:
|
||||
self._traverse_node_single_pass(child, context, current_function=func_context,
|
||||
current_class=current_class)
|
||||
return
|
||||
|
||||
# Handle class declarations
|
||||
elif node_type == 'class_declaration':
|
||||
name = self._get_class_name(node, context.content)
|
||||
if name:
|
||||
symbol_id = self._create_symbol_id(context.file_path, name)
|
||||
symbol_info = SymbolInfo(
|
||||
type="class",
|
||||
file=context.file_path,
|
||||
line=node.start_point[0] + 1
|
||||
)
|
||||
context.symbols[symbol_id] = symbol_info
|
||||
context.symbol_lookup[name] = symbol_id
|
||||
context.classes.append(name)
|
||||
|
||||
# Traverse class body with updated context
|
||||
for child in node.children:
|
||||
self._traverse_node_single_pass(child, context, current_function=current_function,
|
||||
current_class=name)
|
||||
return
|
||||
|
||||
# Handle interface declarations
|
||||
elif node_type == 'interface_declaration':
|
||||
name = self._get_interface_name(node, context.content)
|
||||
if name:
|
||||
symbol_id = self._create_symbol_id(context.file_path, name)
|
||||
symbol_info = SymbolInfo(
|
||||
type="interface",
|
||||
file=context.file_path,
|
||||
line=node.start_point[0] + 1
|
||||
)
|
||||
context.symbols[symbol_id] = symbol_info
|
||||
context.symbol_lookup[name] = symbol_id
|
||||
context.classes.append(name) # Group interfaces with classes
|
||||
|
||||
# Traverse interface body with updated context
|
||||
for child in node.children:
|
||||
self._traverse_node_single_pass(child, context, current_function=current_function,
|
||||
current_class=name)
|
||||
return
|
||||
|
||||
# Handle method definitions
|
||||
elif node_type == 'method_definition':
|
||||
method_name = self._get_method_name(node, context.content)
|
||||
if method_name and current_class:
|
||||
full_name = f"{current_class}.{method_name}"
|
||||
symbol_id = self._create_symbol_id(context.file_path, full_name)
|
||||
signature = self._get_ts_function_signature(node, context.content)
|
||||
symbol_info = SymbolInfo(
|
||||
type="method",
|
||||
file=context.file_path,
|
||||
line=node.start_point[0] + 1,
|
||||
signature=signature
|
||||
)
|
||||
context.symbols[symbol_id] = symbol_info
|
||||
context.symbol_lookup[full_name] = symbol_id
|
||||
context.symbol_lookup[method_name] = symbol_id # Also index by method name alone
|
||||
context.functions.append(full_name)
|
||||
|
||||
# Traverse method body with updated context
|
||||
method_context = f"{context.file_path}::{full_name}"
|
||||
for child in node.children:
|
||||
self._traverse_node_single_pass(child, context, current_function=method_context,
|
||||
current_class=current_class)
|
||||
return
|
||||
|
||||
# Handle variable declarations that define callable exports
|
||||
elif node_type in ['lexical_declaration', 'variable_statement']:
|
||||
handled = False
|
||||
for child in node.children:
|
||||
if child.type != 'variable_declarator':
|
||||
continue
|
||||
name_node = child.child_by_field_name('name')
|
||||
value_node = child.child_by_field_name('value')
|
||||
if not name_node or not value_node:
|
||||
continue
|
||||
|
||||
if current_function is not None:
|
||||
continue
|
||||
|
||||
value_type = value_node.type
|
||||
if value_type not in [
|
||||
'arrow_function',
|
||||
'function',
|
||||
'function_expression',
|
||||
'call_expression',
|
||||
'new_expression',
|
||||
'identifier',
|
||||
'member_expression',
|
||||
]:
|
||||
continue
|
||||
|
||||
name = context.content[name_node.start_byte:name_node.end_byte]
|
||||
symbol_id = self._create_symbol_id(context.file_path, name)
|
||||
signature = context.content[child.start_byte:child.end_byte].split('\n')[0].strip()
|
||||
symbol_info = SymbolInfo(
|
||||
type="function",
|
||||
file=context.file_path,
|
||||
line=child.start_point[0] + 1,
|
||||
signature=signature
|
||||
)
|
||||
context.symbols[symbol_id] = symbol_info
|
||||
context.symbol_lookup[name] = symbol_id
|
||||
context.functions.append(name)
|
||||
handled = True
|
||||
|
||||
if value_type in ['arrow_function', 'function', 'function_expression']:
|
||||
func_context = f"{context.file_path}::{name}"
|
||||
context.variable_scopes.append({})
|
||||
self._traverse_node_single_pass(
|
||||
value_node,
|
||||
context,
|
||||
current_function=func_context,
|
||||
current_class=current_class
|
||||
)
|
||||
context.variable_scopes.pop()
|
||||
|
||||
if handled:
|
||||
return
|
||||
|
||||
# Handle function calls
|
||||
elif node_type == 'call_expression':
|
||||
caller = current_function or f"{context.file_path}:{node.start_point[0] + 1}"
|
||||
called_function = self._resolve_called_function(node, context, current_class)
|
||||
if caller and called_function:
|
||||
self._register_call(context, caller, called_function)
|
||||
if caller:
|
||||
self._collect_callback_arguments(node, context, caller, current_class, current_function)
|
||||
|
||||
# Handle import declarations
|
||||
elif node.type == 'import_statement':
|
||||
import_text = context.content[node.start_byte:node.end_byte]
|
||||
context.imports.append(import_text)
|
||||
|
||||
# Handle export declarations
|
||||
elif node.type in ['export_statement', 'export_default_declaration']:
|
||||
export_text = context.content[node.start_byte:node.end_byte]
|
||||
context.exports.append(export_text)
|
||||
|
||||
# Continue traversing children for other node types
|
||||
for child in node.children:
|
||||
self._traverse_node_single_pass(child, context, current_function=current_function,
|
||||
current_class=current_class)
|
||||
|
||||
def _register_call(self, context: 'TraversalContext', caller: str, called: str) -> None:
|
||||
if called in context.symbol_lookup:
|
||||
symbol_id = context.symbol_lookup[called]
|
||||
symbol_info = context.symbols[symbol_id]
|
||||
if caller not in symbol_info.called_by:
|
||||
symbol_info.called_by.append(caller)
|
||||
return
|
||||
|
||||
key = (caller, called)
|
||||
if key not in context.pending_call_set:
|
||||
context.pending_call_set.add(key)
|
||||
context.pending_calls.append(key)
|
||||
|
||||
def _collect_callback_arguments(
|
||||
self,
|
||||
node,
|
||||
context: 'TraversalContext',
|
||||
caller: str,
|
||||
current_class: Optional[str],
|
||||
current_function: Optional[str]
|
||||
) -> None:
|
||||
arguments_node = node.child_by_field_name('arguments')
|
||||
if not arguments_node:
|
||||
return
|
||||
|
||||
for argument in arguments_node.children:
|
||||
if not getattr(argument, "is_named", False):
|
||||
continue
|
||||
callback_name = self._resolve_argument_reference(argument, context, current_class)
|
||||
if callback_name:
|
||||
call_site = caller
|
||||
if current_function is None:
|
||||
call_site = f"{context.file_path}:{argument.start_point[0] + 1}"
|
||||
self._register_call(context, call_site, callback_name)
|
||||
|
||||
def _resolve_argument_reference(
|
||||
self,
|
||||
node,
|
||||
context: 'TraversalContext',
|
||||
current_class: Optional[str]
|
||||
) -> Optional[str]:
|
||||
node_type = node.type
|
||||
|
||||
if node_type == 'identifier':
|
||||
return context.content[node.start_byte:node.end_byte]
|
||||
|
||||
if node_type == 'member_expression':
|
||||
property_node = node.child_by_field_name('property')
|
||||
if property_node is None:
|
||||
for child in node.children:
|
||||
if child.type in ['property_identifier', 'identifier']:
|
||||
property_node = child
|
||||
break
|
||||
if property_node is None:
|
||||
return None
|
||||
|
||||
property_name = context.content[property_node.start_byte:property_node.end_byte]
|
||||
qualifier_node = node.child_by_field_name('object')
|
||||
qualifier = self._resolve_member_qualifier(
|
||||
qualifier_node,
|
||||
context,
|
||||
current_class
|
||||
)
|
||||
if not qualifier:
|
||||
for child in node.children:
|
||||
if child is property_node:
|
||||
continue
|
||||
qualifier = self._resolve_member_qualifier(
|
||||
child,
|
||||
context,
|
||||
current_class
|
||||
)
|
||||
if qualifier:
|
||||
break
|
||||
if qualifier:
|
||||
return f"{qualifier}.{property_name}"
|
||||
return property_name
|
||||
|
||||
return None
|
||||
|
||||
def _resolve_called_function(
|
||||
self,
|
||||
node,
|
||||
context: 'TraversalContext',
|
||||
current_class: Optional[str]
|
||||
) -> Optional[str]:
|
||||
function_node = node.child_by_field_name('function')
|
||||
if function_node is None and node.children:
|
||||
function_node = node.children[0]
|
||||
if function_node is None:
|
||||
return None
|
||||
|
||||
if function_node.type == 'identifier':
|
||||
return context.content[function_node.start_byte:function_node.end_byte]
|
||||
|
||||
if function_node.type == 'member_expression':
|
||||
property_node = function_node.child_by_field_name('property')
|
||||
if property_node is None:
|
||||
for child in function_node.children:
|
||||
if child.type in ['property_identifier', 'identifier']:
|
||||
property_node = child
|
||||
break
|
||||
if property_node is None:
|
||||
return None
|
||||
|
||||
property_name = context.content[property_node.start_byte:property_node.end_byte]
|
||||
qualifier_node = function_node.child_by_field_name('object')
|
||||
qualifier = self._resolve_member_qualifier(
|
||||
qualifier_node,
|
||||
context,
|
||||
current_class
|
||||
)
|
||||
if not qualifier:
|
||||
for child in function_node.children:
|
||||
if child is property_node:
|
||||
continue
|
||||
qualifier = self._resolve_member_qualifier(
|
||||
child,
|
||||
context,
|
||||
current_class
|
||||
)
|
||||
if qualifier:
|
||||
break
|
||||
if qualifier:
|
||||
return f"{qualifier}.{property_name}"
|
||||
return property_name
|
||||
|
||||
return None
|
||||
|
||||
def _resolve_member_qualifier(
|
||||
self,
|
||||
node,
|
||||
context: 'TraversalContext',
|
||||
current_class: Optional[str]
|
||||
) -> Optional[str]:
|
||||
if node is None:
|
||||
return None
|
||||
|
||||
node_type = node.type
|
||||
if node_type == 'this':
|
||||
return current_class
|
||||
|
||||
if node_type == 'identifier':
|
||||
return context.content[node.start_byte:node.end_byte]
|
||||
|
||||
if node_type == 'member_expression':
|
||||
property_node = node.child_by_field_name('property')
|
||||
if property_node is None:
|
||||
for child in node.children:
|
||||
if child.type in ['property_identifier', 'identifier']:
|
||||
property_node = child
|
||||
break
|
||||
if property_node is None:
|
||||
return None
|
||||
|
||||
qualifier = self._resolve_member_qualifier(
|
||||
node.child_by_field_name('object'),
|
||||
context,
|
||||
current_class
|
||||
)
|
||||
if not qualifier:
|
||||
for child in node.children:
|
||||
if child is property_node:
|
||||
continue
|
||||
qualifier = self._resolve_member_qualifier(
|
||||
child,
|
||||
context,
|
||||
current_class
|
||||
)
|
||||
if qualifier:
|
||||
break
|
||||
|
||||
property_name = context.content[property_node.start_byte:property_node.end_byte]
|
||||
if qualifier:
|
||||
return f"{qualifier}.{property_name}"
|
||||
return property_name
|
||||
|
||||
return None
|
||||
|
||||
def _get_function_name(self, node, content: str) -> Optional[str]:
|
||||
"""Extract function name from tree-sitter node."""
|
||||
for child in node.children:
|
||||
if child.type == 'identifier':
|
||||
return content[child.start_byte:child.end_byte]
|
||||
return None
|
||||
|
||||
def _get_class_name(self, node, content: str) -> Optional[str]:
|
||||
"""Extract class name from tree-sitter node."""
|
||||
for child in node.children:
|
||||
if child.type == 'identifier':
|
||||
return content[child.start_byte:child.end_byte]
|
||||
return None
|
||||
|
||||
def _get_interface_name(self, node, content: str) -> Optional[str]:
|
||||
"""Extract interface name from tree-sitter node."""
|
||||
for child in node.children:
|
||||
if child.type == 'type_identifier':
|
||||
return content[child.start_byte:child.end_byte]
|
||||
return None
|
||||
|
||||
def _get_method_name(self, node, content: str) -> Optional[str]:
|
||||
"""Extract method name from tree-sitter node."""
|
||||
for child in node.children:
|
||||
if child.type == 'property_identifier':
|
||||
return content[child.start_byte:child.end_byte]
|
||||
return None
|
||||
|
||||
def _get_ts_function_signature(self, node, content: str) -> str:
|
||||
"""Extract TypeScript function signature."""
|
||||
return content[node.start_byte:node.end_byte].split('\n')[0].strip()
|
||||
|
||||
|
||||
class TraversalContext:
|
||||
"""Context object to pass state during single-pass traversal."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
content: str,
|
||||
file_path: str,
|
||||
symbols: Dict,
|
||||
functions: List,
|
||||
classes: List,
|
||||
imports: List,
|
||||
exports: List,
|
||||
symbol_lookup: Dict,
|
||||
pending_calls: List[Tuple[str, str]],
|
||||
pending_call_set: Set[Tuple[str, str]],
|
||||
variable_scopes: List[Dict[str, str]],
|
||||
):
|
||||
self.content = content
|
||||
self.file_path = file_path
|
||||
self.symbols = symbols
|
||||
self.functions = functions
|
||||
self.classes = classes
|
||||
self.imports = imports
|
||||
self.exports = exports
|
||||
self.symbol_lookup = symbol_lookup
|
||||
self.pending_calls = pending_calls
|
||||
self.pending_call_set = pending_call_set
|
||||
self.variable_scopes = variable_scopes
|
||||
@@ -0,0 +1,99 @@
|
||||
"""
|
||||
Zig parsing strategy using tree-sitter.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, List, Tuple, Optional
|
||||
from .base_strategy import ParsingStrategy
|
||||
from ..models import SymbolInfo, FileInfo
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
import tree_sitter
|
||||
from tree_sitter_zig import language
|
||||
|
||||
|
||||
class ZigParsingStrategy(ParsingStrategy):
|
||||
"""Zig parsing strategy using tree-sitter."""
|
||||
|
||||
def __init__(self):
|
||||
self.zig_language = tree_sitter.Language(language())
|
||||
|
||||
def get_language_name(self) -> str:
|
||||
return "zig"
|
||||
|
||||
def get_supported_extensions(self) -> List[str]:
|
||||
return ['.zig', '.zon']
|
||||
|
||||
def parse_file(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo], FileInfo]:
|
||||
"""Parse Zig file using tree-sitter."""
|
||||
return self._tree_sitter_parse(file_path, content)
|
||||
|
||||
|
||||
def _tree_sitter_parse(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo], FileInfo]:
|
||||
"""Parse Zig file using tree-sitter."""
|
||||
symbols = {}
|
||||
functions = []
|
||||
classes = []
|
||||
imports = []
|
||||
|
||||
parser = tree_sitter.Parser(self.zig_language)
|
||||
tree = parser.parse(content.encode('utf8'))
|
||||
|
||||
# Phase 1: Extract symbols using tree-sitter
|
||||
self._traverse_zig_node(tree.root_node, content, file_path, symbols, functions, classes, imports)
|
||||
|
||||
file_info = FileInfo(
|
||||
language=self.get_language_name(),
|
||||
line_count=len(content.splitlines()),
|
||||
symbols={"functions": functions, "classes": classes},
|
||||
imports=imports
|
||||
)
|
||||
|
||||
return symbols, file_info
|
||||
|
||||
def _traverse_zig_node(self, node, content: str, file_path: str, symbols: Dict, functions: List, classes: List, imports: List):
|
||||
"""Traverse Zig AST node and extract symbols."""
|
||||
if node.type == 'function_declaration':
|
||||
func_name = self._extract_zig_function_name_from_node(node, content)
|
||||
if func_name:
|
||||
line_number = self._extract_line_number(content, node.start_byte)
|
||||
symbol_id = self._create_symbol_id(file_path, func_name)
|
||||
symbols[symbol_id] = SymbolInfo(
|
||||
type="function",
|
||||
file=file_path,
|
||||
line=line_number,
|
||||
signature=self._safe_extract_text(content, node.start_byte, node.end_byte)
|
||||
)
|
||||
functions.append(func_name)
|
||||
|
||||
elif node.type in ['struct_declaration', 'union_declaration', 'enum_declaration']:
|
||||
type_name = self._extract_zig_type_name_from_node(node, content)
|
||||
if type_name:
|
||||
line_number = self._extract_line_number(content, node.start_byte)
|
||||
symbol_id = self._create_symbol_id(file_path, type_name)
|
||||
symbols[symbol_id] = SymbolInfo(
|
||||
type=node.type.replace('_declaration', ''),
|
||||
file=file_path,
|
||||
line=line_number
|
||||
)
|
||||
classes.append(type_name)
|
||||
|
||||
# Recurse through children
|
||||
for child in node.children:
|
||||
self._traverse_zig_node(child, content, file_path, symbols, functions, classes, imports)
|
||||
|
||||
def _extract_zig_function_name_from_node(self, node, content: str) -> Optional[str]:
|
||||
"""Extract function name from tree-sitter node."""
|
||||
for child in node.children:
|
||||
if child.type == 'identifier':
|
||||
return self._safe_extract_text(content, child.start_byte, child.end_byte)
|
||||
return None
|
||||
|
||||
def _extract_zig_type_name_from_node(self, node, content: str) -> Optional[str]:
|
||||
"""Extract type name from tree-sitter node."""
|
||||
for child in node.children:
|
||||
if child.type == 'identifier':
|
||||
return self._safe_extract_text(content, child.start_byte, child.end_byte)
|
||||
return None
|
||||
|
||||
@@ -0,0 +1,514 @@
|
||||
"""
|
||||
Project Settings Management
|
||||
|
||||
This module provides functionality for managing project settings and persistent data
|
||||
for the Code Index MCP server.
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
|
||||
|
||||
import tempfile
|
||||
import hashlib
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
from .constants import (
|
||||
SETTINGS_DIR, CONFIG_FILE, INDEX_FILE
|
||||
)
|
||||
from .search.base import SearchStrategy
|
||||
from .search.ugrep import UgrepStrategy
|
||||
from .search.ripgrep import RipgrepStrategy
|
||||
from .search.ag import AgStrategy
|
||||
from .search.grep import GrepStrategy
|
||||
from .search.basic import BasicSearchStrategy
|
||||
|
||||
|
||||
# Prioritized list of search strategies
|
||||
SEARCH_STRATEGY_CLASSES = [
|
||||
UgrepStrategy,
|
||||
RipgrepStrategy,
|
||||
AgStrategy,
|
||||
GrepStrategy,
|
||||
BasicSearchStrategy,
|
||||
]
|
||||
|
||||
|
||||
def _get_available_strategies() -> list[SearchStrategy]:
|
||||
"""
|
||||
Detect and return a list of available search strategy instances,
|
||||
ordered by preference.
|
||||
"""
|
||||
available = []
|
||||
for strategy_class in SEARCH_STRATEGY_CLASSES:
|
||||
try:
|
||||
strategy = strategy_class()
|
||||
if strategy.is_available():
|
||||
available.append(strategy)
|
||||
except Exception:
|
||||
pass
|
||||
return available
|
||||
|
||||
|
||||
class ProjectSettings:
|
||||
"""Class for managing project settings and index data"""
|
||||
|
||||
def __init__(self, base_path, skip_load=False):
|
||||
"""Initialize project settings
|
||||
|
||||
Args:
|
||||
base_path (str): Base path of the project
|
||||
skip_load (bool): Whether to skip loading files
|
||||
"""
|
||||
self.base_path = base_path
|
||||
self.skip_load = skip_load
|
||||
self.available_strategies: list[SearchStrategy] = []
|
||||
self.refresh_available_strategies()
|
||||
|
||||
# Ensure the base path of the temporary directory exists
|
||||
try:
|
||||
# Get system temporary directory
|
||||
system_temp = tempfile.gettempdir()
|
||||
|
||||
# Check if the system temporary directory exists and is writable
|
||||
if not os.path.exists(system_temp):
|
||||
# Try using project directory as fallback if available
|
||||
if base_path and os.path.exists(base_path):
|
||||
system_temp = base_path
|
||||
else:
|
||||
# Use user's home directory as last resort
|
||||
system_temp = os.path.expanduser("~")
|
||||
|
||||
if not os.access(system_temp, os.W_OK):
|
||||
# Try using project directory as fallback if available
|
||||
if base_path and os.path.exists(base_path) and os.access(base_path, os.W_OK):
|
||||
system_temp = base_path
|
||||
else:
|
||||
# Use user's home directory as last resort
|
||||
system_temp = os.path.expanduser("~")
|
||||
|
||||
# Create code_indexer directory
|
||||
temp_base_dir = os.path.join(system_temp, SETTINGS_DIR)
|
||||
|
||||
if not os.path.exists(temp_base_dir):
|
||||
os.makedirs(temp_base_dir, exist_ok=True)
|
||||
else:
|
||||
pass
|
||||
except Exception:
|
||||
# If unable to create temporary directory, use .code_indexer in project directory if available
|
||||
if base_path and os.path.exists(base_path):
|
||||
temp_base_dir = os.path.join(base_path, ".code_indexer")
|
||||
|
||||
else:
|
||||
# Use home directory as last resort
|
||||
temp_base_dir = os.path.join(os.path.expanduser("~"), ".code_indexer")
|
||||
|
||||
if not os.path.exists(temp_base_dir):
|
||||
os.makedirs(temp_base_dir, exist_ok=True)
|
||||
|
||||
# Use system temporary directory to store index data
|
||||
try:
|
||||
if base_path:
|
||||
# Use hash of project path as unique identifier
|
||||
path_hash = hashlib.md5(base_path.encode()).hexdigest()
|
||||
self.settings_path = os.path.join(temp_base_dir, path_hash)
|
||||
else:
|
||||
# If no base path provided, use a default directory
|
||||
self.settings_path = os.path.join(temp_base_dir, "default")
|
||||
|
||||
self.ensure_settings_dir()
|
||||
except Exception:
|
||||
# If error occurs, use .code_indexer in project or home directory as fallback
|
||||
if base_path and os.path.exists(base_path):
|
||||
fallback_dir = os.path.join(base_path, ".code_indexer",
|
||||
hashlib.md5(base_path.encode()).hexdigest())
|
||||
else:
|
||||
fallback_dir = os.path.join(os.path.expanduser("~"), ".code_indexer",
|
||||
"default" if not base_path else hashlib.md5(base_path.encode()).hexdigest())
|
||||
|
||||
self.settings_path = fallback_dir
|
||||
if not os.path.exists(fallback_dir):
|
||||
os.makedirs(fallback_dir, exist_ok=True)
|
||||
|
||||
def ensure_settings_dir(self):
|
||||
"""Ensure settings directory exists"""
|
||||
|
||||
try:
|
||||
if not os.path.exists(self.settings_path):
|
||||
# Create directory structure
|
||||
os.makedirs(self.settings_path, exist_ok=True)
|
||||
else:
|
||||
pass
|
||||
|
||||
# Check if directory is writable
|
||||
if not os.access(self.settings_path, os.W_OK):
|
||||
# If directory is not writable, use .code_indexer in project or home directory as fallback
|
||||
if self.base_path and os.path.exists(self.base_path) and os.access(self.base_path, os.W_OK):
|
||||
fallback_dir = os.path.join(self.base_path, ".code_indexer",
|
||||
os.path.basename(self.settings_path))
|
||||
else:
|
||||
fallback_dir = os.path.join(os.path.expanduser("~"), ".code_indexer",
|
||||
os.path.basename(self.settings_path))
|
||||
|
||||
self.settings_path = fallback_dir
|
||||
if not os.path.exists(fallback_dir):
|
||||
os.makedirs(fallback_dir, exist_ok=True)
|
||||
except Exception:
|
||||
# If unable to create settings directory, use .code_indexer in project or home directory
|
||||
if self.base_path and os.path.exists(self.base_path):
|
||||
fallback_dir = os.path.join(self.base_path, ".code_indexer",
|
||||
hashlib.md5(self.base_path.encode()).hexdigest())
|
||||
else:
|
||||
fallback_dir = os.path.join(os.path.expanduser("~"), ".code_indexer",
|
||||
"default" if not self.base_path else hashlib.md5(self.base_path.encode()).hexdigest())
|
||||
|
||||
self.settings_path = fallback_dir
|
||||
if not os.path.exists(fallback_dir):
|
||||
os.makedirs(fallback_dir, exist_ok=True)
|
||||
|
||||
def get_config_path(self):
|
||||
"""Get the path to the configuration file"""
|
||||
try:
|
||||
path = os.path.join(self.settings_path, CONFIG_FILE)
|
||||
# Ensure directory exists
|
||||
os.makedirs(os.path.dirname(path), exist_ok=True)
|
||||
return path
|
||||
except Exception:
|
||||
# If error occurs, use file in project or home directory as fallback
|
||||
if self.base_path and os.path.exists(self.base_path):
|
||||
return os.path.join(self.base_path, CONFIG_FILE)
|
||||
else:
|
||||
return os.path.join(os.path.expanduser("~"), CONFIG_FILE)
|
||||
|
||||
|
||||
def _get_timestamp(self):
|
||||
"""Get current timestamp"""
|
||||
return datetime.now().isoformat()
|
||||
|
||||
def save_config(self, config):
|
||||
"""Save configuration data
|
||||
|
||||
Args:
|
||||
config (dict): Configuration data
|
||||
"""
|
||||
try:
|
||||
config_path = self.get_config_path()
|
||||
# Add timestamp
|
||||
config['last_updated'] = self._get_timestamp()
|
||||
|
||||
# Ensure directory exists
|
||||
os.makedirs(os.path.dirname(config_path), exist_ok=True)
|
||||
|
||||
with open(config_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(config, f, indent=2, ensure_ascii=False)
|
||||
|
||||
|
||||
return config
|
||||
except Exception:
|
||||
return config
|
||||
|
||||
def load_config(self):
|
||||
"""Load configuration data
|
||||
|
||||
Returns:
|
||||
dict: Configuration data, or empty dict if file doesn't exist
|
||||
"""
|
||||
# If skip_load is set, return empty dict directly
|
||||
if self.skip_load:
|
||||
return {}
|
||||
|
||||
try:
|
||||
config_path = self.get_config_path()
|
||||
if os.path.exists(config_path):
|
||||
try:
|
||||
with open(config_path, 'r', encoding='utf-8') as f:
|
||||
config = json.load(f)
|
||||
return config
|
||||
except (json.JSONDecodeError, UnicodeDecodeError):
|
||||
# If file is corrupted, return empty dict
|
||||
return {}
|
||||
else:
|
||||
pass
|
||||
return {}
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
def save_index(self, index_data):
|
||||
"""Save code index in JSON format
|
||||
|
||||
Args:
|
||||
index_data: Index data as dictionary or JSON string
|
||||
"""
|
||||
try:
|
||||
index_path = self.get_index_path()
|
||||
|
||||
# Ensure directory exists
|
||||
dir_path = os.path.dirname(index_path)
|
||||
if not os.path.exists(dir_path):
|
||||
os.makedirs(dir_path, exist_ok=True)
|
||||
|
||||
# Check if directory is writable
|
||||
if not os.access(dir_path, os.W_OK):
|
||||
# Use project or home directory as fallback
|
||||
if self.base_path and os.path.exists(self.base_path):
|
||||
index_path = os.path.join(self.base_path, INDEX_FILE)
|
||||
else:
|
||||
index_path = os.path.join(os.path.expanduser("~"), INDEX_FILE)
|
||||
|
||||
|
||||
# Convert to JSON string if it's an object with to_json method
|
||||
if hasattr(index_data, 'to_json'):
|
||||
json_data = index_data.to_json()
|
||||
elif isinstance(index_data, str):
|
||||
json_data = index_data
|
||||
else:
|
||||
# Assume it's a dictionary and convert to JSON
|
||||
json_data = json.dumps(index_data, indent=2, default=str)
|
||||
|
||||
with open(index_path, 'w', encoding='utf-8') as f:
|
||||
f.write(json_data)
|
||||
|
||||
|
||||
except Exception:
|
||||
# Try saving to project or home directory
|
||||
try:
|
||||
if self.base_path and os.path.exists(self.base_path):
|
||||
fallback_path = os.path.join(self.base_path, INDEX_FILE)
|
||||
else:
|
||||
fallback_path = os.path.join(os.path.expanduser("~"), INDEX_FILE)
|
||||
|
||||
|
||||
# Convert to JSON string if it's an object with to_json method
|
||||
if hasattr(index_data, 'to_json'):
|
||||
json_data = index_data.to_json()
|
||||
elif isinstance(index_data, str):
|
||||
json_data = index_data
|
||||
else:
|
||||
json_data = json.dumps(index_data, indent=2, default=str)
|
||||
|
||||
with open(fallback_path, 'w', encoding='utf-8') as f:
|
||||
f.write(json_data)
|
||||
except Exception:
|
||||
pass
|
||||
def load_index(self):
|
||||
"""Load code index from JSON format
|
||||
|
||||
Returns:
|
||||
dict: Index data, or None if file doesn't exist or has errors
|
||||
"""
|
||||
# If skip_load is set, return None directly
|
||||
if self.skip_load:
|
||||
return None
|
||||
|
||||
try:
|
||||
index_path = self.get_index_path()
|
||||
|
||||
if os.path.exists(index_path):
|
||||
try:
|
||||
with open(index_path, 'r', encoding='utf-8') as f:
|
||||
index_data = json.load(f)
|
||||
return index_data
|
||||
except (json.JSONDecodeError, UnicodeDecodeError):
|
||||
# If file is corrupted, return None
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
else:
|
||||
# Try loading from project or home directory
|
||||
if self.base_path and os.path.exists(self.base_path):
|
||||
fallback_path = os.path.join(self.base_path, INDEX_FILE)
|
||||
else:
|
||||
fallback_path = os.path.join(os.path.expanduser("~"), INDEX_FILE)
|
||||
if os.path.exists(fallback_path):
|
||||
try:
|
||||
with open(fallback_path, 'r', encoding='utf-8') as f:
|
||||
index_data = json.load(f)
|
||||
return index_data
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
|
||||
def cleanup_legacy_files(self) -> None:
|
||||
"""Clean up any legacy index files found."""
|
||||
try:
|
||||
legacy_files = [
|
||||
os.path.join(self.settings_path, "file_index.pickle"),
|
||||
os.path.join(self.settings_path, "content_cache.pickle"),
|
||||
os.path.join(self.settings_path, INDEX_FILE) # Legacy JSON
|
||||
]
|
||||
|
||||
for legacy_file in legacy_files:
|
||||
if os.path.exists(legacy_file):
|
||||
try:
|
||||
os.remove(legacy_file)
|
||||
except Exception:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def clear(self):
|
||||
"""Clear config and index files"""
|
||||
try:
|
||||
|
||||
if os.path.exists(self.settings_path):
|
||||
# Check if directory is writable
|
||||
if not os.access(self.settings_path, os.W_OK):
|
||||
return
|
||||
|
||||
# Delete specific files only (config.json and index.json)
|
||||
files_to_delete = [CONFIG_FILE, INDEX_FILE]
|
||||
|
||||
for filename in files_to_delete:
|
||||
file_path = os.path.join(self.settings_path, filename)
|
||||
try:
|
||||
if os.path.isfile(file_path):
|
||||
os.unlink(file_path)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
else:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
def get_stats(self):
|
||||
"""Get statistics for the settings directory
|
||||
|
||||
Returns:
|
||||
dict: Dictionary containing file sizes and update times
|
||||
"""
|
||||
try:
|
||||
|
||||
stats = {
|
||||
'settings_path': self.settings_path,
|
||||
'exists': os.path.exists(self.settings_path),
|
||||
'is_directory': os.path.isdir(self.settings_path) if os.path.exists(self.settings_path) else False,
|
||||
'writable': os.access(self.settings_path, os.W_OK) if os.path.exists(self.settings_path) else False,
|
||||
'files': {},
|
||||
'temp_dir': tempfile.gettempdir(),
|
||||
'base_path': self.base_path
|
||||
}
|
||||
|
||||
if stats['exists'] and stats['is_directory']:
|
||||
try:
|
||||
# Get all files in the directory
|
||||
all_files = os.listdir(self.settings_path)
|
||||
stats['all_files'] = all_files
|
||||
|
||||
# Get details for specific files
|
||||
for filename in [CONFIG_FILE, INDEX_FILE]:
|
||||
file_path = os.path.join(self.settings_path, filename)
|
||||
if os.path.exists(file_path):
|
||||
try:
|
||||
file_stats = os.stat(file_path)
|
||||
stats['files'][filename] = {
|
||||
'path': file_path,
|
||||
'size_bytes': file_stats.st_size,
|
||||
'last_modified': datetime.fromtimestamp(file_stats.st_mtime).isoformat(),
|
||||
'readable': os.access(file_path, os.R_OK),
|
||||
'writable': os.access(file_path, os.W_OK)
|
||||
}
|
||||
except Exception as e:
|
||||
stats['files'][filename] = {
|
||||
'path': file_path,
|
||||
'error': str(e)
|
||||
}
|
||||
except Exception as e:
|
||||
stats['list_error'] = str(e)
|
||||
|
||||
# Check fallback path
|
||||
if self.base_path and os.path.exists(self.base_path):
|
||||
fallback_dir = os.path.join(self.base_path, ".code_indexer")
|
||||
else:
|
||||
fallback_dir = os.path.join(os.path.expanduser("~"), ".code_indexer")
|
||||
stats['fallback_path'] = fallback_dir
|
||||
stats['fallback_exists'] = os.path.exists(fallback_dir)
|
||||
stats['fallback_is_directory'] = os.path.isdir(fallback_dir) if os.path.exists(fallback_dir) else False
|
||||
|
||||
return stats
|
||||
except Exception as e:
|
||||
return {
|
||||
'error': str(e),
|
||||
'settings_path': self.settings_path,
|
||||
'temp_dir': tempfile.gettempdir(),
|
||||
'base_path': self.base_path
|
||||
}
|
||||
|
||||
def get_search_tools_config(self):
|
||||
"""Get the configuration of available search tools.
|
||||
|
||||
Returns:
|
||||
dict: A dictionary containing the list of available tool names.
|
||||
"""
|
||||
return {
|
||||
"available_tools": [s.name for s in self.available_strategies],
|
||||
"preferred_tool": self.get_preferred_search_tool().name if self.available_strategies else None
|
||||
}
|
||||
|
||||
def get_preferred_search_tool(self) -> SearchStrategy | None:
|
||||
"""Get the preferred search tool based on availability and priority.
|
||||
|
||||
Returns:
|
||||
SearchStrategy: An instance of the preferred search strategy, or None.
|
||||
"""
|
||||
if not self.available_strategies:
|
||||
self.refresh_available_strategies()
|
||||
|
||||
return self.available_strategies[0] if self.available_strategies else None
|
||||
|
||||
def refresh_available_strategies(self):
|
||||
"""
|
||||
Force a refresh of the available search tools list.
|
||||
"""
|
||||
|
||||
self.available_strategies = _get_available_strategies()
|
||||
|
||||
|
||||
def get_file_watcher_config(self) -> dict:
|
||||
"""
|
||||
Get file watcher specific configuration.
|
||||
|
||||
Returns:
|
||||
dict: File watcher configuration with defaults
|
||||
"""
|
||||
config = self.load_config()
|
||||
default_config = {
|
||||
"enabled": True,
|
||||
"debounce_seconds": 6.0,
|
||||
"additional_exclude_patterns": [],
|
||||
"monitored_extensions": [], # Empty = use all supported extensions
|
||||
"exclude_patterns": [
|
||||
".git", ".svn", ".hg",
|
||||
"node_modules", "__pycache__", ".venv", "venv",
|
||||
".DS_Store", "Thumbs.db",
|
||||
"dist", "build", "target", ".idea", ".vscode",
|
||||
".pytest_cache", ".coverage", ".tox",
|
||||
"bin", "obj"
|
||||
]
|
||||
}
|
||||
|
||||
# Merge with loaded config
|
||||
file_watcher_config = config.get("file_watcher", {})
|
||||
for key, default_value in default_config.items():
|
||||
if key not in file_watcher_config:
|
||||
file_watcher_config[key] = default_value
|
||||
|
||||
return file_watcher_config
|
||||
|
||||
def update_file_watcher_config(self, updates: dict) -> None:
|
||||
"""
|
||||
Update file watcher configuration.
|
||||
|
||||
Args:
|
||||
updates: Dictionary of configuration updates
|
||||
"""
|
||||
config = self.load_config()
|
||||
if "file_watcher" not in config:
|
||||
config["file_watcher"] = self.get_file_watcher_config()
|
||||
|
||||
config["file_watcher"].update(updates)
|
||||
self.save_config(config)
|
||||
@@ -0,0 +1 @@
|
||||
"""Search strategies package."""
|
||||
145
reference/code-index-mcp-master/src/code_index_mcp/search/ag.py
Normal file
145
reference/code-index-mcp-master/src/code_index_mcp/search/ag.py
Normal file
@@ -0,0 +1,145 @@
|
||||
"""
|
||||
Search Strategy for The Silver Searcher (ag)
|
||||
"""
|
||||
import shutil
|
||||
import subprocess
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
from .base import SearchStrategy, parse_search_output, create_word_boundary_pattern, is_safe_regex_pattern
|
||||
|
||||
class AgStrategy(SearchStrategy):
|
||||
"""Search strategy using 'The Silver Searcher' (ag) command-line tool."""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
"""The name of the search tool."""
|
||||
return 'ag'
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""Check if 'ag' command is available on the system."""
|
||||
return shutil.which('ag') is not None
|
||||
|
||||
def search(
|
||||
self,
|
||||
pattern: str,
|
||||
base_path: str,
|
||||
case_sensitive: bool = True,
|
||||
context_lines: int = 0,
|
||||
file_pattern: Optional[str] = None,
|
||||
fuzzy: bool = False,
|
||||
regex: bool = False
|
||||
) -> Dict[str, List[Tuple[int, str]]]:
|
||||
"""
|
||||
Execute a search using The Silver Searcher (ag).
|
||||
|
||||
Args:
|
||||
pattern: The search pattern
|
||||
base_path: Directory to search in
|
||||
case_sensitive: Whether search is case sensitive
|
||||
context_lines: Number of context lines to show
|
||||
file_pattern: File pattern to filter
|
||||
fuzzy: Enable word boundary matching (not true fuzzy search)
|
||||
regex: Enable regex pattern matching
|
||||
"""
|
||||
# ag prints line numbers and groups by file by default, which is good.
|
||||
# --noheading is used to be consistent with other tools' output format.
|
||||
cmd = ['ag', '--noheading']
|
||||
|
||||
if not case_sensitive:
|
||||
cmd.append('--ignore-case')
|
||||
|
||||
# Prepare search pattern
|
||||
search_pattern = pattern
|
||||
|
||||
if regex:
|
||||
# Use regex mode - check for safety first
|
||||
if not is_safe_regex_pattern(pattern):
|
||||
raise ValueError(f"Potentially unsafe regex pattern: {pattern}")
|
||||
# Don't add --literal, use regex mode
|
||||
elif fuzzy:
|
||||
# Use word boundary pattern for partial matching
|
||||
search_pattern = create_word_boundary_pattern(pattern)
|
||||
else:
|
||||
# Use literal string search
|
||||
cmd.append('--literal')
|
||||
|
||||
if context_lines > 0:
|
||||
cmd.extend(['--before', str(context_lines)])
|
||||
cmd.extend(['--after', str(context_lines)])
|
||||
|
||||
if file_pattern:
|
||||
# Convert glob pattern to regex pattern for ag's -G parameter
|
||||
# ag's -G expects regex, not glob patterns
|
||||
regex_pattern = file_pattern
|
||||
if '*' in file_pattern and not file_pattern.startswith('^') and not file_pattern.endswith('$'):
|
||||
# Convert common glob patterns to regex
|
||||
if file_pattern.startswith('*.'):
|
||||
# Pattern like "*.py" -> "\.py$"
|
||||
extension = file_pattern[2:] # Remove "*."
|
||||
regex_pattern = f'\\.{extension}$'
|
||||
elif file_pattern.endswith('*'):
|
||||
# Pattern like "test_*" -> "^test_.*"
|
||||
prefix = file_pattern[:-1] # Remove "*"
|
||||
regex_pattern = f'^{prefix}.*'
|
||||
elif '*' in file_pattern:
|
||||
# Pattern like "test_*.py" -> "^test_.*\.py$"
|
||||
# First escape dots, then replace * with .*
|
||||
regex_pattern = file_pattern.replace('.', '\\.')
|
||||
regex_pattern = regex_pattern.replace('*', '.*')
|
||||
if not regex_pattern.startswith('^'):
|
||||
regex_pattern = '^' + regex_pattern
|
||||
if not regex_pattern.endswith('$'):
|
||||
regex_pattern = regex_pattern + '$'
|
||||
|
||||
cmd.extend(['-G', regex_pattern])
|
||||
|
||||
processed_patterns = set()
|
||||
exclude_dirs = getattr(self, 'exclude_dirs', [])
|
||||
exclude_file_patterns = getattr(self, 'exclude_file_patterns', [])
|
||||
|
||||
for directory in exclude_dirs:
|
||||
normalized = directory.strip()
|
||||
if not normalized or normalized in processed_patterns:
|
||||
continue
|
||||
cmd.extend(['--ignore', normalized])
|
||||
processed_patterns.add(normalized)
|
||||
|
||||
for pattern in exclude_file_patterns:
|
||||
normalized = pattern.strip()
|
||||
if not normalized or normalized in processed_patterns:
|
||||
continue
|
||||
if normalized.startswith('!'):
|
||||
normalized = normalized[1:]
|
||||
cmd.extend(['--ignore', normalized])
|
||||
processed_patterns.add(normalized)
|
||||
|
||||
# Add -- to treat pattern as a literal argument, preventing injection
|
||||
cmd.append('--')
|
||||
cmd.append(search_pattern)
|
||||
cmd.append('.') # Use current directory since we set cwd=base_path
|
||||
|
||||
try:
|
||||
# ag exits with 1 if no matches are found, which is not an error.
|
||||
# It exits with 0 on success (match found). Other codes are errors.
|
||||
process = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
encoding='utf-8',
|
||||
errors='replace',
|
||||
check=False, # Do not raise CalledProcessError on non-zero exit
|
||||
cwd=base_path # Set working directory to project base path for proper pattern resolution
|
||||
)
|
||||
# We don't check returncode > 1 because ag's exit code behavior
|
||||
# is less standardized than rg/ug. 0 for match, 1 for no match.
|
||||
# Any actual error will likely raise an exception or be in stderr.
|
||||
if process.returncode > 1:
|
||||
raise RuntimeError(f"ag failed with exit code {process.returncode}: {process.stderr}")
|
||||
|
||||
return parse_search_output(process.stdout, base_path)
|
||||
|
||||
except FileNotFoundError:
|
||||
raise RuntimeError("'ag' (The Silver Searcher) not found. Please install it and ensure it's in your PATH.")
|
||||
except Exception as e:
|
||||
# Re-raise other potential exceptions like permission errors
|
||||
raise RuntimeError(f"An error occurred while running ag: {e}")
|
||||
@@ -0,0 +1,234 @@
|
||||
"""
|
||||
Search Strategies for Code Indexer
|
||||
|
||||
This module defines the abstract base class for search strategies and will contain
|
||||
concrete implementations for different search tools like ugrep, ripgrep, etc.
|
||||
"""
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, List, Optional, Tuple, TYPE_CHECKING
|
||||
|
||||
from ..indexing.qualified_names import normalize_file_path
|
||||
|
||||
if TYPE_CHECKING: # pragma: no cover
|
||||
from ..utils.file_filter import FileFilter
|
||||
|
||||
def parse_search_output(
|
||||
output: str,
|
||||
base_path: str
|
||||
) -> Dict[str, List[Tuple[int, str]]]:
|
||||
"""
|
||||
Parse the output of command-line search tools (grep, ag, rg).
|
||||
|
||||
Args:
|
||||
output: The raw output from the command-line tool.
|
||||
base_path: The base path of the project to make file paths relative.
|
||||
|
||||
Returns:
|
||||
A dictionary where keys are file paths and values are lists of (line_number, line_content) tuples.
|
||||
"""
|
||||
results = {}
|
||||
# Normalize base_path to ensure consistent path separation
|
||||
normalized_base_path = os.path.normpath(base_path)
|
||||
|
||||
for line in output.strip().split('\n'):
|
||||
if not line.strip():
|
||||
continue
|
||||
try:
|
||||
# Try to parse as a matched line first (format: path:linenum:content)
|
||||
parts = line.split(':', 2)
|
||||
|
||||
# Check if this might be a context line (format: path-linenum-content)
|
||||
# Context lines use '-' as separator in grep/ag output
|
||||
if len(parts) < 3 and '-' in line:
|
||||
# Try to parse as context line
|
||||
# Match pattern: path-linenum-content or path-linenum-\tcontent
|
||||
match = re.match(r'^(.*?)-(\d+)[-\t](.*)$', line)
|
||||
if match:
|
||||
file_path_abs = match.group(1)
|
||||
line_number_str = match.group(2)
|
||||
content = match.group(3)
|
||||
else:
|
||||
# If regex doesn't match, skip this line
|
||||
continue
|
||||
elif sys.platform == "win32" and len(parts) >= 3 and len(parts[0]) == 1 and parts[1].startswith('\\'):
|
||||
# Handle Windows paths with drive letter (e.g., C:\path\file.txt)
|
||||
file_path_abs = f"{parts[0]}:{parts[1]}"
|
||||
line_number_str = parts[2].split(':', 1)[0]
|
||||
content = parts[2].split(':', 1)[1] if ':' in parts[2] else parts[2]
|
||||
elif len(parts) >= 3:
|
||||
# Standard format: path:linenum:content
|
||||
file_path_abs = parts[0]
|
||||
line_number_str = parts[1]
|
||||
content = parts[2]
|
||||
else:
|
||||
# Line doesn't match any expected format
|
||||
continue
|
||||
|
||||
line_number = int(line_number_str)
|
||||
|
||||
# If the path is already relative (doesn't start with /), keep it as is
|
||||
# Otherwise, make it relative to the base_path
|
||||
if os.path.isabs(file_path_abs):
|
||||
relative_path = os.path.relpath(file_path_abs, normalized_base_path)
|
||||
else:
|
||||
# Path is already relative, use it as is
|
||||
relative_path = file_path_abs
|
||||
|
||||
# Normalize path separators for consistency
|
||||
relative_path = normalize_file_path(relative_path)
|
||||
|
||||
if relative_path not in results:
|
||||
results[relative_path] = []
|
||||
results[relative_path].append((line_number, content))
|
||||
except (ValueError, IndexError):
|
||||
# Silently ignore lines that don't match the expected format
|
||||
# This can happen with summary lines or other tool-specific output
|
||||
pass
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def create_word_boundary_pattern(pattern: str) -> str:
|
||||
"""
|
||||
Create word boundary patterns for partial matching.
|
||||
This is NOT true fuzzy search, but allows matching words at boundaries.
|
||||
|
||||
Args:
|
||||
pattern: Original search pattern
|
||||
|
||||
Returns:
|
||||
Word boundary pattern for regex matching
|
||||
"""
|
||||
# Escape any regex special characters to make them literal
|
||||
escaped = re.escape(pattern)
|
||||
|
||||
# Create word boundary pattern that matches:
|
||||
# 1. Word at start of word boundary (e.g., "test" in "testing")
|
||||
# 2. Word at end of word boundary (e.g., "test" in "mytest")
|
||||
# 3. Whole word (e.g., "test" as standalone word)
|
||||
if len(pattern) >= 3: # Only for patterns of reasonable length
|
||||
# This pattern allows partial matches at word boundaries
|
||||
boundary_pattern = f"\\b{escaped}|{escaped}\\b"
|
||||
else:
|
||||
# For short patterns, require full word boundaries to avoid too many matches
|
||||
boundary_pattern = f"\\b{escaped}\\b"
|
||||
|
||||
return boundary_pattern
|
||||
|
||||
|
||||
def is_safe_regex_pattern(pattern: str) -> bool:
|
||||
"""
|
||||
Check if a pattern appears to be a safe regex pattern.
|
||||
|
||||
Args:
|
||||
pattern: The search pattern to check
|
||||
|
||||
Returns:
|
||||
True if the pattern looks like a safe regex, False otherwise
|
||||
"""
|
||||
# Strong indicators of regex intent
|
||||
strong_regex_indicators = ['|', '(', ')', '[', ']', '^', '$']
|
||||
|
||||
# Weaker indicators that need context
|
||||
weak_regex_indicators = ['.', '*', '+', '?']
|
||||
|
||||
# Check for strong regex indicators
|
||||
has_strong_regex = any(char in pattern for char in strong_regex_indicators)
|
||||
|
||||
# Check for weak indicators with context
|
||||
has_weak_regex = any(char in pattern for char in weak_regex_indicators)
|
||||
|
||||
# If has strong indicators, likely regex
|
||||
if has_strong_regex:
|
||||
# Still check for dangerous patterns
|
||||
dangerous_patterns = [
|
||||
r'(.+)+', # Nested quantifiers
|
||||
r'(.*)*', # Nested stars
|
||||
r'(.{0,})+', # Potential ReDoS patterns
|
||||
]
|
||||
|
||||
has_dangerous_patterns = any(dangerous in pattern for dangerous in dangerous_patterns)
|
||||
return not has_dangerous_patterns
|
||||
|
||||
# If only weak indicators, need more context
|
||||
if has_weak_regex:
|
||||
# Patterns like ".*", ".+", "file.*py" look like regex
|
||||
# But "file.txt", "test.py" look like literal filenames
|
||||
regex_like_patterns = [
|
||||
r'\.\*', # .*
|
||||
r'\.\+', # .+
|
||||
r'\.\w*\*', # .something*
|
||||
r'\*\.', # *.
|
||||
r'\w+\.\*\w*', # word.*word
|
||||
]
|
||||
|
||||
return any(re.search(regex_pattern, pattern) for regex_pattern in regex_like_patterns)
|
||||
|
||||
return False
|
||||
|
||||
|
||||
class SearchStrategy(ABC):
|
||||
"""
|
||||
Abstract base class for a search strategy.
|
||||
|
||||
Each strategy is responsible for searching code using a specific tool or method.
|
||||
"""
|
||||
|
||||
def configure_excludes(self, file_filter: Optional['FileFilter']) -> None:
|
||||
"""Configure shared exclusion settings for the strategy."""
|
||||
self.file_filter = file_filter
|
||||
if file_filter:
|
||||
self.exclude_dirs = sorted(set(file_filter.exclude_dirs))
|
||||
self.exclude_file_patterns = sorted(set(file_filter.exclude_files))
|
||||
else:
|
||||
self.exclude_dirs = []
|
||||
self.exclude_file_patterns = []
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def name(self) -> str:
|
||||
"""The name of the search tool (e.g., 'ugrep', 'ripgrep')."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def is_available(self) -> bool:
|
||||
"""
|
||||
Check if the search tool for this strategy is available on the system.
|
||||
|
||||
Returns:
|
||||
True if the tool is available, False otherwise.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def search(
|
||||
self,
|
||||
pattern: str,
|
||||
base_path: str,
|
||||
case_sensitive: bool = True,
|
||||
context_lines: int = 0,
|
||||
file_pattern: Optional[str] = None,
|
||||
fuzzy: bool = False,
|
||||
regex: bool = False
|
||||
) -> Dict[str, List[Tuple[int, str]]]:
|
||||
"""
|
||||
Execute a search using the specific strategy.
|
||||
|
||||
Args:
|
||||
pattern: The search pattern.
|
||||
base_path: The root directory to search in.
|
||||
case_sensitive: Whether the search is case-sensitive.
|
||||
context_lines: Number of context lines to show around each match.
|
||||
file_pattern: Glob pattern to filter files (e.g., "*.py").
|
||||
fuzzy: Whether to enable fuzzy/partial matching.
|
||||
regex: Whether to enable regex pattern matching.
|
||||
|
||||
Returns:
|
||||
A dictionary mapping filenames to lists of (line_number, line_content) tuples.
|
||||
"""
|
||||
pass
|
||||
@@ -0,0 +1,116 @@
|
||||
"""
|
||||
Basic, pure-Python search strategy.
|
||||
"""
|
||||
import fnmatch
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
from .base import SearchStrategy, create_word_boundary_pattern, is_safe_regex_pattern
|
||||
|
||||
class BasicSearchStrategy(SearchStrategy):
|
||||
"""
|
||||
A basic, pure-Python search strategy.
|
||||
|
||||
This strategy iterates through files and lines manually. It's a fallback
|
||||
for when no advanced command-line search tools are available.
|
||||
It does not support context lines.
|
||||
"""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
"""The name of the search tool."""
|
||||
return 'basic'
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""This basic strategy is always available."""
|
||||
return True
|
||||
|
||||
def _matches_pattern(self, filename: str, pattern: str) -> bool:
|
||||
"""Check if filename matches the glob pattern."""
|
||||
if not pattern:
|
||||
return True
|
||||
|
||||
# Handle simple cases efficiently
|
||||
if pattern.startswith('*') and not any(c in pattern[1:] for c in '*?[]{}'):
|
||||
return filename.endswith(pattern[1:])
|
||||
|
||||
# Use fnmatch for more complex patterns
|
||||
return fnmatch.fnmatch(filename, pattern)
|
||||
|
||||
def search(
|
||||
self,
|
||||
pattern: str,
|
||||
base_path: str,
|
||||
case_sensitive: bool = True,
|
||||
context_lines: int = 0,
|
||||
file_pattern: Optional[str] = None,
|
||||
fuzzy: bool = False,
|
||||
regex: bool = False
|
||||
) -> Dict[str, List[Tuple[int, str]]]:
|
||||
"""
|
||||
Execute a basic, line-by-line search.
|
||||
|
||||
Note: This implementation does not support context_lines.
|
||||
Args:
|
||||
pattern: The search pattern
|
||||
base_path: Directory to search in
|
||||
case_sensitive: Whether search is case sensitive
|
||||
context_lines: Number of context lines (not supported)
|
||||
file_pattern: File pattern to filter
|
||||
fuzzy: Enable word boundary matching
|
||||
regex: Enable regex pattern matching
|
||||
"""
|
||||
results: Dict[str, List[Tuple[int, str]]] = {}
|
||||
|
||||
flags = 0 if case_sensitive else re.IGNORECASE
|
||||
|
||||
try:
|
||||
if regex:
|
||||
# Use regex mode - check for safety first
|
||||
if not is_safe_regex_pattern(pattern):
|
||||
raise ValueError(f"Potentially unsafe regex pattern: {pattern}")
|
||||
search_regex = re.compile(pattern, flags)
|
||||
elif fuzzy:
|
||||
# Use word boundary pattern for partial matching
|
||||
search_pattern = create_word_boundary_pattern(pattern)
|
||||
search_regex = re.compile(search_pattern, flags)
|
||||
else:
|
||||
# Use literal string search
|
||||
search_regex = re.compile(re.escape(pattern), flags)
|
||||
except re.error as e:
|
||||
raise ValueError(f"Invalid regex pattern: {pattern}, error: {e}")
|
||||
|
||||
file_filter = getattr(self, 'file_filter', None)
|
||||
base = Path(base_path)
|
||||
|
||||
for root, dirs, files in os.walk(base_path):
|
||||
if file_filter:
|
||||
dirs[:] = [d for d in dirs if not file_filter.should_exclude_directory(d)]
|
||||
|
||||
for file in files:
|
||||
if file_pattern and not self._matches_pattern(file, file_pattern):
|
||||
continue
|
||||
|
||||
file_path = Path(root) / file
|
||||
|
||||
if file_filter and not file_filter.should_process_path(file_path, base):
|
||||
continue
|
||||
|
||||
rel_path = os.path.relpath(file_path, base_path)
|
||||
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
for line_num, line in enumerate(f, 1):
|
||||
if search_regex.search(line):
|
||||
content = line.rstrip('\n')
|
||||
if rel_path not in results:
|
||||
results[rel_path] = []
|
||||
results[rel_path].append((line_num, content))
|
||||
except (UnicodeDecodeError, PermissionError, OSError):
|
||||
continue
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
return results
|
||||
@@ -0,0 +1,131 @@
|
||||
"""
|
||||
Search Strategy for standard grep
|
||||
"""
|
||||
import shutil
|
||||
import subprocess
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
from .base import SearchStrategy, parse_search_output, create_word_boundary_pattern, is_safe_regex_pattern
|
||||
|
||||
class GrepStrategy(SearchStrategy):
|
||||
"""
|
||||
Search strategy using the standard 'grep' command-line tool.
|
||||
|
||||
This is intended as a fallback for when more advanced tools like
|
||||
ugrep, ripgrep, or ag are not available.
|
||||
"""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
"""The name of the search tool."""
|
||||
return 'grep'
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""Check if 'grep' command is available on the system."""
|
||||
return shutil.which('grep') is not None
|
||||
|
||||
def search(
|
||||
self,
|
||||
pattern: str,
|
||||
base_path: str,
|
||||
case_sensitive: bool = True,
|
||||
context_lines: int = 0,
|
||||
file_pattern: Optional[str] = None,
|
||||
fuzzy: bool = False,
|
||||
regex: bool = False
|
||||
) -> Dict[str, List[Tuple[int, str]]]:
|
||||
"""
|
||||
Execute a search using standard grep.
|
||||
|
||||
Args:
|
||||
pattern: The search pattern
|
||||
base_path: Directory to search in
|
||||
case_sensitive: Whether search is case sensitive
|
||||
context_lines: Number of context lines to show
|
||||
file_pattern: File pattern to filter
|
||||
fuzzy: Enable word boundary matching
|
||||
regex: Enable regex pattern matching
|
||||
"""
|
||||
# -r: recursive, -n: line number
|
||||
cmd = ['grep', '-r', '-n']
|
||||
|
||||
# Prepare search pattern
|
||||
search_pattern = pattern
|
||||
|
||||
if regex:
|
||||
# Use regex mode - check for safety first
|
||||
if not is_safe_regex_pattern(pattern):
|
||||
raise ValueError(f"Potentially unsafe regex pattern: {pattern}")
|
||||
cmd.append('-E') # Extended Regular Expressions
|
||||
elif fuzzy:
|
||||
# Use word boundary pattern for partial matching
|
||||
search_pattern = create_word_boundary_pattern(pattern)
|
||||
cmd.append('-E') # Extended Regular Expressions
|
||||
else:
|
||||
# Auto-detect if pattern looks like a safe regex
|
||||
if is_safe_regex_pattern(pattern):
|
||||
# Pattern contains regex chars, use extended regex mode
|
||||
cmd.append('-E')
|
||||
else:
|
||||
# Use literal string search
|
||||
cmd.append('-F')
|
||||
|
||||
if not case_sensitive:
|
||||
cmd.append('-i')
|
||||
|
||||
if context_lines > 0:
|
||||
cmd.extend(['-A', str(context_lines)])
|
||||
cmd.extend(['-B', str(context_lines)])
|
||||
|
||||
if file_pattern:
|
||||
# Note: grep's --include uses glob patterns, not regex
|
||||
cmd.append(f'--include={file_pattern}')
|
||||
|
||||
exclude_dirs = getattr(self, 'exclude_dirs', [])
|
||||
exclude_file_patterns = getattr(self, 'exclude_file_patterns', [])
|
||||
|
||||
processed_dirs = set()
|
||||
for directory in exclude_dirs:
|
||||
normalized = directory.strip()
|
||||
if not normalized or normalized in processed_dirs:
|
||||
continue
|
||||
cmd.append(f'--exclude-dir={normalized}')
|
||||
processed_dirs.add(normalized)
|
||||
|
||||
processed_files = set()
|
||||
for pattern in exclude_file_patterns:
|
||||
normalized = pattern.strip()
|
||||
if not normalized or normalized in processed_files:
|
||||
continue
|
||||
if normalized.startswith('!'):
|
||||
normalized = normalized[1:]
|
||||
cmd.append(f'--exclude={normalized}')
|
||||
processed_files.add(normalized)
|
||||
|
||||
# Add -- to treat pattern as a literal argument, preventing injection
|
||||
cmd.append('--')
|
||||
cmd.append(search_pattern)
|
||||
cmd.append('.') # Use current directory since we set cwd=base_path
|
||||
|
||||
try:
|
||||
# grep exits with 1 if no matches are found, which is not an error.
|
||||
# It exits with 0 on success (match found). >1 for errors.
|
||||
process = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
encoding='utf-8',
|
||||
errors='replace',
|
||||
check=False,
|
||||
cwd=base_path # Set working directory to project base path for proper pattern resolution
|
||||
)
|
||||
|
||||
if process.returncode > 1:
|
||||
raise RuntimeError(f"grep failed with exit code {process.returncode}: {process.stderr}")
|
||||
|
||||
return parse_search_output(process.stdout, base_path)
|
||||
|
||||
except FileNotFoundError:
|
||||
raise RuntimeError("'grep' not found. Please install it and ensure it's in your PATH.")
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"An error occurred while running grep: {e}")
|
||||
@@ -0,0 +1,121 @@
|
||||
"""
|
||||
Search Strategy for ripgrep
|
||||
"""
|
||||
import shutil
|
||||
import subprocess
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
from .base import SearchStrategy, parse_search_output, create_word_boundary_pattern, is_safe_regex_pattern
|
||||
|
||||
class RipgrepStrategy(SearchStrategy):
|
||||
"""Search strategy using the 'ripgrep' (rg) command-line tool."""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
"""The name of the search tool."""
|
||||
return 'ripgrep'
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""Check if 'rg' command is available on the system."""
|
||||
return shutil.which('rg') is not None
|
||||
|
||||
def search(
|
||||
self,
|
||||
pattern: str,
|
||||
base_path: str,
|
||||
case_sensitive: bool = True,
|
||||
context_lines: int = 0,
|
||||
file_pattern: Optional[str] = None,
|
||||
fuzzy: bool = False,
|
||||
regex: bool = False
|
||||
) -> Dict[str, List[Tuple[int, str]]]:
|
||||
"""
|
||||
Execute a search using ripgrep.
|
||||
|
||||
Args:
|
||||
pattern: The search pattern
|
||||
base_path: Directory to search in
|
||||
case_sensitive: Whether search is case sensitive
|
||||
context_lines: Number of context lines to show
|
||||
file_pattern: File pattern to filter
|
||||
fuzzy: Enable word boundary matching (not true fuzzy search)
|
||||
regex: Enable regex pattern matching
|
||||
"""
|
||||
cmd = ['rg', '--line-number', '--no-heading', '--color=never', '--no-ignore']
|
||||
|
||||
if not case_sensitive:
|
||||
cmd.append('--ignore-case')
|
||||
|
||||
# Prepare search pattern
|
||||
search_pattern = pattern
|
||||
|
||||
if regex:
|
||||
# Use regex mode - check for safety first
|
||||
if not is_safe_regex_pattern(pattern):
|
||||
raise ValueError(f"Potentially unsafe regex pattern: {pattern}")
|
||||
# Don't add --fixed-strings, use regex mode
|
||||
elif fuzzy:
|
||||
# Use word boundary pattern for partial matching
|
||||
search_pattern = create_word_boundary_pattern(pattern)
|
||||
else:
|
||||
# Use literal string search
|
||||
cmd.append('--fixed-strings')
|
||||
|
||||
if context_lines > 0:
|
||||
cmd.extend(['--context', str(context_lines)])
|
||||
|
||||
if file_pattern:
|
||||
cmd.extend(['--glob', file_pattern])
|
||||
|
||||
exclude_dirs = getattr(self, 'exclude_dirs', [])
|
||||
exclude_file_patterns = getattr(self, 'exclude_file_patterns', [])
|
||||
|
||||
processed_patterns = set()
|
||||
|
||||
for directory in exclude_dirs:
|
||||
normalized = directory.strip()
|
||||
if not normalized or normalized in processed_patterns:
|
||||
continue
|
||||
cmd.extend(['--glob', f'!**/{normalized}/**'])
|
||||
processed_patterns.add(normalized)
|
||||
|
||||
for pattern in exclude_file_patterns:
|
||||
normalized = pattern.strip()
|
||||
if not normalized or normalized in processed_patterns:
|
||||
continue
|
||||
if normalized.startswith('!'):
|
||||
glob_pattern = normalized
|
||||
elif any(ch in normalized for ch in '*?[') or '/' in normalized:
|
||||
glob_pattern = f'!{normalized}'
|
||||
else:
|
||||
glob_pattern = f'!**/{normalized}'
|
||||
cmd.extend(['--glob', glob_pattern])
|
||||
processed_patterns.add(normalized)
|
||||
|
||||
# Add -- to treat pattern as a literal argument, preventing injection
|
||||
cmd.append('--')
|
||||
cmd.append(search_pattern)
|
||||
cmd.append('.') # Use current directory since we set cwd=base_path
|
||||
|
||||
try:
|
||||
# ripgrep exits with 1 if no matches are found, which is not an error.
|
||||
# It exits with 2 for actual errors.
|
||||
process = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
encoding='utf-8',
|
||||
errors='replace',
|
||||
check=False, # Do not raise CalledProcessError on non-zero exit
|
||||
cwd=base_path # Set working directory to project base path for proper glob resolution
|
||||
)
|
||||
if process.returncode > 1:
|
||||
raise RuntimeError(f"ripgrep failed with exit code {process.returncode}: {process.stderr}")
|
||||
|
||||
return parse_search_output(process.stdout, base_path)
|
||||
|
||||
except FileNotFoundError:
|
||||
raise RuntimeError("ripgrep (rg) not found. Please install it and ensure it's in your PATH.")
|
||||
except Exception as e:
|
||||
# Re-raise other potential exceptions like permission errors
|
||||
raise RuntimeError(f"An error occurred while running ripgrep: {e}")
|
||||
@@ -0,0 +1,121 @@
|
||||
"""
|
||||
Search Strategy for ugrep
|
||||
"""
|
||||
import shutil
|
||||
import subprocess
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
from .base import SearchStrategy, parse_search_output, create_word_boundary_pattern, is_safe_regex_pattern
|
||||
|
||||
class UgrepStrategy(SearchStrategy):
|
||||
"""Search strategy using the 'ugrep' (ug) command-line tool."""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
"""The name of the search tool."""
|
||||
return 'ugrep'
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""Check if 'ug' command is available on the system."""
|
||||
return shutil.which('ug') is not None
|
||||
|
||||
def search(
|
||||
self,
|
||||
pattern: str,
|
||||
base_path: str,
|
||||
case_sensitive: bool = True,
|
||||
context_lines: int = 0,
|
||||
file_pattern: Optional[str] = None,
|
||||
fuzzy: bool = False,
|
||||
regex: bool = False
|
||||
) -> Dict[str, List[Tuple[int, str]]]:
|
||||
"""
|
||||
Execute a search using the 'ug' command-line tool.
|
||||
|
||||
Args:
|
||||
pattern: The search pattern
|
||||
base_path: Directory to search in
|
||||
case_sensitive: Whether search is case sensitive
|
||||
context_lines: Number of context lines to show
|
||||
file_pattern: File pattern to filter
|
||||
fuzzy: Enable true fuzzy search (ugrep native support)
|
||||
regex: Enable regex pattern matching
|
||||
"""
|
||||
if not self.is_available():
|
||||
return {"error": "ugrep (ug) command not found."}
|
||||
|
||||
cmd = ['ug', '-r', '--line-number', '--no-heading']
|
||||
|
||||
if fuzzy:
|
||||
# ugrep has native fuzzy search support
|
||||
cmd.append('--fuzzy')
|
||||
elif regex:
|
||||
# Use regex mode - check for safety first
|
||||
if not is_safe_regex_pattern(pattern):
|
||||
raise ValueError(f"Potentially unsafe regex pattern: {pattern}")
|
||||
# Don't add --fixed-strings, use regex mode
|
||||
else:
|
||||
# Use literal string search
|
||||
cmd.append('--fixed-strings')
|
||||
|
||||
if not case_sensitive:
|
||||
cmd.append('--ignore-case')
|
||||
|
||||
if context_lines > 0:
|
||||
cmd.extend(['-A', str(context_lines), '-B', str(context_lines)])
|
||||
|
||||
if file_pattern:
|
||||
cmd.extend(['--include', file_pattern])
|
||||
|
||||
processed_patterns = set()
|
||||
exclude_dirs = getattr(self, 'exclude_dirs', [])
|
||||
exclude_file_patterns = getattr(self, 'exclude_file_patterns', [])
|
||||
|
||||
for directory in exclude_dirs:
|
||||
normalized = directory.strip()
|
||||
if not normalized or normalized in processed_patterns:
|
||||
continue
|
||||
cmd.extend(['--ignore', f'**/{normalized}/**'])
|
||||
processed_patterns.add(normalized)
|
||||
|
||||
for pattern in exclude_file_patterns:
|
||||
normalized = pattern.strip()
|
||||
if not normalized or normalized in processed_patterns:
|
||||
continue
|
||||
if normalized.startswith('!'):
|
||||
ignore_pattern = normalized[1:]
|
||||
elif any(ch in normalized for ch in '*?[') or '/' in normalized:
|
||||
ignore_pattern = normalized
|
||||
else:
|
||||
ignore_pattern = f'**/{normalized}'
|
||||
cmd.extend(['--ignore', ignore_pattern])
|
||||
processed_patterns.add(normalized)
|
||||
|
||||
# Add '--' to treat pattern as a literal argument, preventing injection
|
||||
cmd.append('--')
|
||||
cmd.append(pattern)
|
||||
cmd.append('.') # Use current directory since we set cwd=base_path
|
||||
|
||||
try:
|
||||
process = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
encoding='utf-8',
|
||||
errors='ignore', # Ignore decoding errors for binary-like content
|
||||
check=False, # Do not raise exception on non-zero exit codes
|
||||
cwd=base_path # Set working directory to project base path for proper pattern resolution
|
||||
)
|
||||
|
||||
# ugrep exits with 1 if no matches are found, which is not an error for us.
|
||||
# It exits with 2 for actual errors.
|
||||
if process.returncode > 1:
|
||||
error_output = process.stderr.strip()
|
||||
return {"error": f"ugrep execution failed with code {process.returncode}", "details": error_output}
|
||||
|
||||
return parse_search_output(process.stdout, base_path)
|
||||
|
||||
except FileNotFoundError:
|
||||
return {"error": "ugrep (ug) command not found. Please ensure it's installed and in your PATH."}
|
||||
except Exception as e:
|
||||
return {"error": f"An unexpected error occurred during search: {str(e)}"}
|
||||
386
reference/code-index-mcp-master/src/code_index_mcp/server.py
Normal file
386
reference/code-index-mcp-master/src/code_index_mcp/server.py
Normal file
@@ -0,0 +1,386 @@
|
||||
"""
|
||||
Code Index MCP Server
|
||||
|
||||
This MCP server allows LLMs to index, search, and analyze code from a project directory.
|
||||
It provides tools for file discovery, content retrieval, and code analysis.
|
||||
|
||||
This version uses a service-oriented architecture where MCP decorators delegate
|
||||
to domain-specific services for business logic.
|
||||
"""
|
||||
|
||||
# Standard library imports
|
||||
import argparse
|
||||
import inspect
|
||||
import sys
|
||||
import logging
|
||||
from contextlib import asynccontextmanager
|
||||
from dataclasses import dataclass
|
||||
from typing import AsyncIterator, Dict, Any, List, Optional
|
||||
from urllib.parse import unquote
|
||||
|
||||
# Third-party imports
|
||||
from mcp.server.fastmcp import FastMCP, Context
|
||||
|
||||
# Local imports
|
||||
from .project_settings import ProjectSettings
|
||||
from .services import (
|
||||
SearchService, FileService, SettingsService, FileWatcherService
|
||||
)
|
||||
from .services.settings_service import manage_temp_directory
|
||||
from .services.file_discovery_service import FileDiscoveryService
|
||||
from .services.project_management_service import ProjectManagementService
|
||||
from .services.index_management_service import IndexManagementService
|
||||
from .services.code_intelligence_service import CodeIntelligenceService
|
||||
from .services.system_management_service import SystemManagementService
|
||||
from .utils import handle_mcp_tool_errors
|
||||
|
||||
# Setup logging without writing to files
|
||||
def setup_indexing_performance_logging():
|
||||
"""Setup logging (stderr only); remove any file-based logging."""
|
||||
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.handlers.clear()
|
||||
|
||||
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
|
||||
# stderr for errors only
|
||||
stderr_handler = logging.StreamHandler(sys.stderr)
|
||||
stderr_handler.setFormatter(formatter)
|
||||
stderr_handler.setLevel(logging.ERROR)
|
||||
|
||||
root_logger.addHandler(stderr_handler)
|
||||
root_logger.setLevel(logging.DEBUG)
|
||||
|
||||
# Initialize logging (no file handlers)
|
||||
setup_indexing_performance_logging()
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@dataclass
|
||||
class CodeIndexerContext:
|
||||
"""Context for the Code Indexer MCP server."""
|
||||
base_path: str
|
||||
settings: ProjectSettings
|
||||
file_count: int = 0
|
||||
file_watcher_service: FileWatcherService = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class _CLIConfig:
|
||||
"""Holds CLI configuration for bootstrap operations."""
|
||||
project_path: str | None = None
|
||||
|
||||
|
||||
class _BootstrapRequestContext:
|
||||
"""Minimal request context to reuse business services during bootstrap."""
|
||||
|
||||
def __init__(self, lifespan_context: CodeIndexerContext):
|
||||
self.lifespan_context = lifespan_context
|
||||
self.session = None
|
||||
self.meta = None
|
||||
|
||||
|
||||
_CLI_CONFIG = _CLIConfig()
|
||||
|
||||
@asynccontextmanager
|
||||
async def indexer_lifespan(_server: FastMCP) -> AsyncIterator[CodeIndexerContext]:
|
||||
"""Manage the lifecycle of the Code Indexer MCP server."""
|
||||
# Don't set a default path, user must explicitly set project path
|
||||
base_path = "" # Empty string to indicate no path is set
|
||||
|
||||
# Initialize settings manager with skip_load=True to skip loading files
|
||||
settings = ProjectSettings(base_path, skip_load=True)
|
||||
|
||||
# Initialize context - file watcher will be initialized later when project path is set
|
||||
context = CodeIndexerContext(
|
||||
base_path=base_path,
|
||||
settings=settings,
|
||||
file_watcher_service=None
|
||||
)
|
||||
|
||||
try:
|
||||
# Bootstrap project path when provided via CLI.
|
||||
if _CLI_CONFIG.project_path:
|
||||
bootstrap_ctx = Context(
|
||||
request_context=_BootstrapRequestContext(context),
|
||||
fastmcp=mcp
|
||||
)
|
||||
try:
|
||||
message = ProjectManagementService(bootstrap_ctx).initialize_project(
|
||||
_CLI_CONFIG.project_path
|
||||
)
|
||||
logger.info("Project initialized from CLI flag: %s", message)
|
||||
except Exception as exc: # pylint: disable=broad-except
|
||||
logger.error("Failed to initialize project from CLI flag: %s", exc)
|
||||
raise RuntimeError(
|
||||
f"Failed to initialize project path '{_CLI_CONFIG.project_path}'"
|
||||
) from exc
|
||||
|
||||
# Provide context to the server
|
||||
yield context
|
||||
finally:
|
||||
# Stop file watcher if it was started
|
||||
if context.file_watcher_service:
|
||||
context.file_watcher_service.stop_monitoring()
|
||||
|
||||
# Create the MCP server with lifespan manager
|
||||
mcp = FastMCP("CodeIndexer", lifespan=indexer_lifespan, dependencies=["pathlib"])
|
||||
|
||||
# ----- RESOURCES -----
|
||||
|
||||
@mcp.resource("files://{file_path}")
|
||||
def get_file_content(file_path: str) -> str:
|
||||
"""Get the content of a specific file."""
|
||||
decoded_path = unquote(file_path)
|
||||
ctx = mcp.get_context()
|
||||
return FileService(ctx).get_file_content(decoded_path)
|
||||
|
||||
# ----- TOOLS -----
|
||||
|
||||
@mcp.tool()
|
||||
@handle_mcp_tool_errors(return_type='str')
|
||||
def set_project_path(path: str, ctx: Context) -> str:
|
||||
"""Set the base project path for indexing."""
|
||||
return ProjectManagementService(ctx).initialize_project(path)
|
||||
|
||||
@mcp.tool()
|
||||
@handle_mcp_tool_errors(return_type='dict')
|
||||
def search_code_advanced(
|
||||
pattern: str,
|
||||
ctx: Context,
|
||||
case_sensitive: bool = True,
|
||||
context_lines: int = 0,
|
||||
file_pattern: str = None,
|
||||
fuzzy: bool = False,
|
||||
regex: bool = None,
|
||||
start_index: int = 0,
|
||||
max_results: Optional[int] = 10
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Search for a code pattern in the project using an advanced, fast tool with pagination support.
|
||||
|
||||
This tool automatically selects the best available command-line search tool
|
||||
(like ugrep, ripgrep, ag, or grep) for maximum performance.
|
||||
|
||||
Args:
|
||||
pattern: The search pattern. Can be literal text or regex (see regex parameter).
|
||||
case_sensitive: Whether the search should be case-sensitive.
|
||||
context_lines: Number of lines to show before and after the match.
|
||||
file_pattern: A glob pattern to filter files to search in
|
||||
(e.g., "*.py", "*.js", "test_*.py").
|
||||
All search tools now handle glob patterns consistently:
|
||||
- ugrep: Uses glob patterns (*.py, *.{js,ts})
|
||||
- ripgrep: Uses glob patterns (*.py, *.{js,ts})
|
||||
- ag (Silver Searcher): Automatically converts globs to regex patterns
|
||||
- grep: Basic glob pattern matching
|
||||
All common glob patterns like "*.py", "test_*.js", "src/*.ts" are supported.
|
||||
fuzzy: If True, enables fuzzy/partial matching behavior varies by search tool:
|
||||
- ugrep: Native fuzzy search with --fuzzy flag (true edit-distance fuzzy search)
|
||||
- ripgrep, ag, grep, basic: Word boundary pattern matching (not true fuzzy search)
|
||||
IMPORTANT: Only ugrep provides true fuzzy search. Other tools use word boundary
|
||||
matching which allows partial matches at word boundaries.
|
||||
For exact literal matches, set fuzzy=False (default and recommended).
|
||||
regex: Controls regex pattern matching behavior:
|
||||
- If True, enables regex pattern matching
|
||||
- If False, forces literal string search
|
||||
- If None (default), automatically detects regex patterns and enables regex for patterns like "ERROR|WARN"
|
||||
The pattern will always be validated for safety to prevent ReDoS attacks.
|
||||
start_index: Zero-based offset into the flattened match list. Use to fetch subsequent pages.
|
||||
max_results: Maximum number of matches to return (default 10). Pass None to retrieve all matches.
|
||||
|
||||
Returns:
|
||||
A dictionary containing:
|
||||
- results: List of matches with file, line, and text keys.
|
||||
- pagination: Metadata with total_matches, returned, start_index, end_index, has_more,
|
||||
and optionally max_results.
|
||||
If an error occurs, an error message is returned instead.
|
||||
|
||||
"""
|
||||
return SearchService(ctx).search_code(
|
||||
pattern=pattern,
|
||||
case_sensitive=case_sensitive,
|
||||
context_lines=context_lines,
|
||||
file_pattern=file_pattern,
|
||||
fuzzy=fuzzy,
|
||||
regex=regex,
|
||||
start_index=start_index,
|
||||
max_results=max_results
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@handle_mcp_tool_errors(return_type='list')
|
||||
def find_files(pattern: str, ctx: Context) -> List[str]:
|
||||
"""
|
||||
Find files matching a glob pattern using pre-built file index.
|
||||
|
||||
Use when:
|
||||
- Looking for files by pattern (e.g., "*.py", "test_*.js")
|
||||
- Searching by filename only (e.g., "README.md" finds all README files)
|
||||
- Checking if specific files exist in the project
|
||||
- Getting file lists for further analysis
|
||||
|
||||
Pattern matching:
|
||||
- Supports both full path and filename-only matching
|
||||
- Uses standard glob patterns (*, ?, [])
|
||||
- Fast lookup using in-memory file index
|
||||
- Uses forward slashes consistently across all platforms
|
||||
|
||||
Args:
|
||||
pattern: Glob pattern to match files (e.g., "*.py", "test_*.js", "README.md")
|
||||
|
||||
Returns:
|
||||
List of file paths matching the pattern
|
||||
"""
|
||||
return FileDiscoveryService(ctx).find_files(pattern)
|
||||
|
||||
@mcp.tool()
|
||||
@handle_mcp_tool_errors(return_type='dict')
|
||||
def get_file_summary(file_path: str, ctx: Context) -> Dict[str, Any]:
|
||||
"""
|
||||
Get a summary of a specific file, including:
|
||||
- Line count
|
||||
- Function/class definitions (for supported languages)
|
||||
- Import statements
|
||||
- Basic complexity metrics
|
||||
"""
|
||||
return CodeIntelligenceService(ctx).analyze_file(file_path)
|
||||
|
||||
@mcp.tool()
|
||||
@handle_mcp_tool_errors(return_type='str')
|
||||
def refresh_index(ctx: Context) -> str:
|
||||
"""
|
||||
Manually refresh the project index when files have been added/removed/moved.
|
||||
|
||||
Use when:
|
||||
- File watcher is disabled or unavailable
|
||||
- After large-scale operations (git checkout, merge, pull) that change many files
|
||||
- When you want immediate index rebuild without waiting for file watcher debounce
|
||||
- When find_files results seem incomplete or outdated
|
||||
- For troubleshooting suspected index synchronization issues
|
||||
|
||||
Important notes for LLMs:
|
||||
- Always available as backup when file watcher is not working
|
||||
- Performs full project re-indexing for complete accuracy
|
||||
- Use when you suspect the index is stale after file system changes
|
||||
- **Call this after programmatic file modifications if file watcher seems unresponsive**
|
||||
- Complements the automatic file watcher system
|
||||
|
||||
Returns:
|
||||
Success message with total file count
|
||||
"""
|
||||
return IndexManagementService(ctx).rebuild_index()
|
||||
|
||||
@mcp.tool()
|
||||
@handle_mcp_tool_errors(return_type='str')
|
||||
def build_deep_index(ctx: Context) -> str:
|
||||
"""
|
||||
Build the deep index (full symbol extraction) for the current project.
|
||||
|
||||
This performs a complete re-index and loads it into memory.
|
||||
"""
|
||||
return IndexManagementService(ctx).rebuild_deep_index()
|
||||
|
||||
@mcp.tool()
|
||||
@handle_mcp_tool_errors(return_type='dict')
|
||||
def get_settings_info(ctx: Context) -> Dict[str, Any]:
|
||||
"""Get information about the project settings."""
|
||||
return SettingsService(ctx).get_settings_info()
|
||||
|
||||
@mcp.tool()
|
||||
@handle_mcp_tool_errors(return_type='dict')
|
||||
def create_temp_directory() -> Dict[str, Any]:
|
||||
"""Create the temporary directory used for storing index data."""
|
||||
return manage_temp_directory('create')
|
||||
|
||||
@mcp.tool()
|
||||
@handle_mcp_tool_errors(return_type='dict')
|
||||
def check_temp_directory() -> Dict[str, Any]:
|
||||
"""Check the temporary directory used for storing index data."""
|
||||
return manage_temp_directory('check')
|
||||
|
||||
@mcp.tool()
|
||||
@handle_mcp_tool_errors(return_type='str')
|
||||
def clear_settings(ctx: Context) -> str:
|
||||
"""Clear all settings and cached data."""
|
||||
return SettingsService(ctx).clear_all_settings()
|
||||
|
||||
@mcp.tool()
|
||||
@handle_mcp_tool_errors(return_type='str')
|
||||
def refresh_search_tools(ctx: Context) -> str:
|
||||
"""
|
||||
Manually re-detect the available command-line search tools on the system.
|
||||
This is useful if you have installed a new tool (like ripgrep) after starting the server.
|
||||
"""
|
||||
return SearchService(ctx).refresh_search_tools()
|
||||
|
||||
@mcp.tool()
|
||||
@handle_mcp_tool_errors(return_type='dict')
|
||||
def get_file_watcher_status(ctx: Context) -> Dict[str, Any]:
|
||||
"""Get file watcher service status and statistics."""
|
||||
return SystemManagementService(ctx).get_file_watcher_status()
|
||||
|
||||
@mcp.tool()
|
||||
@handle_mcp_tool_errors(return_type='str')
|
||||
def configure_file_watcher(
|
||||
ctx: Context,
|
||||
enabled: bool = None,
|
||||
debounce_seconds: float = None,
|
||||
additional_exclude_patterns: list = None
|
||||
) -> str:
|
||||
"""Configure file watcher service settings."""
|
||||
return SystemManagementService(ctx).configure_file_watcher(enabled, debounce_seconds, additional_exclude_patterns)
|
||||
|
||||
# ----- PROMPTS -----
|
||||
# Removed: analyze_code, code_search, set_project prompts
|
||||
|
||||
def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
|
||||
"""Parse CLI arguments for the MCP server."""
|
||||
parser = argparse.ArgumentParser(description="Code Index MCP server")
|
||||
parser.add_argument(
|
||||
"--project-path",
|
||||
dest="project_path",
|
||||
help="Set the project path on startup (equivalent to calling set_project_path)."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--transport",
|
||||
choices=["stdio", "sse", "streamable-http"],
|
||||
default="stdio",
|
||||
help="Transport protocol to use (default: stdio)."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--mount-path",
|
||||
dest="mount_path",
|
||||
default=None,
|
||||
help="Mount path when using SSE transport."
|
||||
)
|
||||
return parser.parse_args(argv)
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None):
|
||||
"""Main function to run the MCP server."""
|
||||
args = _parse_args(argv)
|
||||
|
||||
# Store CLI configuration for lifespan bootstrap.
|
||||
_CLI_CONFIG.project_path = args.project_path
|
||||
|
||||
run_kwargs = {"transport": args.transport}
|
||||
if args.transport == "sse" and args.mount_path:
|
||||
run_signature = inspect.signature(mcp.run)
|
||||
if "mount_path" in run_signature.parameters:
|
||||
run_kwargs["mount_path"] = args.mount_path
|
||||
else:
|
||||
logger.warning(
|
||||
"Ignoring --mount-path because this FastMCP version "
|
||||
"does not accept the parameter."
|
||||
)
|
||||
|
||||
try:
|
||||
mcp.run(**run_kwargs)
|
||||
except RuntimeError as exc:
|
||||
logger.error("MCP server terminated with error: %s", exc)
|
||||
raise SystemExit(1) from exc
|
||||
except Exception as exc: # pylint: disable=broad-except
|
||||
logger.error("Unexpected MCP server error: %s", exc)
|
||||
raise
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -0,0 +1,48 @@
|
||||
"""
|
||||
Service layer for the Code Index MCP server.
|
||||
|
||||
This package contains domain-specific services that handle the business logic
|
||||
for different areas of functionality:
|
||||
|
||||
|
||||
- SearchService: Code search operations and search tool management
|
||||
- FileService: File operations, content retrieval, and analysis
|
||||
- SettingsService: Settings management and directory operations
|
||||
|
||||
Each service follows a consistent pattern:
|
||||
- Constructor accepts MCP Context parameter
|
||||
- Methods correspond to MCP entry points
|
||||
- Clear domain boundaries with no cross-service dependencies
|
||||
- Shared utilities accessed through utils module
|
||||
- Meaningful exceptions raised for error conditions
|
||||
"""
|
||||
|
||||
# New Three-Layer Architecture Services
|
||||
from .base_service import BaseService
|
||||
from .project_management_service import ProjectManagementService
|
||||
from .index_management_service import IndexManagementService
|
||||
from .file_discovery_service import FileDiscoveryService
|
||||
from .code_intelligence_service import CodeIntelligenceService
|
||||
from .system_management_service import SystemManagementService
|
||||
from .search_service import SearchService # Already follows clean architecture
|
||||
from .settings_service import SettingsService
|
||||
|
||||
# Simple Services
|
||||
from .file_service import FileService # Simple file reading for resources
|
||||
from .file_watcher_service import FileWatcherService # Low-level service, still needed
|
||||
|
||||
__all__ = [
|
||||
# New Architecture
|
||||
'BaseService',
|
||||
'ProjectManagementService',
|
||||
'IndexManagementService',
|
||||
'FileDiscoveryService',
|
||||
'CodeIntelligenceService',
|
||||
'SystemManagementService',
|
||||
'SearchService',
|
||||
'SettingsService',
|
||||
|
||||
# Simple Services
|
||||
'FileService', # Simple file reading for resources
|
||||
'FileWatcherService' # Keep as low-level service
|
||||
]
|
||||
@@ -0,0 +1,140 @@
|
||||
"""
|
||||
Base service class providing common functionality for all services.
|
||||
|
||||
This module defines the base service pattern that all domain services inherit from,
|
||||
ensuring consistent behavior and shared functionality across the service layer.
|
||||
"""
|
||||
|
||||
from abc import ABC
|
||||
from typing import Optional
|
||||
from mcp.server.fastmcp import Context
|
||||
|
||||
from ..utils import ContextHelper, ValidationHelper
|
||||
|
||||
|
||||
class BaseService(ABC):
|
||||
"""
|
||||
Base class for all MCP services.
|
||||
|
||||
This class provides common functionality that all services need:
|
||||
- Context management through ContextHelper
|
||||
- Common validation patterns
|
||||
- Shared error checking methods
|
||||
|
||||
All domain services should inherit from this class to ensure
|
||||
consistent behavior and access to shared utilities.
|
||||
"""
|
||||
|
||||
def __init__(self, ctx: Context):
|
||||
"""
|
||||
Initialize the base service.
|
||||
|
||||
Args:
|
||||
ctx: The MCP Context object containing request and lifespan context
|
||||
"""
|
||||
self.ctx = ctx
|
||||
self.helper = ContextHelper(ctx)
|
||||
|
||||
def _validate_project_setup(self) -> Optional[str]:
|
||||
"""
|
||||
Validate that the project is properly set up.
|
||||
|
||||
This method checks if the base path is set and valid, which is
|
||||
required for most operations.
|
||||
|
||||
Returns:
|
||||
Error message if project is not set up properly, None if valid
|
||||
"""
|
||||
return self.helper.get_base_path_error()
|
||||
|
||||
def _require_project_setup(self) -> None:
|
||||
"""
|
||||
Ensure project is set up, raising an exception if not.
|
||||
|
||||
This is a convenience method for operations that absolutely
|
||||
require a valid project setup.
|
||||
|
||||
Raises:
|
||||
ValueError: If project is not properly set up
|
||||
"""
|
||||
error = self._validate_project_setup()
|
||||
if error:
|
||||
raise ValueError(error)
|
||||
|
||||
def _validate_file_path(self, file_path: str) -> Optional[str]:
|
||||
"""
|
||||
Validate a file path for security and accessibility.
|
||||
|
||||
Args:
|
||||
file_path: The file path to validate
|
||||
|
||||
Returns:
|
||||
Error message if validation fails, None if valid
|
||||
"""
|
||||
return ValidationHelper.validate_file_path(file_path, self.helper.base_path)
|
||||
|
||||
def _require_valid_file_path(self, file_path: str) -> None:
|
||||
"""
|
||||
Ensure file path is valid, raising an exception if not.
|
||||
|
||||
Args:
|
||||
file_path: The file path to validate
|
||||
|
||||
Raises:
|
||||
ValueError: If file path is invalid
|
||||
"""
|
||||
error = self._validate_file_path(file_path)
|
||||
if error:
|
||||
raise ValueError(error)
|
||||
|
||||
@property
|
||||
def base_path(self) -> str:
|
||||
"""
|
||||
Convenient access to the base project path.
|
||||
|
||||
Returns:
|
||||
The base project path
|
||||
"""
|
||||
return self.helper.base_path
|
||||
|
||||
@property
|
||||
def settings(self):
|
||||
"""
|
||||
Convenient access to the project settings.
|
||||
|
||||
Returns:
|
||||
The ProjectSettings instance
|
||||
"""
|
||||
return self.helper.settings
|
||||
|
||||
@property
|
||||
def file_count(self) -> int:
|
||||
"""
|
||||
Convenient access to the current file count.
|
||||
|
||||
Returns:
|
||||
The number of indexed files
|
||||
"""
|
||||
return self.helper.file_count
|
||||
|
||||
@property
|
||||
def index_provider(self):
|
||||
"""
|
||||
Convenient access to the unified index provider.
|
||||
|
||||
Returns:
|
||||
The current IIndexProvider instance, or None if not available
|
||||
"""
|
||||
if self.helper.index_manager:
|
||||
return self.helper.index_manager.get_provider()
|
||||
return None
|
||||
|
||||
@property
|
||||
def index_manager(self):
|
||||
"""
|
||||
Convenient access to the index manager.
|
||||
|
||||
Returns:
|
||||
The index manager instance, or None if not available
|
||||
"""
|
||||
return self.helper.index_manager
|
||||
@@ -0,0 +1,104 @@
|
||||
"""
|
||||
Code Intelligence Service - Business logic for code analysis and understanding.
|
||||
|
||||
This service handles the business logic for analyzing code files using the new
|
||||
JSON-based indexing system optimized for LLM consumption.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from typing import Dict, Any
|
||||
|
||||
from .base_service import BaseService
|
||||
from ..tools.filesystem import FileSystemTool
|
||||
from ..indexing import get_index_manager
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CodeIntelligenceService(BaseService):
|
||||
"""
|
||||
Business service for code analysis and intelligence using JSON indexing.
|
||||
|
||||
This service provides comprehensive code analysis using the optimized
|
||||
JSON-based indexing system for fast LLM-friendly responses.
|
||||
"""
|
||||
|
||||
def __init__(self, ctx):
|
||||
super().__init__(ctx)
|
||||
self._filesystem_tool = FileSystemTool()
|
||||
|
||||
def analyze_file(self, file_path: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze a file and return comprehensive intelligence.
|
||||
|
||||
This is the main business method that orchestrates the file analysis
|
||||
workflow, choosing the best analysis strategy and providing rich
|
||||
insights about the code.
|
||||
|
||||
Args:
|
||||
file_path: Path to the file to analyze (relative to project root)
|
||||
|
||||
Returns:
|
||||
Dictionary with comprehensive file analysis
|
||||
|
||||
Raises:
|
||||
ValueError: If file path is invalid or analysis fails
|
||||
"""
|
||||
# Business validation
|
||||
self._validate_analysis_request(file_path)
|
||||
|
||||
# Use the global index manager
|
||||
index_manager = get_index_manager()
|
||||
|
||||
# Debug logging
|
||||
logger.info(f"Getting file summary for: {file_path}")
|
||||
logger.info(f"Index manager state - Project path: {index_manager.project_path}")
|
||||
logger.info(f"Index manager state - Has builder: {index_manager.index_builder is not None}")
|
||||
if index_manager.index_builder:
|
||||
logger.info(f"Index manager state - Has index: {index_manager.index_builder.in_memory_index is not None}")
|
||||
|
||||
# Get file summary from JSON index
|
||||
summary = index_manager.get_file_summary(file_path)
|
||||
logger.info(f"Summary result: {summary is not None}")
|
||||
|
||||
# If deep index isn't available yet, return a helpful hint instead of error
|
||||
if not summary:
|
||||
return {
|
||||
"status": "needs_deep_index",
|
||||
"message": "Deep index not available. Please run build_deep_index before calling get_file_summary.",
|
||||
"file_path": file_path
|
||||
}
|
||||
|
||||
return summary
|
||||
|
||||
def _validate_analysis_request(self, file_path: str) -> None:
|
||||
"""
|
||||
Validate the file analysis request according to business rules.
|
||||
|
||||
Args:
|
||||
file_path: File path to validate
|
||||
|
||||
Raises:
|
||||
ValueError: If validation fails
|
||||
"""
|
||||
# Business rule: Project must be set up OR auto-initialization must be possible
|
||||
if self.base_path:
|
||||
# Standard validation if project is set up in context
|
||||
self._require_valid_file_path(file_path)
|
||||
full_path = os.path.join(self.base_path, file_path)
|
||||
if not os.path.exists(full_path):
|
||||
raise ValueError(f"File does not exist: {file_path}")
|
||||
else:
|
||||
# Allow proceeding if auto-initialization might work
|
||||
# The index manager will handle project discovery
|
||||
logger.info("Project not set in context, relying on index auto-initialization")
|
||||
|
||||
# Basic file path validation only
|
||||
if not file_path or '..' in file_path:
|
||||
raise ValueError(f"Invalid file path: {file_path}")
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,78 @@
|
||||
"""
|
||||
File Discovery Service - Business logic for intelligent file discovery.
|
||||
|
||||
This service handles the business logic for finding files using the new
|
||||
JSON-based indexing system optimized for LLM consumption.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, List, Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
from .base_service import BaseService
|
||||
from ..indexing import get_shallow_index_manager
|
||||
|
||||
|
||||
@dataclass
|
||||
class FileDiscoveryResult:
|
||||
"""Business result for file discovery operations."""
|
||||
files: List[str]
|
||||
total_count: int
|
||||
pattern_used: str
|
||||
search_strategy: str
|
||||
metadata: Dict[str, Any]
|
||||
|
||||
|
||||
class FileDiscoveryService(BaseService):
|
||||
"""
|
||||
Business service for intelligent file discovery using JSON indexing.
|
||||
|
||||
This service provides fast file discovery using the optimized JSON
|
||||
indexing system for efficient LLM-oriented responses.
|
||||
"""
|
||||
|
||||
def __init__(self, ctx):
|
||||
super().__init__(ctx)
|
||||
self._index_manager = get_shallow_index_manager()
|
||||
|
||||
def find_files(self, pattern: str, max_results: Optional[int] = None) -> List[str]:
|
||||
"""
|
||||
Find files matching the given pattern using JSON indexing.
|
||||
|
||||
Args:
|
||||
pattern: Glob pattern to search for (e.g., "*.py", "test_*.js")
|
||||
max_results: Maximum number of results to return (None for no limit)
|
||||
|
||||
Returns:
|
||||
List of file paths matching the pattern
|
||||
|
||||
Raises:
|
||||
ValueError: If pattern is invalid or project not set up
|
||||
"""
|
||||
# Business validation
|
||||
self._validate_discovery_request(pattern)
|
||||
|
||||
# Get files from JSON index
|
||||
files = self._index_manager.find_files(pattern)
|
||||
|
||||
# Apply max_results limit if specified
|
||||
if max_results and len(files) > max_results:
|
||||
files = files[:max_results]
|
||||
|
||||
return files
|
||||
|
||||
def _validate_discovery_request(self, pattern: str) -> None:
|
||||
"""
|
||||
Validate the file discovery request according to business rules.
|
||||
|
||||
Args:
|
||||
pattern: Pattern to validate
|
||||
|
||||
Raises:
|
||||
ValueError: If validation fails
|
||||
"""
|
||||
# Ensure project is set up
|
||||
self._require_project_setup()
|
||||
|
||||
# Validate pattern
|
||||
if not pattern or not pattern.strip():
|
||||
raise ValueError("Search pattern cannot be empty")
|
||||
@@ -0,0 +1,62 @@
|
||||
"""
|
||||
File Service - Simple file reading service for MCP resources.
|
||||
|
||||
This service provides simple file content reading functionality for MCP resources.
|
||||
Complex file analysis has been moved to CodeIntelligenceService.
|
||||
|
||||
Usage:
|
||||
- get_file_content() - used by files://{file_path} resource
|
||||
"""
|
||||
|
||||
import os
|
||||
from .base_service import BaseService
|
||||
|
||||
|
||||
class FileService(BaseService):
|
||||
"""
|
||||
Simple service for file content reading.
|
||||
|
||||
This service handles basic file reading operations for MCP resources.
|
||||
Complex analysis functionality has been moved to CodeIntelligenceService.
|
||||
"""
|
||||
|
||||
def get_file_content(self, file_path: str) -> str:
|
||||
"""
|
||||
Get file content for MCP resource.
|
||||
|
||||
Args:
|
||||
file_path: Path to the file (relative to project root)
|
||||
|
||||
Returns:
|
||||
File content as string
|
||||
|
||||
Raises:
|
||||
ValueError: If project is not set up or path is invalid
|
||||
FileNotFoundError: If file is not found or readable
|
||||
"""
|
||||
self._require_project_setup()
|
||||
self._require_valid_file_path(file_path)
|
||||
|
||||
# Build full path
|
||||
full_path = os.path.join(self.base_path, file_path)
|
||||
|
||||
try:
|
||||
# Try UTF-8 first (most common)
|
||||
with open(full_path, 'r', encoding='utf-8') as f:
|
||||
return f.read()
|
||||
except UnicodeDecodeError:
|
||||
# Try other encodings if UTF-8 fails
|
||||
encodings = ['utf-8-sig', 'latin-1', 'cp1252', 'iso-8859-1']
|
||||
for encoding in encodings:
|
||||
try:
|
||||
with open(full_path, 'r', encoding=encoding) as f:
|
||||
return f.read()
|
||||
except UnicodeDecodeError:
|
||||
continue
|
||||
|
||||
raise ValueError(
|
||||
f"Could not decode file {file_path}. File may have "
|
||||
f"unsupported encoding."
|
||||
) from None
|
||||
except (FileNotFoundError, PermissionError, OSError) as e:
|
||||
raise FileNotFoundError(f"Error reading file: {e}") from e
|
||||
@@ -0,0 +1,418 @@
|
||||
"""
|
||||
File Watcher Service for automatic index rebuilds.
|
||||
|
||||
This module provides file system monitoring capabilities that automatically
|
||||
trigger index rebuilds when relevant files are modified, created, or deleted.
|
||||
It uses the watchdog library for cross-platform file system event monitoring.
|
||||
"""
|
||||
# pylint: disable=missing-function-docstring # Fallback stub methods don't need docstrings
|
||||
|
||||
import logging
|
||||
import os
|
||||
import traceback
|
||||
from threading import Timer
|
||||
from typing import Optional, Callable, List
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
from watchdog.observers import Observer
|
||||
from watchdog.events import FileSystemEventHandler, FileSystemEvent
|
||||
WATCHDOG_AVAILABLE = True
|
||||
except ImportError:
|
||||
# Fallback classes for when watchdog is not available
|
||||
class Observer:
|
||||
"""Fallback Observer class when watchdog library is not available."""
|
||||
def __init__(self):
|
||||
pass
|
||||
def schedule(self, *args, **kwargs):
|
||||
pass
|
||||
def start(self):
|
||||
pass
|
||||
def stop(self):
|
||||
pass
|
||||
def join(self, *args, **kwargs):
|
||||
pass
|
||||
def is_alive(self):
|
||||
return False
|
||||
|
||||
class FileSystemEventHandler:
|
||||
"""Fallback FileSystemEventHandler class when watchdog library is not available."""
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
class FileSystemEvent:
|
||||
"""Fallback FileSystemEvent class when watchdog library is not available."""
|
||||
def __init__(self):
|
||||
self.is_directory = False
|
||||
self.src_path = ""
|
||||
self.event_type = ""
|
||||
|
||||
WATCHDOG_AVAILABLE = False
|
||||
|
||||
from .base_service import BaseService
|
||||
from ..constants import SUPPORTED_EXTENSIONS
|
||||
|
||||
|
||||
class FileWatcherService(BaseService):
|
||||
"""
|
||||
Service for monitoring file system changes and triggering index rebuilds.
|
||||
|
||||
This service uses the watchdog library to monitor file system events and
|
||||
automatically triggers background index rebuilds when relevant files change.
|
||||
It includes intelligent debouncing to batch rapid changes and filtering
|
||||
to only monitor relevant file types.
|
||||
"""
|
||||
MAX_RESTART_ATTEMPTS = 3
|
||||
|
||||
def __init__(self, ctx):
|
||||
"""
|
||||
Initialize the file watcher service.
|
||||
|
||||
Args:
|
||||
ctx: The MCP Context object
|
||||
"""
|
||||
super().__init__(ctx)
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.observer: Optional[Observer] = None
|
||||
self.event_handler: Optional[DebounceEventHandler] = None
|
||||
self.is_monitoring = False
|
||||
self.restart_attempts = 0
|
||||
self.rebuild_callback: Optional[Callable] = None
|
||||
|
||||
# Check if watchdog is available
|
||||
if not WATCHDOG_AVAILABLE:
|
||||
self.logger.warning("Watchdog library not available - file watcher disabled")
|
||||
|
||||
def start_monitoring(self, rebuild_callback: Callable) -> bool:
|
||||
"""
|
||||
Start file system monitoring.
|
||||
|
||||
Args:
|
||||
rebuild_callback: Function to call when rebuild is needed
|
||||
|
||||
Returns:
|
||||
True if monitoring started successfully, False otherwise
|
||||
"""
|
||||
if not WATCHDOG_AVAILABLE:
|
||||
self.logger.warning("Cannot start file watcher - watchdog library not available")
|
||||
return False
|
||||
|
||||
if self.is_monitoring:
|
||||
self.logger.debug("File watcher already monitoring")
|
||||
return True
|
||||
|
||||
# Validate project setup
|
||||
error = self._validate_project_setup()
|
||||
if error:
|
||||
self.logger.error("Cannot start file watcher: %s", error)
|
||||
return False
|
||||
|
||||
self.rebuild_callback = rebuild_callback
|
||||
|
||||
# Get debounce seconds from config
|
||||
config = self.settings.get_file_watcher_config()
|
||||
debounce_seconds = config.get('debounce_seconds', 6.0)
|
||||
|
||||
try:
|
||||
self.observer = Observer()
|
||||
self.event_handler = DebounceEventHandler(
|
||||
debounce_seconds=debounce_seconds,
|
||||
rebuild_callback=self.rebuild_callback,
|
||||
base_path=Path(self.base_path),
|
||||
logger=self.logger
|
||||
)
|
||||
|
||||
# Log detailed Observer setup
|
||||
watch_path = str(self.base_path)
|
||||
self.logger.debug("Scheduling Observer for path: %s", watch_path)
|
||||
|
||||
self.observer.schedule(
|
||||
self.event_handler,
|
||||
watch_path,
|
||||
recursive=True
|
||||
)
|
||||
|
||||
# Log Observer start
|
||||
self.logger.debug("Starting Observer...")
|
||||
self.observer.start()
|
||||
self.is_monitoring = True
|
||||
self.restart_attempts = 0
|
||||
|
||||
# Log Observer thread info
|
||||
if hasattr(self.observer, '_thread'):
|
||||
self.logger.debug("Observer thread: %s", self.observer._thread)
|
||||
|
||||
# Verify observer is actually running
|
||||
if self.observer.is_alive():
|
||||
self.logger.info(
|
||||
"File watcher started successfully",
|
||||
extra={
|
||||
"debounce_seconds": debounce_seconds,
|
||||
"monitored_path": str(self.base_path),
|
||||
"supported_extensions": len(SUPPORTED_EXTENSIONS)
|
||||
}
|
||||
)
|
||||
|
||||
# Add diagnostic test - create a test event to verify Observer works
|
||||
self.logger.debug("Observer thread is alive: %s", self.observer.is_alive())
|
||||
self.logger.debug("Monitored path exists: %s", os.path.exists(str(self.base_path)))
|
||||
self.logger.debug("Event handler is set: %s", self.event_handler is not None)
|
||||
|
||||
# Log current directory for comparison
|
||||
current_dir = os.getcwd()
|
||||
self.logger.debug("Current working directory: %s", current_dir)
|
||||
self.logger.debug("Are paths same: %s", os.path.normpath(current_dir) == os.path.normpath(str(self.base_path)))
|
||||
|
||||
return True
|
||||
else:
|
||||
self.logger.error("File watcher failed to start - Observer not alive")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning("Failed to start file watcher: %s", e)
|
||||
self.logger.info("Falling back to reactive index refresh")
|
||||
return False
|
||||
|
||||
def stop_monitoring(self) -> None:
|
||||
"""
|
||||
Stop file system monitoring and cleanup all resources.
|
||||
|
||||
This method ensures complete cleanup of:
|
||||
- Observer thread
|
||||
- Event handler
|
||||
- Debounce timers
|
||||
- Monitoring state
|
||||
"""
|
||||
if not self.observer and not self.is_monitoring:
|
||||
# Already stopped or never started
|
||||
return
|
||||
|
||||
self.logger.info("Stopping file watcher monitoring...")
|
||||
|
||||
try:
|
||||
# Step 1: Stop the observer first
|
||||
if self.observer:
|
||||
self.logger.debug("Stopping observer...")
|
||||
self.observer.stop()
|
||||
|
||||
# Step 2: Cancel any active debounce timer
|
||||
if self.event_handler and self.event_handler.debounce_timer:
|
||||
self.logger.debug("Cancelling debounce timer...")
|
||||
self.event_handler.debounce_timer.cancel()
|
||||
|
||||
# Step 3: Wait for observer thread to finish (with timeout)
|
||||
self.logger.debug("Waiting for observer thread to finish...")
|
||||
self.observer.join(timeout=5.0)
|
||||
|
||||
# Step 4: Check if thread actually finished
|
||||
if self.observer.is_alive():
|
||||
self.logger.warning("Observer thread did not stop within timeout")
|
||||
else:
|
||||
self.logger.debug("Observer thread stopped successfully")
|
||||
|
||||
# Step 5: Clear all references
|
||||
self.observer = None
|
||||
self.event_handler = None
|
||||
self.rebuild_callback = None
|
||||
self.is_monitoring = False
|
||||
|
||||
self.logger.info("File watcher stopped and cleaned up successfully")
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error("Error stopping file watcher: %s", e)
|
||||
|
||||
# Force cleanup even if there were errors
|
||||
self.observer = None
|
||||
self.event_handler = None
|
||||
self.rebuild_callback = None
|
||||
self.is_monitoring = False
|
||||
|
||||
def is_active(self) -> bool:
|
||||
"""
|
||||
Check if file watcher is actively monitoring.
|
||||
|
||||
Returns:
|
||||
True if actively monitoring, False otherwise
|
||||
"""
|
||||
return (self.is_monitoring and
|
||||
self.observer and
|
||||
self.observer.is_alive())
|
||||
|
||||
def restart_observer(self) -> bool:
|
||||
"""
|
||||
Attempt to restart the file system observer.
|
||||
|
||||
Returns:
|
||||
True if restart successful, False otherwise
|
||||
"""
|
||||
if self.restart_attempts >= self.MAX_RESTART_ATTEMPTS:
|
||||
self.logger.error("Max restart attempts reached, file watcher disabled")
|
||||
return False
|
||||
|
||||
self.logger.info("Attempting to restart file watcher (attempt %d)",
|
||||
self.restart_attempts + 1)
|
||||
self.restart_attempts += 1
|
||||
|
||||
# Stop current observer if running
|
||||
if self.observer:
|
||||
try:
|
||||
self.observer.stop()
|
||||
self.observer.join(timeout=2.0)
|
||||
except Exception as e:
|
||||
self.logger.warning("Error stopping observer during restart: %s", e)
|
||||
|
||||
# Start new observer
|
||||
try:
|
||||
self.observer = Observer()
|
||||
self.observer.schedule(
|
||||
self.event_handler,
|
||||
str(self.base_path),
|
||||
recursive=True
|
||||
)
|
||||
self.observer.start()
|
||||
self.is_monitoring = True
|
||||
|
||||
self.logger.info("File watcher restarted successfully")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error("Failed to restart file watcher: %s", e)
|
||||
return False
|
||||
|
||||
def get_status(self) -> dict:
|
||||
"""
|
||||
Get current file watcher status information.
|
||||
|
||||
Returns:
|
||||
Dictionary containing status information
|
||||
"""
|
||||
# Get current debounce seconds from config
|
||||
config = self.settings.get_file_watcher_config()
|
||||
debounce_seconds = config.get('debounce_seconds', 6.0)
|
||||
|
||||
return {
|
||||
"available": WATCHDOG_AVAILABLE,
|
||||
"active": self.is_active(),
|
||||
"monitoring": self.is_monitoring,
|
||||
"restart_attempts": self.restart_attempts,
|
||||
"debounce_seconds": debounce_seconds,
|
||||
"base_path": self.base_path if self.base_path else None,
|
||||
"observer_alive": self.observer.is_alive() if self.observer else False
|
||||
}
|
||||
|
||||
|
||||
class DebounceEventHandler(FileSystemEventHandler):
|
||||
"""
|
||||
File system event handler with debouncing capability.
|
||||
|
||||
This handler filters file system events to only relevant files and
|
||||
implements a debounce mechanism to batch rapid changes into single
|
||||
rebuild operations.
|
||||
"""
|
||||
|
||||
def __init__(self, debounce_seconds: float, rebuild_callback: Callable,
|
||||
base_path: Path, logger: logging.Logger, additional_excludes: Optional[List[str]] = None):
|
||||
"""
|
||||
Initialize the debounce event handler.
|
||||
|
||||
Args:
|
||||
debounce_seconds: Number of seconds to wait before triggering rebuild
|
||||
rebuild_callback: Function to call when rebuild is needed
|
||||
base_path: Base project path for filtering
|
||||
logger: Logger instance for debug messages
|
||||
additional_excludes: Additional patterns to exclude
|
||||
"""
|
||||
from ..utils import FileFilter
|
||||
|
||||
super().__init__()
|
||||
self.debounce_seconds = debounce_seconds
|
||||
self.rebuild_callback = rebuild_callback
|
||||
self.base_path = base_path
|
||||
self.debounce_timer: Optional[Timer] = None
|
||||
self.logger = logger
|
||||
|
||||
# Use centralized file filtering
|
||||
self.file_filter = FileFilter(additional_excludes)
|
||||
|
||||
def on_any_event(self, event: FileSystemEvent) -> None:
|
||||
"""
|
||||
Handle any file system event.
|
||||
|
||||
Args:
|
||||
event: The file system event
|
||||
"""
|
||||
# Check if event should be processed
|
||||
should_process = self.should_process_event(event)
|
||||
|
||||
if should_process:
|
||||
self.logger.info("File changed: %s - %s", event.event_type, event.src_path)
|
||||
self.reset_debounce_timer()
|
||||
else:
|
||||
# Only log at debug level for filtered events
|
||||
self.logger.debug("Filtered: %s - %s", event.event_type, event.src_path)
|
||||
|
||||
def should_process_event(self, event: FileSystemEvent) -> bool:
|
||||
"""
|
||||
Determine if event should trigger index rebuild using centralized filtering.
|
||||
|
||||
Args:
|
||||
event: The file system event to evaluate
|
||||
|
||||
Returns:
|
||||
True if event should trigger rebuild, False otherwise
|
||||
"""
|
||||
# Skip directory events
|
||||
if event.is_directory:
|
||||
self.logger.debug("Skipping directory event: %s", event.src_path)
|
||||
return False
|
||||
|
||||
# Select path to check: dest_path for moves, src_path for others
|
||||
if event.event_type == 'moved':
|
||||
if not hasattr(event, 'dest_path'):
|
||||
return False
|
||||
target_path = event.dest_path
|
||||
else:
|
||||
target_path = event.src_path
|
||||
|
||||
# Use centralized filtering logic
|
||||
try:
|
||||
path = Path(target_path)
|
||||
should_process = self.file_filter.should_process_path(path, self.base_path)
|
||||
|
||||
# Skip temporary files using centralized logic
|
||||
if not should_process or self.file_filter.is_temporary_file(path):
|
||||
return False
|
||||
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def reset_debounce_timer(self) -> None:
|
||||
"""Reset the debounce timer, canceling any existing timer."""
|
||||
if self.debounce_timer:
|
||||
self.debounce_timer.cancel()
|
||||
|
||||
self.debounce_timer = Timer(
|
||||
self.debounce_seconds,
|
||||
self.trigger_rebuild
|
||||
)
|
||||
self.debounce_timer.start()
|
||||
|
||||
def trigger_rebuild(self) -> None:
|
||||
"""Trigger index rebuild after debounce period."""
|
||||
self.logger.info("File changes detected, triggering rebuild")
|
||||
|
||||
if self.rebuild_callback:
|
||||
try:
|
||||
result = self.rebuild_callback()
|
||||
except Exception as e:
|
||||
self.logger.error("Rebuild callback failed: %s", e)
|
||||
traceback_msg = traceback.format_exc()
|
||||
self.logger.error("Traceback: %s", traceback_msg)
|
||||
else:
|
||||
self.logger.warning("No rebuild callback configured")
|
||||
@@ -0,0 +1,198 @@
|
||||
"""
|
||||
Index Management Service - Business logic for index lifecycle management.
|
||||
|
||||
This service handles the business logic for index rebuilding, status monitoring,
|
||||
and index-related operations using the new JSON-based indexing system.
|
||||
"""
|
||||
import time
|
||||
import logging
|
||||
import os
|
||||
import json
|
||||
|
||||
from typing import Dict, Any
|
||||
from dataclasses import dataclass
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from .base_service import BaseService
|
||||
from ..indexing import get_index_manager, get_shallow_index_manager, DeepIndexManager
|
||||
|
||||
|
||||
@dataclass
|
||||
class IndexRebuildResult:
|
||||
"""Business result for index rebuild operations."""
|
||||
file_count: int
|
||||
rebuild_time: float
|
||||
status: str
|
||||
message: str
|
||||
|
||||
|
||||
class IndexManagementService(BaseService):
|
||||
"""
|
||||
Business service for index lifecycle management.
|
||||
|
||||
This service orchestrates index management workflows using the new
|
||||
JSON-based indexing system for optimal LLM performance.
|
||||
"""
|
||||
|
||||
def __init__(self, ctx):
|
||||
super().__init__(ctx)
|
||||
# Deep manager (symbols/files, legacy JSON index manager)
|
||||
self._index_manager = get_index_manager()
|
||||
# Shallow manager (file-list only) for default workflows
|
||||
self._shallow_manager = get_shallow_index_manager()
|
||||
# Optional wrapper for explicit deep builds
|
||||
self._deep_wrapper = DeepIndexManager()
|
||||
|
||||
def rebuild_index(self) -> str:
|
||||
"""
|
||||
Rebuild the project index (DEFAULT: shallow file list).
|
||||
|
||||
For deep/symbol rebuilds, use build_deep_index() tool instead.
|
||||
|
||||
Returns:
|
||||
Success message with rebuild information
|
||||
|
||||
Raises:
|
||||
ValueError: If project not set up or rebuild fails
|
||||
"""
|
||||
# Business validation
|
||||
self._validate_rebuild_request()
|
||||
|
||||
# Shallow rebuild only (fast path)
|
||||
if not self._shallow_manager.set_project_path(self.base_path):
|
||||
raise RuntimeError("Failed to set project path (shallow) in index manager")
|
||||
if not self._shallow_manager.build_index():
|
||||
raise RuntimeError("Failed to rebuild shallow index")
|
||||
|
||||
try:
|
||||
count = len(self._shallow_manager.get_file_list())
|
||||
except Exception:
|
||||
count = 0
|
||||
return f"Shallow index re-built with {count} files."
|
||||
|
||||
def get_rebuild_status(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Get current index rebuild status information.
|
||||
|
||||
Returns:
|
||||
Dictionary with rebuild status and metadata
|
||||
"""
|
||||
# Check if project is set up
|
||||
if not self.base_path:
|
||||
return {
|
||||
'status': 'not_initialized',
|
||||
'message': 'Project not initialized',
|
||||
'is_rebuilding': False
|
||||
}
|
||||
|
||||
# Get index stats from the new JSON system
|
||||
stats = self._index_manager.get_index_stats()
|
||||
|
||||
return {
|
||||
'status': 'ready' if stats.get('status') == 'loaded' else 'needs_rebuild',
|
||||
'index_available': stats.get('status') == 'loaded',
|
||||
'is_rebuilding': False,
|
||||
'project_path': self.base_path,
|
||||
'file_count': stats.get('indexed_files', 0),
|
||||
'total_symbols': stats.get('total_symbols', 0),
|
||||
'symbol_types': stats.get('symbol_types', {}),
|
||||
'languages': stats.get('languages', [])
|
||||
}
|
||||
|
||||
def _validate_rebuild_request(self) -> None:
|
||||
"""
|
||||
Validate the index rebuild request according to business rules.
|
||||
|
||||
Raises:
|
||||
ValueError: If validation fails
|
||||
"""
|
||||
# Business rule: Project must be set up
|
||||
self._require_project_setup()
|
||||
|
||||
def _execute_rebuild_workflow(self) -> IndexRebuildResult:
|
||||
"""
|
||||
Execute the core index rebuild business workflow.
|
||||
|
||||
Returns:
|
||||
IndexRebuildResult with rebuild data
|
||||
"""
|
||||
start_time = time.time()
|
||||
|
||||
# Set project path in index manager
|
||||
if not self._index_manager.set_project_path(self.base_path):
|
||||
raise RuntimeError("Failed to set project path in index manager")
|
||||
|
||||
# Rebuild the index
|
||||
if not self._index_manager.refresh_index():
|
||||
raise RuntimeError("Failed to rebuild index")
|
||||
|
||||
# Get stats for result
|
||||
stats = self._index_manager.get_index_stats()
|
||||
file_count = stats.get('indexed_files', 0)
|
||||
|
||||
rebuild_time = time.time() - start_time
|
||||
|
||||
return IndexRebuildResult(
|
||||
file_count=file_count,
|
||||
rebuild_time=rebuild_time,
|
||||
status='success',
|
||||
message=f"Index rebuilt successfully with {file_count} files"
|
||||
)
|
||||
|
||||
|
||||
def _format_rebuild_result(self, result: IndexRebuildResult) -> str:
|
||||
"""
|
||||
Format the rebuild result according to business requirements.
|
||||
|
||||
Args:
|
||||
result: Rebuild result data
|
||||
|
||||
Returns:
|
||||
Formatted result string for MCP response
|
||||
"""
|
||||
return f"Project re-indexed. Found {result.file_count} files."
|
||||
|
||||
def build_shallow_index(self) -> str:
|
||||
"""
|
||||
Build and persist the shallow index (file list only).
|
||||
|
||||
Returns:
|
||||
Success message including file count if available.
|
||||
|
||||
Raises:
|
||||
ValueError/RuntimeError on validation or build failure
|
||||
"""
|
||||
# Ensure project is set up
|
||||
self._require_project_setup()
|
||||
|
||||
# Initialize manager with current base path
|
||||
if not self._shallow_manager.set_project_path(self.base_path):
|
||||
raise RuntimeError("Failed to set project path in index manager")
|
||||
|
||||
# Build shallow index
|
||||
if not self._shallow_manager.build_index():
|
||||
raise RuntimeError("Failed to build shallow index")
|
||||
|
||||
# Try to report count
|
||||
count = 0
|
||||
try:
|
||||
shallow_path = getattr(self._shallow_manager, 'index_path', None)
|
||||
if shallow_path and os.path.exists(shallow_path):
|
||||
with open(shallow_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
if isinstance(data, list):
|
||||
count = len(data)
|
||||
except Exception as e: # noqa: BLE001 - safe fallback to zero
|
||||
logger.debug(f"Unable to read shallow index count: {e}")
|
||||
|
||||
return f"Shallow index built{f' with {count} files' if count else ''}."
|
||||
|
||||
def rebuild_deep_index(self) -> str:
|
||||
"""Rebuild the deep index using the original workflow."""
|
||||
# Business validation
|
||||
self._validate_rebuild_request()
|
||||
|
||||
# Deep rebuild via existing workflow
|
||||
result = self._execute_rebuild_workflow()
|
||||
return self._format_rebuild_result(result)
|
||||
@@ -0,0 +1,375 @@
|
||||
"""
|
||||
Project Management Service - Business logic for project lifecycle management.
|
||||
|
||||
This service handles the business logic for project initialization, configuration,
|
||||
and lifecycle management using the new JSON-based indexing system.
|
||||
"""
|
||||
import logging
|
||||
from typing import Dict, Any
|
||||
from dataclasses import dataclass
|
||||
from contextlib import contextmanager
|
||||
|
||||
from .base_service import BaseService
|
||||
from ..utils.response_formatter import ResponseFormatter
|
||||
from ..constants import SUPPORTED_EXTENSIONS
|
||||
from ..indexing import get_index_manager, get_shallow_index_manager
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProjectInitializationResult:
|
||||
"""Business result for project initialization operations."""
|
||||
project_path: str
|
||||
file_count: int
|
||||
index_source: str # 'loaded_existing' or 'built_new'
|
||||
search_capabilities: str
|
||||
monitoring_status: str
|
||||
message: str
|
||||
|
||||
|
||||
class ProjectManagementService(BaseService):
|
||||
"""
|
||||
Business service for project lifecycle management.
|
||||
|
||||
This service orchestrates project initialization workflows by composing
|
||||
technical tools to achieve business goals like setting up projects,
|
||||
managing configurations, and coordinating system components.
|
||||
"""
|
||||
|
||||
def __init__(self, ctx):
|
||||
super().__init__(ctx)
|
||||
# Deep index manager (legacy full index)
|
||||
self._index_manager = get_index_manager()
|
||||
# Shallow index manager (default for initialization)
|
||||
self._shallow_manager = get_shallow_index_manager()
|
||||
from ..tools.config import ProjectConfigTool
|
||||
self._config_tool = ProjectConfigTool()
|
||||
# Import FileWatcherTool locally to avoid circular import
|
||||
from ..tools.monitoring import FileWatcherTool
|
||||
self._watcher_tool = FileWatcherTool(ctx)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def _noop_operation(self, *_args, **_kwargs):
|
||||
yield
|
||||
|
||||
def initialize_project(self, path: str) -> str:
|
||||
"""
|
||||
Initialize a project with comprehensive business logic.
|
||||
|
||||
This is the main business method that orchestrates the project
|
||||
initialization workflow, handling validation, cleanup, setup,
|
||||
and coordination of all project components.
|
||||
|
||||
Args:
|
||||
path: Project directory path to initialize
|
||||
|
||||
Returns:
|
||||
Success message with project information
|
||||
|
||||
Raises:
|
||||
ValueError: If path is invalid or initialization fails
|
||||
"""
|
||||
# Business validation
|
||||
self._validate_initialization_request(path)
|
||||
|
||||
# Business workflow: Execute initialization
|
||||
result = self._execute_initialization_workflow(path)
|
||||
|
||||
# Business result formatting
|
||||
return self._format_initialization_result(result)
|
||||
|
||||
def _validate_initialization_request(self, path: str) -> None:
|
||||
"""
|
||||
Validate the project initialization request according to business rules.
|
||||
|
||||
Args:
|
||||
path: Project path to validate
|
||||
|
||||
Raises:
|
||||
ValueError: If validation fails
|
||||
"""
|
||||
# Business rule: Path must be valid
|
||||
error = self._config_tool.validate_project_path(path)
|
||||
if error:
|
||||
raise ValueError(error)
|
||||
|
||||
def _execute_initialization_workflow(self, path: str) -> ProjectInitializationResult:
|
||||
"""
|
||||
Execute the core project initialization business workflow.
|
||||
|
||||
Args:
|
||||
path: Project path to initialize
|
||||
|
||||
Returns:
|
||||
ProjectInitializationResult with initialization data
|
||||
"""
|
||||
# Business step 1: Initialize config tool
|
||||
self._config_tool.initialize_settings(path)
|
||||
|
||||
# Normalize path for consistent processing
|
||||
normalized_path = self._config_tool.normalize_project_path(path)
|
||||
|
||||
# Business step 2: Cleanup existing project state
|
||||
self._cleanup_existing_project()
|
||||
|
||||
# Business step 3: Initialize shallow index by default (fast path)
|
||||
index_result = self._initialize_shallow_index_manager(normalized_path)
|
||||
|
||||
# Business step 3.1: Store index manager in context for other services
|
||||
self.helper.update_index_manager(self._index_manager)
|
||||
|
||||
# Business step 4: Setup file monitoring
|
||||
monitoring_result = self._setup_file_monitoring(normalized_path)
|
||||
|
||||
# Business step 4: Update system state
|
||||
self._update_project_state(normalized_path, index_result['file_count'])
|
||||
|
||||
# Business step 6: Get search capabilities info
|
||||
search_info = self._get_search_capabilities_info()
|
||||
|
||||
return ProjectInitializationResult(
|
||||
project_path=normalized_path,
|
||||
file_count=index_result['file_count'],
|
||||
index_source=index_result['source'],
|
||||
search_capabilities=search_info,
|
||||
monitoring_status=monitoring_result,
|
||||
message=f"Project initialized: {normalized_path}"
|
||||
)
|
||||
|
||||
def _cleanup_existing_project(self) -> None:
|
||||
"""Business logic to cleanup existing project state."""
|
||||
with self._noop_operation():
|
||||
# Stop existing file monitoring
|
||||
self._watcher_tool.stop_existing_watcher()
|
||||
|
||||
# Clear existing index cache
|
||||
self.helper.clear_index_cache()
|
||||
|
||||
# Clear any existing index state
|
||||
pass
|
||||
|
||||
def _initialize_shallow_index_manager(self, project_path: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Business logic to initialize the shallow index manager by default.
|
||||
|
||||
Args:
|
||||
project_path: Project path
|
||||
|
||||
Returns:
|
||||
Dictionary with initialization results
|
||||
"""
|
||||
# Set project path in shallow manager
|
||||
if not self._shallow_manager.set_project_path(project_path):
|
||||
raise RuntimeError(f"Failed to set project path (shallow): {project_path}")
|
||||
|
||||
# Update context
|
||||
self.helper.update_base_path(project_path)
|
||||
|
||||
# Try to load existing shallow index or build new one
|
||||
if self._shallow_manager.load_index():
|
||||
source = "loaded_existing"
|
||||
else:
|
||||
if not self._shallow_manager.build_index():
|
||||
raise RuntimeError("Failed to build shallow index")
|
||||
source = "built_new"
|
||||
|
||||
# Determine file count from shallow list
|
||||
try:
|
||||
files = self._shallow_manager.get_file_list()
|
||||
file_count = len(files)
|
||||
except Exception: # noqa: BLE001 - safe fallback
|
||||
file_count = 0
|
||||
|
||||
return {
|
||||
'file_count': file_count,
|
||||
'source': source,
|
||||
'total_symbols': 0,
|
||||
'languages': []
|
||||
}
|
||||
|
||||
|
||||
def _is_valid_existing_index(self, index_data: Dict[str, Any]) -> bool:
|
||||
"""
|
||||
Business rule to determine if existing index is valid and usable.
|
||||
|
||||
Args:
|
||||
index_data: Index data to validate
|
||||
|
||||
Returns:
|
||||
True if index is valid and usable, False otherwise
|
||||
"""
|
||||
if not index_data or not isinstance(index_data, dict):
|
||||
return False
|
||||
|
||||
# Business rule: Must have new format metadata
|
||||
if 'index_metadata' not in index_data:
|
||||
return False
|
||||
|
||||
# Business rule: Must be compatible version
|
||||
version = index_data.get('index_metadata', {}).get('version', '')
|
||||
return version >= '3.0'
|
||||
|
||||
def _load_existing_index(self, index_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Business logic to load and use existing index.
|
||||
|
||||
Args:
|
||||
index_data: Existing index data
|
||||
|
||||
Returns:
|
||||
Dictionary with loading results
|
||||
"""
|
||||
|
||||
|
||||
# Note: Legacy index loading is now handled by UnifiedIndexManager
|
||||
# This method is kept for backward compatibility but functionality moved
|
||||
|
||||
# Extract file count from metadata
|
||||
file_count = index_data.get('project_metadata', {}).get('total_files', 0)
|
||||
|
||||
|
||||
|
||||
return {
|
||||
'file_count': file_count,
|
||||
'source': 'loaded_existing'
|
||||
}
|
||||
|
||||
|
||||
def _setup_file_monitoring(self, project_path: str) -> str:
|
||||
"""
|
||||
Business logic to setup file monitoring for the project.
|
||||
|
||||
Args:
|
||||
project_path: Project path to monitor
|
||||
|
||||
Returns:
|
||||
String describing monitoring setup result
|
||||
"""
|
||||
|
||||
|
||||
try:
|
||||
# Create rebuild callback that uses the JSON index manager
|
||||
def rebuild_callback():
|
||||
logger.info("File watcher triggered rebuild callback")
|
||||
try:
|
||||
logger.debug(f"Starting shallow index rebuild for: {project_path}")
|
||||
# Business logic: File changed, rebuild using SHALLOW index manager
|
||||
try:
|
||||
if not self._shallow_manager.set_project_path(project_path):
|
||||
logger.warning("Shallow manager set_project_path failed")
|
||||
return False
|
||||
if self._shallow_manager.build_index():
|
||||
files = self._shallow_manager.get_file_list()
|
||||
logger.info(f"File watcher shallow rebuild completed successfully - files {len(files)}")
|
||||
return True
|
||||
else:
|
||||
logger.warning("File watcher shallow rebuild failed")
|
||||
return False
|
||||
except Exception as e:
|
||||
import traceback
|
||||
logger.error(f"File watcher shallow rebuild failed: {e}")
|
||||
logger.error(f"Traceback: {traceback.format_exc()}")
|
||||
return False
|
||||
except Exception as e:
|
||||
import traceback
|
||||
logger.error(f"File watcher rebuild failed: {e}")
|
||||
logger.error(f"Traceback: {traceback.format_exc()}")
|
||||
return False
|
||||
|
||||
# Start monitoring using watcher tool
|
||||
success = self._watcher_tool.start_monitoring(project_path, rebuild_callback)
|
||||
|
||||
if success:
|
||||
# Store watcher in context for later access
|
||||
self._watcher_tool.store_in_context()
|
||||
# No logging
|
||||
return "monitoring_active"
|
||||
else:
|
||||
self._watcher_tool.record_error("Failed to start file monitoring")
|
||||
return "monitoring_failed"
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"File monitoring setup failed: {e}"
|
||||
self._watcher_tool.record_error(error_msg)
|
||||
return "monitoring_error"
|
||||
|
||||
def _update_project_state(self, project_path: str, file_count: int) -> None:
|
||||
"""Business logic to update system state after project initialization."""
|
||||
|
||||
|
||||
# Update context with file count
|
||||
self.helper.update_file_count(file_count)
|
||||
|
||||
# No logging
|
||||
|
||||
def _get_search_capabilities_info(self) -> str:
|
||||
"""Business logic to get search capabilities information."""
|
||||
search_info = self._config_tool.get_search_tool_info()
|
||||
|
||||
if search_info['available']:
|
||||
return f"Advanced search enabled ({search_info['name']})"
|
||||
else:
|
||||
return "Basic search available"
|
||||
|
||||
def _format_initialization_result(self, result: ProjectInitializationResult) -> str:
|
||||
"""
|
||||
Format the initialization result according to business requirements.
|
||||
|
||||
Args:
|
||||
result: Initialization result data
|
||||
|
||||
Returns:
|
||||
Formatted result string for MCP response
|
||||
"""
|
||||
if result.index_source == 'unified_manager':
|
||||
message = (f"Project path set to: {result.project_path}. "
|
||||
f"Initialized unified index with {result.file_count} files. "
|
||||
f"{result.search_capabilities}.")
|
||||
elif result.index_source == 'failed':
|
||||
message = (f"Project path set to: {result.project_path}. "
|
||||
f"Index initialization failed. Some features may be limited. "
|
||||
f"{result.search_capabilities}.")
|
||||
else:
|
||||
message = (f"Project path set to: {result.project_path}. "
|
||||
f"Indexed {result.file_count} files. "
|
||||
f"{result.search_capabilities}.")
|
||||
|
||||
if result.monitoring_status != "monitoring_active":
|
||||
message += " (File monitoring unavailable - use manual refresh)"
|
||||
|
||||
return message
|
||||
|
||||
def get_project_config(self) -> str:
|
||||
"""
|
||||
Get the current project configuration for MCP resource.
|
||||
|
||||
Returns:
|
||||
JSON formatted configuration string
|
||||
"""
|
||||
|
||||
# Check if project is configured
|
||||
if not self.helper.base_path:
|
||||
config_data = {
|
||||
"status": "not_configured",
|
||||
"message": ("Project path not set. Please use set_project_path "
|
||||
"to set a project directory first."),
|
||||
"supported_extensions": SUPPORTED_EXTENSIONS
|
||||
}
|
||||
return ResponseFormatter.config_response(config_data)
|
||||
|
||||
# Get settings stats
|
||||
settings_stats = self.helper.settings.get_stats() if self.helper.settings else {}
|
||||
|
||||
config_data = {
|
||||
"base_path": self.helper.base_path,
|
||||
"supported_extensions": SUPPORTED_EXTENSIONS,
|
||||
"file_count": self.helper.file_count,
|
||||
"settings_directory": self.helper.settings.settings_path if self.helper.settings else "",
|
||||
"settings_stats": settings_stats
|
||||
}
|
||||
|
||||
return ResponseFormatter.config_response(config_data)
|
||||
|
||||
# Removed: get_project_structure; the project structure resource is deprecated
|
||||
@@ -0,0 +1,269 @@
|
||||
"""
|
||||
Search service for the Code Index MCP server.
|
||||
|
||||
This service handles code search operations, search tool management,
|
||||
and search strategy selection.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from .base_service import BaseService
|
||||
from ..utils import FileFilter, ResponseFormatter, ValidationHelper
|
||||
from ..search.base import is_safe_regex_pattern
|
||||
|
||||
|
||||
class SearchService(BaseService):
|
||||
"""Service for managing code search operations."""
|
||||
|
||||
def __init__(self, ctx):
|
||||
super().__init__(ctx)
|
||||
self.file_filter = self._create_file_filter()
|
||||
|
||||
def search_code( # pylint: disable=too-many-arguments, too-many-locals
|
||||
self,
|
||||
pattern: str,
|
||||
case_sensitive: bool = True,
|
||||
context_lines: int = 0,
|
||||
file_pattern: Optional[str] = None,
|
||||
fuzzy: bool = False,
|
||||
regex: Optional[bool] = None,
|
||||
start_index: int = 0,
|
||||
max_results: Optional[int] = 10
|
||||
) -> Dict[str, Any]:
|
||||
"""Search for code patterns in the project."""
|
||||
self._require_project_setup()
|
||||
|
||||
if regex is None:
|
||||
regex = is_safe_regex_pattern(pattern)
|
||||
|
||||
error = ValidationHelper.validate_search_pattern(pattern, regex)
|
||||
if error:
|
||||
raise ValueError(error)
|
||||
|
||||
if file_pattern:
|
||||
error = ValidationHelper.validate_glob_pattern(file_pattern)
|
||||
if error:
|
||||
raise ValueError(f"Invalid file pattern: {error}")
|
||||
|
||||
pagination_error = ValidationHelper.validate_pagination(start_index, max_results)
|
||||
if pagination_error:
|
||||
raise ValueError(pagination_error)
|
||||
|
||||
if not self.settings:
|
||||
raise ValueError("Settings not available")
|
||||
|
||||
strategy = self.settings.get_preferred_search_tool()
|
||||
if not strategy:
|
||||
raise ValueError("No search strategies available")
|
||||
|
||||
self._configure_strategy(strategy)
|
||||
|
||||
try:
|
||||
results = strategy.search(
|
||||
pattern=pattern,
|
||||
base_path=self.base_path,
|
||||
case_sensitive=case_sensitive,
|
||||
context_lines=context_lines,
|
||||
file_pattern=file_pattern,
|
||||
fuzzy=fuzzy,
|
||||
regex=regex
|
||||
)
|
||||
filtered = self._filter_results(results)
|
||||
formatted_results, pagination = self._paginate_results(
|
||||
filtered,
|
||||
start_index=start_index,
|
||||
max_results=max_results
|
||||
)
|
||||
return ResponseFormatter.search_results_response(
|
||||
formatted_results,
|
||||
pagination
|
||||
)
|
||||
except Exception as exc:
|
||||
raise ValueError(f"Search failed using '{strategy.name}': {exc}") from exc
|
||||
|
||||
def refresh_search_tools(self) -> str:
|
||||
"""Refresh the available search tools."""
|
||||
if not self.settings:
|
||||
raise ValueError("Settings not available")
|
||||
|
||||
self.settings.refresh_available_strategies()
|
||||
config = self.settings.get_search_tools_config()
|
||||
|
||||
available = config['available_tools']
|
||||
preferred = config['preferred_tool']
|
||||
return f"Search tools refreshed. Available: {available}. Preferred: {preferred}."
|
||||
|
||||
def get_search_capabilities(self) -> Dict[str, Any]:
|
||||
"""Get information about search capabilities and available tools."""
|
||||
if not self.settings:
|
||||
return {"error": "Settings not available"}
|
||||
|
||||
config = self.settings.get_search_tools_config()
|
||||
|
||||
capabilities = {
|
||||
"available_tools": config.get('available_tools', []),
|
||||
"preferred_tool": config.get('preferred_tool', 'basic'),
|
||||
"supports_regex": True,
|
||||
"supports_fuzzy": True,
|
||||
"supports_case_sensitivity": True,
|
||||
"supports_context_lines": True,
|
||||
"supports_file_patterns": True
|
||||
}
|
||||
|
||||
return capabilities
|
||||
|
||||
def _configure_strategy(self, strategy) -> None:
|
||||
"""Apply shared exclusion configuration to the strategy if supported."""
|
||||
configure = getattr(strategy, 'configure_excludes', None)
|
||||
if not configure:
|
||||
return
|
||||
|
||||
try:
|
||||
configure(self.file_filter)
|
||||
except Exception: # pragma: no cover - defensive fallback
|
||||
pass
|
||||
|
||||
def _create_file_filter(self) -> FileFilter:
|
||||
"""Build a shared file filter drawing from project settings."""
|
||||
additional_dirs: List[str] = []
|
||||
additional_file_patterns: List[str] = []
|
||||
|
||||
settings = self.settings
|
||||
if settings:
|
||||
try:
|
||||
config = settings.get_file_watcher_config()
|
||||
except Exception: # pragma: no cover - fallback if config fails
|
||||
config = {}
|
||||
|
||||
for key in ('exclude_patterns', 'additional_exclude_patterns'):
|
||||
patterns = config.get(key) or []
|
||||
for pattern in patterns:
|
||||
if not isinstance(pattern, str):
|
||||
continue
|
||||
normalized = pattern.strip()
|
||||
if not normalized:
|
||||
continue
|
||||
additional_dirs.append(normalized)
|
||||
additional_file_patterns.append(normalized)
|
||||
|
||||
file_filter = FileFilter(additional_dirs or None)
|
||||
|
||||
if additional_file_patterns:
|
||||
file_filter.exclude_files.update(additional_file_patterns)
|
||||
|
||||
return file_filter
|
||||
|
||||
def _filter_results(self, results: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Filter out matches that reside under excluded paths."""
|
||||
if not isinstance(results, dict) or not results:
|
||||
return results
|
||||
|
||||
if 'error' in results or not self.file_filter or not self.base_path:
|
||||
return results
|
||||
|
||||
base_path = Path(self.base_path)
|
||||
filtered: Dict[str, Any] = {}
|
||||
|
||||
for rel_path, matches in results.items():
|
||||
if not isinstance(rel_path, str):
|
||||
continue
|
||||
|
||||
normalized = Path(rel_path.replace('\\', '/'))
|
||||
try:
|
||||
absolute = (base_path / normalized).resolve()
|
||||
except Exception: # pragma: no cover - invalid path safety
|
||||
continue
|
||||
|
||||
try:
|
||||
if self.file_filter.should_process_path(absolute, base_path):
|
||||
filtered[rel_path] = matches
|
||||
except Exception: # pragma: no cover - defensive fallback
|
||||
continue
|
||||
|
||||
return filtered
|
||||
|
||||
def _paginate_results(
|
||||
self,
|
||||
results: Dict[str, Any],
|
||||
start_index: int,
|
||||
max_results: Optional[int]
|
||||
) -> Tuple[List[Dict[str, Any]], Dict[str, Any]]:
|
||||
"""Apply pagination to search results and format them for responses."""
|
||||
total_matches = 0
|
||||
for matches in results.values():
|
||||
if isinstance(matches, (list, tuple)):
|
||||
total_matches += len(matches)
|
||||
|
||||
effective_start = min(max(start_index, 0), total_matches)
|
||||
|
||||
if total_matches == 0 or effective_start >= total_matches:
|
||||
pagination = self._build_pagination_metadata(
|
||||
total_matches=total_matches,
|
||||
returned=0,
|
||||
start_index=effective_start,
|
||||
max_results=max_results
|
||||
)
|
||||
return [], pagination
|
||||
|
||||
collected: List[Dict[str, Any]] = []
|
||||
current_index = 0
|
||||
|
||||
sorted_items = sorted(
|
||||
(
|
||||
(path, matches)
|
||||
for path, matches in results.items()
|
||||
if isinstance(path, str) and isinstance(matches, (list, tuple))
|
||||
),
|
||||
key=lambda item: item[0]
|
||||
)
|
||||
|
||||
for path, matches in sorted_items:
|
||||
sorted_matches = sorted(
|
||||
(match for match in matches if isinstance(match, (list, tuple)) and len(match) >= 2),
|
||||
key=lambda pair: pair[0]
|
||||
)
|
||||
|
||||
for line_number, content, *_ in sorted_matches:
|
||||
if current_index >= effective_start:
|
||||
if max_results is None or len(collected) < max_results:
|
||||
collected.append({
|
||||
"file": path,
|
||||
"line": line_number,
|
||||
"text": content
|
||||
})
|
||||
else:
|
||||
break
|
||||
current_index += 1
|
||||
if max_results is not None and len(collected) >= max_results:
|
||||
break
|
||||
|
||||
pagination = self._build_pagination_metadata(
|
||||
total_matches=total_matches,
|
||||
returned=len(collected),
|
||||
start_index=effective_start,
|
||||
max_results=max_results
|
||||
)
|
||||
return collected, pagination
|
||||
|
||||
@staticmethod
|
||||
def _build_pagination_metadata(
|
||||
total_matches: int,
|
||||
returned: int,
|
||||
start_index: int,
|
||||
max_results: Optional[int]
|
||||
) -> Dict[str, Any]:
|
||||
"""Construct pagination metadata for search responses."""
|
||||
end_index = start_index + returned
|
||||
metadata: Dict[str, Any] = {
|
||||
"total_matches": total_matches,
|
||||
"returned": returned,
|
||||
"start_index": start_index,
|
||||
"has_more": end_index < total_matches
|
||||
}
|
||||
|
||||
if max_results is not None:
|
||||
metadata["max_results"] = max_results
|
||||
|
||||
metadata["end_index"] = end_index
|
||||
return metadata
|
||||
@@ -0,0 +1,191 @@
|
||||
"""
|
||||
Settings management service for the Code Index MCP server.
|
||||
|
||||
This service handles settings information, statistics,
|
||||
temporary directory management, and settings cleanup operations.
|
||||
"""
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
from typing import Dict, Any
|
||||
|
||||
from .base_service import BaseService
|
||||
from ..utils import ResponseFormatter
|
||||
from ..constants import SETTINGS_DIR
|
||||
from ..project_settings import ProjectSettings
|
||||
from ..indexing import get_index_manager
|
||||
|
||||
|
||||
def manage_temp_directory(action: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Manage temporary directory operations.
|
||||
|
||||
This is a standalone function that doesn't require project context.
|
||||
Handles the logic for create_temp_directory and check_temp_directory MCP tools.
|
||||
|
||||
Args:
|
||||
action: The action to perform ('create' or 'check')
|
||||
|
||||
Returns:
|
||||
Dictionary with directory information and operation results
|
||||
|
||||
Raises:
|
||||
ValueError: If action is invalid or operation fails
|
||||
"""
|
||||
if action not in ['create', 'check']:
|
||||
raise ValueError(f"Invalid action: {action}. Must be 'create' or 'check'")
|
||||
|
||||
# Try to get the actual temp directory from index manager, fallback to default
|
||||
try:
|
||||
index_manager = get_index_manager()
|
||||
temp_dir = index_manager.temp_dir if index_manager.temp_dir else os.path.join(tempfile.gettempdir(), SETTINGS_DIR)
|
||||
except:
|
||||
temp_dir = os.path.join(tempfile.gettempdir(), SETTINGS_DIR)
|
||||
|
||||
if action == 'create':
|
||||
existed_before = os.path.exists(temp_dir)
|
||||
|
||||
try:
|
||||
# Use ProjectSettings to handle directory creation consistently
|
||||
ProjectSettings("", skip_load=True)
|
||||
|
||||
result = ResponseFormatter.directory_info_response(
|
||||
temp_directory=temp_dir,
|
||||
exists=os.path.exists(temp_dir),
|
||||
is_directory=os.path.isdir(temp_dir)
|
||||
)
|
||||
result["existed_before"] = existed_before
|
||||
result["created"] = not existed_before
|
||||
|
||||
return result
|
||||
|
||||
except (OSError, IOError, ValueError) as e:
|
||||
return ResponseFormatter.directory_info_response(
|
||||
temp_directory=temp_dir,
|
||||
exists=False,
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
else: # action == 'check'
|
||||
result = ResponseFormatter.directory_info_response(
|
||||
temp_directory=temp_dir,
|
||||
exists=os.path.exists(temp_dir),
|
||||
is_directory=os.path.isdir(temp_dir) if os.path.exists(temp_dir) else False
|
||||
)
|
||||
result["temp_root"] = tempfile.gettempdir()
|
||||
|
||||
# If the directory exists, list its contents
|
||||
if result["exists"] and result["is_directory"]:
|
||||
try:
|
||||
contents = os.listdir(temp_dir)
|
||||
result["contents"] = contents
|
||||
result["subdirectories"] = []
|
||||
|
||||
# Check each subdirectory
|
||||
for item in contents:
|
||||
item_path = os.path.join(temp_dir, item)
|
||||
if os.path.isdir(item_path):
|
||||
subdir_info = {
|
||||
"name": item,
|
||||
"path": item_path,
|
||||
"contents": os.listdir(item_path) if os.path.exists(item_path) else []
|
||||
}
|
||||
result["subdirectories"].append(subdir_info)
|
||||
|
||||
except (OSError, PermissionError) as e:
|
||||
result["error"] = str(e)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class SettingsService(BaseService):
|
||||
"""
|
||||
Service for managing settings and directory operations.
|
||||
|
||||
This service handles:
|
||||
- Settings information and statistics
|
||||
- Temporary directory management
|
||||
- Settings cleanup operations
|
||||
- Configuration data access
|
||||
"""
|
||||
|
||||
|
||||
|
||||
def get_settings_info(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Get comprehensive settings information.
|
||||
|
||||
Handles the logic for get_settings_info MCP tool.
|
||||
|
||||
Returns:
|
||||
Dictionary with settings directory, config, stats, and status information
|
||||
"""
|
||||
temp_dir = os.path.join(tempfile.gettempdir(), SETTINGS_DIR)
|
||||
|
||||
# Get the actual index directory from the index manager
|
||||
index_manager = get_index_manager()
|
||||
actual_temp_dir = index_manager.temp_dir if index_manager.temp_dir else temp_dir
|
||||
|
||||
# Check if base_path is set
|
||||
if not self.base_path:
|
||||
return ResponseFormatter.settings_info_response(
|
||||
settings_directory="",
|
||||
temp_directory=actual_temp_dir,
|
||||
temp_directory_exists=os.path.exists(actual_temp_dir),
|
||||
config={},
|
||||
stats={},
|
||||
exists=False,
|
||||
status="not_configured",
|
||||
message="Project path not set. Please use set_project_path to set a "
|
||||
"project directory first."
|
||||
)
|
||||
|
||||
# Get config and stats
|
||||
config = self.settings.load_config() if self.settings else {}
|
||||
stats = self.settings.get_stats() if self.settings else {}
|
||||
settings_directory = actual_temp_dir
|
||||
exists = os.path.exists(settings_directory) if settings_directory else False
|
||||
|
||||
return ResponseFormatter.settings_info_response(
|
||||
settings_directory=settings_directory,
|
||||
temp_directory=actual_temp_dir,
|
||||
temp_directory_exists=os.path.exists(actual_temp_dir),
|
||||
config=config,
|
||||
stats=stats,
|
||||
exists=exists
|
||||
)
|
||||
|
||||
|
||||
|
||||
def clear_all_settings(self) -> str:
|
||||
"""
|
||||
Clear all settings and cached data.
|
||||
|
||||
Handles the logic for clear_settings MCP tool.
|
||||
|
||||
Returns:
|
||||
Success message confirming settings were cleared
|
||||
"""
|
||||
if self.settings:
|
||||
self.settings.clear()
|
||||
|
||||
return "Project settings, index, and cache have been cleared."
|
||||
|
||||
def get_settings_stats(self) -> str:
|
||||
"""
|
||||
Get settings statistics as JSON string.
|
||||
|
||||
Handles the logic for settings://stats MCP resource.
|
||||
|
||||
Returns:
|
||||
JSON formatted settings statistics
|
||||
"""
|
||||
if not self.settings:
|
||||
stats_data = {"error": "Settings not available"}
|
||||
else:
|
||||
stats_data = self.settings.get_stats()
|
||||
|
||||
return ResponseFormatter.stats_response(stats_data)
|
||||
@@ -0,0 +1,407 @@
|
||||
"""
|
||||
System Management Service - Business logic for system configuration and monitoring.
|
||||
|
||||
This service handles the business logic for system management operations including
|
||||
file watcher status, configuration management, and system health monitoring.
|
||||
It composes technical tools to achieve business goals.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, Optional
|
||||
from dataclasses import dataclass
|
||||
from .index_management_service import IndexManagementService
|
||||
from .base_service import BaseService
|
||||
# FileWatcherTool will be imported locally to avoid circular import
|
||||
from ..tools.config import ProjectConfigTool, SettingsTool
|
||||
|
||||
|
||||
@dataclass
|
||||
class FileWatcherStatus:
|
||||
"""Business result for file watcher status operations."""
|
||||
available: bool
|
||||
active: bool
|
||||
status: str
|
||||
message: Optional[str]
|
||||
error_info: Optional[Dict[str, Any]]
|
||||
configuration: Dict[str, Any]
|
||||
rebuild_status: Dict[str, Any]
|
||||
recommendations: list[str]
|
||||
|
||||
|
||||
class SystemManagementService(BaseService):
|
||||
"""
|
||||
Business service for system configuration and monitoring.
|
||||
|
||||
This service orchestrates system management workflows by composing
|
||||
technical tools to achieve business goals like monitoring file watchers,
|
||||
managing configurations, and providing system health insights.
|
||||
"""
|
||||
|
||||
def __init__(self, ctx):
|
||||
super().__init__(ctx)
|
||||
# Import FileWatcherTool locally to avoid circular import
|
||||
from ..tools.monitoring import FileWatcherTool
|
||||
self._watcher_tool = FileWatcherTool(ctx)
|
||||
self._config_tool = ProjectConfigTool()
|
||||
self._settings_tool = SettingsTool()
|
||||
|
||||
def get_file_watcher_status(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Get comprehensive file watcher status with business intelligence.
|
||||
|
||||
This is the main business method that orchestrates the file watcher
|
||||
status workflow, analyzing system state, providing recommendations,
|
||||
and formatting comprehensive status information.
|
||||
|
||||
Returns:
|
||||
Dictionary with comprehensive file watcher status
|
||||
"""
|
||||
# Business workflow: Analyze system state
|
||||
status_result = self._analyze_file_watcher_state()
|
||||
|
||||
# Business result formatting
|
||||
return self._format_status_result(status_result)
|
||||
|
||||
def configure_file_watcher(self, enabled: Optional[bool] = None,
|
||||
debounce_seconds: Optional[float] = None,
|
||||
additional_exclude_patterns: Optional[list] = None) -> str:
|
||||
"""
|
||||
Configure file watcher settings with business validation.
|
||||
|
||||
Args:
|
||||
enabled: Whether to enable file watcher
|
||||
debounce_seconds: Debounce time in seconds
|
||||
additional_exclude_patterns: Additional patterns to exclude
|
||||
|
||||
Returns:
|
||||
Success message with configuration details
|
||||
|
||||
Raises:
|
||||
ValueError: If configuration is invalid
|
||||
"""
|
||||
# Business validation
|
||||
self._validate_configuration_request(enabled, debounce_seconds, additional_exclude_patterns)
|
||||
|
||||
# Business workflow: Apply configuration
|
||||
result = self._apply_file_watcher_configuration(enabled, debounce_seconds, additional_exclude_patterns)
|
||||
|
||||
return result
|
||||
|
||||
def _analyze_file_watcher_state(self) -> FileWatcherStatus:
|
||||
"""
|
||||
Business logic to analyze comprehensive file watcher state.
|
||||
|
||||
Returns:
|
||||
FileWatcherStatus with complete analysis
|
||||
"""
|
||||
# Business step 1: Check for error conditions
|
||||
error_info = self._check_for_watcher_errors()
|
||||
if error_info:
|
||||
return self._create_error_status(error_info)
|
||||
|
||||
# Business step 2: Check initialization state
|
||||
watcher_service = self._watcher_tool.get_from_context()
|
||||
if not watcher_service:
|
||||
return self._create_not_initialized_status()
|
||||
|
||||
# Business step 3: Get active status
|
||||
return self._create_active_status(watcher_service)
|
||||
|
||||
def _check_for_watcher_errors(self) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Business logic to check for file watcher error conditions.
|
||||
|
||||
Returns:
|
||||
Error information dictionary or None if no errors
|
||||
"""
|
||||
# Check context for recorded errors
|
||||
if hasattr(self.ctx.request_context.lifespan_context, 'file_watcher_error'):
|
||||
return self.ctx.request_context.lifespan_context.file_watcher_error
|
||||
|
||||
return None
|
||||
|
||||
def _create_error_status(self, error_info: Dict[str, Any]) -> FileWatcherStatus:
|
||||
"""
|
||||
Business logic to create error status with recommendations.
|
||||
|
||||
Args:
|
||||
error_info: Error information from context
|
||||
|
||||
Returns:
|
||||
FileWatcherStatus for error condition
|
||||
"""
|
||||
# Get configuration if available
|
||||
configuration = self._get_file_watcher_configuration()
|
||||
|
||||
# Get rebuild status
|
||||
rebuild_status = self._get_rebuild_status()
|
||||
|
||||
# Business logic: Generate error-specific recommendations
|
||||
recommendations = [
|
||||
"Use refresh_index tool for manual updates",
|
||||
"File watcher auto-refresh is disabled due to errors",
|
||||
"Consider restarting the project or checking system permissions"
|
||||
]
|
||||
|
||||
return FileWatcherStatus(
|
||||
available=True,
|
||||
active=False,
|
||||
status="error",
|
||||
message=error_info.get('message', 'File watcher error occurred'),
|
||||
error_info=error_info,
|
||||
configuration=configuration,
|
||||
rebuild_status=rebuild_status,
|
||||
recommendations=recommendations
|
||||
)
|
||||
|
||||
def _create_not_initialized_status(self) -> FileWatcherStatus:
|
||||
"""
|
||||
Business logic to create not-initialized status.
|
||||
|
||||
Returns:
|
||||
FileWatcherStatus for not-initialized condition
|
||||
"""
|
||||
# Get basic configuration
|
||||
configuration = self._get_file_watcher_configuration()
|
||||
|
||||
# Get rebuild status
|
||||
rebuild_status = self._get_rebuild_status()
|
||||
|
||||
# Business logic: Generate initialization recommendations
|
||||
recommendations = [
|
||||
"Use set_project_path tool to initialize file watcher",
|
||||
"File monitoring will be enabled after project initialization"
|
||||
]
|
||||
|
||||
return FileWatcherStatus(
|
||||
available=True,
|
||||
active=False,
|
||||
status="not_initialized",
|
||||
message="File watcher service not initialized. Set project path to enable auto-refresh.",
|
||||
error_info=None,
|
||||
configuration=configuration,
|
||||
rebuild_status=rebuild_status,
|
||||
recommendations=recommendations
|
||||
)
|
||||
|
||||
def _create_active_status(self, watcher_service) -> FileWatcherStatus:
|
||||
"""
|
||||
Business logic to create active status with comprehensive information.
|
||||
|
||||
Args:
|
||||
watcher_service: Active file watcher service
|
||||
|
||||
Returns:
|
||||
FileWatcherStatus for active condition
|
||||
"""
|
||||
# Get detailed status from watcher service
|
||||
watcher_status = watcher_service.get_status()
|
||||
|
||||
# Get configuration
|
||||
configuration = self._get_file_watcher_configuration()
|
||||
|
||||
# Get rebuild status
|
||||
rebuild_status = self._get_rebuild_status()
|
||||
|
||||
# Business logic: Generate status-specific recommendations
|
||||
recommendations = self._generate_active_recommendations(watcher_status)
|
||||
|
||||
return FileWatcherStatus(
|
||||
available=watcher_status.get('available', True),
|
||||
active=watcher_status.get('active', False),
|
||||
status=watcher_status.get('status', 'active'),
|
||||
message=watcher_status.get('message'),
|
||||
error_info=None,
|
||||
configuration=configuration,
|
||||
rebuild_status=rebuild_status,
|
||||
recommendations=recommendations
|
||||
)
|
||||
|
||||
def _get_file_watcher_configuration(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Business logic to get file watcher configuration safely.
|
||||
|
||||
Returns:
|
||||
Configuration dictionary
|
||||
"""
|
||||
try:
|
||||
# Try to get from project settings
|
||||
if (hasattr(self.ctx.request_context.lifespan_context, 'settings') and
|
||||
self.ctx.request_context.lifespan_context.settings):
|
||||
return self.ctx.request_context.lifespan_context.settings.get_file_watcher_config()
|
||||
|
||||
# Fallback to default configuration
|
||||
return {
|
||||
'enabled': True,
|
||||
'debounce_seconds': 6.0,
|
||||
'additional_exclude_patterns': [],
|
||||
'note': 'Default configuration - project not fully initialized'
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
'error': f'Could not load configuration: {e}',
|
||||
'enabled': True,
|
||||
'debounce_seconds': 6.0
|
||||
}
|
||||
|
||||
def _get_rebuild_status(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Business logic to get index rebuild status safely.
|
||||
|
||||
Returns:
|
||||
Rebuild status dictionary
|
||||
"""
|
||||
try:
|
||||
index_service = IndexManagementService(self.ctx)
|
||||
return index_service.get_rebuild_status()
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
'status': 'unknown',
|
||||
'error': f'Could not get rebuild status: {e}'
|
||||
}
|
||||
|
||||
def _generate_active_recommendations(self, watcher_status: Dict[str, Any]) -> list[str]:
|
||||
"""
|
||||
Business logic to generate recommendations for active file watcher.
|
||||
|
||||
Args:
|
||||
watcher_status: Current watcher status
|
||||
|
||||
Returns:
|
||||
List of recommendations
|
||||
"""
|
||||
recommendations = []
|
||||
|
||||
if watcher_status.get('active', False):
|
||||
recommendations.append("File watcher is active - automatic index updates enabled")
|
||||
recommendations.append("Files will be re-indexed automatically when changed")
|
||||
else:
|
||||
recommendations.append("File watcher is available but not active")
|
||||
recommendations.append("Use refresh_index for manual updates")
|
||||
|
||||
# Add performance recommendations
|
||||
restart_attempts = watcher_status.get('restart_attempts', 0)
|
||||
if restart_attempts > 0:
|
||||
recommendations.append(f"File watcher has restarted {restart_attempts} times - monitor for stability")
|
||||
|
||||
return recommendations
|
||||
|
||||
def _validate_configuration_request(self, enabled: Optional[bool],
|
||||
debounce_seconds: Optional[float],
|
||||
additional_exclude_patterns: Optional[list]) -> None:
|
||||
"""
|
||||
Business validation for file watcher configuration.
|
||||
|
||||
Args:
|
||||
enabled: Enable flag
|
||||
debounce_seconds: Debounce time
|
||||
additional_exclude_patterns: Exclude patterns
|
||||
|
||||
Raises:
|
||||
ValueError: If validation fails
|
||||
"""
|
||||
# Business rule: Enabled flag must be boolean if provided
|
||||
if enabled is not None and not isinstance(enabled, bool):
|
||||
raise ValueError("Enabled flag must be a boolean value")
|
||||
|
||||
# Business rule: Debounce seconds must be reasonable
|
||||
if debounce_seconds is not None:
|
||||
if debounce_seconds < 0.1:
|
||||
raise ValueError("Debounce seconds must be at least 0.1")
|
||||
if debounce_seconds > 300: # 5 minutes
|
||||
raise ValueError("Debounce seconds cannot exceed 300 (5 minutes)")
|
||||
|
||||
# Business rule: Exclude patterns must be valid
|
||||
if additional_exclude_patterns is not None:
|
||||
if not isinstance(additional_exclude_patterns, list):
|
||||
raise ValueError("Additional exclude patterns must be a list")
|
||||
|
||||
for pattern in additional_exclude_patterns:
|
||||
if not isinstance(pattern, str):
|
||||
raise ValueError("All exclude patterns must be strings")
|
||||
if not pattern.strip():
|
||||
raise ValueError("Exclude patterns cannot be empty")
|
||||
|
||||
def _apply_file_watcher_configuration(self, enabled: Optional[bool],
|
||||
debounce_seconds: Optional[float],
|
||||
additional_exclude_patterns: Optional[list]) -> str:
|
||||
"""
|
||||
Business logic to apply file watcher configuration.
|
||||
|
||||
Args:
|
||||
enabled: Enable flag
|
||||
debounce_seconds: Debounce time
|
||||
additional_exclude_patterns: Exclude patterns
|
||||
|
||||
Returns:
|
||||
Success message
|
||||
|
||||
Raises:
|
||||
ValueError: If configuration cannot be applied
|
||||
"""
|
||||
# Business rule: Settings must be available
|
||||
if (not hasattr(self.ctx.request_context.lifespan_context, 'settings') or
|
||||
not self.ctx.request_context.lifespan_context.settings):
|
||||
raise ValueError("Settings not available - project path not set")
|
||||
|
||||
settings = self.ctx.request_context.lifespan_context.settings
|
||||
|
||||
# Build updates dictionary
|
||||
updates = {}
|
||||
if enabled is not None:
|
||||
updates["enabled"] = enabled
|
||||
if debounce_seconds is not None:
|
||||
updates["debounce_seconds"] = debounce_seconds
|
||||
if additional_exclude_patterns is not None:
|
||||
updates["additional_exclude_patterns"] = additional_exclude_patterns
|
||||
|
||||
if not updates:
|
||||
return "No configuration changes specified"
|
||||
|
||||
# Apply configuration
|
||||
settings.update_file_watcher_config(updates)
|
||||
|
||||
# Business logic: Generate informative result message
|
||||
changes_summary = []
|
||||
if 'enabled' in updates:
|
||||
changes_summary.append(f"enabled={updates['enabled']}")
|
||||
if 'debounce_seconds' in updates:
|
||||
changes_summary.append(f"debounce={updates['debounce_seconds']}s")
|
||||
if 'additional_exclude_patterns' in updates:
|
||||
pattern_count = len(updates['additional_exclude_patterns'])
|
||||
changes_summary.append(f"exclude_patterns={pattern_count}")
|
||||
|
||||
changes_str = ", ".join(changes_summary)
|
||||
|
||||
return (f"File watcher configuration updated: {changes_str}. "
|
||||
f"Restart may be required for changes to take effect.")
|
||||
|
||||
def _format_status_result(self, status_result: FileWatcherStatus) -> Dict[str, Any]:
|
||||
"""
|
||||
Format the status result according to business requirements.
|
||||
|
||||
Args:
|
||||
status_result: Status analysis result
|
||||
|
||||
Returns:
|
||||
Formatted result dictionary for MCP response
|
||||
"""
|
||||
result = {
|
||||
'available': status_result.available,
|
||||
'active': status_result.active,
|
||||
'status': status_result.status,
|
||||
'configuration': status_result.configuration,
|
||||
'rebuild_status': status_result.rebuild_status,
|
||||
'recommendations': status_result.recommendations
|
||||
}
|
||||
|
||||
# Add optional fields
|
||||
if status_result.message:
|
||||
result['message'] = status_result.message
|
||||
|
||||
if status_result.error_info:
|
||||
result['error'] = status_result.error_info
|
||||
result['manual_refresh_required'] = True
|
||||
|
||||
return result
|
||||
@@ -0,0 +1,19 @@
|
||||
"""
|
||||
Tool Layer - Technical components for the Code Index MCP server.
|
||||
|
||||
This package contains pure technical components that provide specific
|
||||
capabilities without business logic. These tools are composed by the
|
||||
business layer to achieve business goals.
|
||||
"""
|
||||
|
||||
from .filesystem import FileMatchingTool, FileSystemTool
|
||||
from .config import ProjectConfigTool, SettingsTool
|
||||
from .monitoring import FileWatcherTool
|
||||
|
||||
__all__ = [
|
||||
'FileMatchingTool',
|
||||
'FileSystemTool',
|
||||
'ProjectConfigTool',
|
||||
'SettingsTool',
|
||||
'FileWatcherTool'
|
||||
]
|
||||
@@ -0,0 +1,8 @@
|
||||
"""
|
||||
Configuration Tools - Technical components for configuration management.
|
||||
"""
|
||||
|
||||
from .project_config_tool import ProjectConfigTool
|
||||
from .settings_tool import SettingsTool
|
||||
|
||||
__all__ = ['ProjectConfigTool', 'SettingsTool']
|
||||
@@ -0,0 +1,308 @@
|
||||
"""
|
||||
Project Configuration Tool - Pure technical component for project configuration operations.
|
||||
|
||||
This tool handles low-level project configuration operations without any business logic.
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import Dict, Any, Optional
|
||||
from pathlib import Path
|
||||
|
||||
from ...project_settings import ProjectSettings
|
||||
|
||||
|
||||
class ProjectConfigTool:
|
||||
"""
|
||||
Pure technical component for project configuration operations.
|
||||
|
||||
This tool provides low-level configuration management capabilities
|
||||
without any business logic or decision making.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._settings: Optional[ProjectSettings] = None
|
||||
self._project_path: Optional[str] = None
|
||||
|
||||
def initialize_settings(self, project_path: str) -> ProjectSettings:
|
||||
"""
|
||||
Initialize project settings for the given path.
|
||||
|
||||
Args:
|
||||
project_path: Absolute path to the project directory
|
||||
|
||||
Returns:
|
||||
ProjectSettings instance
|
||||
|
||||
Raises:
|
||||
ValueError: If project path is invalid
|
||||
"""
|
||||
if not Path(project_path).exists():
|
||||
raise ValueError(f"Project path does not exist: {project_path}")
|
||||
|
||||
if not Path(project_path).is_dir():
|
||||
raise ValueError(f"Project path is not a directory: {project_path}")
|
||||
|
||||
self._project_path = project_path
|
||||
self._settings = ProjectSettings(project_path, skip_load=False)
|
||||
|
||||
return self._settings
|
||||
|
||||
def load_existing_index(self) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Load existing index data if available.
|
||||
|
||||
Returns:
|
||||
Index data dictionary or None if not available
|
||||
|
||||
Raises:
|
||||
RuntimeError: If settings not initialized
|
||||
"""
|
||||
if not self._settings:
|
||||
raise RuntimeError("Settings not initialized. Call initialize_settings() first.")
|
||||
|
||||
try:
|
||||
return self._settings.load_index()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def save_project_config(self, config_data: Dict[str, Any]) -> None:
|
||||
"""
|
||||
Save project configuration data.
|
||||
|
||||
Args:
|
||||
config_data: Configuration data to save
|
||||
|
||||
Raises:
|
||||
RuntimeError: If settings not initialized
|
||||
"""
|
||||
if not self._settings:
|
||||
raise RuntimeError("Settings not initialized")
|
||||
|
||||
self._settings.save_config(config_data)
|
||||
|
||||
def save_index_data(self, index_data: Dict[str, Any]) -> None:
|
||||
"""
|
||||
Save index data to persistent storage.
|
||||
|
||||
Args:
|
||||
index_data: Index data to save
|
||||
|
||||
Raises:
|
||||
RuntimeError: If settings not initialized
|
||||
"""
|
||||
if not self._settings:
|
||||
raise RuntimeError("Settings not initialized")
|
||||
|
||||
self._settings.save_index(index_data)
|
||||
|
||||
def check_index_version(self) -> bool:
|
||||
"""
|
||||
Check if JSON index is the latest version.
|
||||
|
||||
Returns:
|
||||
True if JSON index exists and is recent, False if needs rebuild
|
||||
|
||||
Raises:
|
||||
RuntimeError: If settings not initialized
|
||||
"""
|
||||
if not self._settings:
|
||||
raise RuntimeError("Settings not initialized")
|
||||
|
||||
# Check if JSON index exists and is fresh
|
||||
from ...indexing import get_index_manager
|
||||
index_manager = get_index_manager()
|
||||
|
||||
# Set project path if available
|
||||
if self._settings.base_path:
|
||||
index_manager.set_project_path(self._settings.base_path)
|
||||
stats = index_manager.get_index_stats()
|
||||
return stats.get('status') == 'loaded'
|
||||
|
||||
return False
|
||||
|
||||
def cleanup_legacy_files(self) -> None:
|
||||
"""
|
||||
Clean up legacy index files.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If settings not initialized
|
||||
"""
|
||||
if not self._settings:
|
||||
raise RuntimeError("Settings not initialized")
|
||||
|
||||
self._settings.cleanup_legacy_files()
|
||||
|
||||
def get_search_tool_info(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Get information about available search tools.
|
||||
|
||||
Returns:
|
||||
Dictionary with search tool information
|
||||
|
||||
Raises:
|
||||
RuntimeError: If settings not initialized
|
||||
"""
|
||||
if not self._settings:
|
||||
raise RuntimeError("Settings not initialized")
|
||||
|
||||
search_tool = self._settings.get_preferred_search_tool()
|
||||
return {
|
||||
'available': search_tool is not None,
|
||||
'name': search_tool.name if search_tool else None,
|
||||
'description': "Advanced search enabled" if search_tool else "Basic search available"
|
||||
}
|
||||
|
||||
def get_file_watcher_config(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Get file watcher configuration.
|
||||
|
||||
Returns:
|
||||
File watcher configuration dictionary
|
||||
|
||||
Raises:
|
||||
RuntimeError: If settings not initialized
|
||||
"""
|
||||
if not self._settings:
|
||||
raise RuntimeError("Settings not initialized")
|
||||
|
||||
return self._settings.get_file_watcher_config()
|
||||
|
||||
def create_default_config(self, project_path: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Create default project configuration.
|
||||
|
||||
Args:
|
||||
project_path: Project path for the configuration
|
||||
|
||||
Returns:
|
||||
Default configuration dictionary
|
||||
"""
|
||||
from ...utils import FileFilter
|
||||
|
||||
file_filter = FileFilter()
|
||||
return {
|
||||
"base_path": project_path,
|
||||
"supported_extensions": list(file_filter.supported_extensions),
|
||||
"last_indexed": None,
|
||||
"file_watcher": self.get_file_watcher_config() if self._settings else {}
|
||||
}
|
||||
|
||||
def validate_project_path(self, path: str) -> Optional[str]:
|
||||
"""
|
||||
Validate project path.
|
||||
|
||||
Args:
|
||||
path: Path to validate
|
||||
|
||||
Returns:
|
||||
Error message if invalid, None if valid
|
||||
"""
|
||||
if not path or not path.strip():
|
||||
return "Project path cannot be empty"
|
||||
|
||||
try:
|
||||
norm_path = os.path.normpath(path)
|
||||
abs_path = os.path.abspath(norm_path)
|
||||
except (OSError, ValueError) as e:
|
||||
return f"Invalid path format: {str(e)}"
|
||||
|
||||
if not os.path.exists(abs_path):
|
||||
return f"Path does not exist: {abs_path}"
|
||||
|
||||
if not os.path.isdir(abs_path):
|
||||
return f"Path is not a directory: {abs_path}"
|
||||
|
||||
return None
|
||||
|
||||
def normalize_project_path(self, path: str) -> str:
|
||||
"""
|
||||
Normalize and get absolute project path.
|
||||
|
||||
Args:
|
||||
path: Path to normalize
|
||||
|
||||
Returns:
|
||||
Normalized absolute path
|
||||
"""
|
||||
norm_path = os.path.normpath(path)
|
||||
return os.path.abspath(norm_path)
|
||||
|
||||
def get_settings_path(self) -> Optional[str]:
|
||||
"""
|
||||
Get the settings directory path.
|
||||
|
||||
Returns:
|
||||
Settings directory path or None if not initialized
|
||||
"""
|
||||
return self._settings.settings_path if self._settings else None
|
||||
|
||||
def get_project_path(self) -> Optional[str]:
|
||||
"""
|
||||
Get the current project path.
|
||||
|
||||
Returns:
|
||||
Project path or None if not set
|
||||
"""
|
||||
return self._project_path
|
||||
|
||||
def get_basic_project_structure(self, project_path: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Get basic project directory structure.
|
||||
|
||||
Args:
|
||||
project_path: Path to analyze
|
||||
|
||||
Returns:
|
||||
Basic directory structure dictionary
|
||||
"""
|
||||
from ...utils import FileFilter
|
||||
|
||||
file_filter = FileFilter()
|
||||
|
||||
def build_tree(path: str, max_depth: int = 3, current_depth: int = 0) -> Dict[str, Any]:
|
||||
"""Build directory tree with limited depth using centralized filtering."""
|
||||
if current_depth >= max_depth:
|
||||
return {"type": "directory", "truncated": True}
|
||||
|
||||
try:
|
||||
items = []
|
||||
path_obj = Path(path)
|
||||
|
||||
for item in sorted(path_obj.iterdir()):
|
||||
if item.is_dir():
|
||||
# Use centralized directory filtering
|
||||
if not file_filter.should_exclude_directory(item.name):
|
||||
items.append({
|
||||
"name": item.name,
|
||||
"type": "directory",
|
||||
"children": build_tree(str(item), max_depth, current_depth + 1)
|
||||
})
|
||||
else:
|
||||
# Use centralized file filtering
|
||||
if not file_filter.should_exclude_file(item):
|
||||
items.append({
|
||||
"name": item.name,
|
||||
"type": "file",
|
||||
"size": item.stat().st_size if item.exists() else 0
|
||||
})
|
||||
|
||||
return {"type": "directory", "children": items}
|
||||
|
||||
except (OSError, PermissionError):
|
||||
return {"type": "directory", "error": "Access denied"}
|
||||
|
||||
try:
|
||||
root_name = Path(project_path).name
|
||||
structure = {
|
||||
"name": root_name,
|
||||
"path": project_path,
|
||||
"type": "directory",
|
||||
"children": build_tree(project_path)["children"]
|
||||
}
|
||||
return structure
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
"error": f"Failed to build project structure: {e}",
|
||||
"path": project_path
|
||||
}
|
||||
@@ -0,0 +1,100 @@
|
||||
"""
|
||||
Settings Tool - Pure technical component for settings operations.
|
||||
|
||||
This tool handles low-level settings operations without any business logic.
|
||||
"""
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
from typing import Dict, Any
|
||||
|
||||
from ...constants import SETTINGS_DIR
|
||||
|
||||
|
||||
class SettingsTool:
|
||||
"""
|
||||
Pure technical component for settings operations.
|
||||
|
||||
This tool provides low-level settings management capabilities
|
||||
without any business logic or decision making.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def get_temp_directory_path(self) -> str:
|
||||
"""
|
||||
Get the path to the temporary directory for settings.
|
||||
|
||||
Returns:
|
||||
Path to the temporary settings directory
|
||||
"""
|
||||
return os.path.join(tempfile.gettempdir(), SETTINGS_DIR)
|
||||
|
||||
def create_temp_directory(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Create the temporary directory for settings.
|
||||
|
||||
Returns:
|
||||
Dictionary with creation results
|
||||
"""
|
||||
temp_dir = self.get_temp_directory_path()
|
||||
existed_before = os.path.exists(temp_dir)
|
||||
|
||||
try:
|
||||
os.makedirs(temp_dir, exist_ok=True)
|
||||
|
||||
return {
|
||||
"temp_directory": temp_dir,
|
||||
"exists": os.path.exists(temp_dir),
|
||||
"is_directory": os.path.isdir(temp_dir),
|
||||
"existed_before": existed_before,
|
||||
"created": not existed_before
|
||||
}
|
||||
|
||||
except (OSError, IOError) as e:
|
||||
return {
|
||||
"temp_directory": temp_dir,
|
||||
"exists": False,
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
def check_temp_directory(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Check the status of the temporary directory.
|
||||
|
||||
Returns:
|
||||
Dictionary with directory status information
|
||||
"""
|
||||
temp_dir = self.get_temp_directory_path()
|
||||
|
||||
result = {
|
||||
"temp_directory": temp_dir,
|
||||
"temp_root": tempfile.gettempdir(),
|
||||
"exists": os.path.exists(temp_dir),
|
||||
"is_directory": os.path.isdir(temp_dir) if os.path.exists(temp_dir) else False
|
||||
}
|
||||
|
||||
# If the directory exists, list its contents
|
||||
if result["exists"] and result["is_directory"]:
|
||||
try:
|
||||
contents = os.listdir(temp_dir)
|
||||
result["contents"] = contents
|
||||
result["subdirectories"] = []
|
||||
|
||||
# Check each subdirectory
|
||||
for item in contents:
|
||||
item_path = os.path.join(temp_dir, item)
|
||||
if os.path.isdir(item_path):
|
||||
subdir_info = {
|
||||
"name": item,
|
||||
"path": item_path,
|
||||
"contents": os.listdir(item_path) if os.path.exists(item_path) else []
|
||||
}
|
||||
result["subdirectories"].append(subdir_info)
|
||||
|
||||
except (OSError, PermissionError) as e:
|
||||
result["error"] = str(e)
|
||||
|
||||
return result
|
||||
|
||||
@@ -0,0 +1,8 @@
|
||||
"""
|
||||
Filesystem Tools - Technical components for file system operations.
|
||||
"""
|
||||
|
||||
from .file_matching_tool import FileMatchingTool
|
||||
from .file_system_tool import FileSystemTool
|
||||
|
||||
__all__ = ['FileMatchingTool', 'FileSystemTool']
|
||||
@@ -0,0 +1,215 @@
|
||||
"""
|
||||
File Matching Tool - Pure technical component for pattern matching operations.
|
||||
|
||||
This tool handles file pattern matching without any business logic.
|
||||
It provides technical capabilities for finding files based on various patterns.
|
||||
"""
|
||||
|
||||
import fnmatch
|
||||
from typing import List, Set
|
||||
from pathlib import Path
|
||||
|
||||
# FileInfo defined locally for file matching operations
|
||||
from dataclasses import dataclass
|
||||
|
||||
@dataclass
|
||||
class FileInfo:
|
||||
"""File information structure."""
|
||||
relative_path: str
|
||||
language: str
|
||||
|
||||
|
||||
class FileMatchingTool:
|
||||
"""
|
||||
Pure technical component for file pattern matching.
|
||||
|
||||
This tool provides low-level pattern matching capabilities without
|
||||
any business logic. It can match files using glob patterns, regex,
|
||||
or other matching strategies.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def match_glob_pattern(self, files: List[FileInfo], pattern: str) -> List[FileInfo]:
|
||||
"""
|
||||
Match files using glob pattern.
|
||||
|
||||
Args:
|
||||
files: List of FileInfo objects to search through
|
||||
pattern: Glob pattern (e.g., "*.py", "test_*.js", "src/**/*.ts")
|
||||
|
||||
Returns:
|
||||
List of FileInfo objects that match the pattern
|
||||
"""
|
||||
if not pattern:
|
||||
return files
|
||||
|
||||
matched_files = []
|
||||
|
||||
for file_info in files:
|
||||
# Try matching against full path
|
||||
if fnmatch.fnmatch(file_info.relative_path, pattern):
|
||||
matched_files.append(file_info)
|
||||
continue
|
||||
|
||||
# Try matching against just the filename
|
||||
filename = Path(file_info.relative_path).name
|
||||
if fnmatch.fnmatch(filename, pattern):
|
||||
matched_files.append(file_info)
|
||||
|
||||
return matched_files
|
||||
|
||||
def match_multiple_patterns(self, files: List[FileInfo], patterns: List[str]) -> List[FileInfo]:
|
||||
"""
|
||||
Match files using multiple glob patterns (OR logic).
|
||||
|
||||
Args:
|
||||
files: List of FileInfo objects to search through
|
||||
patterns: List of glob patterns
|
||||
|
||||
Returns:
|
||||
List of FileInfo objects that match any of the patterns
|
||||
"""
|
||||
if not patterns:
|
||||
return files
|
||||
|
||||
matched_files = set()
|
||||
|
||||
for pattern in patterns:
|
||||
pattern_matches = self.match_glob_pattern(files, pattern)
|
||||
matched_files.update(pattern_matches)
|
||||
|
||||
return list(matched_files)
|
||||
|
||||
def match_by_language(self, files: List[FileInfo], languages: List[str]) -> List[FileInfo]:
|
||||
"""
|
||||
Match files by programming language.
|
||||
|
||||
Args:
|
||||
files: List of FileInfo objects to search through
|
||||
languages: List of language names (e.g., ["python", "javascript"])
|
||||
|
||||
Returns:
|
||||
List of FileInfo objects with matching languages
|
||||
"""
|
||||
if not languages:
|
||||
return files
|
||||
|
||||
# Normalize language names for comparison
|
||||
normalized_languages = {lang.lower() for lang in languages}
|
||||
|
||||
matched_files = []
|
||||
for file_info in files:
|
||||
if file_info.language.lower() in normalized_languages:
|
||||
matched_files.append(file_info)
|
||||
|
||||
return matched_files
|
||||
|
||||
def match_by_directory(self, files: List[FileInfo], directory_patterns: List[str]) -> List[FileInfo]:
|
||||
"""
|
||||
Match files by directory patterns.
|
||||
|
||||
Args:
|
||||
files: List of FileInfo objects to search through
|
||||
directory_patterns: List of directory patterns (e.g., ["src/*", "test/**"])
|
||||
|
||||
Returns:
|
||||
List of FileInfo objects in matching directories
|
||||
"""
|
||||
if not directory_patterns:
|
||||
return files
|
||||
|
||||
matched_files = []
|
||||
|
||||
for file_info in files:
|
||||
file_dir = str(Path(file_info.relative_path).parent)
|
||||
|
||||
for dir_pattern in directory_patterns:
|
||||
if fnmatch.fnmatch(file_dir, dir_pattern):
|
||||
matched_files.append(file_info)
|
||||
break
|
||||
|
||||
return matched_files
|
||||
|
||||
def exclude_patterns(self, files: List[FileInfo], exclude_patterns: List[str]) -> List[FileInfo]:
|
||||
"""
|
||||
Exclude files matching the given patterns.
|
||||
|
||||
Args:
|
||||
files: List of FileInfo objects to filter
|
||||
exclude_patterns: List of patterns to exclude
|
||||
|
||||
Returns:
|
||||
List of FileInfo objects that don't match any exclude pattern
|
||||
"""
|
||||
if not exclude_patterns:
|
||||
return files
|
||||
|
||||
filtered_files = []
|
||||
|
||||
for file_info in files:
|
||||
should_exclude = False
|
||||
|
||||
for exclude_pattern in exclude_patterns:
|
||||
if (fnmatch.fnmatch(file_info.relative_path, exclude_pattern) or
|
||||
fnmatch.fnmatch(Path(file_info.relative_path).name, exclude_pattern)):
|
||||
should_exclude = True
|
||||
break
|
||||
|
||||
if not should_exclude:
|
||||
filtered_files.append(file_info)
|
||||
|
||||
return filtered_files
|
||||
|
||||
def sort_by_relevance(self, files: List[FileInfo], pattern: str) -> List[FileInfo]:
|
||||
"""
|
||||
Sort files by relevance to the search pattern.
|
||||
|
||||
Args:
|
||||
files: List of FileInfo objects to sort
|
||||
pattern: Original search pattern for relevance scoring
|
||||
|
||||
Returns:
|
||||
List of FileInfo objects sorted by relevance (most relevant first)
|
||||
"""
|
||||
def relevance_score(file_info: FileInfo) -> int:
|
||||
"""Calculate relevance score for a file."""
|
||||
score = 0
|
||||
filename = Path(file_info.relative_path).name
|
||||
|
||||
# Exact filename match gets highest score
|
||||
if filename == pattern:
|
||||
score += 100
|
||||
|
||||
# Filename starts with pattern
|
||||
elif filename.startswith(pattern.replace('*', '')):
|
||||
score += 50
|
||||
|
||||
# Pattern appears in filename
|
||||
elif pattern.replace('*', '') in filename:
|
||||
score += 25
|
||||
|
||||
# Shorter paths are generally more relevant
|
||||
path_depth = len(Path(file_info.relative_path).parts)
|
||||
score += max(0, 10 - path_depth)
|
||||
|
||||
return score
|
||||
|
||||
return sorted(files, key=relevance_score, reverse=True)
|
||||
|
||||
def limit_results(self, files: List[FileInfo], max_results: int) -> List[FileInfo]:
|
||||
"""
|
||||
Limit the number of results returned.
|
||||
|
||||
Args:
|
||||
files: List of FileInfo objects
|
||||
max_results: Maximum number of results to return
|
||||
|
||||
Returns:
|
||||
List of FileInfo objects limited to max_results
|
||||
"""
|
||||
if max_results <= 0:
|
||||
return files
|
||||
|
||||
return files[:max_results]
|
||||
@@ -0,0 +1,234 @@
|
||||
"""
|
||||
File System Tool - Pure technical component for file system operations.
|
||||
|
||||
This tool handles low-level file system operations without any business logic.
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import Dict, Any, Optional
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class FileSystemTool:
|
||||
"""
|
||||
Pure technical component for file system operations.
|
||||
|
||||
This tool provides low-level file system capabilities without
|
||||
any business logic or decision making.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def get_file_stats(self, file_path: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Get basic file system statistics for a file.
|
||||
|
||||
Args:
|
||||
file_path: Absolute path to the file
|
||||
|
||||
Returns:
|
||||
Dictionary with file statistics
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If file doesn't exist
|
||||
OSError: If file cannot be accessed
|
||||
"""
|
||||
if not os.path.exists(file_path):
|
||||
raise FileNotFoundError(f"File not found: {file_path}")
|
||||
|
||||
try:
|
||||
stat_info = os.stat(file_path)
|
||||
path_obj = Path(file_path)
|
||||
|
||||
return {
|
||||
'size_bytes': stat_info.st_size,
|
||||
'modified_time': stat_info.st_mtime,
|
||||
'created_time': stat_info.st_ctime,
|
||||
'is_file': path_obj.is_file(),
|
||||
'is_directory': path_obj.is_dir(),
|
||||
'extension': path_obj.suffix,
|
||||
'name': path_obj.name,
|
||||
'parent': str(path_obj.parent)
|
||||
}
|
||||
|
||||
except OSError as e:
|
||||
raise OSError(f"Cannot access file {file_path}: {e}") from e
|
||||
|
||||
def read_file_content(self, file_path: str) -> str:
|
||||
"""
|
||||
Read file content with intelligent encoding detection.
|
||||
|
||||
Args:
|
||||
file_path: Absolute path to the file
|
||||
|
||||
Returns:
|
||||
File content as string
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If file doesn't exist
|
||||
ValueError: If file cannot be decoded
|
||||
"""
|
||||
if not os.path.exists(file_path):
|
||||
raise FileNotFoundError(f"File not found: {file_path}")
|
||||
|
||||
# Try UTF-8 first (most common)
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
return f.read()
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
|
||||
# Try other common encodings
|
||||
encodings = ['utf-8-sig', 'latin-1', 'cp1252', 'iso-8859-1']
|
||||
for encoding in encodings:
|
||||
try:
|
||||
with open(file_path, 'r', encoding=encoding) as f:
|
||||
return f.read()
|
||||
except UnicodeDecodeError:
|
||||
continue
|
||||
|
||||
raise ValueError(f"Could not decode file {file_path} with any supported encoding")
|
||||
|
||||
def count_lines(self, file_path: str) -> int:
|
||||
"""
|
||||
Count the number of lines in a file.
|
||||
|
||||
Args:
|
||||
file_path: Absolute path to the file
|
||||
|
||||
Returns:
|
||||
Number of lines in the file
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If file doesn't exist
|
||||
"""
|
||||
try:
|
||||
content = self.read_file_content(file_path)
|
||||
return len(content.splitlines())
|
||||
except Exception:
|
||||
# If we can't read the file, return 0
|
||||
return 0
|
||||
|
||||
def detect_language_from_extension(self, file_path: str) -> str:
|
||||
"""
|
||||
Detect programming language from file extension.
|
||||
|
||||
Args:
|
||||
file_path: Path to the file
|
||||
|
||||
Returns:
|
||||
Language name or 'unknown'
|
||||
"""
|
||||
extension = Path(file_path).suffix.lower()
|
||||
|
||||
lang_map = {
|
||||
'.py': 'python',
|
||||
'.js': 'javascript',
|
||||
'.jsx': 'javascript',
|
||||
'.ts': 'typescript',
|
||||
'.tsx': 'typescript',
|
||||
'.java': 'java',
|
||||
'.cpp': 'cpp',
|
||||
'.cxx': 'cpp',
|
||||
'.cc': 'cpp',
|
||||
'.c': 'c',
|
||||
'.h': 'c',
|
||||
'.hpp': 'cpp',
|
||||
'.hxx': 'cpp',
|
||||
'.cs': 'csharp',
|
||||
'.go': 'go',
|
||||
'.rs': 'rust',
|
||||
'.php': 'php',
|
||||
'.rb': 'ruby',
|
||||
'.swift': 'swift',
|
||||
'.kt': 'kotlin',
|
||||
'.scala': 'scala',
|
||||
'.m': 'objc',
|
||||
'.mm': 'objc',
|
||||
'.html': 'html',
|
||||
'.htm': 'html',
|
||||
'.css': 'css',
|
||||
'.scss': 'scss',
|
||||
'.sass': 'sass',
|
||||
'.less': 'less',
|
||||
'.json': 'json',
|
||||
'.xml': 'xml',
|
||||
'.yaml': 'yaml',
|
||||
'.yml': 'yaml',
|
||||
'.md': 'markdown',
|
||||
'.txt': 'text',
|
||||
'.sh': 'shell',
|
||||
'.bash': 'shell',
|
||||
'.zsh': 'shell',
|
||||
'.fish': 'shell',
|
||||
'.ps1': 'powershell',
|
||||
'.bat': 'batch',
|
||||
'.cmd': 'batch'
|
||||
}
|
||||
|
||||
return lang_map.get(extension, 'unknown')
|
||||
|
||||
def is_text_file(self, file_path: str) -> bool:
|
||||
"""
|
||||
Check if a file is likely a text file.
|
||||
|
||||
Args:
|
||||
file_path: Path to the file
|
||||
|
||||
Returns:
|
||||
True if file appears to be text, False otherwise
|
||||
"""
|
||||
try:
|
||||
# Try to read a small portion of the file
|
||||
with open(file_path, 'rb') as f:
|
||||
chunk = f.read(1024)
|
||||
|
||||
# Check for null bytes (common in binary files)
|
||||
if b'\x00' in chunk:
|
||||
return False
|
||||
|
||||
# Try to decode as UTF-8
|
||||
try:
|
||||
chunk.decode('utf-8')
|
||||
return True
|
||||
except UnicodeDecodeError:
|
||||
# Try other encodings
|
||||
for encoding in ['latin-1', 'cp1252']:
|
||||
try:
|
||||
chunk.decode(encoding)
|
||||
return True
|
||||
except UnicodeDecodeError:
|
||||
continue
|
||||
|
||||
return False
|
||||
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def get_file_size_category(self, file_path: str) -> str:
|
||||
"""
|
||||
Categorize file size for analysis purposes.
|
||||
|
||||
Args:
|
||||
file_path: Path to the file
|
||||
|
||||
Returns:
|
||||
Size category: 'small', 'medium', 'large', or 'very_large'
|
||||
"""
|
||||
try:
|
||||
size = os.path.getsize(file_path)
|
||||
|
||||
if size < 1024: # < 1KB
|
||||
return 'tiny'
|
||||
elif size < 10 * 1024: # < 10KB
|
||||
return 'small'
|
||||
elif size < 100 * 1024: # < 100KB
|
||||
return 'medium'
|
||||
elif size < 1024 * 1024: # < 1MB
|
||||
return 'large'
|
||||
else:
|
||||
return 'very_large'
|
||||
|
||||
except Exception:
|
||||
return 'unknown'
|
||||
@@ -0,0 +1,7 @@
|
||||
"""
|
||||
Monitoring Tools - Technical components for file monitoring operations.
|
||||
"""
|
||||
|
||||
from .file_watcher_tool import FileWatcherTool
|
||||
|
||||
__all__ = ['FileWatcherTool']
|
||||
@@ -0,0 +1,134 @@
|
||||
"""
|
||||
File Watcher Tool - Pure technical component for file monitoring operations.
|
||||
|
||||
This tool handles low-level file watching operations without any business logic.
|
||||
"""
|
||||
|
||||
import time
|
||||
from typing import Optional, Callable
|
||||
from ...utils import ContextHelper
|
||||
from ...services.file_watcher_service import FileWatcherService
|
||||
|
||||
|
||||
class FileWatcherTool:
|
||||
"""
|
||||
Pure technical component for file monitoring operations.
|
||||
|
||||
This tool provides low-level file watching capabilities without
|
||||
any business logic or decision making.
|
||||
"""
|
||||
|
||||
def __init__(self, ctx):
|
||||
self._ctx = ctx
|
||||
self._file_watcher_service: Optional[FileWatcherService] = None
|
||||
|
||||
|
||||
def create_watcher(self) -> FileWatcherService:
|
||||
"""
|
||||
Create a new file watcher service instance.
|
||||
|
||||
Returns:
|
||||
FileWatcherService instance
|
||||
"""
|
||||
self._file_watcher_service = FileWatcherService(self._ctx)
|
||||
return self._file_watcher_service
|
||||
|
||||
def start_monitoring(self, project_path: str, rebuild_callback: Callable) -> bool:
|
||||
"""
|
||||
Start file monitoring for the given project path.
|
||||
|
||||
Args:
|
||||
project_path: Path to monitor
|
||||
rebuild_callback: Callback function for rebuild events
|
||||
|
||||
Returns:
|
||||
True if monitoring started successfully, False otherwise
|
||||
"""
|
||||
if not self._file_watcher_service:
|
||||
self._file_watcher_service = self.create_watcher()
|
||||
|
||||
# Validate that the project path matches the expected base path
|
||||
helper = ContextHelper(self._ctx)
|
||||
if helper.base_path and helper.base_path != project_path:
|
||||
pass
|
||||
|
||||
return self._file_watcher_service.start_monitoring(rebuild_callback)
|
||||
|
||||
def stop_monitoring(self) -> None:
|
||||
"""Stop file monitoring if active."""
|
||||
if self._file_watcher_service:
|
||||
self._file_watcher_service.stop_monitoring()
|
||||
|
||||
def is_monitoring_active(self) -> bool:
|
||||
"""
|
||||
Check if file monitoring is currently active.
|
||||
|
||||
Returns:
|
||||
True if monitoring is active, False otherwise
|
||||
"""
|
||||
return (self._file_watcher_service is not None and
|
||||
self._file_watcher_service.is_active())
|
||||
|
||||
def get_monitoring_status(self) -> dict:
|
||||
"""
|
||||
Get current monitoring status.
|
||||
|
||||
Returns:
|
||||
Dictionary with monitoring status information
|
||||
"""
|
||||
if not self._file_watcher_service:
|
||||
return {
|
||||
'active': False,
|
||||
'available': True,
|
||||
'status': 'not_initialized'
|
||||
}
|
||||
|
||||
return self._file_watcher_service.get_status()
|
||||
|
||||
def store_in_context(self) -> None:
|
||||
"""Store the file watcher service in the MCP context."""
|
||||
if (self._file_watcher_service and
|
||||
hasattr(self._ctx.request_context.lifespan_context, '__dict__')):
|
||||
self._ctx.request_context.lifespan_context.file_watcher_service = self._file_watcher_service
|
||||
|
||||
def get_from_context(self) -> Optional[FileWatcherService]:
|
||||
"""
|
||||
Get existing file watcher service from context.
|
||||
|
||||
Returns:
|
||||
FileWatcherService instance or None if not found
|
||||
"""
|
||||
if hasattr(self._ctx.request_context.lifespan_context, 'file_watcher_service'):
|
||||
return self._ctx.request_context.lifespan_context.file_watcher_service
|
||||
return None
|
||||
|
||||
def stop_existing_watcher(self) -> None:
|
||||
"""Stop any existing file watcher from context."""
|
||||
existing_watcher = self.get_from_context()
|
||||
if existing_watcher:
|
||||
|
||||
existing_watcher.stop_monitoring()
|
||||
# Clear reference
|
||||
if hasattr(self._ctx.request_context.lifespan_context, '__dict__'):
|
||||
self._ctx.request_context.lifespan_context.file_watcher_service = None
|
||||
|
||||
|
||||
def record_error(self, error_message: str) -> None:
|
||||
"""
|
||||
Record file watcher error in context for status reporting.
|
||||
|
||||
Args:
|
||||
error_message: Error message to record
|
||||
"""
|
||||
error_info = {
|
||||
'status': 'failed',
|
||||
'message': f'{error_message}. Auto-refresh disabled. Please use manual refresh.',
|
||||
'timestamp': time.time(),
|
||||
'manual_refresh_required': True
|
||||
}
|
||||
|
||||
# Store error in context for status reporting
|
||||
if hasattr(self._ctx.request_context.lifespan_context, '__dict__'):
|
||||
self._ctx.request_context.lifespan_context.file_watcher_error = error_info
|
||||
|
||||
|
||||
@@ -0,0 +1,31 @@
|
||||
"""
|
||||
Utility modules for the Code Index MCP server.
|
||||
|
||||
This package contains shared utilities used across services:
|
||||
- error_handler: Decorator-based error handling for MCP entry points
|
||||
- context_helper: Context access utilities and helpers
|
||||
- validation: Common validation logic
|
||||
- response_formatter: Response formatting utilities
|
||||
"""
|
||||
|
||||
from .error_handler import (
|
||||
handle_mcp_errors,
|
||||
handle_mcp_resource_errors,
|
||||
handle_mcp_tool_errors,
|
||||
MCPToolError,
|
||||
)
|
||||
from .context_helper import ContextHelper
|
||||
from .validation import ValidationHelper
|
||||
from .response_formatter import ResponseFormatter
|
||||
from .file_filter import FileFilter
|
||||
|
||||
__all__ = [
|
||||
'handle_mcp_errors',
|
||||
'handle_mcp_resource_errors',
|
||||
'handle_mcp_tool_errors',
|
||||
'MCPToolError',
|
||||
'ContextHelper',
|
||||
'ValidationHelper',
|
||||
'ResponseFormatter',
|
||||
'FileFilter'
|
||||
]
|
||||
@@ -0,0 +1,169 @@
|
||||
"""
|
||||
Context access utilities and helpers.
|
||||
|
||||
This module provides convenient access to MCP Context data and common
|
||||
operations that services need to perform with the context.
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import Optional
|
||||
from mcp.server.fastmcp import Context
|
||||
|
||||
from ..project_settings import ProjectSettings
|
||||
|
||||
|
||||
class ContextHelper:
|
||||
"""
|
||||
Helper class for convenient access to MCP Context data.
|
||||
|
||||
This class wraps the MCP Context object and provides convenient properties
|
||||
and methods for accessing commonly needed data like base_path, settings, etc.
|
||||
"""
|
||||
|
||||
def __init__(self, ctx: Context):
|
||||
"""
|
||||
Initialize the context helper.
|
||||
|
||||
Args:
|
||||
ctx: The MCP Context object
|
||||
"""
|
||||
self.ctx = ctx
|
||||
|
||||
@property
|
||||
def base_path(self) -> str:
|
||||
"""
|
||||
Get the base project path from the context.
|
||||
|
||||
Returns:
|
||||
The base project path, or empty string if not set
|
||||
"""
|
||||
try:
|
||||
return self.ctx.request_context.lifespan_context.base_path
|
||||
except AttributeError:
|
||||
return ""
|
||||
|
||||
@property
|
||||
def settings(self) -> Optional[ProjectSettings]:
|
||||
"""
|
||||
Get the project settings from the context.
|
||||
|
||||
Returns:
|
||||
The ProjectSettings instance, or None if not available
|
||||
"""
|
||||
try:
|
||||
return self.ctx.request_context.lifespan_context.settings
|
||||
except AttributeError:
|
||||
return None
|
||||
|
||||
@property
|
||||
def file_count(self) -> int:
|
||||
"""
|
||||
Get the current file count from the context.
|
||||
|
||||
Returns:
|
||||
The number of indexed files, or 0 if not available
|
||||
"""
|
||||
try:
|
||||
return self.ctx.request_context.lifespan_context.file_count
|
||||
except AttributeError:
|
||||
return 0
|
||||
|
||||
@property
|
||||
def index_manager(self):
|
||||
"""
|
||||
Get the unified index manager from the context.
|
||||
|
||||
Returns:
|
||||
The UnifiedIndexManager instance, or None if not available
|
||||
"""
|
||||
try:
|
||||
return getattr(self.ctx.request_context.lifespan_context, 'index_manager', None)
|
||||
except AttributeError:
|
||||
return None
|
||||
|
||||
def validate_base_path(self) -> bool:
|
||||
"""
|
||||
Check if the base path is set and valid.
|
||||
|
||||
Returns:
|
||||
True if base path is set and exists, False otherwise
|
||||
"""
|
||||
base_path = self.base_path
|
||||
return bool(base_path and os.path.exists(base_path))
|
||||
|
||||
def get_base_path_error(self) -> Optional[str]:
|
||||
"""
|
||||
Get an error message if base path is not properly set.
|
||||
|
||||
Returns:
|
||||
Error message string if base path is invalid, None if valid
|
||||
"""
|
||||
if not self.base_path:
|
||||
return ("Project path not set. Please use set_project_path to set a "
|
||||
"project directory first.")
|
||||
|
||||
if not os.path.exists(self.base_path):
|
||||
return f"Project path does not exist: {self.base_path}"
|
||||
|
||||
if not os.path.isdir(self.base_path):
|
||||
return f"Project path is not a directory: {self.base_path}"
|
||||
|
||||
return None
|
||||
|
||||
def update_file_count(self, count: int) -> None:
|
||||
"""
|
||||
Update the file count in the context.
|
||||
|
||||
Args:
|
||||
count: The new file count
|
||||
"""
|
||||
try:
|
||||
self.ctx.request_context.lifespan_context.file_count = count
|
||||
except AttributeError:
|
||||
pass # Context not available or doesn't support this operation
|
||||
|
||||
def update_base_path(self, path: str) -> None:
|
||||
"""
|
||||
Update the base path in the context.
|
||||
|
||||
Args:
|
||||
path: The new base path
|
||||
"""
|
||||
try:
|
||||
self.ctx.request_context.lifespan_context.base_path = path
|
||||
except AttributeError:
|
||||
pass # Context not available or doesn't support this operation
|
||||
|
||||
def update_settings(self, settings: ProjectSettings) -> None:
|
||||
"""
|
||||
Update the settings in the context.
|
||||
|
||||
Args:
|
||||
settings: The new ProjectSettings instance
|
||||
"""
|
||||
try:
|
||||
self.ctx.request_context.lifespan_context.settings = settings
|
||||
except AttributeError:
|
||||
pass # Context not available or doesn't support this operation
|
||||
|
||||
def clear_index_cache(self) -> None:
|
||||
"""
|
||||
Clear the index through the unified index manager.
|
||||
"""
|
||||
try:
|
||||
if self.index_manager:
|
||||
self.index_manager.clear_index()
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
def update_index_manager(self, index_manager) -> None:
|
||||
"""
|
||||
Update the index manager in the context.
|
||||
|
||||
Args:
|
||||
index_manager: The new UnifiedIndexManager instance
|
||||
"""
|
||||
try:
|
||||
self.ctx.request_context.lifespan_context.index_manager = index_manager
|
||||
except AttributeError:
|
||||
pass # Context not available or doesn't support this operation
|
||||
@@ -0,0 +1,122 @@
|
||||
"""
|
||||
Decorator-based error handling for MCP entry points.
|
||||
|
||||
This module provides consistent error handling across all MCP tools, resources, and prompts.
|
||||
"""
|
||||
|
||||
import functools
|
||||
import json
|
||||
from typing import Any, Callable
|
||||
|
||||
|
||||
class MCPToolError(RuntimeError):
|
||||
"""Exception raised when an MCP entry point fails."""
|
||||
|
||||
def __init__(self, message: str):
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
def handle_mcp_errors(return_type: str = 'str') -> Callable:
|
||||
"""
|
||||
Decorator to handle exceptions in MCP entry points consistently.
|
||||
|
||||
This decorator catches all exceptions and rethrows them as MCPToolError after
|
||||
formatting a consistent error message. FastMCP converts the raised exception
|
||||
into a structured error response for the client.
|
||||
|
||||
Args:
|
||||
return_type: Label used to format the error message for logging/consistency.
|
||||
- 'str'/'list'/others: Prefixes message with "Error: ..."
|
||||
- 'dict'/'json': Prefixes message with "Operation failed: ..."
|
||||
|
||||
Returns:
|
||||
Decorator function that wraps MCP entry points with error handling
|
||||
|
||||
Example:
|
||||
@mcp.tool()
|
||||
@handle_mcp_errors(return_type='str')
|
||||
def set_project_path(path: str, ctx: Context) -> str:
|
||||
from ..services.project_management_service import ProjectManagementService
|
||||
return ProjectManagementService(ctx).initialize_project(path)
|
||||
|
||||
@mcp.tool()
|
||||
@handle_mcp_errors(return_type='dict')
|
||||
def search_code_advanced(pattern: str, ctx: Context, **kwargs) -> Dict[str, Any]:
|
||||
return SearchService(ctx).search_code(pattern, **kwargs)
|
||||
"""
|
||||
def decorator(func: Callable) -> Callable:
|
||||
@functools.wraps(func)
|
||||
def wrapper(*args, **kwargs) -> Any:
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except MCPToolError:
|
||||
raise
|
||||
except Exception as exc:
|
||||
error_message = str(exc)
|
||||
formatted = _format_error_message(error_message, return_type)
|
||||
raise MCPToolError(formatted) from exc
|
||||
|
||||
return wrapper
|
||||
return decorator
|
||||
|
||||
|
||||
def handle_mcp_resource_errors(func: Callable) -> Callable:
|
||||
"""
|
||||
Specialized error handler for MCP resources that always return strings.
|
||||
|
||||
This is a convenience decorator specifically for @mcp.resource decorated functions
|
||||
which always return string responses.
|
||||
|
||||
Args:
|
||||
func: The MCP resource function to wrap
|
||||
|
||||
Returns:
|
||||
Wrapped function with error handling
|
||||
|
||||
Example:
|
||||
@mcp.resource("config://code-indexer")
|
||||
@handle_mcp_resource_errors
|
||||
def get_config(ctx: Context) -> str:
|
||||
from ..services.project_management_service import ProjectManagementService
|
||||
return ProjectManagementService(ctx).get_project_config()
|
||||
"""
|
||||
return handle_mcp_errors(return_type='str')(func)
|
||||
|
||||
|
||||
def handle_mcp_tool_errors(return_type: str = 'str') -> Callable:
|
||||
"""
|
||||
Specialized error handler for MCP tools with flexible return types.
|
||||
|
||||
This is a convenience decorator specifically for @mcp.tool decorated functions
|
||||
which may return either strings or dictionaries.
|
||||
|
||||
Args:
|
||||
return_type: Label describing the successful payload shape (e.g. 'str', 'dict', 'list').
|
||||
|
||||
Returns:
|
||||
Decorator function for MCP tools
|
||||
|
||||
Example:
|
||||
@mcp.tool()
|
||||
@handle_mcp_tool_errors(return_type='dict')
|
||||
def find_files(pattern: str, ctx: Context) -> Dict[str, Any]:
|
||||
from ..services.file_discovery_service import FileDiscoveryService
|
||||
return FileDiscoveryService(ctx).find_files(pattern)
|
||||
"""
|
||||
return handle_mcp_errors(return_type=return_type)
|
||||
|
||||
|
||||
def _format_error_message(error_message: str, return_type: str) -> str:
|
||||
"""
|
||||
Convert an exception message into a consistent string for MCP errors.
|
||||
|
||||
Args:
|
||||
error_message: The raw exception message.
|
||||
return_type: The declared return type for the decorated entry point.
|
||||
|
||||
Returns:
|
||||
A string representation suitable for raising as MCPToolError.
|
||||
"""
|
||||
if return_type in {'dict', 'json'}:
|
||||
return f"Operation failed: {error_message}"
|
||||
return f"Error: {error_message}"
|
||||
@@ -0,0 +1,177 @@
|
||||
"""
|
||||
Centralized file filtering logic for the Code Index MCP server.
|
||||
|
||||
This module provides unified filtering capabilities used across all components
|
||||
that need to determine which files and directories should be processed or excluded.
|
||||
"""
|
||||
|
||||
import fnmatch
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Set
|
||||
|
||||
from ..constants import FILTER_CONFIG
|
||||
|
||||
|
||||
class FileFilter:
|
||||
"""Centralized file filtering logic."""
|
||||
|
||||
def __init__(self, additional_excludes: Optional[List[str]] = None):
|
||||
"""
|
||||
Initialize the file filter.
|
||||
|
||||
Args:
|
||||
additional_excludes: Additional directory patterns to exclude
|
||||
"""
|
||||
self.exclude_dirs = set(FILTER_CONFIG["exclude_directories"])
|
||||
self.exclude_files = set(FILTER_CONFIG["exclude_files"])
|
||||
self.supported_extensions = set(FILTER_CONFIG["supported_extensions"])
|
||||
|
||||
# Add user-defined exclusions
|
||||
if additional_excludes:
|
||||
self.exclude_dirs.update(additional_excludes)
|
||||
|
||||
def should_exclude_directory(self, dir_name: str) -> bool:
|
||||
"""
|
||||
Check if directory should be excluded from processing.
|
||||
|
||||
Args:
|
||||
dir_name: Directory name to check
|
||||
|
||||
Returns:
|
||||
True if directory should be excluded, False otherwise
|
||||
"""
|
||||
# Skip hidden directories except for specific allowed ones
|
||||
if dir_name.startswith('.') and dir_name not in {'.env', '.gitignore'}:
|
||||
return True
|
||||
|
||||
# Check against exclude patterns
|
||||
return dir_name in self.exclude_dirs
|
||||
|
||||
def should_exclude_file(self, file_path: Path) -> bool:
|
||||
"""
|
||||
Check if file should be excluded from processing.
|
||||
|
||||
Args:
|
||||
file_path: Path object for the file to check
|
||||
|
||||
Returns:
|
||||
True if file should be excluded, False otherwise
|
||||
"""
|
||||
# Extension check - only process supported file types
|
||||
if file_path.suffix.lower() not in self.supported_extensions:
|
||||
return True
|
||||
|
||||
# Hidden files (except specific allowed ones)
|
||||
if file_path.name.startswith('.') and file_path.name not in {'.gitignore', '.env'}:
|
||||
return True
|
||||
|
||||
# Filename pattern check using glob patterns
|
||||
for pattern in self.exclude_files:
|
||||
if fnmatch.fnmatch(file_path.name, pattern):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def should_process_path(self, path: Path, base_path: Path) -> bool:
|
||||
"""
|
||||
Unified path processing logic to determine if a file should be processed.
|
||||
|
||||
Args:
|
||||
path: File path to check
|
||||
base_path: Project base path for relative path calculation
|
||||
|
||||
Returns:
|
||||
True if file should be processed, False otherwise
|
||||
"""
|
||||
try:
|
||||
# Ensure we're working with absolute paths
|
||||
if not path.is_absolute():
|
||||
path = base_path / path
|
||||
|
||||
# Get relative path from base
|
||||
relative_path = path.relative_to(base_path)
|
||||
|
||||
# Check each path component for excluded directories
|
||||
for part in relative_path.parts[:-1]: # Exclude filename
|
||||
if self.should_exclude_directory(part):
|
||||
return False
|
||||
|
||||
# Check file itself
|
||||
return not self.should_exclude_file(path)
|
||||
|
||||
except (ValueError, OSError):
|
||||
# Path not relative to base_path or other path errors
|
||||
return False
|
||||
|
||||
def is_supported_file_type(self, file_path: Path) -> bool:
|
||||
"""
|
||||
Check if file type is supported for indexing.
|
||||
|
||||
Args:
|
||||
file_path: Path to check
|
||||
|
||||
Returns:
|
||||
True if file type is supported, False otherwise
|
||||
"""
|
||||
return file_path.suffix.lower() in self.supported_extensions
|
||||
|
||||
def is_temporary_file(self, file_path: Path) -> bool:
|
||||
"""
|
||||
Check if file appears to be a temporary file.
|
||||
|
||||
Args:
|
||||
file_path: Path to check
|
||||
|
||||
Returns:
|
||||
True if file appears temporary, False otherwise
|
||||
"""
|
||||
name = file_path.name
|
||||
|
||||
# Common temporary file patterns
|
||||
temp_patterns = ['*.tmp', '*.temp', '*.swp', '*.swo', '*~']
|
||||
|
||||
for pattern in temp_patterns:
|
||||
if fnmatch.fnmatch(name, pattern):
|
||||
return True
|
||||
|
||||
# Files ending in .bak or .orig
|
||||
if name.endswith(('.bak', '.orig')):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def filter_file_list(self, files: List[str], base_path: str) -> List[str]:
|
||||
"""
|
||||
Filter a list of file paths, keeping only those that should be processed.
|
||||
|
||||
Args:
|
||||
files: List of file paths (absolute or relative)
|
||||
base_path: Project base path
|
||||
|
||||
Returns:
|
||||
Filtered list of file paths that should be processed
|
||||
"""
|
||||
base = Path(base_path)
|
||||
filtered = []
|
||||
|
||||
for file_path_str in files:
|
||||
file_path = Path(file_path_str)
|
||||
if self.should_process_path(file_path, base):
|
||||
filtered.append(file_path_str)
|
||||
|
||||
return filtered
|
||||
|
||||
def get_exclude_summary(self) -> dict:
|
||||
"""
|
||||
Get summary of current exclusion configuration.
|
||||
|
||||
Returns:
|
||||
Dictionary with exclusion configuration details
|
||||
"""
|
||||
return {
|
||||
"exclude_directories_count": len(self.exclude_dirs),
|
||||
"exclude_files_count": len(self.exclude_files),
|
||||
"supported_extensions_count": len(self.supported_extensions),
|
||||
"exclude_directories": sorted(self.exclude_dirs),
|
||||
"exclude_files": sorted(self.exclude_files)
|
||||
}
|
||||
@@ -0,0 +1,372 @@
|
||||
"""
|
||||
Response formatting utilities for the MCP server.
|
||||
|
||||
This module provides consistent response formatting functions used across
|
||||
services to ensure uniform response structures and formats.
|
||||
"""
|
||||
|
||||
import json
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
from ..indexing.qualified_names import generate_qualified_name
|
||||
|
||||
|
||||
class ResponseFormatter:
|
||||
"""
|
||||
Helper class for formatting responses consistently across services.
|
||||
|
||||
This class provides static methods for formatting different types of
|
||||
responses in a consistent manner.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def _resolve_qualified_names_in_relationships(
|
||||
file_path: str,
|
||||
relationship_list: List[str],
|
||||
duplicate_names: set,
|
||||
index_cache: Optional[Dict[str, Any]] = None
|
||||
) -> List[str]:
|
||||
"""
|
||||
Convert simple names to qualified names when duplicates exist.
|
||||
|
||||
Args:
|
||||
file_path: Current file path for context
|
||||
relationship_list: List of function/class names that may need qualification
|
||||
duplicate_names: Set of names that have duplicates in the project
|
||||
index_cache: Optional index cache for duplicate detection
|
||||
|
||||
Returns:
|
||||
List with qualified names where duplicates exist
|
||||
"""
|
||||
if not relationship_list or not duplicate_names:
|
||||
return relationship_list
|
||||
|
||||
qualified_list = []
|
||||
for name in relationship_list:
|
||||
if name in duplicate_names:
|
||||
# Convert to qualified name if this name has duplicates
|
||||
if index_cache and 'files' in index_cache:
|
||||
# Try to find the actual file where this name is defined
|
||||
# For now, we'll use the current file path as context
|
||||
qualified_name = generate_qualified_name(file_path, name)
|
||||
qualified_list.append(qualified_name)
|
||||
else:
|
||||
# Fallback: keep original name if we can't resolve
|
||||
qualified_list.append(name)
|
||||
else:
|
||||
# No duplicates, keep original name
|
||||
qualified_list.append(name)
|
||||
|
||||
return qualified_list
|
||||
|
||||
@staticmethod
|
||||
def _get_duplicate_names_from_index(index_cache: Optional[Dict[str, Any]] = None) -> Dict[str, set]:
|
||||
"""
|
||||
Extract duplicate function and class names from index cache.
|
||||
|
||||
Args:
|
||||
index_cache: Optional index cache
|
||||
|
||||
Returns:
|
||||
Dictionary with 'functions' and 'classes' sets of duplicate names
|
||||
"""
|
||||
duplicates = {'functions': set(), 'classes': set()}
|
||||
|
||||
if not index_cache:
|
||||
return duplicates
|
||||
|
||||
# Duplicate detection functionality removed - was legacy code
|
||||
# Return empty duplicates as this feature is no longer used
|
||||
|
||||
return duplicates
|
||||
|
||||
@staticmethod
|
||||
def success_response(message: str, data: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Format a successful operation response.
|
||||
|
||||
Args:
|
||||
message: Success message
|
||||
data: Optional additional data to include
|
||||
|
||||
Returns:
|
||||
Formatted success response dictionary
|
||||
"""
|
||||
response = {"status": "success", "message": message}
|
||||
if data:
|
||||
response.update(data)
|
||||
return response
|
||||
|
||||
@staticmethod
|
||||
def error_response(message: str, error_code: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Format an error response.
|
||||
|
||||
Args:
|
||||
message: Error message
|
||||
error_code: Optional error code for categorization
|
||||
|
||||
Returns:
|
||||
Formatted error response dictionary
|
||||
"""
|
||||
response = {"error": message}
|
||||
if error_code:
|
||||
response["error_code"] = error_code
|
||||
return response
|
||||
|
||||
@staticmethod
|
||||
def file_list_response(files: List[str], status_message: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Format a file list response for find_files operations.
|
||||
|
||||
Args:
|
||||
files: List of file paths
|
||||
status_message: Status message describing the operation result
|
||||
|
||||
Returns:
|
||||
Formatted file list response
|
||||
"""
|
||||
return {
|
||||
"files": files,
|
||||
"status": status_message
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def search_results_response(
|
||||
results: List[Dict[str, Any]],
|
||||
pagination: Optional[Dict[str, Any]] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Format search results response.
|
||||
|
||||
Args:
|
||||
results: List of search result dictionaries
|
||||
|
||||
Returns:
|
||||
Formatted search results response
|
||||
"""
|
||||
response = {
|
||||
"results": results
|
||||
}
|
||||
|
||||
if pagination is not None:
|
||||
response["pagination"] = pagination
|
||||
|
||||
return response
|
||||
|
||||
@staticmethod
|
||||
def config_response(config_data: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Format configuration data as JSON string.
|
||||
|
||||
Args:
|
||||
config_data: Configuration data dictionary
|
||||
|
||||
Returns:
|
||||
JSON formatted configuration string
|
||||
"""
|
||||
return json.dumps(config_data, indent=2)
|
||||
|
||||
@staticmethod
|
||||
def stats_response(stats_data: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Format statistics data as JSON string.
|
||||
|
||||
Args:
|
||||
stats_data: Statistics data dictionary
|
||||
|
||||
Returns:
|
||||
JSON formatted statistics string
|
||||
"""
|
||||
return json.dumps(stats_data, indent=2)
|
||||
|
||||
@staticmethod
|
||||
def file_summary_response(
|
||||
file_path: str,
|
||||
line_count: int,
|
||||
size_bytes: int,
|
||||
extension: str,
|
||||
language: str = "unknown",
|
||||
functions: Optional[Union[List[str], List[Dict[str, Any]]]] = None,
|
||||
classes: Optional[Union[List[str], List[Dict[str, Any]]]] = None,
|
||||
imports: Optional[Union[List[str], List[Dict[str, Any]]]] = None,
|
||||
language_specific: Optional[Dict[str, Any]] = None,
|
||||
error: Optional[str] = None,
|
||||
index_cache: Optional[Dict[str, Any]] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Format file summary response from index data.
|
||||
|
||||
Args:
|
||||
file_path: Path to the file
|
||||
line_count: Number of lines in the file
|
||||
size_bytes: File size in bytes
|
||||
extension: File extension
|
||||
language: Programming language detected
|
||||
functions: List of function names (strings) or complete function objects (dicts)
|
||||
classes: List of class names (strings) or complete class objects (dicts)
|
||||
imports: List of import statements (strings) or complete import objects (dicts)
|
||||
language_specific: Language-specific analysis data
|
||||
error: Error message if analysis failed
|
||||
index_cache: Optional index cache for duplicate name resolution
|
||||
|
||||
Returns:
|
||||
Formatted file summary response
|
||||
"""
|
||||
# Get duplicate names from index for qualified name resolution
|
||||
duplicate_names = ResponseFormatter._get_duplicate_names_from_index(index_cache)
|
||||
|
||||
# Handle backward compatibility for functions
|
||||
processed_functions = []
|
||||
if functions:
|
||||
for func in functions:
|
||||
if isinstance(func, str):
|
||||
# Legacy format - convert string to basic object
|
||||
processed_functions.append({"name": func})
|
||||
elif isinstance(func, dict):
|
||||
# New format - use complete object and resolve qualified names in relationships
|
||||
processed_func = func.copy()
|
||||
|
||||
# Resolve qualified names in relationship fields
|
||||
if 'calls' in processed_func and isinstance(processed_func['calls'], list):
|
||||
processed_func['calls'] = ResponseFormatter._resolve_qualified_names_in_relationships(
|
||||
file_path, processed_func['calls'], duplicate_names['functions'], index_cache
|
||||
)
|
||||
|
||||
if 'called_by' in processed_func and isinstance(processed_func['called_by'], list):
|
||||
processed_func['called_by'] = ResponseFormatter._resolve_qualified_names_in_relationships(
|
||||
file_path, processed_func['called_by'], duplicate_names['functions'], index_cache
|
||||
)
|
||||
|
||||
processed_functions.append(processed_func)
|
||||
|
||||
# Handle backward compatibility for classes
|
||||
processed_classes = []
|
||||
if classes:
|
||||
for cls in classes:
|
||||
if isinstance(cls, str):
|
||||
# Legacy format - convert string to basic object
|
||||
processed_classes.append({"name": cls})
|
||||
elif isinstance(cls, dict):
|
||||
# New format - use complete object and resolve qualified names in relationships
|
||||
processed_cls = cls.copy()
|
||||
|
||||
# Resolve qualified names in relationship fields
|
||||
if 'instantiated_by' in processed_cls and isinstance(processed_cls['instantiated_by'], list):
|
||||
processed_cls['instantiated_by'] = ResponseFormatter._resolve_qualified_names_in_relationships(
|
||||
file_path, processed_cls['instantiated_by'], duplicate_names['functions'], index_cache
|
||||
)
|
||||
|
||||
processed_classes.append(processed_cls)
|
||||
|
||||
# Handle backward compatibility for imports
|
||||
processed_imports = []
|
||||
if imports:
|
||||
for imp in imports:
|
||||
if isinstance(imp, str):
|
||||
# Legacy format - convert string to basic object
|
||||
processed_imports.append({"module": imp, "import_type": "unknown"})
|
||||
elif isinstance(imp, dict):
|
||||
# New format - use complete object
|
||||
processed_imports.append(imp)
|
||||
|
||||
response = {
|
||||
"file_path": file_path,
|
||||
"line_count": line_count,
|
||||
"size_bytes": size_bytes,
|
||||
"extension": extension,
|
||||
"language": language,
|
||||
"functions": processed_functions,
|
||||
"classes": processed_classes,
|
||||
"imports": processed_imports,
|
||||
"language_specific": language_specific or {}
|
||||
}
|
||||
|
||||
if error:
|
||||
response["error"] = error
|
||||
|
||||
return response
|
||||
|
||||
@staticmethod
|
||||
def directory_info_response(
|
||||
temp_directory: str,
|
||||
exists: bool,
|
||||
is_directory: bool = False,
|
||||
contents: Optional[List[str]] = None,
|
||||
subdirectories: Optional[List[Dict[str, Any]]] = None,
|
||||
error: Optional[str] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Format directory information response.
|
||||
|
||||
Args:
|
||||
temp_directory: Path to the directory
|
||||
exists: Whether the directory exists
|
||||
is_directory: Whether the path is a directory
|
||||
contents: List of directory contents
|
||||
subdirectories: List of subdirectory information
|
||||
error: Error message if operation failed
|
||||
|
||||
Returns:
|
||||
Formatted directory info response
|
||||
"""
|
||||
response = {
|
||||
"temp_directory": temp_directory,
|
||||
"exists": exists,
|
||||
"is_directory": is_directory
|
||||
}
|
||||
|
||||
if contents is not None:
|
||||
response["contents"] = contents
|
||||
|
||||
if subdirectories is not None:
|
||||
response["subdirectories"] = subdirectories
|
||||
|
||||
if error:
|
||||
response["error"] = error
|
||||
|
||||
return response
|
||||
|
||||
@staticmethod
|
||||
def settings_info_response(
|
||||
settings_directory: str,
|
||||
temp_directory: str,
|
||||
temp_directory_exists: bool,
|
||||
config: Dict[str, Any],
|
||||
stats: Dict[str, Any],
|
||||
exists: bool,
|
||||
status: str = "configured",
|
||||
message: Optional[str] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Format settings information response.
|
||||
|
||||
Args:
|
||||
settings_directory: Path to settings directory
|
||||
temp_directory: Path to temp directory
|
||||
temp_directory_exists: Whether temp directory exists
|
||||
config: Configuration data
|
||||
stats: Statistics data
|
||||
exists: Whether settings directory exists
|
||||
status: Status of the configuration
|
||||
message: Optional status message
|
||||
|
||||
Returns:
|
||||
Formatted settings info response
|
||||
"""
|
||||
response = {
|
||||
"settings_directory": settings_directory,
|
||||
"temp_directory": temp_directory,
|
||||
"temp_directory_exists": temp_directory_exists,
|
||||
"config": config,
|
||||
"stats": stats,
|
||||
"exists": exists
|
||||
}
|
||||
|
||||
if status != "configured":
|
||||
response["status"] = status
|
||||
|
||||
if message:
|
||||
response["message"] = message
|
||||
|
||||
return response
|
||||
@@ -0,0 +1,239 @@
|
||||
"""
|
||||
Common validation logic for the MCP server.
|
||||
|
||||
This module provides shared validation functions used across services
|
||||
to ensure consistent validation behavior and reduce code duplication.
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import fnmatch
|
||||
from typing import Optional, List
|
||||
|
||||
from ..indexing.qualified_names import normalize_file_path
|
||||
|
||||
|
||||
class ValidationHelper:
|
||||
"""
|
||||
Helper class containing common validation logic.
|
||||
|
||||
This class provides static methods for common validation operations
|
||||
that are used across multiple services.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def validate_file_path(file_path: str, base_path: str) -> Optional[str]:
|
||||
"""
|
||||
Validate a file path for security and accessibility.
|
||||
|
||||
This method checks for:
|
||||
- Path traversal attempts
|
||||
- Absolute path usage (not allowed)
|
||||
- Path existence within base directory
|
||||
|
||||
Args:
|
||||
file_path: The file path to validate (should be relative)
|
||||
base_path: The base project directory path
|
||||
|
||||
Returns:
|
||||
Error message if validation fails, None if valid
|
||||
"""
|
||||
if not file_path:
|
||||
return "File path cannot be empty"
|
||||
|
||||
if not base_path:
|
||||
return "Base path not set"
|
||||
|
||||
# Handle absolute paths (especially Windows paths starting with drive letters)
|
||||
if os.path.isabs(file_path) or (len(file_path) > 1 and file_path[1] == ':'):
|
||||
return (f"Absolute file paths like '{file_path}' are not allowed. "
|
||||
"Please use paths relative to the project root.")
|
||||
|
||||
# Normalize the file path
|
||||
norm_path = os.path.normpath(file_path)
|
||||
|
||||
# Check for path traversal attempts
|
||||
if "..\\" in norm_path or "../" in norm_path or norm_path.startswith(".."):
|
||||
return f"Invalid file path: {file_path} (directory traversal not allowed)"
|
||||
|
||||
# Construct the full path and verify it's within the project bounds
|
||||
full_path = os.path.join(base_path, norm_path)
|
||||
real_full_path = os.path.realpath(full_path)
|
||||
real_base_path = os.path.realpath(base_path)
|
||||
|
||||
if not real_full_path.startswith(real_base_path):
|
||||
return "Access denied. File path must be within project directory."
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def validate_directory_path(dir_path: str) -> Optional[str]:
|
||||
"""
|
||||
Validate a directory path for project initialization.
|
||||
|
||||
Args:
|
||||
dir_path: The directory path to validate
|
||||
|
||||
Returns:
|
||||
Error message if validation fails, None if valid
|
||||
"""
|
||||
if not dir_path:
|
||||
return "Directory path cannot be empty"
|
||||
|
||||
# Normalize and get absolute path
|
||||
try:
|
||||
norm_path = os.path.normpath(dir_path)
|
||||
abs_path = os.path.abspath(norm_path)
|
||||
except (OSError, ValueError) as e:
|
||||
return f"Invalid path format: {str(e)}"
|
||||
|
||||
if not os.path.exists(abs_path):
|
||||
return f"Path does not exist: {abs_path}"
|
||||
|
||||
if not os.path.isdir(abs_path):
|
||||
return f"Path is not a directory: {abs_path}"
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def validate_glob_pattern(pattern: str) -> Optional[str]:
|
||||
"""
|
||||
Validate a glob pattern for file searching.
|
||||
|
||||
Args:
|
||||
pattern: The glob pattern to validate
|
||||
|
||||
Returns:
|
||||
Error message if validation fails, None if valid
|
||||
"""
|
||||
if not pattern:
|
||||
return "Pattern cannot be empty"
|
||||
|
||||
# Check for potentially dangerous patterns
|
||||
if pattern.startswith('/') or pattern.startswith('\\'):
|
||||
return "Pattern cannot start with path separator"
|
||||
|
||||
# Test if the pattern is valid by trying to compile it
|
||||
try:
|
||||
# This will raise an exception if the pattern is malformed
|
||||
fnmatch.translate(pattern)
|
||||
except (ValueError, TypeError) as e:
|
||||
return f"Invalid glob pattern: {str(e)}"
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def validate_search_pattern(pattern: str, regex: bool = False) -> Optional[str]:
|
||||
"""
|
||||
Validate a search pattern for code searching.
|
||||
|
||||
Args:
|
||||
pattern: The search pattern to validate
|
||||
regex: Whether the pattern is a regex pattern
|
||||
|
||||
Returns:
|
||||
Error message if validation fails, None if valid
|
||||
"""
|
||||
if not pattern:
|
||||
return "Search pattern cannot be empty"
|
||||
|
||||
if regex:
|
||||
# Basic regex validation - check for potentially dangerous patterns
|
||||
try:
|
||||
re.compile(pattern)
|
||||
except re.error as e:
|
||||
return (
|
||||
f"Invalid regex pattern: {str(e)}. "
|
||||
"If you intended a literal search, pass regex=False."
|
||||
)
|
||||
|
||||
# Check for potentially expensive regex patterns (basic ReDoS protection)
|
||||
dangerous_patterns = [
|
||||
r'\(\?\=.*\)\+', # Positive lookahead with quantifier
|
||||
r'\(\?\!.*\)\+', # Negative lookahead with quantifier
|
||||
r'\(\?\<\=.*\)\+', # Positive lookbehind with quantifier
|
||||
r'\(\?\<\!.*\)\+', # Negative lookbehind with quantifier
|
||||
]
|
||||
|
||||
for dangerous in dangerous_patterns:
|
||||
if re.search(dangerous, pattern):
|
||||
return "Potentially dangerous regex pattern detected"
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def validate_pagination(start_index: int, max_results: Optional[int]) -> Optional[str]:
|
||||
"""
|
||||
Validate pagination parameters for search queries.
|
||||
|
||||
Args:
|
||||
start_index: The index of the first result to include.
|
||||
max_results: The maximum number of results to return.
|
||||
|
||||
Returns:
|
||||
Error message if validation fails, None if valid.
|
||||
"""
|
||||
if not isinstance(start_index, int):
|
||||
return "start_index must be an integer"
|
||||
|
||||
if start_index < 0:
|
||||
return "start_index cannot be negative"
|
||||
|
||||
if max_results is None:
|
||||
return None
|
||||
|
||||
if not isinstance(max_results, int):
|
||||
return "max_results must be an integer when provided"
|
||||
|
||||
if max_results <= 0:
|
||||
return "max_results must be greater than zero when provided"
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def validate_file_extensions(extensions: List[str]) -> Optional[str]:
|
||||
"""
|
||||
Validate a list of file extensions.
|
||||
|
||||
Args:
|
||||
extensions: List of file extensions to validate
|
||||
|
||||
Returns:
|
||||
Error message if validation fails, None if valid
|
||||
"""
|
||||
if not extensions:
|
||||
return "Extensions list cannot be empty"
|
||||
|
||||
for ext in extensions:
|
||||
if not isinstance(ext, str):
|
||||
return "All extensions must be strings"
|
||||
|
||||
if not ext.startswith('.'):
|
||||
return f"Extension '{ext}' must start with a dot"
|
||||
|
||||
if len(ext) < 2:
|
||||
return f"Extension '{ext}' is too short"
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def sanitize_file_path(file_path: str) -> str:
|
||||
"""
|
||||
Sanitize a file path by normalizing separators and removing dangerous elements.
|
||||
|
||||
Args:
|
||||
file_path: The file path to sanitize
|
||||
|
||||
Returns:
|
||||
Sanitized file path
|
||||
"""
|
||||
if not file_path:
|
||||
return ""
|
||||
|
||||
# Normalize path separators and structure
|
||||
sanitized = normalize_file_path(file_path)
|
||||
|
||||
# Remove any leading slashes to ensure relative path
|
||||
sanitized = sanitized.lstrip('/')
|
||||
|
||||
return sanitized
|
||||
247
reference/code-index-mcp-master/test/README.md
Normal file
247
reference/code-index-mcp-master/test/README.md
Normal file
@@ -0,0 +1,247 @@
|
||||
# Test Projects for Code Index MCP
|
||||
|
||||
This directory contains comprehensive test projects designed to validate and demonstrate the capabilities of the Code Index MCP server. Each project represents a realistic, enterprise-level codebase that showcases different programming languages, frameworks, and architectural patterns.
|
||||
|
||||
## Project Structure
|
||||
|
||||
```
|
||||
test/
|
||||
├── sample-projects/
|
||||
│ ├── python/
|
||||
│ │ └── user_management/ # Python user management system
|
||||
│ ├── java/
|
||||
│ │ └── user-management/ # Java Spring Boot user management
|
||||
│ ├── go/
|
||||
│ │ └── user-management/ # Go Gin user management API
|
||||
│ ├── javascript/
|
||||
│ │ └── user-management/ # Node.js Express user management
|
||||
│ ├── typescript/
|
||||
│ │ └── user-management/ # TypeScript Express user management
|
||||
│ └── objective-c/ # Objective-C test files
|
||||
└── README.md # This file
|
||||
```
|
||||
|
||||
## Sample Projects Overview
|
||||
|
||||
Each sample project implements a comprehensive user management system with the following core features:
|
||||
|
||||
### Common Features Across All Projects
|
||||
- **User Registration & Authentication**: Secure user registration with password hashing
|
||||
- **Role-Based Access Control (RBAC)**: Admin, User, and Guest roles with permissions
|
||||
- **CRUD Operations**: Complete Create, Read, Update, Delete functionality
|
||||
- **Search & Filtering**: Full-text search and role/status-based filtering
|
||||
- **Pagination**: Efficient pagination for large datasets
|
||||
- **Input Validation**: Comprehensive validation and sanitization
|
||||
- **Error Handling**: Structured error handling with custom error classes
|
||||
- **Logging**: Structured logging for debugging and monitoring
|
||||
- **Security**: Password hashing, rate limiting, and security headers
|
||||
- **Data Export**: User data export functionality
|
||||
- **Statistics**: User analytics and statistics
|
||||
|
||||
### Language-Specific Implementation Details
|
||||
|
||||
#### Python Project (`python/user_management/`)
|
||||
- **Framework**: Flask-based web application
|
||||
- **Database**: SQLAlchemy ORM with SQLite
|
||||
- **Authentication**: JWT tokens with BCrypt password hashing
|
||||
- **Structure**: Clean package structure with models, services, and utilities
|
||||
- **Features**: CLI interface, comprehensive validation, and export functionality
|
||||
|
||||
**Key Files:**
|
||||
- `models/person.py` - Base Person model
|
||||
- `models/user.py` - User model with authentication
|
||||
- `services/user_manager.py` - Business logic layer
|
||||
- `services/auth_service.py` - Authentication service
|
||||
- `utils/` - Validation, exceptions, and helper utilities
|
||||
- `cli.py` - Command-line interface
|
||||
|
||||
#### Java Project (`java/user-management/`)
|
||||
- **Framework**: Spring Boot with Spring Data JPA
|
||||
- **Database**: H2 in-memory database with JPA
|
||||
- **Authentication**: JWT tokens with BCrypt
|
||||
- **Structure**: Maven project with standard Java package structure
|
||||
- **Features**: REST API, validation annotations, and comprehensive testing
|
||||
|
||||
**Key Files:**
|
||||
- `model/User.java` - JPA entity with validation
|
||||
- `service/UserService.java` - Business logic service
|
||||
- `controller/UserController.java` - REST API endpoints
|
||||
- `util/` - Validation, exceptions, and utilities
|
||||
- `Application.java` - Spring Boot application entry point
|
||||
|
||||
#### Go Project (`go/user-management/`)
|
||||
- **Framework**: Gin web framework with GORM
|
||||
- **Database**: SQLite with GORM ORM
|
||||
- **Authentication**: JWT tokens with BCrypt
|
||||
- **Structure**: Clean Go module structure with internal packages
|
||||
- **Features**: High-performance API, middleware, and concurrent processing
|
||||
|
||||
**Key Files:**
|
||||
- `internal/models/user.go` - User model with GORM
|
||||
- `internal/services/user_service.go` - Business logic
|
||||
- `pkg/api/handlers/user_handler.go` - HTTP handlers
|
||||
- `pkg/middleware/` - Authentication and validation middleware
|
||||
- `cmd/server/main.go` - Application entry point
|
||||
|
||||
#### JavaScript Project (`javascript/user-management/`)
|
||||
- **Framework**: Express.js with Mongoose
|
||||
- **Database**: MongoDB with Mongoose ODM
|
||||
- **Authentication**: JWT tokens with BCrypt
|
||||
- **Structure**: Modern Node.js project with ES6+ features
|
||||
- **Features**: Async/await, middleware, and comprehensive error handling
|
||||
|
||||
**Key Files:**
|
||||
- `src/models/User.js` - Mongoose model with validation
|
||||
- `src/services/UserService.js` - Business logic service
|
||||
- `src/routes/userRoutes.js` - Express routes
|
||||
- `src/middleware/` - Authentication and validation middleware
|
||||
- `src/server.js` - Express application setup
|
||||
|
||||
#### TypeScript Project (`typescript/user-management/`)
|
||||
- **Framework**: Express.js with Mongoose (TypeScript)
|
||||
- **Database**: MongoDB with Mongoose ODM
|
||||
- **Authentication**: JWT tokens with BCrypt
|
||||
- **Structure**: Type-safe Node.js project with comprehensive interfaces
|
||||
- **Features**: Full type safety, interfaces, and advanced TypeScript features
|
||||
|
||||
**Key Files:**
|
||||
- `src/types/User.ts` - TypeScript interfaces and types
|
||||
- `src/models/User.ts` - Mongoose model with TypeScript
|
||||
- `src/services/UserService.ts` - Typed business logic service
|
||||
- `src/routes/userRoutes.ts` - Typed Express routes
|
||||
- `src/server.ts` - TypeScript Express application
|
||||
|
||||
#### Objective-C Project (`objective-c/`)
|
||||
- **Framework**: Foundation classes
|
||||
- **Features**: Classes, properties, methods, protocols
|
||||
- **Structure**: Traditional .h/.m file structure
|
||||
|
||||
**Key Files:**
|
||||
- `Person.h/.m` - Person class with properties
|
||||
- `UserManager.h/.m` - User management functionality
|
||||
- `main.m` - Application entry point
|
||||
|
||||
## Testing the Code Index MCP
|
||||
|
||||
These projects are designed to test various aspects of the Code Index MCP:
|
||||
|
||||
### File Analysis Capabilities
|
||||
- **Language Detection**: Automatic detection of programming languages
|
||||
- **Syntax Parsing**: Parsing of different syntax structures
|
||||
- **Import/Dependency Analysis**: Understanding of module dependencies
|
||||
- **Code Structure**: Recognition of classes, functions, and interfaces
|
||||
|
||||
### Search and Navigation
|
||||
- **Symbol Search**: Finding functions, classes, and variables
|
||||
- **Cross-Reference**: Finding usage of symbols across files
|
||||
- **Fuzzy Search**: Approximate matching for typos and partial queries
|
||||
- **Pattern Matching**: Regular expression and pattern-based searches
|
||||
|
||||
### Code Intelligence
|
||||
- **Function Signatures**: Understanding of function parameters and return types
|
||||
- **Variable Types**: Type inference and tracking
|
||||
- **Scope Analysis**: Understanding of variable and function scope
|
||||
- **Documentation**: Parsing of comments and documentation
|
||||
|
||||
### Performance Testing
|
||||
- **Large Codebases**: Testing with realistic project sizes
|
||||
- **Complex Structures**: Nested packages and deep directory structures
|
||||
- **Multiple File Types**: Mixed file types within projects
|
||||
- **Concurrent Access**: Multiple simultaneous search operations
|
||||
|
||||
## Running the Projects
|
||||
|
||||
Each project includes comprehensive setup instructions in its respective README.md file. General steps:
|
||||
|
||||
1. Navigate to the project directory
|
||||
2. Install dependencies using the appropriate package manager
|
||||
3. Set up environment variables (see .env.example files)
|
||||
4. Run the application using the provided scripts
|
||||
5. Test the API endpoints using the provided examples
|
||||
|
||||
### Quick Start Examples
|
||||
|
||||
```bash
|
||||
# Python project
|
||||
cd test/sample-projects/python/user_management
|
||||
pip install -r requirements.txt
|
||||
python cli.py
|
||||
|
||||
# Java project
|
||||
cd test/sample-projects/java/user-management
|
||||
mvn spring-boot:run
|
||||
|
||||
# Go project
|
||||
cd test/sample-projects/go/user-management
|
||||
go run cmd/server/main.go
|
||||
|
||||
# JavaScript project
|
||||
cd test/sample-projects/javascript/user-management
|
||||
npm install
|
||||
npm run dev
|
||||
|
||||
# TypeScript project
|
||||
cd test/sample-projects/typescript/user-management
|
||||
npm install
|
||||
npm run dev
|
||||
```
|
||||
|
||||
## MCP Server Testing
|
||||
|
||||
To test the Code Index MCP server with these projects:
|
||||
|
||||
1. **Set Project Path**: Use the `set_project_path` tool to point to a project directory
|
||||
2. **Index Files**: The server will automatically index all files in the project
|
||||
3. **Search Testing**: Test various search queries and patterns
|
||||
4. **Analysis Testing**: Use the analysis tools to examine code structure
|
||||
5. **Performance Testing**: Measure response times and resource usage
|
||||
|
||||
### Example MCP Commands
|
||||
|
||||
```bash
|
||||
# Set project path
|
||||
set_project_path /path/to/test/sample-projects/python/user_management
|
||||
|
||||
# Search for user-related functions
|
||||
search_code_advanced "def create_user" --file-pattern "*.py"
|
||||
|
||||
# Find all authentication-related code
|
||||
search_code_advanced "auth" --fuzzy true
|
||||
|
||||
# Get file summary
|
||||
get_file_summary models/user.py
|
||||
|
||||
# Find TypeScript interfaces
|
||||
search_code_advanced "interface.*User" --regex true --file-pattern "*.ts"
|
||||
```
|
||||
|
||||
## Contributing
|
||||
|
||||
When adding new test projects:
|
||||
|
||||
1. Follow the established patterns and structure
|
||||
2. Implement all core features consistently
|
||||
3. Include comprehensive documentation
|
||||
4. Add appropriate test cases
|
||||
5. Update this README with project details
|
||||
|
||||
## Security Considerations
|
||||
|
||||
All test projects include:
|
||||
- Secure password hashing (BCrypt)
|
||||
- Input validation and sanitization
|
||||
- Rate limiting and security headers
|
||||
- JWT token-based authentication
|
||||
- Environment variable configuration
|
||||
- Proper error handling without information disclosure
|
||||
|
||||
## Future Enhancements
|
||||
|
||||
Potential additions to the test suite:
|
||||
- **Rust Project**: Systems programming language example
|
||||
- **C++ Project**: Complex C++ codebase with templates
|
||||
- **C# Project**: .NET Core application
|
||||
- **PHP Project**: Laravel-based web application
|
||||
- **Ruby Project**: Rails application
|
||||
- **Swift Project**: iOS application structure
|
||||
- **Kotlin Project**: Android/JVM application
|
||||
@@ -0,0 +1,324 @@
|
||||
# User Management System (Go)
|
||||
|
||||
A comprehensive user management system built in Go for testing Code Index MCP's analysis capabilities.
|
||||
|
||||
## Features
|
||||
|
||||
- **User Management**: Create, update, delete, and search users
|
||||
- **REST API**: Full HTTP API with JSON responses
|
||||
- **Authentication**: BCrypt password hashing and JWT tokens
|
||||
- **Authorization**: Role-based access control (Admin, User, Guest)
|
||||
- **Database**: SQLite with GORM ORM
|
||||
- **Pagination**: Efficient pagination for large datasets
|
||||
- **Search**: Full-text search across users
|
||||
- **Export**: JSON export functionality
|
||||
- **Logging**: Structured logging with middleware
|
||||
- **CORS**: Cross-origin resource sharing support
|
||||
|
||||
## Project Structure
|
||||
|
||||
```
|
||||
user-management/
|
||||
├── cmd/
|
||||
│ ├── server/
|
||||
│ │ └── main.go # HTTP server entry point
|
||||
│ └── cli/
|
||||
│ └── main.go # CLI application
|
||||
├── internal/
|
||||
│ ├── models/
|
||||
│ │ └── user.go # User model and types
|
||||
│ ├── services/
|
||||
│ │ └── user_service.go # Business logic
|
||||
│ └── utils/
|
||||
│ └── types.go # Utility types and helpers
|
||||
├── pkg/
|
||||
│ └── api/
|
||||
│ └── user_handler.go # HTTP handlers
|
||||
├── go.mod # Go module file
|
||||
├── go.sum # Go dependencies
|
||||
└── README.md # This file
|
||||
```
|
||||
|
||||
## Technologies Used
|
||||
|
||||
- **Go 1.21**: Modern Go with generics and latest features
|
||||
- **Gin**: HTTP web framework
|
||||
- **GORM**: ORM for database operations
|
||||
- **SQLite**: Embedded database
|
||||
- **UUID**: Unique identifiers
|
||||
- **BCrypt**: Password hashing
|
||||
- **JWT**: JSON Web Tokens (planned)
|
||||
- **Viper**: Configuration management
|
||||
- **Cobra**: CLI framework
|
||||
|
||||
## Build and Run
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- Go 1.21 or higher
|
||||
|
||||
### Install Dependencies
|
||||
|
||||
```bash
|
||||
go mod tidy
|
||||
```
|
||||
|
||||
### Run HTTP Server
|
||||
|
||||
```bash
|
||||
go run cmd/server/main.go
|
||||
```
|
||||
|
||||
The server will start on `http://localhost:8080`
|
||||
|
||||
### Run CLI
|
||||
|
||||
```bash
|
||||
go run cmd/cli/main.go
|
||||
```
|
||||
|
||||
### Build
|
||||
|
||||
```bash
|
||||
# Build server
|
||||
go build -o bin/server cmd/server/main.go
|
||||
|
||||
# Build CLI
|
||||
go build -o bin/cli cmd/cli/main.go
|
||||
```
|
||||
|
||||
## API Endpoints
|
||||
|
||||
### Users
|
||||
|
||||
| Method | Endpoint | Description |
|
||||
|--------|----------|-------------|
|
||||
| `POST` | `/api/v1/users` | Create a new user |
|
||||
| `GET` | `/api/v1/users` | Get all users (paginated) |
|
||||
| `GET` | `/api/v1/users/:id` | Get user by ID |
|
||||
| `PUT` | `/api/v1/users/:id` | Update user |
|
||||
| `DELETE` | `/api/v1/users/:id` | Delete user |
|
||||
| `GET` | `/api/v1/users/search` | Search users |
|
||||
| `GET` | `/api/v1/users/stats` | Get user statistics |
|
||||
| `GET` | `/api/v1/users/export` | Export users |
|
||||
|
||||
### Authentication
|
||||
|
||||
| Method | Endpoint | Description |
|
||||
|--------|----------|-------------|
|
||||
| `POST` | `/api/v1/auth/login` | User login |
|
||||
| `POST` | `/api/v1/auth/logout` | User logout |
|
||||
| `POST` | `/api/v1/auth/change-password` | Change password |
|
||||
|
||||
### Admin
|
||||
|
||||
| Method | Endpoint | Description |
|
||||
|--------|----------|-------------|
|
||||
| `POST` | `/api/v1/admin/users/:id/reset-password` | Reset user password |
|
||||
| `POST` | `/api/v1/admin/users/:id/permissions` | Add permission |
|
||||
| `DELETE` | `/api/v1/admin/users/:id/permissions` | Remove permission |
|
||||
|
||||
## Usage Examples
|
||||
|
||||
### Create User
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:8080/api/v1/users \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"username": "johndoe",
|
||||
"email": "john@example.com",
|
||||
"name": "John Doe",
|
||||
"age": 30,
|
||||
"password": "password123"
|
||||
}'
|
||||
```
|
||||
|
||||
### Get Users
|
||||
|
||||
```bash
|
||||
curl http://localhost:8080/api/v1/users?page=1&page_size=10
|
||||
```
|
||||
|
||||
### Search Users
|
||||
|
||||
```bash
|
||||
curl http://localhost:8080/api/v1/users/search?q=john&page=1&page_size=10
|
||||
```
|
||||
|
||||
### Login
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:8080/api/v1/auth/login \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"username": "admin",
|
||||
"password": "admin123"
|
||||
}'
|
||||
```
|
||||
|
||||
### Get Statistics
|
||||
|
||||
```bash
|
||||
curl http://localhost:8080/api/v1/users/stats
|
||||
```
|
||||
|
||||
## Programmatic Usage
|
||||
|
||||
```go
|
||||
package main
|
||||
|
||||
import (
|
||||
"github.com/example/user-management/internal/models"
|
||||
"github.com/example/user-management/internal/services"
|
||||
"gorm.io/driver/sqlite"
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
|
||||
func main() {
|
||||
// Initialize database
|
||||
db, err := gorm.Open(sqlite.Open("users.db"), &gorm.Config{})
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
// Auto migrate
|
||||
db.AutoMigrate(&models.User{})
|
||||
|
||||
// Initialize service
|
||||
userService := services.NewUserService(db)
|
||||
|
||||
// Create user
|
||||
req := &models.UserRequest{
|
||||
Username: "alice",
|
||||
Email: "alice@example.com",
|
||||
Name: "Alice Smith",
|
||||
Age: 25,
|
||||
Password: "password123",
|
||||
Role: models.RoleUser,
|
||||
}
|
||||
|
||||
user, err := userService.CreateUser(req)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
// Authenticate user
|
||||
authUser, err := userService.AuthenticateUser("alice", "password123")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
// Get statistics
|
||||
stats, err := userService.GetUserStats()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Testing Features
|
||||
|
||||
This project tests the following Go language features:
|
||||
|
||||
### Core Language Features
|
||||
- **Structs and Methods**: User model with associated methods
|
||||
- **Interfaces**: Service and handler interfaces
|
||||
- **Pointers**: Efficient memory management
|
||||
- **Error Handling**: Comprehensive error handling patterns
|
||||
- **Packages**: Modular code organization
|
||||
- **Imports**: Internal and external package imports
|
||||
|
||||
### Modern Go Features
|
||||
- **Generics**: Type-safe collections (Go 1.18+)
|
||||
- **Modules**: Dependency management with go.mod
|
||||
- **Context**: Request context handling
|
||||
- **Channels**: Concurrent programming (in background tasks)
|
||||
- **Goroutines**: Concurrent execution
|
||||
- **JSON Tags**: Struct field mapping
|
||||
|
||||
### Advanced Features
|
||||
- **Reflection**: GORM model reflection
|
||||
- **Build Tags**: Conditional compilation
|
||||
- **Embedding**: Struct embedding for composition
|
||||
- **Type Assertions**: Interface type checking
|
||||
- **Panic/Recover**: Error recovery mechanisms
|
||||
|
||||
### Framework Integration
|
||||
- **Gin**: HTTP router and middleware
|
||||
- **GORM**: ORM with hooks and associations
|
||||
- **UUID**: Unique identifier generation
|
||||
- **BCrypt**: Cryptographic hashing
|
||||
- **SQLite**: Embedded database
|
||||
|
||||
### Design Patterns
|
||||
- **Repository Pattern**: Data access layer
|
||||
- **Service Layer**: Business logic separation
|
||||
- **Dependency Injection**: Service composition
|
||||
- **Middleware Pattern**: HTTP request processing
|
||||
- **Factory Pattern**: Service creation
|
||||
|
||||
## Dependencies
|
||||
|
||||
### Core Dependencies
|
||||
- **gin-gonic/gin**: Web framework
|
||||
- **gorm.io/gorm**: ORM
|
||||
- **gorm.io/driver/sqlite**: SQLite driver
|
||||
- **google/uuid**: UUID generation
|
||||
- **golang.org/x/crypto**: Cryptographic functions
|
||||
|
||||
### CLI Dependencies
|
||||
- **spf13/cobra**: CLI framework
|
||||
- **spf13/viper**: Configuration management
|
||||
|
||||
### Development Dependencies
|
||||
- **testify**: Testing framework
|
||||
- **mockery**: Mock generation
|
||||
|
||||
## Configuration
|
||||
|
||||
The application can be configured using environment variables or a configuration file:
|
||||
|
||||
```yaml
|
||||
database:
|
||||
driver: sqlite
|
||||
database: users.db
|
||||
|
||||
server:
|
||||
port: 8080
|
||||
host: localhost
|
||||
|
||||
jwt:
|
||||
secret_key: your-secret-key
|
||||
expiration_hours: 24
|
||||
```
|
||||
|
||||
## Development
|
||||
|
||||
### Run Tests
|
||||
|
||||
```bash
|
||||
go test ./...
|
||||
```
|
||||
|
||||
### Generate Mocks
|
||||
|
||||
```bash
|
||||
mockery --all
|
||||
```
|
||||
|
||||
### Format Code
|
||||
|
||||
```bash
|
||||
gofmt -w .
|
||||
```
|
||||
|
||||
### Lint Code
|
||||
|
||||
```bash
|
||||
golangci-lint run
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
MIT License - This is a sample project for testing purposes.
|
||||
@@ -0,0 +1,294 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/example/user-management/internal/models"
|
||||
"github.com/example/user-management/internal/services"
|
||||
"github.com/example/user-management/internal/utils"
|
||||
"github.com/example/user-management/pkg/api"
|
||||
"github.com/gin-gonic/gin"
|
||||
"gorm.io/driver/sqlite"
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
|
||||
func main() {
|
||||
// Initialize database
|
||||
db, err := initDatabase()
|
||||
if err != nil {
|
||||
log.Fatal("Failed to initialize database:", err)
|
||||
}
|
||||
|
||||
// Initialize services
|
||||
userService := services.NewUserService(db)
|
||||
|
||||
// Initialize API handlers
|
||||
userHandler := api.NewUserHandler(userService)
|
||||
|
||||
// Setup routes
|
||||
router := setupRoutes(userHandler)
|
||||
|
||||
// Create sample data
|
||||
createSampleData(userService)
|
||||
|
||||
// Start server
|
||||
log.Println("Starting server on :8080")
|
||||
if err := router.Run(":8080"); err != nil {
|
||||
log.Fatal("Failed to start server:", err)
|
||||
}
|
||||
}
|
||||
|
||||
func initDatabase() (*gorm.DB, error) {
|
||||
db, err := gorm.Open(sqlite.Open("users.db"), &gorm.Config{})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Auto migrate
|
||||
if err := db.AutoMigrate(&models.User{}); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return db, nil
|
||||
}
|
||||
|
||||
func setupRoutes(userHandler *api.UserHandler) *gin.Engine {
|
||||
router := gin.Default()
|
||||
|
||||
// Middleware
|
||||
router.Use(corsMiddleware())
|
||||
router.Use(loggingMiddleware())
|
||||
|
||||
// Health check
|
||||
router.GET("/health", healthCheck)
|
||||
|
||||
// API routes
|
||||
v1 := router.Group("/api/v1")
|
||||
{
|
||||
users := v1.Group("/users")
|
||||
{
|
||||
users.POST("", userHandler.CreateUser)
|
||||
users.GET("", userHandler.GetUsers)
|
||||
users.GET("/:id", userHandler.GetUser)
|
||||
users.PUT("/:id", userHandler.UpdateUser)
|
||||
users.DELETE("/:id", userHandler.DeleteUser)
|
||||
users.GET("/search", userHandler.SearchUsers)
|
||||
users.GET("/stats", userHandler.GetUserStats)
|
||||
users.GET("/export", userHandler.ExportUsers)
|
||||
}
|
||||
|
||||
auth := v1.Group("/auth")
|
||||
{
|
||||
auth.POST("/login", userHandler.Login)
|
||||
auth.POST("/logout", userHandler.Logout)
|
||||
auth.POST("/change-password", userHandler.ChangePassword)
|
||||
}
|
||||
|
||||
admin := v1.Group("/admin")
|
||||
{
|
||||
admin.POST("/users/:id/reset-password", userHandler.ResetPassword)
|
||||
admin.POST("/users/:id/permissions", userHandler.AddPermission)
|
||||
admin.DELETE("/users/:id/permissions", userHandler.RemovePermission)
|
||||
}
|
||||
}
|
||||
|
||||
return router
|
||||
}
|
||||
|
||||
func healthCheck(c *gin.Context) {
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"status": "healthy",
|
||||
"timestamp": time.Now().UTC(),
|
||||
"version": "1.0.0",
|
||||
})
|
||||
}
|
||||
|
||||
func corsMiddleware() gin.HandlerFunc {
|
||||
return func(c *gin.Context) {
|
||||
c.Header("Access-Control-Allow-Origin", "*")
|
||||
c.Header("Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS")
|
||||
c.Header("Access-Control-Allow-Headers", "Content-Type, Authorization")
|
||||
|
||||
if c.Request.Method == "OPTIONS" {
|
||||
c.AbortWithStatus(http.StatusOK)
|
||||
return
|
||||
}
|
||||
|
||||
c.Next()
|
||||
}
|
||||
}
|
||||
|
||||
func loggingMiddleware() gin.HandlerFunc {
|
||||
return gin.LoggerWithFormatter(func(param gin.LogFormatterParams) string {
|
||||
return fmt.Sprintf("%s - [%s] \"%s %s %s %d %s \"%s\" %s\"\n",
|
||||
param.ClientIP,
|
||||
param.TimeStamp.Format(time.RFC1123),
|
||||
param.Method,
|
||||
param.Path,
|
||||
param.Request.Proto,
|
||||
param.StatusCode,
|
||||
param.Latency,
|
||||
param.Request.UserAgent(),
|
||||
param.ErrorMessage,
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
func createSampleData(userService *services.UserService) {
|
||||
// Check if admin user already exists
|
||||
if _, err := userService.GetUserByUsername("admin"); err == nil {
|
||||
return // Admin user already exists
|
||||
}
|
||||
|
||||
// Create admin user
|
||||
adminReq := &models.UserRequest{
|
||||
Username: "admin",
|
||||
Email: "admin@example.com",
|
||||
Name: "System Administrator",
|
||||
Age: 30,
|
||||
Password: "admin123",
|
||||
Role: models.RoleAdmin,
|
||||
}
|
||||
|
||||
admin, err := userService.CreateUser(adminReq)
|
||||
if err != nil {
|
||||
log.Printf("Failed to create admin user: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
// Add admin permissions
|
||||
permissions := []string{
|
||||
"user_management",
|
||||
"system_admin",
|
||||
"user_read",
|
||||
"user_write",
|
||||
"user_delete",
|
||||
}
|
||||
|
||||
for _, perm := range permissions {
|
||||
if err := userService.AddPermission(admin.ID, perm); err != nil {
|
||||
log.Printf("Failed to add permission %s to admin: %v", perm, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Create sample users
|
||||
sampleUsers := []*models.UserRequest{
|
||||
{
|
||||
Username: "john_doe",
|
||||
Email: "john@example.com",
|
||||
Name: "John Doe",
|
||||
Age: 25,
|
||||
Password: "password123",
|
||||
Role: models.RoleUser,
|
||||
},
|
||||
{
|
||||
Username: "jane_smith",
|
||||
Email: "jane@example.com",
|
||||
Name: "Jane Smith",
|
||||
Age: 28,
|
||||
Password: "password123",
|
||||
Role: models.RoleUser,
|
||||
},
|
||||
{
|
||||
Username: "guest_user",
|
||||
Email: "guest@example.com",
|
||||
Name: "Guest User",
|
||||
Age: 22,
|
||||
Password: "password123",
|
||||
Role: models.RoleGuest,
|
||||
},
|
||||
}
|
||||
|
||||
for _, userReq := range sampleUsers {
|
||||
if _, err := userService.CreateUser(userReq); err != nil {
|
||||
log.Printf("Failed to create user %s: %v", userReq.Username, err)
|
||||
}
|
||||
}
|
||||
|
||||
log.Println("Sample data created successfully")
|
||||
}
|
||||
|
||||
// Helper functions for demo
|
||||
func printUserStats(userService *services.UserService) {
|
||||
stats, err := userService.GetUserStats()
|
||||
if err != nil {
|
||||
log.Printf("Failed to get user stats: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
log.Printf("User Statistics:")
|
||||
log.Printf(" Total: %d", stats.Total)
|
||||
log.Printf(" Active: %d", stats.Active)
|
||||
log.Printf(" Admin: %d", stats.Admin)
|
||||
log.Printf(" User: %d", stats.User)
|
||||
log.Printf(" Guest: %d", stats.Guest)
|
||||
log.Printf(" With Email: %d", stats.WithEmail)
|
||||
}
|
||||
|
||||
func demonstrateUserOperations(userService *services.UserService) {
|
||||
log.Println("\n=== User Management Demo ===")
|
||||
|
||||
// Get all users
|
||||
users, total, err := userService.GetAllUsers(1, 10)
|
||||
if err != nil {
|
||||
log.Printf("Failed to get users: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
log.Printf("Found %d users (total: %d):", len(users), total)
|
||||
for _, user := range users {
|
||||
log.Printf(" - %s (%s) - %s [%s]",
|
||||
user.Username, user.Name, user.Role, user.Status)
|
||||
}
|
||||
|
||||
// Test authentication
|
||||
log.Println("\n=== Authentication Test ===")
|
||||
user, err := userService.AuthenticateUser("admin", "admin123")
|
||||
if err != nil {
|
||||
log.Printf("Authentication failed: %v", err)
|
||||
} else {
|
||||
log.Printf("Authentication successful for: %s", user.Username)
|
||||
log.Printf("Last login: %v", user.LastLogin)
|
||||
}
|
||||
|
||||
// Test search
|
||||
log.Println("\n=== Search Test ===")
|
||||
searchResults, _, err := userService.SearchUsers("john", 1, 10)
|
||||
if err != nil {
|
||||
log.Printf("Search failed: %v", err)
|
||||
} else {
|
||||
log.Printf("Search results for 'john': %d users", len(searchResults))
|
||||
for _, user := range searchResults {
|
||||
log.Printf(" - %s (%s)", user.Username, user.Name)
|
||||
}
|
||||
}
|
||||
|
||||
// Print stats
|
||||
log.Println("\n=== Statistics ===")
|
||||
printUserStats(userService)
|
||||
}
|
||||
|
||||
// Run demo if not in server mode
|
||||
func runDemo() {
|
||||
log.Println("Running User Management Demo...")
|
||||
|
||||
// Initialize database
|
||||
db, err := initDatabase()
|
||||
if err != nil {
|
||||
log.Fatal("Failed to initialize database:", err)
|
||||
}
|
||||
|
||||
// Initialize services
|
||||
userService := services.NewUserService(db)
|
||||
|
||||
// Create sample data
|
||||
createSampleData(userService)
|
||||
|
||||
// Demonstrate operations
|
||||
demonstrateUserOperations(userService)
|
||||
|
||||
log.Println("\nDemo completed!")
|
||||
}
|
||||
@@ -0,0 +1,53 @@
|
||||
module github.com/example/user-management
|
||||
|
||||
go 1.21
|
||||
|
||||
require (
|
||||
github.com/gin-gonic/gin v1.9.1
|
||||
github.com/golang-jwt/jwt/v5 v5.0.0
|
||||
github.com/google/uuid v1.3.0
|
||||
github.com/spf13/cobra v1.7.0
|
||||
github.com/spf13/viper v1.16.0
|
||||
golang.org/x/crypto v0.11.0
|
||||
gorm.io/driver/sqlite v1.5.2
|
||||
gorm.io/gorm v1.25.2
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/bytedance/sonic v1.9.1 // indirect
|
||||
github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect
|
||||
github.com/fsnotify/fsnotify v1.6.0 // indirect
|
||||
github.com/gabriel-vasile/mimetype v1.4.2 // indirect
|
||||
github.com/gin-contrib/sse v0.1.0 // indirect
|
||||
github.com/go-playground/locales v0.14.1 // indirect
|
||||
github.com/go-playground/universal-translator v0.18.1 // indirect
|
||||
github.com/go-playground/validator/v10 v10.14.0 // indirect
|
||||
github.com/goccy/go-json v0.10.2 // indirect
|
||||
github.com/hashicorp/hcl v1.0.0 // indirect
|
||||
github.com/inconshreveable/mousetrap v1.1.0 // indirect
|
||||
github.com/jinzhu/inflection v1.0.0 // indirect
|
||||
github.com/jinzhu/now v1.1.5 // indirect
|
||||
github.com/json-iterator/go v1.1.12 // indirect
|
||||
github.com/klauspost/cpuid/v2 v2.2.4 // indirect
|
||||
github.com/leodido/go-urn v1.2.4 // indirect
|
||||
github.com/magiconair/properties v1.8.7 // indirect
|
||||
github.com/mattn/go-isatty v0.0.19 // indirect
|
||||
github.com/mattn/go-sqlite3 v1.14.17 // indirect
|
||||
github.com/mitchellh/mapstructure v1.5.0 // indirect
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||
github.com/pelletier/go-toml/v2 v2.0.8 // indirect
|
||||
github.com/spf13/afero v1.9.5 // indirect
|
||||
github.com/spf13/cast v1.5.1 // indirect
|
||||
github.com/spf13/jwalterweatherman v1.1.0 // indirect
|
||||
github.com/spf13/pflag v1.0.5 // indirect
|
||||
github.com/subosito/gotenv v1.4.2 // indirect
|
||||
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
|
||||
github.com/ugorji/go/codec v1.2.11 // indirect
|
||||
golang.org/x/arch v0.3.0 // indirect
|
||||
golang.org/x/net v0.10.0 // indirect
|
||||
golang.org/x/sys v0.10.0 // indirect
|
||||
golang.org/x/text v0.11.0 // indirect
|
||||
gopkg.in/ini.v1 v1.67.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
)
|
||||
@@ -0,0 +1,310 @@
|
||||
package models
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"golang.org/x/crypto/bcrypt"
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
|
||||
// UserRole represents the role of a user
|
||||
type UserRole string
|
||||
|
||||
const (
|
||||
RoleAdmin UserRole = "admin"
|
||||
RoleUser UserRole = "user"
|
||||
RoleGuest UserRole = "guest"
|
||||
)
|
||||
|
||||
// UserStatus represents the status of a user
|
||||
type UserStatus string
|
||||
|
||||
const (
|
||||
StatusActive UserStatus = "active"
|
||||
StatusInactive UserStatus = "inactive"
|
||||
StatusSuspended UserStatus = "suspended"
|
||||
StatusDeleted UserStatus = "deleted"
|
||||
)
|
||||
|
||||
// User represents a user in the system
|
||||
type User struct {
|
||||
ID uuid.UUID `json:"id" gorm:"type:uuid;primary_key"`
|
||||
Username string `json:"username" gorm:"uniqueIndex;not null"`
|
||||
Email string `json:"email" gorm:"uniqueIndex"`
|
||||
Name string `json:"name" gorm:"not null"`
|
||||
Age int `json:"age"`
|
||||
PasswordHash string `json:"-" gorm:"not null"`
|
||||
Role UserRole `json:"role" gorm:"default:user"`
|
||||
Status UserStatus `json:"status" gorm:"default:active"`
|
||||
LastLogin *time.Time `json:"last_login"`
|
||||
LoginAttempts int `json:"login_attempts" gorm:"default:0"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
DeletedAt gorm.DeletedAt `json:"-" gorm:"index"`
|
||||
|
||||
// Permissions is a JSON field containing user permissions
|
||||
Permissions []string `json:"permissions" gorm:"type:json"`
|
||||
|
||||
// Metadata for additional user information
|
||||
Metadata map[string]interface{} `json:"metadata" gorm:"type:json"`
|
||||
}
|
||||
|
||||
// UserRequest represents a request to create or update a user
|
||||
type UserRequest struct {
|
||||
Username string `json:"username" binding:"required,min=3,max=20"`
|
||||
Email string `json:"email" binding:"omitempty,email"`
|
||||
Name string `json:"name" binding:"required,min=1,max=100"`
|
||||
Age int `json:"age" binding:"min=0,max=150"`
|
||||
Password string `json:"password" binding:"required,min=8"`
|
||||
Role UserRole `json:"role" binding:"omitempty,oneof=admin user guest"`
|
||||
Metadata map[string]interface{} `json:"metadata"`
|
||||
}
|
||||
|
||||
// UserResponse represents a user response (without sensitive data)
|
||||
type UserResponse struct {
|
||||
ID uuid.UUID `json:"id"`
|
||||
Username string `json:"username"`
|
||||
Email string `json:"email"`
|
||||
Name string `json:"name"`
|
||||
Age int `json:"age"`
|
||||
Role UserRole `json:"role"`
|
||||
Status UserStatus `json:"status"`
|
||||
LastLogin *time.Time `json:"last_login"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
Permissions []string `json:"permissions"`
|
||||
Metadata map[string]interface{} `json:"metadata"`
|
||||
}
|
||||
|
||||
// BeforeCreate is a GORM hook that runs before creating a user
|
||||
func (u *User) BeforeCreate(tx *gorm.DB) error {
|
||||
if u.ID == uuid.Nil {
|
||||
u.ID = uuid.New()
|
||||
}
|
||||
|
||||
if u.Permissions == nil {
|
||||
u.Permissions = []string{}
|
||||
}
|
||||
|
||||
if u.Metadata == nil {
|
||||
u.Metadata = make(map[string]interface{})
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// SetPassword hashes and sets the user's password
|
||||
func (u *User) SetPassword(password string) error {
|
||||
if len(password) < 8 {
|
||||
return errors.New("password must be at least 8 characters long")
|
||||
}
|
||||
|
||||
hash, err := bcrypt.GenerateFromPassword([]byte(password), bcrypt.DefaultCost)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
u.PasswordHash = string(hash)
|
||||
return nil
|
||||
}
|
||||
|
||||
// VerifyPassword checks if the provided password matches the user's password
|
||||
func (u *User) VerifyPassword(password string) bool {
|
||||
err := bcrypt.CompareHashAndPassword([]byte(u.PasswordHash), []byte(password))
|
||||
return err == nil
|
||||
}
|
||||
|
||||
// HasPermission checks if the user has a specific permission
|
||||
func (u *User) HasPermission(permission string) bool {
|
||||
for _, p := range u.Permissions {
|
||||
if p == permission {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// AddPermission adds a permission to the user
|
||||
func (u *User) AddPermission(permission string) {
|
||||
if !u.HasPermission(permission) {
|
||||
u.Permissions = append(u.Permissions, permission)
|
||||
}
|
||||
}
|
||||
|
||||
// RemovePermission removes a permission from the user
|
||||
func (u *User) RemovePermission(permission string) {
|
||||
for i, p := range u.Permissions {
|
||||
if p == permission {
|
||||
u.Permissions = append(u.Permissions[:i], u.Permissions[i+1:]...)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// IsActive checks if the user is active
|
||||
func (u *User) IsActive() bool {
|
||||
return u.Status == StatusActive
|
||||
}
|
||||
|
||||
// IsAdmin checks if the user is an admin
|
||||
func (u *User) IsAdmin() bool {
|
||||
return u.Role == RoleAdmin
|
||||
}
|
||||
|
||||
// IsLocked checks if the user is locked due to too many failed login attempts
|
||||
func (u *User) IsLocked() bool {
|
||||
return u.LoginAttempts >= 5 || u.Status == StatusSuspended
|
||||
}
|
||||
|
||||
// Login records a successful login
|
||||
func (u *User) Login() error {
|
||||
if !u.IsActive() {
|
||||
return errors.New("user is not active")
|
||||
}
|
||||
|
||||
if u.IsLocked() {
|
||||
return errors.New("user is locked")
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
u.LastLogin = &now
|
||||
u.LoginAttempts = 0
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// FailedLoginAttempt records a failed login attempt
|
||||
func (u *User) FailedLoginAttempt() {
|
||||
u.LoginAttempts++
|
||||
if u.LoginAttempts >= 5 {
|
||||
u.Status = StatusSuspended
|
||||
}
|
||||
}
|
||||
|
||||
// ResetLoginAttempts resets the login attempts counter
|
||||
func (u *User) ResetLoginAttempts() {
|
||||
u.LoginAttempts = 0
|
||||
}
|
||||
|
||||
// Activate activates the user account
|
||||
func (u *User) Activate() {
|
||||
u.Status = StatusActive
|
||||
u.LoginAttempts = 0
|
||||
}
|
||||
|
||||
// Deactivate deactivates the user account
|
||||
func (u *User) Deactivate() {
|
||||
u.Status = StatusInactive
|
||||
}
|
||||
|
||||
// Suspend suspends the user account
|
||||
func (u *User) Suspend() {
|
||||
u.Status = StatusSuspended
|
||||
}
|
||||
|
||||
// Delete marks the user as deleted
|
||||
func (u *User) Delete() {
|
||||
u.Status = StatusDeleted
|
||||
}
|
||||
|
||||
// ToResponse converts a User to a UserResponse
|
||||
func (u *User) ToResponse() *UserResponse {
|
||||
return &UserResponse{
|
||||
ID: u.ID,
|
||||
Username: u.Username,
|
||||
Email: u.Email,
|
||||
Name: u.Name,
|
||||
Age: u.Age,
|
||||
Role: u.Role,
|
||||
Status: u.Status,
|
||||
LastLogin: u.LastLogin,
|
||||
CreatedAt: u.CreatedAt,
|
||||
UpdatedAt: u.UpdatedAt,
|
||||
Permissions: u.Permissions,
|
||||
Metadata: u.Metadata,
|
||||
}
|
||||
}
|
||||
|
||||
// FromRequest creates a User from a UserRequest
|
||||
func (u *User) FromRequest(req *UserRequest) error {
|
||||
u.Username = req.Username
|
||||
u.Email = req.Email
|
||||
u.Name = req.Name
|
||||
u.Age = req.Age
|
||||
u.Role = req.Role
|
||||
u.Metadata = req.Metadata
|
||||
|
||||
if req.Password != "" {
|
||||
return u.SetPassword(req.Password)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// MarshalJSON customizes JSON marshaling for User
|
||||
func (u *User) MarshalJSON() ([]byte, error) {
|
||||
return json.Marshal(u.ToResponse())
|
||||
}
|
||||
|
||||
// Validate validates the user model
|
||||
func (u *User) Validate() error {
|
||||
if len(u.Username) < 3 || len(u.Username) > 20 {
|
||||
return errors.New("username must be between 3 and 20 characters")
|
||||
}
|
||||
|
||||
if len(u.Name) == 0 || len(u.Name) > 100 {
|
||||
return errors.New("name must be between 1 and 100 characters")
|
||||
}
|
||||
|
||||
if u.Age < 0 || u.Age > 150 {
|
||||
return errors.New("age must be between 0 and 150")
|
||||
}
|
||||
|
||||
if u.Role != RoleAdmin && u.Role != RoleUser && u.Role != RoleGuest {
|
||||
return errors.New("invalid role")
|
||||
}
|
||||
|
||||
if u.Status != StatusActive && u.Status != StatusInactive &&
|
||||
u.Status != StatusSuspended && u.Status != StatusDeleted {
|
||||
return errors.New("invalid status")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// TableName returns the table name for GORM
|
||||
func (u *User) TableName() string {
|
||||
return "users"
|
||||
}
|
||||
|
||||
// GetMetadata gets a metadata value by key
|
||||
func (u *User) GetMetadata(key string) (interface{}, bool) {
|
||||
if u.Metadata == nil {
|
||||
return nil, false
|
||||
}
|
||||
value, exists := u.Metadata[key]
|
||||
return value, exists
|
||||
}
|
||||
|
||||
// SetMetadata sets a metadata value
|
||||
func (u *User) SetMetadata(key string, value interface{}) {
|
||||
if u.Metadata == nil {
|
||||
u.Metadata = make(map[string]interface{})
|
||||
}
|
||||
u.Metadata[key] = value
|
||||
}
|
||||
|
||||
// RemoveMetadata removes a metadata key
|
||||
func (u *User) RemoveMetadata(key string) {
|
||||
if u.Metadata != nil {
|
||||
delete(u.Metadata, key)
|
||||
}
|
||||
}
|
||||
|
||||
// String returns a string representation of the user
|
||||
func (u *User) String() string {
|
||||
return u.Username + " (" + u.Name + ")"
|
||||
}
|
||||
@@ -0,0 +1,419 @@
|
||||
package services
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/example/user-management/internal/models"
|
||||
"github.com/example/user-management/internal/utils"
|
||||
"github.com/google/uuid"
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
|
||||
// UserService handles user-related business logic
|
||||
type UserService struct {
|
||||
db *gorm.DB
|
||||
}
|
||||
|
||||
// NewUserService creates a new user service
|
||||
func NewUserService(db *gorm.DB) *UserService {
|
||||
return &UserService{db: db}
|
||||
}
|
||||
|
||||
// CreateUser creates a new user
|
||||
func (s *UserService) CreateUser(req *models.UserRequest) (*models.User, error) {
|
||||
// Check if username already exists
|
||||
var existingUser models.User
|
||||
if err := s.db.Where("username = ?", req.Username).First(&existingUser).Error; err == nil {
|
||||
return nil, errors.New("username already exists")
|
||||
}
|
||||
|
||||
// Check if email already exists (if provided)
|
||||
if req.Email != "" {
|
||||
if err := s.db.Where("email = ?", req.Email).First(&existingUser).Error; err == nil {
|
||||
return nil, errors.New("email already exists")
|
||||
}
|
||||
}
|
||||
|
||||
// Create new user
|
||||
user := &models.User{
|
||||
Role: models.RoleUser,
|
||||
Status: models.StatusActive,
|
||||
}
|
||||
|
||||
if err := user.FromRequest(req); err != nil {
|
||||
return nil, fmt.Errorf("failed to create user from request: %w", err)
|
||||
}
|
||||
|
||||
if err := user.Validate(); err != nil {
|
||||
return nil, fmt.Errorf("user validation failed: %w", err)
|
||||
}
|
||||
|
||||
if err := s.db.Create(user).Error; err != nil {
|
||||
return nil, fmt.Errorf("failed to create user: %w", err)
|
||||
}
|
||||
|
||||
return user, nil
|
||||
}
|
||||
|
||||
// GetUserByID retrieves a user by ID
|
||||
func (s *UserService) GetUserByID(id uuid.UUID) (*models.User, error) {
|
||||
var user models.User
|
||||
if err := s.db.First(&user, "id = ?", id).Error; err != nil {
|
||||
if errors.Is(err, gorm.ErrRecordNotFound) {
|
||||
return nil, errors.New("user not found")
|
||||
}
|
||||
return nil, fmt.Errorf("failed to get user: %w", err)
|
||||
}
|
||||
return &user, nil
|
||||
}
|
||||
|
||||
// GetUserByUsername retrieves a user by username
|
||||
func (s *UserService) GetUserByUsername(username string) (*models.User, error) {
|
||||
var user models.User
|
||||
if err := s.db.Where("username = ?", username).First(&user).Error; err != nil {
|
||||
if errors.Is(err, gorm.ErrRecordNotFound) {
|
||||
return nil, errors.New("user not found")
|
||||
}
|
||||
return nil, fmt.Errorf("failed to get user: %w", err)
|
||||
}
|
||||
return &user, nil
|
||||
}
|
||||
|
||||
// GetUserByEmail retrieves a user by email
|
||||
func (s *UserService) GetUserByEmail(email string) (*models.User, error) {
|
||||
var user models.User
|
||||
if err := s.db.Where("email = ?", email).First(&user).Error; err != nil {
|
||||
if errors.Is(err, gorm.ErrRecordNotFound) {
|
||||
return nil, errors.New("user not found")
|
||||
}
|
||||
return nil, fmt.Errorf("failed to get user: %w", err)
|
||||
}
|
||||
return &user, nil
|
||||
}
|
||||
|
||||
// UpdateUser updates an existing user
|
||||
func (s *UserService) UpdateUser(id uuid.UUID, updates map[string]interface{}) (*models.User, error) {
|
||||
user, err := s.GetUserByID(id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Apply updates
|
||||
for key, value := range updates {
|
||||
switch key {
|
||||
case "name":
|
||||
if name, ok := value.(string); ok {
|
||||
user.Name = name
|
||||
}
|
||||
case "age":
|
||||
if age, ok := value.(int); ok {
|
||||
user.Age = age
|
||||
}
|
||||
case "email":
|
||||
if email, ok := value.(string); ok {
|
||||
user.Email = email
|
||||
}
|
||||
case "role":
|
||||
if role, ok := value.(models.UserRole); ok {
|
||||
user.Role = role
|
||||
}
|
||||
case "status":
|
||||
if status, ok := value.(models.UserStatus); ok {
|
||||
user.Status = status
|
||||
}
|
||||
case "metadata":
|
||||
if metadata, ok := value.(map[string]interface{}); ok {
|
||||
user.Metadata = metadata
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if err := user.Validate(); err != nil {
|
||||
return nil, fmt.Errorf("user validation failed: %w", err)
|
||||
}
|
||||
|
||||
if err := s.db.Save(user).Error; err != nil {
|
||||
return nil, fmt.Errorf("failed to update user: %w", err)
|
||||
}
|
||||
|
||||
return user, nil
|
||||
}
|
||||
|
||||
// DeleteUser soft deletes a user
|
||||
func (s *UserService) DeleteUser(id uuid.UUID) error {
|
||||
user, err := s.GetUserByID(id)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
user.Delete()
|
||||
|
||||
if err := s.db.Save(user).Error; err != nil {
|
||||
return fmt.Errorf("failed to delete user: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// HardDeleteUser permanently deletes a user
|
||||
func (s *UserService) HardDeleteUser(id uuid.UUID) error {
|
||||
if err := s.db.Unscoped().Delete(&models.User{}, id).Error; err != nil {
|
||||
return fmt.Errorf("failed to hard delete user: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetAllUsers retrieves all users with pagination
|
||||
func (s *UserService) GetAllUsers(page, pageSize int) ([]*models.User, int64, error) {
|
||||
var users []*models.User
|
||||
var total int64
|
||||
|
||||
// Count total users
|
||||
if err := s.db.Model(&models.User{}).Count(&total).Error; err != nil {
|
||||
return nil, 0, fmt.Errorf("failed to count users: %w", err)
|
||||
}
|
||||
|
||||
// Get users with pagination
|
||||
offset := (page - 1) * pageSize
|
||||
if err := s.db.Limit(pageSize).Offset(offset).Find(&users).Error; err != nil {
|
||||
return nil, 0, fmt.Errorf("failed to get users: %w", err)
|
||||
}
|
||||
|
||||
return users, total, nil
|
||||
}
|
||||
|
||||
// GetActiveUsers retrieves all active users
|
||||
func (s *UserService) GetActiveUsers() ([]*models.User, error) {
|
||||
var users []*models.User
|
||||
if err := s.db.Where("status = ?", models.StatusActive).Find(&users).Error; err != nil {
|
||||
return nil, fmt.Errorf("failed to get active users: %w", err)
|
||||
}
|
||||
return users, nil
|
||||
}
|
||||
|
||||
// GetUsersByRole retrieves users by role
|
||||
func (s *UserService) GetUsersByRole(role models.UserRole) ([]*models.User, error) {
|
||||
var users []*models.User
|
||||
if err := s.db.Where("role = ?", role).Find(&users).Error; err != nil {
|
||||
return nil, fmt.Errorf("failed to get users by role: %w", err)
|
||||
}
|
||||
return users, nil
|
||||
}
|
||||
|
||||
// SearchUsers searches for users by name or username
|
||||
func (s *UserService) SearchUsers(query string, page, pageSize int) ([]*models.User, int64, error) {
|
||||
var users []*models.User
|
||||
var total int64
|
||||
|
||||
searchQuery := "%" + strings.ToLower(query) + "%"
|
||||
|
||||
// Count total matching users
|
||||
if err := s.db.Model(&models.User{}).Where(
|
||||
"LOWER(name) LIKE ? OR LOWER(username) LIKE ? OR LOWER(email) LIKE ?",
|
||||
searchQuery, searchQuery, searchQuery,
|
||||
).Count(&total).Error; err != nil {
|
||||
return nil, 0, fmt.Errorf("failed to count search results: %w", err)
|
||||
}
|
||||
|
||||
// Get matching users with pagination
|
||||
offset := (page - 1) * pageSize
|
||||
if err := s.db.Where(
|
||||
"LOWER(name) LIKE ? OR LOWER(username) LIKE ? OR LOWER(email) LIKE ?",
|
||||
searchQuery, searchQuery, searchQuery,
|
||||
).Limit(pageSize).Offset(offset).Find(&users).Error; err != nil {
|
||||
return nil, 0, fmt.Errorf("failed to search users: %w", err)
|
||||
}
|
||||
|
||||
return users, total, nil
|
||||
}
|
||||
|
||||
// GetUserStats returns user statistics
|
||||
func (s *UserService) GetUserStats() (*utils.UserStats, error) {
|
||||
var stats utils.UserStats
|
||||
|
||||
// Total users
|
||||
if err := s.db.Model(&models.User{}).Count(&stats.Total).Error; err != nil {
|
||||
return nil, fmt.Errorf("failed to count total users: %w", err)
|
||||
}
|
||||
|
||||
// Active users
|
||||
if err := s.db.Model(&models.User{}).Where("status = ?", models.StatusActive).Count(&stats.Active).Error; err != nil {
|
||||
return nil, fmt.Errorf("failed to count active users: %w", err)
|
||||
}
|
||||
|
||||
// Admin users
|
||||
if err := s.db.Model(&models.User{}).Where("role = ?", models.RoleAdmin).Count(&stats.Admin).Error; err != nil {
|
||||
return nil, fmt.Errorf("failed to count admin users: %w", err)
|
||||
}
|
||||
|
||||
// Regular users
|
||||
if err := s.db.Model(&models.User{}).Where("role = ?", models.RoleUser).Count(&stats.User).Error; err != nil {
|
||||
return nil, fmt.Errorf("failed to count regular users: %w", err)
|
||||
}
|
||||
|
||||
// Guest users
|
||||
if err := s.db.Model(&models.User{}).Where("role = ?", models.RoleGuest).Count(&stats.Guest).Error; err != nil {
|
||||
return nil, fmt.Errorf("failed to count guest users: %w", err)
|
||||
}
|
||||
|
||||
// Users with email
|
||||
if err := s.db.Model(&models.User{}).Where("email != ''").Count(&stats.WithEmail).Error; err != nil {
|
||||
return nil, fmt.Errorf("failed to count users with email: %w", err)
|
||||
}
|
||||
|
||||
return &stats, nil
|
||||
}
|
||||
|
||||
// AuthenticateUser authenticates a user with username and password
|
||||
func (s *UserService) AuthenticateUser(username, password string) (*models.User, error) {
|
||||
user, err := s.GetUserByUsername(username)
|
||||
if err != nil {
|
||||
return nil, errors.New("invalid username or password")
|
||||
}
|
||||
|
||||
if !user.IsActive() {
|
||||
return nil, errors.New("user account is not active")
|
||||
}
|
||||
|
||||
if user.IsLocked() {
|
||||
return nil, errors.New("user account is locked")
|
||||
}
|
||||
|
||||
if !user.VerifyPassword(password) {
|
||||
user.FailedLoginAttempt()
|
||||
if err := s.db.Save(user).Error; err != nil {
|
||||
return nil, fmt.Errorf("failed to update failed login attempt: %w", err)
|
||||
}
|
||||
return nil, errors.New("invalid username or password")
|
||||
}
|
||||
|
||||
// Successful login
|
||||
if err := user.Login(); err != nil {
|
||||
return nil, fmt.Errorf("login failed: %w", err)
|
||||
}
|
||||
|
||||
if err := s.db.Save(user).Error; err != nil {
|
||||
return nil, fmt.Errorf("failed to update login info: %w", err)
|
||||
}
|
||||
|
||||
return user, nil
|
||||
}
|
||||
|
||||
// ChangePassword changes a user's password
|
||||
func (s *UserService) ChangePassword(id uuid.UUID, currentPassword, newPassword string) error {
|
||||
user, err := s.GetUserByID(id)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if !user.VerifyPassword(currentPassword) {
|
||||
return errors.New("current password is incorrect")
|
||||
}
|
||||
|
||||
if err := user.SetPassword(newPassword); err != nil {
|
||||
return fmt.Errorf("failed to set new password: %w", err)
|
||||
}
|
||||
|
||||
if err := s.db.Save(user).Error; err != nil {
|
||||
return fmt.Errorf("failed to update password: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// ResetPassword resets a user's password (admin function)
|
||||
func (s *UserService) ResetPassword(id uuid.UUID, newPassword string) error {
|
||||
user, err := s.GetUserByID(id)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := user.SetPassword(newPassword); err != nil {
|
||||
return fmt.Errorf("failed to set new password: %w", err)
|
||||
}
|
||||
|
||||
user.ResetLoginAttempts()
|
||||
|
||||
if err := s.db.Save(user).Error; err != nil {
|
||||
return fmt.Errorf("failed to update password: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// AddPermission adds a permission to a user
|
||||
func (s *UserService) AddPermission(id uuid.UUID, permission string) error {
|
||||
user, err := s.GetUserByID(id)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
user.AddPermission(permission)
|
||||
|
||||
if err := s.db.Save(user).Error; err != nil {
|
||||
return fmt.Errorf("failed to add permission: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// RemovePermission removes a permission from a user
|
||||
func (s *UserService) RemovePermission(id uuid.UUID, permission string) error {
|
||||
user, err := s.GetUserByID(id)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
user.RemovePermission(permission)
|
||||
|
||||
if err := s.db.Save(user).Error; err != nil {
|
||||
return fmt.Errorf("failed to remove permission: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// ExportUsers exports users to JSON
|
||||
func (s *UserService) ExportUsers() ([]byte, error) {
|
||||
users, _, err := s.GetAllUsers(1, 1000) // Get all users (limit to 1000 for safety)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get users for export: %w", err)
|
||||
}
|
||||
|
||||
var responses []*models.UserResponse
|
||||
for _, user := range users {
|
||||
responses = append(responses, user.ToResponse())
|
||||
}
|
||||
|
||||
data, err := json.MarshalIndent(responses, "", " ")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal users: %w", err)
|
||||
}
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
// GetUserActivity returns user activity information
|
||||
func (s *UserService) GetUserActivity(id uuid.UUID) (*utils.UserActivity, error) {
|
||||
user, err := s.GetUserByID(id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
activity := &utils.UserActivity{
|
||||
UserID: user.ID,
|
||||
Username: user.Username,
|
||||
LastLogin: user.LastLogin,
|
||||
LoginAttempts: user.LoginAttempts,
|
||||
IsActive: user.IsActive(),
|
||||
IsLocked: user.IsLocked(),
|
||||
CreatedAt: user.CreatedAt,
|
||||
UpdatedAt: user.UpdatedAt,
|
||||
}
|
||||
|
||||
return activity, nil
|
||||
}
|
||||
@@ -0,0 +1,250 @@
|
||||
package utils
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
// UserStats represents user statistics
|
||||
type UserStats struct {
|
||||
Total int64 `json:"total"`
|
||||
Active int64 `json:"active"`
|
||||
Admin int64 `json:"admin"`
|
||||
User int64 `json:"user"`
|
||||
Guest int64 `json:"guest"`
|
||||
WithEmail int64 `json:"with_email"`
|
||||
}
|
||||
|
||||
// UserActivity represents user activity information
|
||||
type UserActivity struct {
|
||||
UserID uuid.UUID `json:"user_id"`
|
||||
Username string `json:"username"`
|
||||
LastLogin *time.Time `json:"last_login"`
|
||||
LoginAttempts int `json:"login_attempts"`
|
||||
IsActive bool `json:"is_active"`
|
||||
IsLocked bool `json:"is_locked"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
}
|
||||
|
||||
// PaginatedResponse represents a paginated response
|
||||
type PaginatedResponse struct {
|
||||
Data interface{} `json:"data"`
|
||||
Page int `json:"page"`
|
||||
PageSize int `json:"page_size"`
|
||||
Total int64 `json:"total"`
|
||||
TotalPages int `json:"total_pages"`
|
||||
}
|
||||
|
||||
// NewPaginatedResponse creates a new paginated response
|
||||
func NewPaginatedResponse(data interface{}, page, pageSize int, total int64) *PaginatedResponse {
|
||||
totalPages := int((total + int64(pageSize) - 1) / int64(pageSize))
|
||||
return &PaginatedResponse{
|
||||
Data: data,
|
||||
Page: page,
|
||||
PageSize: pageSize,
|
||||
Total: total,
|
||||
TotalPages: totalPages,
|
||||
}
|
||||
}
|
||||
|
||||
// APIResponse represents a standard API response
|
||||
type APIResponse struct {
|
||||
Success bool `json:"success"`
|
||||
Message string `json:"message"`
|
||||
Data interface{} `json:"data,omitempty"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
// NewSuccessResponse creates a new success response
|
||||
func NewSuccessResponse(message string, data interface{}) *APIResponse {
|
||||
return &APIResponse{
|
||||
Success: true,
|
||||
Message: message,
|
||||
Data: data,
|
||||
}
|
||||
}
|
||||
|
||||
// NewErrorResponse creates a new error response
|
||||
func NewErrorResponse(message string, err error) *APIResponse {
|
||||
resp := &APIResponse{
|
||||
Success: false,
|
||||
Message: message,
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
resp.Error = err.Error()
|
||||
}
|
||||
|
||||
return resp
|
||||
}
|
||||
|
||||
// ValidationError represents a validation error
|
||||
type ValidationError struct {
|
||||
Field string `json:"field"`
|
||||
Message string `json:"message"`
|
||||
}
|
||||
|
||||
// ValidationErrors represents multiple validation errors
|
||||
type ValidationErrors struct {
|
||||
Errors []ValidationError `json:"errors"`
|
||||
}
|
||||
|
||||
// NewValidationErrors creates a new validation errors instance
|
||||
func NewValidationErrors() *ValidationErrors {
|
||||
return &ValidationErrors{
|
||||
Errors: make([]ValidationError, 0),
|
||||
}
|
||||
}
|
||||
|
||||
// Add adds a validation error
|
||||
func (ve *ValidationErrors) Add(field, message string) {
|
||||
ve.Errors = append(ve.Errors, ValidationError{
|
||||
Field: field,
|
||||
Message: message,
|
||||
})
|
||||
}
|
||||
|
||||
// HasErrors returns true if there are validation errors
|
||||
func (ve *ValidationErrors) HasErrors() bool {
|
||||
return len(ve.Errors) > 0
|
||||
}
|
||||
|
||||
// Error implements the error interface
|
||||
func (ve *ValidationErrors) Error() string {
|
||||
if len(ve.Errors) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
if len(ve.Errors) == 1 {
|
||||
return ve.Errors[0].Message
|
||||
}
|
||||
|
||||
return "multiple validation errors"
|
||||
}
|
||||
|
||||
// DatabaseConfig represents database configuration
|
||||
type DatabaseConfig struct {
|
||||
Driver string `json:"driver"`
|
||||
Host string `json:"host"`
|
||||
Port int `json:"port"`
|
||||
Database string `json:"database"`
|
||||
Username string `json:"username"`
|
||||
Password string `json:"password"`
|
||||
SSLMode string `json:"ssl_mode"`
|
||||
}
|
||||
|
||||
// ServerConfig represents server configuration
|
||||
type ServerConfig struct {
|
||||
Port int `json:"port"`
|
||||
Host string `json:"host"`
|
||||
ReadTimeout int `json:"read_timeout"`
|
||||
WriteTimeout int `json:"write_timeout"`
|
||||
IdleTimeout int `json:"idle_timeout"`
|
||||
}
|
||||
|
||||
// JWTConfig represents JWT configuration
|
||||
type JWTConfig struct {
|
||||
SecretKey string `json:"secret_key"`
|
||||
ExpirationHours int `json:"expiration_hours"`
|
||||
RefreshHours int `json:"refresh_hours"`
|
||||
Issuer string `json:"issuer"`
|
||||
SigningAlgorithm string `json:"signing_algorithm"`
|
||||
}
|
||||
|
||||
// Config represents application configuration
|
||||
type Config struct {
|
||||
Database DatabaseConfig `json:"database"`
|
||||
Server ServerConfig `json:"server"`
|
||||
JWT JWTConfig `json:"jwt"`
|
||||
LogLevel string `json:"log_level"`
|
||||
Debug bool `json:"debug"`
|
||||
}
|
||||
|
||||
// SearchParams represents search parameters
|
||||
type SearchParams struct {
|
||||
Query string `json:"query"`
|
||||
Page int `json:"page"`
|
||||
PageSize int `json:"page_size"`
|
||||
SortBy string `json:"sort_by"`
|
||||
SortDir string `json:"sort_dir"`
|
||||
}
|
||||
|
||||
// NewSearchParams creates new search parameters with defaults
|
||||
func NewSearchParams() *SearchParams {
|
||||
return &SearchParams{
|
||||
Page: 1,
|
||||
PageSize: 20,
|
||||
SortBy: "created_at",
|
||||
SortDir: "desc",
|
||||
}
|
||||
}
|
||||
|
||||
// Validate validates search parameters
|
||||
func (sp *SearchParams) Validate() error {
|
||||
if sp.Page < 1 {
|
||||
sp.Page = 1
|
||||
}
|
||||
|
||||
if sp.PageSize < 1 {
|
||||
sp.PageSize = 20
|
||||
}
|
||||
|
||||
if sp.PageSize > 100 {
|
||||
sp.PageSize = 100
|
||||
}
|
||||
|
||||
if sp.SortBy == "" {
|
||||
sp.SortBy = "created_at"
|
||||
}
|
||||
|
||||
if sp.SortDir != "asc" && sp.SortDir != "desc" {
|
||||
sp.SortDir = "desc"
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// FilterParams represents filter parameters
|
||||
type FilterParams struct {
|
||||
Role string `json:"role"`
|
||||
Status string `json:"status"`
|
||||
AgeMin int `json:"age_min"`
|
||||
AgeMax int `json:"age_max"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
}
|
||||
|
||||
// AuditLog represents an audit log entry
|
||||
type AuditLog struct {
|
||||
ID uuid.UUID `json:"id"`
|
||||
UserID uuid.UUID `json:"user_id"`
|
||||
Action string `json:"action"`
|
||||
Resource string `json:"resource"`
|
||||
Details map[string]interface{} `json:"details"`
|
||||
IPAddress string `json:"ip_address"`
|
||||
UserAgent string `json:"user_agent"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
}
|
||||
|
||||
// Session represents a user session
|
||||
type Session struct {
|
||||
ID uuid.UUID `json:"id"`
|
||||
UserID uuid.UUID `json:"user_id"`
|
||||
Token string `json:"token"`
|
||||
ExpiresAt time.Time `json:"expires_at"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
}
|
||||
|
||||
// IsExpired checks if the session is expired
|
||||
func (s *Session) IsExpired() bool {
|
||||
return time.Now().After(s.ExpiresAt)
|
||||
}
|
||||
|
||||
// ExtendSession extends the session expiration
|
||||
func (s *Session) ExtendSession(duration time.Duration) {
|
||||
s.ExpiresAt = time.Now().Add(duration)
|
||||
s.UpdatedAt = time.Now()
|
||||
}
|
||||
@@ -0,0 +1,309 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"strconv"
|
||||
|
||||
"github.com/example/user-management/internal/models"
|
||||
"github.com/example/user-management/internal/services"
|
||||
"github.com/example/user-management/internal/utils"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
// UserHandler handles user-related HTTP requests
|
||||
type UserHandler struct {
|
||||
userService *services.UserService
|
||||
}
|
||||
|
||||
// NewUserHandler creates a new user handler
|
||||
func NewUserHandler(userService *services.UserService) *UserHandler {
|
||||
return &UserHandler{
|
||||
userService: userService,
|
||||
}
|
||||
}
|
||||
|
||||
// CreateUser handles user creation
|
||||
func (h *UserHandler) CreateUser(c *gin.Context) {
|
||||
var req models.UserRequest
|
||||
if err := c.ShouldBindJSON(&req); err != nil {
|
||||
c.JSON(http.StatusBadRequest, utils.NewErrorResponse("Invalid request", err))
|
||||
return
|
||||
}
|
||||
|
||||
user, err := h.userService.CreateUser(&req)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadRequest, utils.NewErrorResponse("Failed to create user", err))
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusCreated, utils.NewSuccessResponse("User created successfully", user.ToResponse()))
|
||||
}
|
||||
|
||||
// GetUser handles getting a single user
|
||||
func (h *UserHandler) GetUser(c *gin.Context) {
|
||||
idStr := c.Param("id")
|
||||
id, err := uuid.Parse(idStr)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadRequest, utils.NewErrorResponse("Invalid user ID", err))
|
||||
return
|
||||
}
|
||||
|
||||
user, err := h.userService.GetUserByID(id)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusNotFound, utils.NewErrorResponse("User not found", err))
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, utils.NewSuccessResponse("User retrieved successfully", user.ToResponse()))
|
||||
}
|
||||
|
||||
// GetUsers handles getting users with pagination
|
||||
func (h *UserHandler) GetUsers(c *gin.Context) {
|
||||
page, _ := strconv.Atoi(c.DefaultQuery("page", "1"))
|
||||
pageSize, _ := strconv.Atoi(c.DefaultQuery("page_size", "20"))
|
||||
|
||||
if page < 1 {
|
||||
page = 1
|
||||
}
|
||||
if pageSize < 1 || pageSize > 100 {
|
||||
pageSize = 20
|
||||
}
|
||||
|
||||
users, total, err := h.userService.GetAllUsers(page, pageSize)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, utils.NewErrorResponse("Failed to get users", err))
|
||||
return
|
||||
}
|
||||
|
||||
var responses []*models.UserResponse
|
||||
for _, user := range users {
|
||||
responses = append(responses, user.ToResponse())
|
||||
}
|
||||
|
||||
paginatedResponse := utils.NewPaginatedResponse(responses, page, pageSize, total)
|
||||
c.JSON(http.StatusOK, utils.NewSuccessResponse("Users retrieved successfully", paginatedResponse))
|
||||
}
|
||||
|
||||
// UpdateUser handles user updates
|
||||
func (h *UserHandler) UpdateUser(c *gin.Context) {
|
||||
idStr := c.Param("id")
|
||||
id, err := uuid.Parse(idStr)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadRequest, utils.NewErrorResponse("Invalid user ID", err))
|
||||
return
|
||||
}
|
||||
|
||||
var updates map[string]interface{}
|
||||
if err := c.ShouldBindJSON(&updates); err != nil {
|
||||
c.JSON(http.StatusBadRequest, utils.NewErrorResponse("Invalid request", err))
|
||||
return
|
||||
}
|
||||
|
||||
user, err := h.userService.UpdateUser(id, updates)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadRequest, utils.NewErrorResponse("Failed to update user", err))
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, utils.NewSuccessResponse("User updated successfully", user.ToResponse()))
|
||||
}
|
||||
|
||||
// DeleteUser handles user deletion
|
||||
func (h *UserHandler) DeleteUser(c *gin.Context) {
|
||||
idStr := c.Param("id")
|
||||
id, err := uuid.Parse(idStr)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadRequest, utils.NewErrorResponse("Invalid user ID", err))
|
||||
return
|
||||
}
|
||||
|
||||
if err := h.userService.DeleteUser(id); err != nil {
|
||||
c.JSON(http.StatusInternalServerError, utils.NewErrorResponse("Failed to delete user", err))
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, utils.NewSuccessResponse("User deleted successfully", nil))
|
||||
}
|
||||
|
||||
// SearchUsers handles user search
|
||||
func (h *UserHandler) SearchUsers(c *gin.Context) {
|
||||
query := c.Query("q")
|
||||
page, _ := strconv.Atoi(c.DefaultQuery("page", "1"))
|
||||
pageSize, _ := strconv.Atoi(c.DefaultQuery("page_size", "20"))
|
||||
|
||||
if page < 1 {
|
||||
page = 1
|
||||
}
|
||||
if pageSize < 1 || pageSize > 100 {
|
||||
pageSize = 20
|
||||
}
|
||||
|
||||
users, total, err := h.userService.SearchUsers(query, page, pageSize)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, utils.NewErrorResponse("Failed to search users", err))
|
||||
return
|
||||
}
|
||||
|
||||
var responses []*models.UserResponse
|
||||
for _, user := range users {
|
||||
responses = append(responses, user.ToResponse())
|
||||
}
|
||||
|
||||
paginatedResponse := utils.NewPaginatedResponse(responses, page, pageSize, total)
|
||||
c.JSON(http.StatusOK, utils.NewSuccessResponse("Search completed successfully", paginatedResponse))
|
||||
}
|
||||
|
||||
// GetUserStats handles getting user statistics
|
||||
func (h *UserHandler) GetUserStats(c *gin.Context) {
|
||||
stats, err := h.userService.GetUserStats()
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, utils.NewErrorResponse("Failed to get user statistics", err))
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, utils.NewSuccessResponse("Statistics retrieved successfully", stats))
|
||||
}
|
||||
|
||||
// ExportUsers handles user export
|
||||
func (h *UserHandler) ExportUsers(c *gin.Context) {
|
||||
data, err := h.userService.ExportUsers()
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, utils.NewErrorResponse("Failed to export users", err))
|
||||
return
|
||||
}
|
||||
|
||||
c.Header("Content-Type", "application/json")
|
||||
c.Header("Content-Disposition", "attachment; filename=users.json")
|
||||
c.Data(http.StatusOK, "application/json", data)
|
||||
}
|
||||
|
||||
// Login handles user authentication
|
||||
func (h *UserHandler) Login(c *gin.Context) {
|
||||
var req struct {
|
||||
Username string `json:"username" binding:"required"`
|
||||
Password string `json:"password" binding:"required"`
|
||||
}
|
||||
|
||||
if err := c.ShouldBindJSON(&req); err != nil {
|
||||
c.JSON(http.StatusBadRequest, utils.NewErrorResponse("Invalid request", err))
|
||||
return
|
||||
}
|
||||
|
||||
user, err := h.userService.AuthenticateUser(req.Username, req.Password)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusUnauthorized, utils.NewErrorResponse("Authentication failed", err))
|
||||
return
|
||||
}
|
||||
|
||||
// In a real application, you would generate a JWT token here
|
||||
response := map[string]interface{}{
|
||||
"user": user.ToResponse(),
|
||||
"token": "dummy-jwt-token", // This would be a real JWT token
|
||||
"expires": "2024-12-31T23:59:59Z",
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, utils.NewSuccessResponse("Login successful", response))
|
||||
}
|
||||
|
||||
// Logout handles user logout
|
||||
func (h *UserHandler) Logout(c *gin.Context) {
|
||||
// In a real application, you would invalidate the JWT token here
|
||||
c.JSON(http.StatusOK, utils.NewSuccessResponse("Logout successful", nil))
|
||||
}
|
||||
|
||||
// ChangePassword handles password change
|
||||
func (h *UserHandler) ChangePassword(c *gin.Context) {
|
||||
var req struct {
|
||||
UserID uuid.UUID `json:"user_id" binding:"required"`
|
||||
CurrentPassword string `json:"current_password" binding:"required"`
|
||||
NewPassword string `json:"new_password" binding:"required,min=8"`
|
||||
}
|
||||
|
||||
if err := c.ShouldBindJSON(&req); err != nil {
|
||||
c.JSON(http.StatusBadRequest, utils.NewErrorResponse("Invalid request", err))
|
||||
return
|
||||
}
|
||||
|
||||
if err := h.userService.ChangePassword(req.UserID, req.CurrentPassword, req.NewPassword); err != nil {
|
||||
c.JSON(http.StatusBadRequest, utils.NewErrorResponse("Failed to change password", err))
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, utils.NewSuccessResponse("Password changed successfully", nil))
|
||||
}
|
||||
|
||||
// ResetPassword handles password reset (admin only)
|
||||
func (h *UserHandler) ResetPassword(c *gin.Context) {
|
||||
idStr := c.Param("id")
|
||||
id, err := uuid.Parse(idStr)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadRequest, utils.NewErrorResponse("Invalid user ID", err))
|
||||
return
|
||||
}
|
||||
|
||||
var req struct {
|
||||
NewPassword string `json:"new_password" binding:"required,min=8"`
|
||||
}
|
||||
|
||||
if err := c.ShouldBindJSON(&req); err != nil {
|
||||
c.JSON(http.StatusBadRequest, utils.NewErrorResponse("Invalid request", err))
|
||||
return
|
||||
}
|
||||
|
||||
if err := h.userService.ResetPassword(id, req.NewPassword); err != nil {
|
||||
c.JSON(http.StatusBadRequest, utils.NewErrorResponse("Failed to reset password", err))
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, utils.NewSuccessResponse("Password reset successfully", nil))
|
||||
}
|
||||
|
||||
// AddPermission handles adding permission to user
|
||||
func (h *UserHandler) AddPermission(c *gin.Context) {
|
||||
idStr := c.Param("id")
|
||||
id, err := uuid.Parse(idStr)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadRequest, utils.NewErrorResponse("Invalid user ID", err))
|
||||
return
|
||||
}
|
||||
|
||||
var req struct {
|
||||
Permission string `json:"permission" binding:"required"`
|
||||
}
|
||||
|
||||
if err := c.ShouldBindJSON(&req); err != nil {
|
||||
c.JSON(http.StatusBadRequest, utils.NewErrorResponse("Invalid request", err))
|
||||
return
|
||||
}
|
||||
|
||||
if err := h.userService.AddPermission(id, req.Permission); err != nil {
|
||||
c.JSON(http.StatusBadRequest, utils.NewErrorResponse("Failed to add permission", err))
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, utils.NewSuccessResponse("Permission added successfully", nil))
|
||||
}
|
||||
|
||||
// RemovePermission handles removing permission from user
|
||||
func (h *UserHandler) RemovePermission(c *gin.Context) {
|
||||
idStr := c.Param("id")
|
||||
id, err := uuid.Parse(idStr)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadRequest, utils.NewErrorResponse("Invalid user ID", err))
|
||||
return
|
||||
}
|
||||
|
||||
permission := c.Query("permission")
|
||||
if permission == "" {
|
||||
c.JSON(http.StatusBadRequest, utils.NewErrorResponse("Permission parameter is required", nil))
|
||||
return
|
||||
}
|
||||
|
||||
if err := h.userService.RemovePermission(id, permission); err != nil {
|
||||
c.JSON(http.StatusBadRequest, utils.NewErrorResponse("Failed to remove permission", err))
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, utils.NewSuccessResponse("Permission removed successfully", nil))
|
||||
}
|
||||
@@ -0,0 +1,183 @@
|
||||
# User Management System (Java)
|
||||
|
||||
A comprehensive user management system built in Java for testing Code Index MCP's analysis capabilities.
|
||||
|
||||
## Features
|
||||
|
||||
- **User Management**: Create, update, delete, and search users
|
||||
- **Authentication**: BCrypt password hashing and verification
|
||||
- **Authorization**: Role-based access control (Admin, User, Guest)
|
||||
- **Data Validation**: Input validation and sanitization
|
||||
- **Export/Import**: JSON and CSV export capabilities
|
||||
- **Persistence**: File-based storage with JSON serialization
|
||||
- **Logging**: SLF4J logging with Logback
|
||||
|
||||
## Project Structure
|
||||
|
||||
```
|
||||
src/main/java/com/example/usermanagement/
|
||||
├── models/
|
||||
│ ├── Person.java # Base person model
|
||||
│ ├── User.java # User model with auth features
|
||||
│ ├── UserRole.java # User role enumeration
|
||||
│ └── UserStatus.java # User status enumeration
|
||||
├── services/
|
||||
│ └── UserManager.java # User management service
|
||||
├── utils/
|
||||
│ ├── ValidationUtils.java # Validation utilities
|
||||
│ ├── UserNotFoundException.java # Custom exception
|
||||
│ └── DuplicateUserException.java # Custom exception
|
||||
└── Main.java # Main demo application
|
||||
```
|
||||
|
||||
## Technologies Used
|
||||
|
||||
- **Java 11**: Modern Java features and APIs
|
||||
- **Jackson**: JSON processing and serialization
|
||||
- **BCrypt**: Secure password hashing
|
||||
- **Apache Commons**: Utility libraries (Lang3, CSV)
|
||||
- **SLF4J + Logback**: Logging framework
|
||||
- **Maven**: Build and dependency management
|
||||
- **JUnit 5**: Testing framework
|
||||
|
||||
## Build and Run
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- Java 11 or higher
|
||||
- Maven 3.6+
|
||||
|
||||
### Build
|
||||
|
||||
```bash
|
||||
mvn clean compile
|
||||
```
|
||||
|
||||
### Run
|
||||
|
||||
```bash
|
||||
mvn exec:java -Dexec.mainClass="com.example.usermanagement.Main"
|
||||
```
|
||||
|
||||
### Test
|
||||
|
||||
```bash
|
||||
mvn test
|
||||
```
|
||||
|
||||
### Package
|
||||
|
||||
```bash
|
||||
mvn package
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Creating Users
|
||||
|
||||
```java
|
||||
UserManager userManager = new UserManager();
|
||||
|
||||
// Create a basic user
|
||||
User user = userManager.createUser("John Doe", 30, "john_doe", "john@example.com");
|
||||
user.setPassword("SecurePass123!");
|
||||
|
||||
// Create an admin user
|
||||
User admin = userManager.createUser("Jane Smith", 35, "jane_admin",
|
||||
"jane@example.com", UserRole.ADMIN);
|
||||
admin.setPassword("AdminPass123!");
|
||||
admin.addPermission("user_management");
|
||||
```
|
||||
|
||||
### User Authentication
|
||||
|
||||
```java
|
||||
// Verify password
|
||||
boolean isValid = user.verifyPassword("SecurePass123!");
|
||||
|
||||
// Login
|
||||
if (user.login()) {
|
||||
System.out.println("Login successful!");
|
||||
System.out.println("Last login: " + user.getLastLogin());
|
||||
}
|
||||
```
|
||||
|
||||
### User Management
|
||||
|
||||
```java
|
||||
// Search users
|
||||
List<User> results = userManager.searchUsers("john");
|
||||
|
||||
// Filter users
|
||||
List<User> activeUsers = userManager.getActiveUsers();
|
||||
List<User> adminUsers = userManager.getUsersByRole(UserRole.ADMIN);
|
||||
List<User> olderUsers = userManager.getUsersOlderThan(25);
|
||||
|
||||
// Update user
|
||||
Map<String, Object> updates = Map.of("age", 31, "email", "newemail@example.com");
|
||||
userManager.updateUser("john_doe", updates);
|
||||
|
||||
// Export users
|
||||
String jsonData = userManager.exportUsers("json");
|
||||
String csvData = userManager.exportUsers("csv");
|
||||
```
|
||||
|
||||
## Testing Features
|
||||
|
||||
This project tests the following Java language features:
|
||||
|
||||
### Core Language Features
|
||||
- **Classes and Inheritance**: Person and User class hierarchy
|
||||
- **Enums**: UserRole and UserStatus with methods
|
||||
- **Interfaces**: Custom exceptions and validation
|
||||
- **Generics**: Collections with type safety
|
||||
- **Annotations**: Jackson JSON annotations
|
||||
- **Exception Handling**: Custom exceptions and try-catch blocks
|
||||
|
||||
### Modern Java Features
|
||||
- **Streams API**: Filtering, mapping, and collecting
|
||||
- **Lambda Expressions**: Functional programming
|
||||
- **Method References**: Stream operations
|
||||
- **Optional**: Null-safe operations
|
||||
- **Time API**: LocalDateTime usage
|
||||
|
||||
### Advanced Features
|
||||
- **Concurrent Collections**: ConcurrentHashMap
|
||||
- **Reflection**: Jackson serialization
|
||||
- **File I/O**: NIO.2 Path and Files
|
||||
- **Logging**: SLF4J with parameterized messages
|
||||
- **Validation**: Input validation and sanitization
|
||||
|
||||
### Framework Integration
|
||||
- **Maven**: Build lifecycle and dependency management
|
||||
- **Jackson**: JSON serialization/deserialization
|
||||
- **BCrypt**: Password hashing
|
||||
- **Apache Commons**: Utility libraries
|
||||
- **SLF4J**: Structured logging
|
||||
|
||||
### Design Patterns
|
||||
- **Builder Pattern**: Object construction
|
||||
- **Factory Pattern**: User creation
|
||||
- **Repository Pattern**: Data access
|
||||
- **Service Layer**: Business logic separation
|
||||
|
||||
## Dependencies
|
||||
|
||||
### Core Dependencies
|
||||
- **Jackson Databind**: JSON processing
|
||||
- **Jackson JSR310**: Java 8 time support
|
||||
- **BCrypt**: Password hashing
|
||||
- **Apache Commons Lang3**: Utilities
|
||||
- **Apache Commons CSV**: CSV processing
|
||||
|
||||
### Logging
|
||||
- **SLF4J API**: Logging facade
|
||||
- **Logback Classic**: Logging implementation
|
||||
|
||||
### Testing
|
||||
- **JUnit 5**: Testing framework
|
||||
- **Mockito**: Mocking framework
|
||||
|
||||
## License
|
||||
|
||||
MIT License - This is a sample project for testing purposes.
|
||||
@@ -0,0 +1,117 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
|
||||
http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>com.example</groupId>
|
||||
<artifactId>user-management</artifactId>
|
||||
<version>1.0.0</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<name>User Management System</name>
|
||||
<description>A sample user management system for testing Code Index MCP</description>
|
||||
|
||||
<properties>
|
||||
<maven.compiler.source>11</maven.compiler.source>
|
||||
<maven.compiler.target>11</maven.compiler.target>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<junit.version>5.9.2</junit.version>
|
||||
<jackson.version>2.15.2</jackson.version>
|
||||
<slf4j.version>2.0.7</slf4j.version>
|
||||
<logback.version>1.4.7</logback.version>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<!-- Jackson for JSON processing -->
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.core</groupId>
|
||||
<artifactId>jackson-databind</artifactId>
|
||||
<version>${jackson.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.datatype</groupId>
|
||||
<artifactId>jackson-datatype-jsr310</artifactId>
|
||||
<version>${jackson.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Logging -->
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
<version>${slf4j.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>ch.qos.logback</groupId>
|
||||
<artifactId>logback-classic</artifactId>
|
||||
<version>${logback.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Apache Commons -->
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-lang3</artifactId>
|
||||
<version>3.12.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-csv</artifactId>
|
||||
<version>1.9.0</version>
|
||||
</dependency>
|
||||
|
||||
<!-- BCrypt for password hashing -->
|
||||
<dependency>
|
||||
<groupId>org.mindrot</groupId>
|
||||
<artifactId>jbcrypt</artifactId>
|
||||
<version>0.4</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Test Dependencies -->
|
||||
<dependency>
|
||||
<groupId>org.junit.jupiter</groupId>
|
||||
<artifactId>junit-jupiter</artifactId>
|
||||
<version>${junit.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-core</artifactId>
|
||||
<version>5.3.1</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>3.11.0</version>
|
||||
<configuration>
|
||||
<source>11</source>
|
||||
<target>11</target>
|
||||
</configuration>
|
||||
</plugin>
|
||||
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-surefire-plugin</artifactId>
|
||||
<version>3.1.2</version>
|
||||
</plugin>
|
||||
|
||||
<plugin>
|
||||
<groupId>org.codehaus.mojo</groupId>
|
||||
<artifactId>exec-maven-plugin</artifactId>
|
||||
<version>3.1.0</version>
|
||||
<configuration>
|
||||
<mainClass>com.example.usermanagement.Main</mainClass>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
||||
@@ -0,0 +1,220 @@
|
||||
package com.example.usermanagement;
|
||||
|
||||
import com.example.usermanagement.models.User;
|
||||
import com.example.usermanagement.models.UserRole;
|
||||
import com.example.usermanagement.services.UserManager;
|
||||
import com.example.usermanagement.utils.UserNotFoundException;
|
||||
import com.example.usermanagement.utils.DuplicateUserException;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Main class demonstrating the User Management System.
|
||||
*/
|
||||
public class Main {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(Main.class);
|
||||
|
||||
public static void main(String[] args) {
|
||||
System.out.println("=".repeat(50));
|
||||
System.out.println("User Management System Demo (Java)");
|
||||
System.out.println("=".repeat(50));
|
||||
|
||||
// Create user manager
|
||||
UserManager userManager = new UserManager();
|
||||
|
||||
// Create sample users
|
||||
System.out.println("\n1. Creating sample users...");
|
||||
createSampleUsers(userManager);
|
||||
|
||||
// Display all users
|
||||
System.out.println("\n2. Listing all users...");
|
||||
listAllUsers(userManager);
|
||||
|
||||
// Test user retrieval
|
||||
System.out.println("\n3. Testing user retrieval...");
|
||||
testUserRetrieval(userManager);
|
||||
|
||||
// Test user search
|
||||
System.out.println("\n4. Testing user search...");
|
||||
testUserSearch(userManager);
|
||||
|
||||
// Test user filtering
|
||||
System.out.println("\n5. Testing user filtering...");
|
||||
testUserFiltering(userManager);
|
||||
|
||||
// Test user updates
|
||||
System.out.println("\n6. Testing user updates...");
|
||||
testUserUpdates(userManager);
|
||||
|
||||
// Test authentication
|
||||
System.out.println("\n7. Testing authentication...");
|
||||
testAuthentication(userManager);
|
||||
|
||||
// Display statistics
|
||||
System.out.println("\n8. User statistics...");
|
||||
displayStatistics(userManager);
|
||||
|
||||
// Test export functionality
|
||||
System.out.println("\n9. Testing export functionality...");
|
||||
testExport(userManager);
|
||||
|
||||
// Test user permissions
|
||||
System.out.println("\n10. Testing user permissions...");
|
||||
testPermissions(userManager);
|
||||
|
||||
System.out.println("\n" + "=".repeat(50));
|
||||
System.out.println("Demo completed successfully!");
|
||||
System.out.println("=".repeat(50));
|
||||
}
|
||||
|
||||
private static void createSampleUsers(UserManager userManager) {
|
||||
try {
|
||||
// Create admin user
|
||||
User admin = userManager.createUser("Alice Johnson", 30, "alice_admin",
|
||||
"alice@example.com", UserRole.ADMIN);
|
||||
admin.setPassword("AdminPass123!");
|
||||
admin.addPermission("user_management");
|
||||
admin.addPermission("system_admin");
|
||||
|
||||
// Create regular users
|
||||
User user1 = userManager.createUser("Bob Smith", 25, "bob_user", "bob@example.com");
|
||||
user1.setPassword("UserPass123!");
|
||||
|
||||
User user2 = userManager.createUser("Charlie Brown", 35, "charlie", "charlie@example.com");
|
||||
user2.setPassword("CharliePass123!");
|
||||
|
||||
User user3 = userManager.createUser("Diana Prince", 28, "diana", "diana@example.com");
|
||||
user3.setPassword("DianaPass123!");
|
||||
|
||||
System.out.println("✓ Created " + userManager.getUserCount() + " users");
|
||||
|
||||
} catch (DuplicateUserException e) {
|
||||
System.out.println("✗ Error creating users: " + e.getMessage());
|
||||
} catch (Exception e) {
|
||||
System.out.println("✗ Unexpected error: " + e.getMessage());
|
||||
logger.error("Error creating sample users", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static void listAllUsers(UserManager userManager) {
|
||||
List<User> users = userManager.getAllUsers();
|
||||
|
||||
System.out.println("Found " + users.size() + " users:");
|
||||
users.forEach(user ->
|
||||
System.out.println(" • " + user.getUsername() + " (" + user.getName() +
|
||||
") - " + user.getRole().getDisplayName() +
|
||||
" [" + user.getStatus().getDisplayName() + "]")
|
||||
);
|
||||
}
|
||||
|
||||
private static void testUserRetrieval(UserManager userManager) {
|
||||
try {
|
||||
User user = userManager.getUser("alice_admin");
|
||||
System.out.println("✓ Retrieved user: " + user.getUsername() + " (" + user.getName() + ")");
|
||||
|
||||
User userByEmail = userManager.getUserByEmail("bob@example.com");
|
||||
if (userByEmail != null) {
|
||||
System.out.println("✓ Found user by email: " + userByEmail.getUsername());
|
||||
}
|
||||
|
||||
} catch (UserNotFoundException e) {
|
||||
System.out.println("✗ User retrieval failed: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private static void testUserSearch(UserManager userManager) {
|
||||
List<User> searchResults = userManager.searchUsers("alice");
|
||||
System.out.println("Search results for 'alice': " + searchResults.size() + " users found");
|
||||
|
||||
searchResults.forEach(user ->
|
||||
System.out.println(" • " + user.getUsername() + " (" + user.getName() + ")")
|
||||
);
|
||||
}
|
||||
|
||||
private static void testUserFiltering(UserManager userManager) {
|
||||
List<User> olderUsers = userManager.getUsersOlderThan(30);
|
||||
System.out.println("Users older than 30: " + olderUsers.size() + " users");
|
||||
|
||||
olderUsers.forEach(user ->
|
||||
System.out.println(" • " + user.getUsername() + " (" + user.getName() + ") - age " + user.getAge())
|
||||
);
|
||||
|
||||
List<User> adminUsers = userManager.getUsersByRole(UserRole.ADMIN);
|
||||
System.out.println("Admin users: " + adminUsers.size() + " users");
|
||||
}
|
||||
|
||||
private static void testUserUpdates(UserManager userManager) {
|
||||
try {
|
||||
Map<String, Object> updates = Map.of("age", 26);
|
||||
User updatedUser = userManager.updateUser("bob_user", updates);
|
||||
System.out.println("✓ Updated " + updatedUser.getUsername() + "'s age to " + updatedUser.getAge());
|
||||
|
||||
} catch (UserNotFoundException e) {
|
||||
System.out.println("✗ Update failed: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private static void testAuthentication(UserManager userManager) {
|
||||
try {
|
||||
User user = userManager.getUser("alice_admin");
|
||||
|
||||
// Test password verification
|
||||
boolean isValid = user.verifyPassword("AdminPass123!");
|
||||
System.out.println("✓ Password verification: " + (isValid ? "SUCCESS" : "FAILED"));
|
||||
|
||||
// Test login
|
||||
boolean loginSuccess = user.login();
|
||||
System.out.println("✓ Login attempt: " + (loginSuccess ? "SUCCESS" : "FAILED"));
|
||||
|
||||
if (loginSuccess) {
|
||||
System.out.println("✓ Last login: " + user.getLastLogin());
|
||||
}
|
||||
|
||||
} catch (UserNotFoundException e) {
|
||||
System.out.println("✗ Authentication test failed: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private static void displayStatistics(UserManager userManager) {
|
||||
Map<String, Integer> stats = userManager.getUserStats();
|
||||
|
||||
stats.forEach((key, value) ->
|
||||
System.out.println(" " + key.replace("_", " ").toUpperCase() + ": " + value)
|
||||
);
|
||||
}
|
||||
|
||||
private static void testExport(UserManager userManager) {
|
||||
try {
|
||||
String jsonExport = userManager.exportUsers("json");
|
||||
System.out.println("✓ JSON export: " + jsonExport.length() + " characters");
|
||||
|
||||
String csvExport = userManager.exportUsers("csv");
|
||||
System.out.println("✓ CSV export: " + csvExport.split("\n").length + " lines");
|
||||
|
||||
} catch (Exception e) {
|
||||
System.out.println("✗ Export failed: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private static void testPermissions(UserManager userManager) {
|
||||
try {
|
||||
User admin = userManager.getUser("alice_admin");
|
||||
|
||||
System.out.println("Admin permissions: " + admin.getPermissions());
|
||||
System.out.println("Has user_management permission: " + admin.hasPermission("user_management"));
|
||||
System.out.println("Is admin: " + admin.isAdmin());
|
||||
|
||||
// Test role privileges
|
||||
System.out.println("Admin role can act on USER role: " +
|
||||
admin.getRole().canActOn(UserRole.USER));
|
||||
|
||||
} catch (UserNotFoundException e) {
|
||||
System.out.println("✗ Permission test failed: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,284 @@
|
||||
package com.example.usermanagement.models;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Represents a person with basic information.
|
||||
* This class serves as the base class for more specific person types.
|
||||
*/
|
||||
public class Person {
|
||||
|
||||
@JsonProperty("name")
|
||||
private String name;
|
||||
|
||||
@JsonProperty("age")
|
||||
private int age;
|
||||
|
||||
@JsonProperty("email")
|
||||
private String email;
|
||||
|
||||
@JsonProperty("created_at")
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING, pattern = "yyyy-MM-dd'T'HH:mm:ss")
|
||||
private LocalDateTime createdAt;
|
||||
|
||||
@JsonProperty("metadata")
|
||||
private Map<String, Object> metadata;
|
||||
|
||||
/**
|
||||
* Default constructor for Jackson deserialization.
|
||||
*/
|
||||
public Person() {
|
||||
this.createdAt = LocalDateTime.now();
|
||||
this.metadata = new HashMap<>();
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor with name and age.
|
||||
*
|
||||
* @param name The person's name
|
||||
* @param age The person's age
|
||||
* @throws IllegalArgumentException if validation fails
|
||||
*/
|
||||
public Person(String name, int age) {
|
||||
this();
|
||||
setName(name);
|
||||
setAge(age);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor with name, age, and email.
|
||||
*
|
||||
* @param name The person's name
|
||||
* @param age The person's age
|
||||
* @param email The person's email address
|
||||
* @throws IllegalArgumentException if validation fails
|
||||
*/
|
||||
public Person(String name, int age, String email) {
|
||||
this(name, age);
|
||||
setEmail(email);
|
||||
}
|
||||
|
||||
// Getters and Setters
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public void setName(String name) {
|
||||
if (StringUtils.isBlank(name)) {
|
||||
throw new IllegalArgumentException("Name cannot be null or empty");
|
||||
}
|
||||
if (name.length() > 100) {
|
||||
throw new IllegalArgumentException("Name cannot exceed 100 characters");
|
||||
}
|
||||
this.name = name.trim();
|
||||
}
|
||||
|
||||
public int getAge() {
|
||||
return age;
|
||||
}
|
||||
|
||||
public void setAge(int age) {
|
||||
if (age < 0) {
|
||||
throw new IllegalArgumentException("Age cannot be negative");
|
||||
}
|
||||
if (age > 150) {
|
||||
throw new IllegalArgumentException("Age cannot exceed 150");
|
||||
}
|
||||
this.age = age;
|
||||
}
|
||||
|
||||
public String getEmail() {
|
||||
return email;
|
||||
}
|
||||
|
||||
public void setEmail(String email) {
|
||||
if (StringUtils.isNotBlank(email) && !isValidEmail(email)) {
|
||||
throw new IllegalArgumentException("Invalid email format");
|
||||
}
|
||||
this.email = StringUtils.isBlank(email) ? null : email.trim();
|
||||
}
|
||||
|
||||
public LocalDateTime getCreatedAt() {
|
||||
return createdAt;
|
||||
}
|
||||
|
||||
public void setCreatedAt(LocalDateTime createdAt) {
|
||||
this.createdAt = createdAt;
|
||||
}
|
||||
|
||||
public Map<String, Object> getMetadata() {
|
||||
return new HashMap<>(metadata);
|
||||
}
|
||||
|
||||
public void setMetadata(Map<String, Object> metadata) {
|
||||
this.metadata = metadata == null ? new HashMap<>() : new HashMap<>(metadata);
|
||||
}
|
||||
|
||||
// Business methods
|
||||
|
||||
/**
|
||||
* Returns a greeting message for the person.
|
||||
*
|
||||
* @return A personalized greeting
|
||||
*/
|
||||
public String greet() {
|
||||
return String.format("Hello, I'm %s and I'm %d years old.", name, age);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the person has an email address.
|
||||
*
|
||||
* @return true if email is present and not empty
|
||||
*/
|
||||
public boolean hasEmail() {
|
||||
return StringUtils.isNotBlank(email);
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates the person's email address.
|
||||
*
|
||||
* @param newEmail The new email address
|
||||
* @throws IllegalArgumentException if email format is invalid
|
||||
*/
|
||||
public void updateEmail(String newEmail) {
|
||||
setEmail(newEmail);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds metadata to the person.
|
||||
*
|
||||
* @param key The metadata key
|
||||
* @param value The metadata value
|
||||
*/
|
||||
public void addMetadata(String key, Object value) {
|
||||
if (StringUtils.isNotBlank(key)) {
|
||||
metadata.put(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets metadata value by key.
|
||||
*
|
||||
* @param key The metadata key
|
||||
* @return The metadata value or null if not found
|
||||
*/
|
||||
public Object getMetadata(String key) {
|
||||
return metadata.get(key);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets metadata value by key with default value.
|
||||
*
|
||||
* @param key The metadata key
|
||||
* @param defaultValue The default value if key is not found
|
||||
* @return The metadata value or default value
|
||||
*/
|
||||
public Object getMetadata(String key, Object defaultValue) {
|
||||
return metadata.getOrDefault(key, defaultValue);
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes metadata by key.
|
||||
*
|
||||
* @param key The metadata key to remove
|
||||
* @return The removed value or null if not found
|
||||
*/
|
||||
public Object removeMetadata(String key) {
|
||||
return metadata.remove(key);
|
||||
}
|
||||
|
||||
/**
|
||||
* Clears all metadata.
|
||||
*/
|
||||
public void clearMetadata() {
|
||||
metadata.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates email format using a simple regex.
|
||||
*
|
||||
* @param email The email to validate
|
||||
* @return true if email format is valid
|
||||
*/
|
||||
private boolean isValidEmail(String email) {
|
||||
String emailPattern = "^[A-Za-z0-9+_.-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}$";
|
||||
return email.matches(emailPattern);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a Person instance from a map of data.
|
||||
*
|
||||
* @param data The data map
|
||||
* @return A new Person instance
|
||||
*/
|
||||
public static Person fromMap(Map<String, Object> data) {
|
||||
Person person = new Person();
|
||||
|
||||
if (data.containsKey("name")) {
|
||||
person.setName((String) data.get("name"));
|
||||
}
|
||||
|
||||
if (data.containsKey("age")) {
|
||||
person.setAge((Integer) data.get("age"));
|
||||
}
|
||||
|
||||
if (data.containsKey("email")) {
|
||||
person.setEmail((String) data.get("email"));
|
||||
}
|
||||
|
||||
if (data.containsKey("metadata")) {
|
||||
@SuppressWarnings("unchecked")
|
||||
Map<String, Object> metadata = (Map<String, Object>) data.get("metadata");
|
||||
person.setMetadata(metadata);
|
||||
}
|
||||
|
||||
return person;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts the person to a map representation.
|
||||
*
|
||||
* @return A map containing person data
|
||||
*/
|
||||
public Map<String, Object> toMap() {
|
||||
Map<String, Object> map = new HashMap<>();
|
||||
map.put("name", name);
|
||||
map.put("age", age);
|
||||
map.put("email", email);
|
||||
map.put("created_at", createdAt);
|
||||
map.put("metadata", new HashMap<>(metadata));
|
||||
return map;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (this == obj) return true;
|
||||
if (obj == null || getClass() != obj.getClass()) return false;
|
||||
|
||||
Person person = (Person) obj;
|
||||
return age == person.age &&
|
||||
Objects.equals(name, person.name) &&
|
||||
Objects.equals(email, person.email) &&
|
||||
Objects.equals(createdAt, person.createdAt) &&
|
||||
Objects.equals(metadata, person.metadata);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(name, age, email, createdAt, metadata);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("Person{name='%s', age=%d, email='%s', createdAt=%s}",
|
||||
name, age, email, createdAt);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,363 @@
|
||||
package com.example.usermanagement.models;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.mindrot.jbcrypt.BCrypt;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* User class extending Person with authentication and authorization features.
|
||||
*/
|
||||
public class User extends Person {
|
||||
|
||||
@JsonProperty("username")
|
||||
private String username;
|
||||
|
||||
@JsonProperty("password_hash")
|
||||
private String passwordHash;
|
||||
|
||||
@JsonProperty("role")
|
||||
private UserRole role;
|
||||
|
||||
@JsonProperty("status")
|
||||
private UserStatus status;
|
||||
|
||||
@JsonProperty("last_login")
|
||||
@JsonFormat(shape = JsonFormat.Shape.STRING, pattern = "yyyy-MM-dd'T'HH:mm:ss")
|
||||
private LocalDateTime lastLogin;
|
||||
|
||||
@JsonProperty("login_attempts")
|
||||
private int loginAttempts;
|
||||
|
||||
@JsonProperty("permissions")
|
||||
private Set<String> permissions;
|
||||
|
||||
/**
|
||||
* Default constructor for Jackson deserialization.
|
||||
*/
|
||||
public User() {
|
||||
super();
|
||||
this.role = UserRole.USER;
|
||||
this.status = UserStatus.ACTIVE;
|
||||
this.loginAttempts = 0;
|
||||
this.permissions = new HashSet<>();
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor with basic information.
|
||||
*
|
||||
* @param name The user's name
|
||||
* @param age The user's age
|
||||
* @param username The username
|
||||
*/
|
||||
public User(String name, int age, String username) {
|
||||
super(name, age);
|
||||
setUsername(username);
|
||||
this.role = UserRole.USER;
|
||||
this.status = UserStatus.ACTIVE;
|
||||
this.loginAttempts = 0;
|
||||
this.permissions = new HashSet<>();
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor with email.
|
||||
*
|
||||
* @param name The user's name
|
||||
* @param age The user's age
|
||||
* @param username The username
|
||||
* @param email The email address
|
||||
*/
|
||||
public User(String name, int age, String username, String email) {
|
||||
super(name, age, email);
|
||||
setUsername(username);
|
||||
this.role = UserRole.USER;
|
||||
this.status = UserStatus.ACTIVE;
|
||||
this.loginAttempts = 0;
|
||||
this.permissions = new HashSet<>();
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor with role.
|
||||
*
|
||||
* @param name The user's name
|
||||
* @param age The user's age
|
||||
* @param username The username
|
||||
* @param email The email address
|
||||
* @param role The user role
|
||||
*/
|
||||
public User(String name, int age, String username, String email, UserRole role) {
|
||||
this(name, age, username, email);
|
||||
this.role = role;
|
||||
}
|
||||
|
||||
// Getters and Setters
|
||||
|
||||
public String getUsername() {
|
||||
return username;
|
||||
}
|
||||
|
||||
public void setUsername(String username) {
|
||||
if (StringUtils.isBlank(username)) {
|
||||
throw new IllegalArgumentException("Username cannot be null or empty");
|
||||
}
|
||||
if (username.length() < 3 || username.length() > 20) {
|
||||
throw new IllegalArgumentException("Username must be between 3 and 20 characters");
|
||||
}
|
||||
if (!username.matches("^[a-zA-Z0-9_]+$")) {
|
||||
throw new IllegalArgumentException("Username can only contain letters, numbers, and underscores");
|
||||
}
|
||||
this.username = username.trim();
|
||||
}
|
||||
|
||||
public String getPasswordHash() {
|
||||
return passwordHash;
|
||||
}
|
||||
|
||||
public void setPasswordHash(String passwordHash) {
|
||||
this.passwordHash = passwordHash;
|
||||
}
|
||||
|
||||
public UserRole getRole() {
|
||||
return role;
|
||||
}
|
||||
|
||||
public void setRole(UserRole role) {
|
||||
this.role = role != null ? role : UserRole.USER;
|
||||
}
|
||||
|
||||
public UserStatus getStatus() {
|
||||
return status;
|
||||
}
|
||||
|
||||
public void setStatus(UserStatus status) {
|
||||
this.status = status != null ? status : UserStatus.ACTIVE;
|
||||
}
|
||||
|
||||
public LocalDateTime getLastLogin() {
|
||||
return lastLogin;
|
||||
}
|
||||
|
||||
public void setLastLogin(LocalDateTime lastLogin) {
|
||||
this.lastLogin = lastLogin;
|
||||
}
|
||||
|
||||
public int getLoginAttempts() {
|
||||
return loginAttempts;
|
||||
}
|
||||
|
||||
public void setLoginAttempts(int loginAttempts) {
|
||||
this.loginAttempts = Math.max(0, loginAttempts);
|
||||
}
|
||||
|
||||
public Set<String> getPermissions() {
|
||||
return new HashSet<>(permissions);
|
||||
}
|
||||
|
||||
public void setPermissions(Set<String> permissions) {
|
||||
this.permissions = permissions != null ? new HashSet<>(permissions) : new HashSet<>();
|
||||
}
|
||||
|
||||
// Authentication methods
|
||||
|
||||
/**
|
||||
* Sets the user's password using BCrypt hashing.
|
||||
*
|
||||
* @param password The plain text password
|
||||
* @throws IllegalArgumentException if password is invalid
|
||||
*/
|
||||
public void setPassword(String password) {
|
||||
if (StringUtils.isBlank(password)) {
|
||||
throw new IllegalArgumentException("Password cannot be null or empty");
|
||||
}
|
||||
if (password.length() < 8) {
|
||||
throw new IllegalArgumentException("Password must be at least 8 characters long");
|
||||
}
|
||||
|
||||
// Hash the password with BCrypt
|
||||
this.passwordHash = BCrypt.hashpw(password, BCrypt.gensalt());
|
||||
}
|
||||
|
||||
/**
|
||||
* Verifies a password against the stored hash.
|
||||
*
|
||||
* @param password The plain text password to verify
|
||||
* @return true if password matches
|
||||
*/
|
||||
public boolean verifyPassword(String password) {
|
||||
if (StringUtils.isBlank(password) || StringUtils.isBlank(passwordHash)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
try {
|
||||
return BCrypt.checkpw(password, passwordHash);
|
||||
} catch (IllegalArgumentException e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Permission methods
|
||||
|
||||
/**
|
||||
* Adds a permission to the user.
|
||||
*
|
||||
* @param permission The permission to add
|
||||
*/
|
||||
public void addPermission(String permission) {
|
||||
if (StringUtils.isNotBlank(permission)) {
|
||||
permissions.add(permission.trim());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes a permission from the user.
|
||||
*
|
||||
* @param permission The permission to remove
|
||||
*/
|
||||
public void removePermission(String permission) {
|
||||
permissions.remove(permission);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the user has a specific permission.
|
||||
*
|
||||
* @param permission The permission to check
|
||||
* @return true if user has the permission
|
||||
*/
|
||||
public boolean hasPermission(String permission) {
|
||||
return permissions.contains(permission);
|
||||
}
|
||||
|
||||
/**
|
||||
* Clears all permissions.
|
||||
*/
|
||||
public void clearPermissions() {
|
||||
permissions.clear();
|
||||
}
|
||||
|
||||
// Status and role methods
|
||||
|
||||
/**
|
||||
* Checks if the user is an admin.
|
||||
*
|
||||
* @return true if user is admin
|
||||
*/
|
||||
public boolean isAdmin() {
|
||||
return role == UserRole.ADMIN;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the user is active.
|
||||
*
|
||||
* @return true if user is active
|
||||
*/
|
||||
public boolean isActive() {
|
||||
return status == UserStatus.ACTIVE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the user is locked due to too many failed login attempts.
|
||||
*
|
||||
* @return true if user is locked
|
||||
*/
|
||||
public boolean isLocked() {
|
||||
return status == UserStatus.SUSPENDED || loginAttempts >= 5;
|
||||
}
|
||||
|
||||
// Login methods
|
||||
|
||||
/**
|
||||
* Records a successful login.
|
||||
*
|
||||
* @return true if login was successful
|
||||
*/
|
||||
public boolean login() {
|
||||
if (!isActive() || isLocked()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
this.lastLogin = LocalDateTime.now();
|
||||
this.loginAttempts = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Records a failed login attempt.
|
||||
*/
|
||||
public void failedLoginAttempt() {
|
||||
this.loginAttempts++;
|
||||
if (this.loginAttempts >= 5) {
|
||||
this.status = UserStatus.SUSPENDED;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets login attempts.
|
||||
*/
|
||||
public void resetLoginAttempts() {
|
||||
this.loginAttempts = 0;
|
||||
}
|
||||
|
||||
// Status change methods
|
||||
|
||||
/**
|
||||
* Activates the user account.
|
||||
*/
|
||||
public void activate() {
|
||||
this.status = UserStatus.ACTIVE;
|
||||
this.loginAttempts = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Deactivates the user account.
|
||||
*/
|
||||
public void deactivate() {
|
||||
this.status = UserStatus.INACTIVE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Suspends the user account.
|
||||
*/
|
||||
public void suspend() {
|
||||
this.status = UserStatus.SUSPENDED;
|
||||
}
|
||||
|
||||
/**
|
||||
* Marks the user as deleted.
|
||||
*/
|
||||
public void delete() {
|
||||
this.status = UserStatus.DELETED;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (this == obj) return true;
|
||||
if (obj == null || getClass() != obj.getClass()) return false;
|
||||
if (!super.equals(obj)) return false;
|
||||
|
||||
User user = (User) obj;
|
||||
return loginAttempts == user.loginAttempts &&
|
||||
Objects.equals(username, user.username) &&
|
||||
Objects.equals(passwordHash, user.passwordHash) &&
|
||||
role == user.role &&
|
||||
status == user.status &&
|
||||
Objects.equals(lastLogin, user.lastLogin) &&
|
||||
Objects.equals(permissions, user.permissions);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(super.hashCode(), username, passwordHash, role, status,
|
||||
lastLogin, loginAttempts, permissions);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("User{username='%s', name='%s', role=%s, status=%s, lastLogin=%s}",
|
||||
username, getName(), role, status, lastLogin);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,134 @@
|
||||
package com.example.usermanagement.models;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonValue;
|
||||
|
||||
/**
|
||||
* Enumeration for user roles in the system.
|
||||
*/
|
||||
public enum UserRole {
|
||||
|
||||
/**
|
||||
* Administrator role with full system access.
|
||||
*/
|
||||
ADMIN("admin", "Administrator", "Full system access"),
|
||||
|
||||
/**
|
||||
* Regular user role with standard permissions.
|
||||
*/
|
||||
USER("user", "User", "Standard user permissions"),
|
||||
|
||||
/**
|
||||
* Guest role with limited permissions.
|
||||
*/
|
||||
GUEST("guest", "Guest", "Limited guest permissions");
|
||||
|
||||
private final String code;
|
||||
private final String displayName;
|
||||
private final String description;
|
||||
|
||||
/**
|
||||
* Constructor for UserRole enum.
|
||||
*
|
||||
* @param code The role code
|
||||
* @param displayName The display name
|
||||
* @param description The role description
|
||||
*/
|
||||
UserRole(String code, String displayName, String description) {
|
||||
this.code = code;
|
||||
this.displayName = displayName;
|
||||
this.description = description;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the role code.
|
||||
*
|
||||
* @return The role code
|
||||
*/
|
||||
@JsonValue
|
||||
public String getCode() {
|
||||
return code;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the display name.
|
||||
*
|
||||
* @return The display name
|
||||
*/
|
||||
public String getDisplayName() {
|
||||
return displayName;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the role description.
|
||||
*
|
||||
* @return The role description
|
||||
*/
|
||||
public String getDescription() {
|
||||
return description;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds a UserRole by its code.
|
||||
*
|
||||
* @param code The role code to search for
|
||||
* @return The UserRole or null if not found
|
||||
*/
|
||||
public static UserRole fromCode(String code) {
|
||||
if (code == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
for (UserRole role : values()) {
|
||||
if (role.code.equalsIgnoreCase(code)) {
|
||||
return role;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if this role has higher privilege than another role.
|
||||
*
|
||||
* @param other The other role to compare with
|
||||
* @return true if this role has higher privilege
|
||||
*/
|
||||
public boolean hasHigherPrivilegeThan(UserRole other) {
|
||||
return this.ordinal() < other.ordinal();
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if this role has lower privilege than another role.
|
||||
*
|
||||
* @param other The other role to compare with
|
||||
* @return true if this role has lower privilege
|
||||
*/
|
||||
public boolean hasLowerPrivilegeThan(UserRole other) {
|
||||
return this.ordinal() > other.ordinal();
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if this role can perform actions on another role.
|
||||
*
|
||||
* @param targetRole The target role
|
||||
* @return true if this role can act on the target role
|
||||
*/
|
||||
public boolean canActOn(UserRole targetRole) {
|
||||
// Admin can act on all roles
|
||||
if (this == ADMIN) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Users can only act on guests
|
||||
if (this == USER) {
|
||||
return targetRole == GUEST;
|
||||
}
|
||||
|
||||
// Guests cannot act on anyone
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return displayName;
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user