chore: move ccw-skill-hub to standalone repository

Migrated ccw-skill-hub to D:/ccw-skill-hub as independent git project.
Removed nested git repos (ccw/frontend/ccw-skill-hub, skill-hub-repo, skill-hub-temp).
This commit is contained in:
catlog22
2026-02-24 11:57:26 +08:00
parent 6f0bbe84ea
commit 61e313a0c1
35 changed files with 3189 additions and 362 deletions

View File

@@ -0,0 +1,186 @@
# Command: semantic-scan
> LLM-based semantic analysis via CLI. Supplements toolchain findings with issues that static tools cannot detect: business logic flaws, architectural problems, complex security patterns.
## When to Use
- Phase 3 of Scanner, Standard mode, Step B
- Runs AFTER toolchain-scan completes (needs its output to avoid duplication)
- Quick mode does NOT use this command
**Trigger conditions**:
- SCAN-* task in Phase 3 with `quickMode === false`
- toolchain-scan.md has completed (toolchain-findings.json exists or empty)
## Strategy
### Delegation Mode
**Mode**: CLI Fan-out (single gemini agent, analysis only)
### Tool Fallback Chain
```
gemini (primary) -> qwen (fallback) -> codex (fallback)
```
## Execution Steps
### Step 1: Prepare Context
Build the CLI prompt with target files and a summary of toolchain findings to avoid duplication.
```javascript
// Read toolchain findings for dedup context
let toolFindings = []
try {
toolFindings = JSON.parse(Read(`${sessionFolder}/scan/toolchain-findings.json`))
} catch { /* no toolchain findings */ }
// Build toolchain summary for dedup (compact: file:line:rule per line)
const toolSummary = toolFindings.length > 0
? toolFindings.slice(0, 50).map(f =>
`${f.location?.file}:${f.location?.line} [${f.source}] ${f.title}`
).join('\n')
: '(no toolchain findings)'
// Build target file list for CLI context
// Limit to reasonable size for CLI prompt
const fileList = targetFiles.slice(0, 100)
const targetPattern = fileList.length <= 20
? fileList.join(' ')
: `${target}/**/*.{ts,tsx,js,jsx,py,go,java,rs}`
// Map requested dimensions to scan focus areas
const DIM_FOCUS = {
sec: 'Security: business logic vulnerabilities, privilege escalation, sensitive data flow, auth bypass, injection beyond simple patterns',
cor: 'Correctness: logic errors, unhandled exception paths, state management bugs, race conditions, incorrect algorithm implementation',
perf: 'Performance: algorithm complexity (O(n^2)+), N+1 queries, unnecessary sync operations, memory leaks, missing caching opportunities',
maint: 'Maintainability: architectural coupling, abstraction leaks, project convention violations, dead code paths, excessive complexity'
}
const focusAreas = dimensions
.map(d => DIM_FOCUS[d])
.filter(Boolean)
.map((desc, i) => `${i + 1}. ${desc}`)
.join('\n')
```
### Step 2: Execute CLI Scan
```javascript
const maxPerDimension = 5
const minSeverity = 'medium'
const cliPrompt = `PURPOSE: Supplement toolchain scan with semantic analysis that static tools cannot detect. Find logic errors, architectural issues, and complex vulnerability patterns.
TASK:
${focusAreas}
MODE: analysis
CONTEXT: @${targetPattern}
Toolchain already detected these issues (DO NOT repeat them):
${toolSummary}
EXPECTED: Respond with ONLY a JSON array (no markdown, no explanation). Each element:
{"dimension":"security|correctness|performance|maintainability","category":"<sub-category>","severity":"critical|high|medium","title":"<concise title>","description":"<detailed explanation>","location":{"file":"<path>","line":<number>,"end_line":<number>,"code_snippet":"<relevant code>"},"source":"llm","suggested_fix":"<how to fix>","effort":"low|medium|high","confidence":"high|medium|low"}
CONSTRAINTS: Max ${maxPerDimension} findings per dimension | Only ${minSeverity} severity and above | Do not duplicate toolchain findings | Focus on issues tools CANNOT detect | Return raw JSON array only`
let cliOutput = null
let cliTool = 'gemini'
// Try primary tool
try {
cliOutput = Bash(
`ccw cli -p "${cliPrompt.replace(/"/g, '\\"')}" --tool gemini --mode analysis --rule analysis-review-code-quality`,
{ timeout: 300000 }
)
} catch {
// Fallback to qwen
try {
cliTool = 'qwen'
cliOutput = Bash(
`ccw cli -p "${cliPrompt.replace(/"/g, '\\"')}" --tool qwen --mode analysis`,
{ timeout: 300000 }
)
} catch {
// Fallback to codex
try {
cliTool = 'codex'
cliOutput = Bash(
`ccw cli -p "${cliPrompt.replace(/"/g, '\\"')}" --tool codex --mode analysis`,
{ timeout: 300000 }
)
} catch {
// All CLI tools failed
cliOutput = null
}
}
}
```
### Step 3: Parse & Validate Output
```javascript
let semanticFindings = []
if (cliOutput) {
try {
// Extract JSON array from CLI output (may have surrounding text)
const jsonMatch = cliOutput.match(/\[[\s\S]*\]/)
if (jsonMatch) {
const parsed = JSON.parse(jsonMatch[0])
// Validate each finding against schema
semanticFindings = parsed.filter(f => {
// Required fields check
if (!f.dimension || !f.title || !f.location?.file) return false
// Dimension must be valid
if (!['security', 'correctness', 'performance', 'maintainability'].includes(f.dimension)) return false
// Severity must be valid and meet minimum
const validSev = ['critical', 'high', 'medium']
if (!validSev.includes(f.severity)) return false
return true
}).map(f => ({
dimension: f.dimension,
category: f.category || 'general',
severity: f.severity,
title: f.title,
description: f.description || f.title,
location: {
file: f.location.file,
line: f.location.line || 1,
end_line: f.location.end_line || f.location.line || 1,
code_snippet: f.location.code_snippet || ''
},
source: 'llm',
tool_rule: null,
suggested_fix: f.suggested_fix || '',
effort: ['low', 'medium', 'high'].includes(f.effort) ? f.effort : 'medium',
confidence: ['high', 'medium', 'low'].includes(f.confidence) ? f.confidence : 'medium'
}))
}
} catch {
// JSON parse failed - log and continue with empty
}
}
// Enforce per-dimension limits
const dimCounts = {}
semanticFindings = semanticFindings.filter(f => {
dimCounts[f.dimension] = (dimCounts[f.dimension] || 0) + 1
return dimCounts[f.dimension] <= maxPerDimension
})
// Write output
Write(`${sessionFolder}/scan/semantic-findings.json`,
JSON.stringify(semanticFindings, null, 2))
```
## Error Handling
| Scenario | Resolution |
|----------|------------|
| gemini CLI fails | Fallback to qwen, then codex |
| All CLI tools fail | Log warning, write empty findings array (toolchain results still valid) |
| CLI output not valid JSON | Attempt regex extraction, else empty findings |
| Findings exceed per-dimension limit | Truncate to max per dimension |
| Invalid dimension/severity in output | Filter out invalid entries |
| CLI timeout (>5 min) | Kill, log warning, return empty findings |

View File

@@ -0,0 +1,187 @@
# Command: toolchain-scan
> Parallel static analysis tool execution. Detects available tools, runs concurrently, normalizes output into standardized findings.
## When to Use
- Phase 3 of Scanner, Standard mode, Step A
- At least one tool detected in Phase 2
- Quick mode does NOT use this command
## Strategy
### Delegation Mode
**Mode**: Direct (Bash parallel execution)
## Execution Steps
### Step 1: Build Tool Commands
```javascript
if (!Object.values(toolchain).some(Boolean)) {
Write(`${sessionFolder}/scan/toolchain-findings.json`, '[]')
return
}
const tmpDir = `${sessionFolder}/scan/tmp`
Bash(`mkdir -p "${tmpDir}"`)
const cmds = []
if (toolchain.tsc)
cmds.push(`(cd "${projectRoot}" && npx tsc --noEmit --pretty false 2>&1 | head -500 > "${tmpDir}/tsc.txt") &`)
if (toolchain.eslint)
cmds.push(`(cd "${projectRoot}" && npx eslint "${target}" --format json --no-error-on-unmatched-pattern 2>/dev/null | head -5000 > "${tmpDir}/eslint.json") &`)
if (toolchain.semgrep)
cmds.push(`(cd "${projectRoot}" && semgrep --config auto --json "${target}" 2>/dev/null | head -5000 > "${tmpDir}/semgrep.json") &`)
if (toolchain.ruff)
cmds.push(`(cd "${projectRoot}" && ruff check "${target}" --output-format json 2>/dev/null | head -5000 > "${tmpDir}/ruff.json") &`)
if (toolchain.mypy)
cmds.push(`(cd "${projectRoot}" && mypy "${target}" --output json 2>/dev/null | head -2000 > "${tmpDir}/mypy.txt") &`)
if (toolchain.npmAudit)
cmds.push(`(cd "${projectRoot}" && npm audit --json 2>/dev/null | head -5000 > "${tmpDir}/audit.json") &`)
```
### Step 2: Parallel Execution
```javascript
Bash(cmds.join('\n') + '\nwait', { timeout: 300000 })
```
### Step 3: Parse Tool Outputs
Each parser normalizes to: `{ dimension, category, severity, title, description, location:{file,line,end_line,code_snippet}, source, tool_rule, suggested_fix, effort, confidence }`
```javascript
const findings = []
// --- tsc: file(line,col): error TSxxxx: message ---
if (toolchain.tsc) {
try {
const out = Read(`${tmpDir}/tsc.txt`)
const re = /^(.+)\((\d+),\d+\):\s+(error|warning)\s+(TS\d+):\s+(.+)$/gm
let m; while ((m = re.exec(out)) !== null) {
findings.push({
dimension: 'correctness', category: 'type-safety',
severity: m[3] === 'error' ? 'high' : 'medium',
title: `tsc ${m[4]}: ${m[5].slice(0,80)}`, description: m[5],
location: { file: m[1], line: +m[2] },
source: 'tool:tsc', tool_rule: m[4], suggested_fix: '',
effort: 'low', confidence: 'high'
})
}
} catch {}
}
// --- eslint: JSON array of {filePath, messages[{severity,ruleId,message,line}]} ---
if (toolchain.eslint) {
try {
const data = JSON.parse(Read(`${tmpDir}/eslint.json`))
for (const f of data) for (const msg of (f.messages || [])) {
const isErr = msg.severity === 2
findings.push({
dimension: isErr ? 'correctness' : 'maintainability',
category: isErr ? 'bug' : 'code-smell',
severity: isErr ? 'high' : 'medium',
title: `eslint ${msg.ruleId || '?'}: ${(msg.message||'').slice(0,80)}`,
description: msg.message || '',
location: { file: f.filePath, line: msg.line || 1, end_line: msg.endLine, code_snippet: msg.source || '' },
source: 'tool:eslint', tool_rule: msg.ruleId || null,
suggested_fix: msg.fix ? 'Auto-fixable' : '', effort: msg.fix ? 'low' : 'medium', confidence: 'high'
})
}
} catch {}
}
// --- semgrep: {results[{path,start:{line},end:{line},check_id,extra:{severity,message,fix,lines}}]} ---
if (toolchain.semgrep) {
try {
const data = JSON.parse(Read(`${tmpDir}/semgrep.json`))
const smap = { ERROR:'high', WARNING:'medium', INFO:'low' }
for (const r of (data.results || [])) {
findings.push({
dimension: 'security', category: r.check_id?.split('.').pop() || 'generic',
severity: smap[r.extra?.severity] || 'medium',
title: `semgrep: ${(r.extra?.message || r.check_id || '').slice(0,80)}`,
description: r.extra?.message || '', location: { file: r.path, line: r.start?.line || 1, end_line: r.end?.line, code_snippet: r.extra?.lines || '' },
source: 'tool:semgrep', tool_rule: r.check_id || null,
suggested_fix: r.extra?.fix || '', effort: 'medium', confidence: smap[r.extra?.severity] === 'high' ? 'high' : 'medium'
})
}
} catch {}
}
// --- ruff: [{code,message,filename,location:{row},end_location:{row},fix}] ---
if (toolchain.ruff) {
try {
const data = JSON.parse(Read(`${tmpDir}/ruff.json`))
for (const item of data) {
const code = item.code || ''
const dim = code.startsWith('S') ? 'security' : (code.startsWith('F') || code.startsWith('B')) ? 'correctness' : 'maintainability'
findings.push({
dimension: dim, category: dim === 'security' ? 'input-validation' : dim === 'correctness' ? 'bug' : 'code-smell',
severity: (code.startsWith('S') || code.startsWith('F')) ? 'high' : 'medium',
title: `ruff ${code}: ${(item.message||'').slice(0,80)}`, description: item.message || '',
location: { file: item.filename, line: item.location?.row || 1, end_line: item.end_location?.row },
source: 'tool:ruff', tool_rule: code, suggested_fix: item.fix?.message || '',
effort: item.fix ? 'low' : 'medium', confidence: 'high'
})
}
} catch {}
}
// --- npm audit: {vulnerabilities:{name:{severity,title,fixAvailable,via}}} ---
if (toolchain.npmAudit) {
try {
const data = JSON.parse(Read(`${tmpDir}/audit.json`))
const smap = { critical:'critical', high:'high', moderate:'medium', low:'low', info:'info' }
for (const [,v] of Object.entries(data.vulnerabilities || {})) {
findings.push({
dimension: 'security', category: 'dependency', severity: smap[v.severity] || 'medium',
title: `npm audit: ${v.name} - ${(v.title || '').slice(0,80)}`,
description: v.title || `Vulnerable: ${v.name}`,
location: { file: 'package.json', line: 1 },
source: 'tool:npm-audit', tool_rule: null,
suggested_fix: v.fixAvailable ? 'npm audit fix' : 'Manual resolution',
effort: v.fixAvailable ? 'low' : 'high', confidence: 'high'
})
}
} catch {}
}
// --- mypy: file:line: error: message [code] ---
if (toolchain.mypy) {
try {
const out = Read(`${tmpDir}/mypy.txt`)
const re = /^(.+):(\d+):\s+(error|warning):\s+(.+?)(?:\s+\[(\w[\w-]*)\])?$/gm
let m; while ((m = re.exec(out)) !== null) {
if (m[3] === 'note') continue
findings.push({
dimension: 'correctness', category: 'type-safety',
severity: m[3] === 'error' ? 'high' : 'medium',
title: `mypy${m[5] ? ` [${m[5]}]` : ''}: ${m[4].slice(0,80)}`, description: m[4],
location: { file: m[1], line: +m[2] },
source: 'tool:mypy', tool_rule: m[5] || null, suggested_fix: '',
effort: 'low', confidence: 'high'
})
}
} catch {}
}
```
### Step 4: Write Output
```javascript
Write(`${sessionFolder}/scan/toolchain-findings.json`, JSON.stringify(findings, null, 2))
Bash(`rm -rf "${tmpDir}"`)
```
## Error Handling
| Scenario | Resolution |
|----------|------------|
| Tool not found at runtime | Skip gracefully, continue with others |
| Tool times out (>5 min) | Killed by `wait` timeout, partial output used |
| Tool output unparseable | try/catch skips that tool's findings |
| All tools fail | Empty array written, semantic-scan covers all dimensions |