Claude-Code-Workflow/.claude/skills/software-manual/scripts/assemble_docsify.py

#!/usr/bin/env python3
"""
Docsify-Style HTML Manual Assembly Script Template
Generates interactive single-file documentation with hierarchical navigation

Usage:
    1. Copy this script to your manual output directory
    2. Customize MANUAL_META and NAV_STRUCTURE
    3. Run: python assemble_docsify.py
"""

import json
import base64
import re
from pathlib import Path
from typing import Dict, List, Any

# Try to import markdown library
try:
    import markdown
    from markdown.extensions.codehilite import CodeHiliteExtension
    from markdown.extensions.fenced_code import FencedCodeExtension
    from markdown.extensions.tables import TableExtension
    from markdown.extensions.toc import TocExtension
    HAS_MARKDOWN = True
except ImportError:
    HAS_MARKDOWN = False
    print("Warning: markdown library not found. Install with: pip install markdown pygments")


# ============================================================
# CONFIGURATION - Customize these for your project
# ============================================================

# Paths - Update these paths for your environment
BASE_DIR = Path(__file__).parent
SECTIONS_DIR = BASE_DIR / "sections"
SCREENSHOTS_DIR = BASE_DIR / "screenshots"

# Template paths - Point to skill templates directory
SKILL_DIR = Path(__file__).parent.parent  # Adjust based on where script is placed
TEMPLATE_FILE = SKILL_DIR / "templates" / "docsify-shell.html"
CSS_BASE_FILE = SKILL_DIR / "templates" / "css" / "docsify-base.css"

# Manual metadata - Customize for your software
MANUAL_META = {
    "title": "Your Software",
    "subtitle": "使用手册",
    "version": "v1.0.0",
    "timestamp": "2025-01-01",
    "language": "zh-CN",
    "logo_icon": "Y"  # First letter or emoji
}

# Output file
OUTPUT_FILE = BASE_DIR / f"{MANUAL_META['title']}{MANUAL_META['subtitle']}.html"

# Hierarchical navigation structure
# Customize groups and items based on your sections
NAV_STRUCTURE = [
    {
        "type": "group",
        "title": "入门指南",
        "icon": "📚",
        "expanded": True,
        "items": [
            {"id": "overview", "title": "产品概述", "file": "section-overview.md"},
        ]
    },
    {
        "type": "group",
        "title": "使用教程",
        "icon": "🎯",
        "expanded": False,
        "items": [
            {"id": "ui-guide", "title": "UI操作指南", "file": "section-ui-guide.md"},
        ]
    },
    {
        "type": "group",
        "title": "API参考",
        "icon": "🔧",
        "expanded": False,
        "items": [
            {"id": "api-reference", "title": "API文档", "file": "section-api-reference.md"},
        ]
    },
    {
        "type": "group",
        "title": "配置与部署",
        "icon": "⚙️",
        "expanded": False,
        "items": [
            {"id": "configuration", "title": "配置指南", "file": "section-configuration.md"},
        ]
    },
    {
        "type": "group",
        "title": "帮助与支持",
        "icon": "💡",
        "expanded": False,
        "items": [
            {"id": "troubleshooting", "title": "故障排除", "file": "section-troubleshooting.md"},
            {"id": "examples", "title": "代码示例", "file": "section-examples.md"},
        ]
    }
]

# Screenshot ID to filename mapping - Customize for your screenshots
SCREENSHOT_MAPPING = {
    # "截图ID": "filename.png",
}


# ============================================================
# CORE FUNCTIONS - Generally don't need to modify
# ============================================================

# Global cache for embedded images
_embedded_images = {}


def read_file(filepath: Path) -> str:
    """Read file content with UTF-8 encoding"""
    return filepath.read_text(encoding='utf-8')


# ============================================================
# MERMAID VALIDATION
# ============================================================

# Valid Mermaid diagram types
MERMAID_DIAGRAM_TYPES = [
    'graph', 'flowchart', 'sequenceDiagram', 'classDiagram',
    'stateDiagram', 'stateDiagram-v2', 'erDiagram', 'journey',
    'gantt', 'pie', 'quadrantChart', 'requirementDiagram',
    'gitGraph', 'mindmap', 'timeline', 'zenuml', 'sankey-beta',
    'xychart-beta', 'block-beta'
]

# Common Mermaid syntax patterns
MERMAID_PATTERNS = {
    'graph': r'^graph\s+(TB|BT|LR|RL|TD)\s*$',
    'flowchart': r'^flowchart\s+(TB|BT|LR|RL|TD)\s*$',
    'sequenceDiagram': r'^sequenceDiagram\s*$',
    'classDiagram': r'^classDiagram\s*$',
    'stateDiagram': r'^stateDiagram(-v2)?\s*$',
    'erDiagram': r'^erDiagram\s*$',
    'gantt': r'^gantt\s*$',
    'pie': r'^pie\s*(showData|title\s+.*)?\s*$',
    'journey': r'^journey\s*$',
}


class MermaidBlock:
    """Represents a mermaid code block found in markdown"""
    def __init__(self, content: str, file: str, line_num: int, indented: bool = False):
        self.content = content
        self.file = file
        self.line_num = line_num
        self.indented = indented
        self.errors: List[str] = []
        self.warnings: List[str] = []
        self.diagram_type: str = None

    def __repr__(self):
        return f"MermaidBlock({self.diagram_type}, {self.file}:{self.line_num})"


def extract_mermaid_blocks(markdown_text: str, filename: str) -> List[MermaidBlock]:
    """Extract all mermaid code blocks from markdown text"""
    blocks = []

    # More flexible pattern - matches opening fence with optional indent,
    # then captures content until closing fence (with any indent)
    pattern = r'^(\s*)(```|~~~)mermaid\s*\n(.*?)\n\s*\2\s*$'

    for match in re.finditer(pattern, markdown_text, re.MULTILINE | re.DOTALL):
        indent = match.group(1)
        content = match.group(3)
        # Calculate line number
        line_num = markdown_text[:match.start()].count('\n') + 1
        indented = len(indent) > 0

        block = MermaidBlock(
            content=content,
            file=filename,
            line_num=line_num,
            indented=indented
        )
        blocks.append(block)

    return blocks


def validate_mermaid_block(block: MermaidBlock) -> bool:
    """Validate a mermaid block and populate errors/warnings"""
    content = block.content.strip()
    lines = content.split('\n')

    if not lines:
        block.errors.append("Empty mermaid block")
        return False

    first_line = lines[0].strip()

    # Detect diagram type
    for dtype in MERMAID_DIAGRAM_TYPES:
        if first_line.startswith(dtype):
            block.diagram_type = dtype
            break

    if not block.diagram_type:
        block.errors.append(f"Unknown diagram type: '{first_line[:30]}...'")
        block.errors.append(f"Valid types: {', '.join(MERMAID_DIAGRAM_TYPES[:8])}...")
        return False

    # Check for balanced brackets/braces
    brackets = {'[': ']', '{': '}', '(': ')'}
    stack = []
    for i, char in enumerate(content):
        if char in brackets:
            stack.append((char, i))
        elif char in brackets.values():
            if not stack:
                block.errors.append(f"Unmatched closing bracket '{char}' at position {i}")
            else:
                open_char, _ = stack.pop()
                if brackets[open_char] != char:
                    block.errors.append(f"Mismatched brackets: '{open_char}' and '{char}'")

    if stack:
        for open_char, pos in stack:
            block.warnings.append(f"Unclosed bracket '{open_char}' at position {pos}")

    # Check for common graph/flowchart issues
    if block.diagram_type in ['graph', 'flowchart']:
        # Check direction specifier
        if not re.match(r'^(graph|flowchart)\s+(TB|BT|LR|RL|TD)', first_line):
            block.warnings.append("Missing or invalid direction (TB/BT/LR/RL/TD)")

        # Check for arrow syntax
        arrow_count = content.count('-->') + content.count('---') + content.count('-.->') + content.count('==>')
        if arrow_count == 0 and len(lines) > 1:
            block.warnings.append("No arrows found - graph may be incomplete")

    # Check for sequenceDiagram issues
    if block.diagram_type == 'sequenceDiagram':
        if '->' not in content and '->>' not in content:
            block.warnings.append("No message arrows found in sequence diagram")

    # Indentation warning
    if block.indented:
        block.warnings.append("Indented code block - may not render in some markdown parsers")

    return len(block.errors) == 0


def validate_all_mermaid(nav_structure: List[Dict], sections_dir: Path) -> Dict[str, Any]:
    """Validate all mermaid blocks in all section files"""
    report = {
        'total_blocks': 0,
        'valid_blocks': 0,
        'error_blocks': 0,
        'warning_blocks': 0,
        'blocks': [],
        'by_file': {},
        'by_type': {}
    }

    for group in nav_structure:
        for item in group.get("items", []):
            section_file = item.get("file")
            if not section_file:
                continue

            filepath = sections_dir / section_file
            if not filepath.exists():
                continue

            content = read_file(filepath)
            blocks = extract_mermaid_blocks(content, section_file)

            file_report = {'blocks': [], 'errors': 0, 'warnings': 0}

            for block in blocks:
                report['total_blocks'] += 1
                is_valid = validate_mermaid_block(block)

                if is_valid:
                    report['valid_blocks'] += 1
                else:
                    report['error_blocks'] += 1
                    file_report['errors'] += 1

                if block.warnings:
                    report['warning_blocks'] += 1
                    file_report['warnings'] += len(block.warnings)

                # Track by diagram type
                if block.diagram_type:
                    if block.diagram_type not in report['by_type']:
                        report['by_type'][block.diagram_type] = 0
                    report['by_type'][block.diagram_type] += 1

                report['blocks'].append(block)
                file_report['blocks'].append(block)

            if blocks:
                report['by_file'][section_file] = file_report

    return report


def print_mermaid_report(report: Dict[str, Any]) -> None:
    """Print mermaid validation report"""
    print("\n" + "=" * 60)
    print("MERMAID DIAGRAM VALIDATION REPORT")
    print("=" * 60)

    print(f"\nSummary:")
    print(f"  Total blocks: {report['total_blocks']}")
    print(f"  Valid: {report['valid_blocks']}")
    print(f"  With errors: {report['error_blocks']}")
    print(f"  With warnings: {report['warning_blocks']}")

    if report['by_type']:
        print(f"\nDiagram Types:")
        for dtype, count in sorted(report['by_type'].items()):
            print(f"  {dtype}: {count}")

    # Print errors and warnings
    has_issues = False
    for block in report['blocks']:
        if block.errors or block.warnings:
            if not has_issues:
                print(f"\nIssues Found:")
                has_issues = True

            print(f"\n  [{block.file}:{block.line_num}] {block.diagram_type or 'unknown'}")
            for error in block.errors:
                print(f"    [ERROR] {error}")
            for warning in block.warnings:
                print(f"    [WARN] {warning}")

    if not has_issues:
        print(f"\n  No issues found!")

    print("=" * 60 + "\n")


def convert_md_to_html(markdown_text: str) -> str:
    """Convert Markdown to HTML with syntax highlighting"""
    if not HAS_MARKDOWN:
        # Fallback: just escape HTML and wrap in pre
        escaped = markdown_text.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
        return f'<pre>{escaped}</pre>'

    md = markdown.Markdown(
        extensions=[
            FencedCodeExtension(),
            TableExtension(),
            TocExtension(toc_depth=3),
            CodeHiliteExtension(
                css_class='highlight',
                linenums=False,
                guess_lang=True,
                use_pygments=True
            ),
        ],
        output_format='html5'
    )
    html = md.convert(markdown_text)
    md.reset()
    return html


def embed_screenshot_base64(screenshot_id: str) -> str:
    """Embed screenshot as base64, using cache to avoid duplicates"""
    global _embedded_images

    filename = SCREENSHOT_MAPPING.get(screenshot_id)

    if not filename:
        return f'<div class="screenshot-placeholder">📷 {screenshot_id}</div>'

    filepath = SCREENSHOTS_DIR / filename

    if not filepath.exists():
        return f'<div class="screenshot-placeholder">📷 {screenshot_id}</div>'

    # Check cache
    if filename not in _embedded_images:
        try:
            with open(filepath, 'rb') as f:
                image_data = base64.b64encode(f.read()).decode('utf-8')
            ext = filepath.suffix[1:].lower()
            _embedded_images[filename] = f"data:image/{ext};base64,{image_data}"
        except Exception as e:
            return f'<div class="screenshot-placeholder">📷 {screenshot_id} (加载失败)</div>'

    return f'''<figure class="screenshot">
      <img src="{_embedded_images[filename]}" alt="{screenshot_id}" loading="lazy" />
      <figcaption>{screenshot_id}</figcaption>
    </figure>'''


def process_markdown_screenshots(markdown_text: str) -> str:
    """Replace [[screenshot:xxx]] placeholders with embedded images"""
    pattern = r'\[\[screenshot:(.*?)\]\]'

    def replacer(match):
        screenshot_id = match.group(1)
        return embed_screenshot_base64(screenshot_id)

    return re.sub(pattern, replacer, markdown_text)


def generate_sidebar_nav_html(nav_structure: List[Dict]) -> str:
    """Generate hierarchical sidebar navigation HTML"""
    html_parts = []

    for group in nav_structure:
        if group["type"] == "group":
            expanded_class = "expanded" if group.get("expanded", False) else ""
            html_parts.append(f'''
      <div class="nav-group {expanded_class}">
        <div class="nav-group-header">
          <button class="nav-group-toggle" aria-expanded="{str(group.get('expanded', False)).lower()}">
            <svg viewBox="0 0 24 24"><path d="M8.59 16.59L13.17 12 8.59 7.41 10 6l6 6-6 6z" fill="currentColor"/></svg>
          </button>
          <span class="nav-group-title">{group.get('icon', '')} {group['title']}</span>
        </div>
        <div class="nav-group-items">''')

            for item in group.get("items", []):
                html_parts.append(f'''
          <a class="nav-item" href="#/{item['id']}" data-section="{item['id']}">{item['title']}</a>''')

            html_parts.append('''
        </div>
      </div>''')

    return '\n'.join(html_parts)


def generate_sections_html(nav_structure: List[Dict]) -> str:
    """Generate content sections HTML"""
    sections_html = []

    for group in nav_structure:
        for item in group.get("items", []):
            section_id = item["id"]
            section_title = item["title"]
            section_file = item.get("file")

            if not section_file:
                continue

            filepath = SECTIONS_DIR / section_file
            if not filepath.exists():
                print(f"Warning: Section file not found: {filepath}")
                continue

            # Read and convert markdown
            markdown_content = read_file(filepath)
            markdown_content = process_markdown_screenshots(markdown_content)
            html_content = convert_md_to_html(markdown_content)

            sections_html.append(f'''
    <section class="content-section" id="section-{section_id}" data-title="{section_title}">
      {html_content}
    </section>''')

    return '\n'.join(sections_html)


def generate_search_index(nav_structure: List[Dict]) -> str:
    """Generate search index JSON"""
    search_index = {}

    for group in nav_structure:
        for item in group.get("items", []):
            section_id = item["id"]
            section_file = item.get("file")

            if not section_file:
                continue

            filepath = SECTIONS_DIR / section_file
            if filepath.exists():
                content = read_file(filepath)
                clean_content = re.sub(r'[#*`\[\]()]', '', content)
                clean_content = re.sub(r'\s+', ' ', clean_content)[:1500]

                search_index[section_id] = {
                    "title": item["title"],
                    "body": clean_content,
                    "group": group["title"]
                }

    return json.dumps(search_index, ensure_ascii=False, indent=2)


def generate_nav_structure_json(nav_structure: List[Dict]) -> str:
    """Generate navigation structure JSON for client-side"""
    return json.dumps(nav_structure, ensure_ascii=False, indent=2)


def assemble_manual(validate_mermaid: bool = True):
    """Main assembly function

    Args:
        validate_mermaid: Whether to validate mermaid diagrams (default: True)
    """
    global _embedded_images
    _embedded_images = {}

    full_title = f"{MANUAL_META['title']} {MANUAL_META['subtitle']}"
    print(f"Assembling Docsify-style manual: {full_title}")

    # Verify template exists
    if not TEMPLATE_FILE.exists():
        print(f"Error: Template not found at {TEMPLATE_FILE}")
        print("Please update TEMPLATE_FILE path in this script.")
        return None, 0

    if not CSS_BASE_FILE.exists():
        print(f"Error: CSS not found at {CSS_BASE_FILE}")
        print("Please update CSS_BASE_FILE path in this script.")
        return None, 0

    # Validate Mermaid diagrams
    mermaid_report = None
    if validate_mermaid:
        print("\nValidating Mermaid diagrams...")
        mermaid_report = validate_all_mermaid(NAV_STRUCTURE, SECTIONS_DIR)
        print_mermaid_report(mermaid_report)

        # Warn if there are errors (but continue)
        if mermaid_report['error_blocks'] > 0:
            print(f"[WARN] {mermaid_report['error_blocks']} mermaid block(s) have errors!")
            print("       These diagrams may not render correctly.")

    # Read template and CSS
    template_html = read_file(TEMPLATE_FILE)
    css_content = read_file(CSS_BASE_FILE)

    # Generate components
    sidebar_nav_html = generate_sidebar_nav_html(NAV_STRUCTURE)
    sections_html = generate_sections_html(NAV_STRUCTURE)
    search_index_json = generate_search_index(NAV_STRUCTURE)
    nav_structure_json = generate_nav_structure_json(NAV_STRUCTURE)

    # Replace placeholders
    output_html = template_html
    output_html = output_html.replace('{{SOFTWARE_NAME}}', full_title)
    output_html = output_html.replace('{{VERSION}}', MANUAL_META['version'])
    output_html = output_html.replace('{{TIMESTAMP}}', MANUAL_META['timestamp'])
    output_html = output_html.replace('{{LOGO_ICON}}', MANUAL_META['logo_icon'])
    output_html = output_html.replace('{{EMBEDDED_CSS}}', css_content)
    output_html = output_html.replace('{{SIDEBAR_NAV_HTML}}', sidebar_nav_html)
    output_html = output_html.replace('{{SECTIONS_HTML}}', sections_html)
    output_html = output_html.replace('{{SEARCH_INDEX_JSON}}', search_index_json)
    output_html = output_html.replace('{{NAV_STRUCTURE_JSON}}', nav_structure_json)

    # Write output file
    OUTPUT_FILE.write_text(output_html, encoding='utf-8')

    file_size = OUTPUT_FILE.stat().st_size
    file_size_mb = file_size / (1024 * 1024)
    section_count = sum(len(g.get("items", [])) for g in NAV_STRUCTURE)

    print("[OK] Docsify-style manual generated successfully!")
    print(f"  Output: {OUTPUT_FILE}")
    print(f"  Size: {file_size_mb:.2f} MB ({file_size:,} bytes)")
    print(f"  Navigation Groups: {len(NAV_STRUCTURE)}")
    print(f"  Sections: {section_count}")

    return str(OUTPUT_FILE), file_size


if __name__ == "__main__":
    output_path, size = assemble_manual()