sync-pd2-wiki/sync.py

# -*- coding: utf-8 -*-
"""
MediaWiki 最近变更同步工具 - 增强版
支持：
1. 正常全量同步（无参数）
2. 手动指定时间起点：--since 2025-11-28T00:00:00Z
3. 只同步单个页面：--title "页面名称"
4. 单个页面时可选更新全局时间戳：--update-timestamp
5. 获取历史版本并生成diff
6. 同步中文翻译版本
7. 生成双语对比网页
"""

import os
import argparse
from pathlib import Path
from datetime import datetime
import requests
from dotenv import load_dotenv
import difflib
import json
import re
from urllib.parse import quote

# ==================== 配置区 ====================
load_dotenv()
WIKI_API_URL_EN = os.getenv("WIKI_API_URL_EN", "https://wiki.projectdiablo2.com/w/api.php")
WIKI_API_URL_CN = os.getenv("WIKI_API_URL_CN", "https://wiki.projectdiablo2.cn/w/api.php")
OUTPUT_DIR = Path("wiki_sync_output")
OUTPUT_DIR.mkdir(exist_ok=True)

# 全局变量，存储本次执行的输出目录
CURRENT_OUTPUT_DIR = None

LAST_TIMESTAMP_FILE = "last_sync_timestamp.txt"

SESSION_EN = requests.Session()
SESSION_EN.headers.update({
    "User-Agent": "WikiSyncTool/4.0 (your-email@example.com; MediaWiki Sync Bot)"
})

SESSION_CN = requests.Session()
SESSION_CN.headers.update({
    "User-Agent": "WikiSyncTool/4.0 (your-email@example.com; MediaWiki Sync Bot)"
})
# ================================================

def load_last_timestamp():
    if not os.path.exists(LAST_TIMESTAMP_FILE):
        return None
    with open(LAST_TIMESTAMP_FILE, encoding="utf-8") as f:
        return f.read().strip()

def save_last_timestamp(ts):
    with open(LAST_TIMESTAMP_FILE, "w", encoding="utf-8") as f:
        f.write(ts)

def get_recent_changes(since):
    """获取自 since 时间后每个页面的最新 revid（自动去重）"""
    params = {
        "action": "query",
        "list": "recentchanges",
        "rcprop": "title|ids|timestamp",
        "rctype": "edit|new",
        "rcdir": "newer",
        "rcstart": since,
        "rclimit": 500,
        "format": "json"
    }
    latest = {}
    while True:
        try:
            r = SESSION_EN.get(WIKI_API_URL_EN, params=params)
            r.raise_for_status()
            response_data = r.json()
            if "error" in response_data:
                raise Exception(response_data["error"])
            for rc in response_data.get("query", {}).get("recentchanges", []):
                latest[rc["title"]] = (rc["revid"], rc["timestamp"])
            if "continue" not in response_data:
                break
            params.update(response_data["continue"])
        except Exception as e:
            print(f"获取最近更改时出错: {e}")
            break
    return latest

def get_old_revid(title, end_time):
    """获取 ≤ end_time 的最后一次修订的 revid（用于 fromrev）"""
    params = {
        "action": "query",
        "prop": "revisions",
        "titles": title,
        "rvprop": "ids|timestamp",
        "rvlimit": 1,
        "rvdir": "older",
        "rvstart": end_time,
        "format": "json"
    }
    try:
        r = SESSION_EN.get(WIKI_API_URL_EN, params=params).json()
        pages = r["query"]["pages"]
        page = next(iter(pages.values()))
        if "revisions" not in page:
            print(f"  页面 '{title}' 在指定时间前没有找到修订版本")
            return None

        revisions = page["revisions"]
        if len(revisions) >= 1:
            return revisions[0]["revid"]
        print(f"  页面 '{title}' 在指定时间前没有找到修订版本")
        return None
    except Exception as e:
        print(f"获取旧版本ID时出错: {e}")
        return None

def get_page_content(wiki_url, session, title, revid=None):
    """获取页面完整内容"""
    params = {
        "action": "query",
        "prop": "revisions",
        "titles": title,
        "rvprop": "content|timestamp|ids",
        "rvslots": "main",
        "format": "json"
    }
    if revid:
        params["rvstartid"] = revid
        params["rvendid"] = revid

    try:
        r = session.get(wiki_url, params=params).json()
        pages = r["query"]["pages"]
        page = next(iter(pages.values()))

        if "revisions" not in page:
            return None, None, None

        rev = page["revisions"][0]
        content = rev["slots"]["main"]["*"]
        timestamp = rev["timestamp"]
        rev_id = rev["revid"]

        return content, timestamp, rev_id
    except Exception as e:
        print(f"获取页面内容时出错: {e}")
        return None, None, None

def generate_text_diff(old_text, new_text):
    """生成类似git diff的文本diff"""
    if not old_text:
        return "新创建页面"

    old_lines = old_text.splitlines(keepends=True)
    new_lines = new_text.splitlines(keepends=True)

    differ = difflib.unified_diff(
        old_lines,
        new_lines,
        lineterm='\n'
    )

    return ''.join(differ)

def parse_diff_with_line_numbers(diff_text):
    """解析diff文本，提取详细的行号信息"""
    if not diff_text or diff_text.startswith("新创建页面"):
        return []

    parsed_lines = []
    current_old_line = 0
    current_new_line = 0
    in_hunk = False

    for line in diff_text.splitlines():
        if line.startswith('@@'):
            # 解析hunk头部，格式如: @@ -start,count +start,count @@
            import re
            match = re.match(r'@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@', line)
            if match:
                old_start = int(match.group(1))
                old_count = int(match.group(2)) if match.group(2) else 1
                new_start = int(match.group(3))
                new_count = int(match.group(4)) if match.group(4) else 1

                current_old_line = old_start
                current_new_line = new_start
                in_hunk = True

                parsed_lines.append({
                    'type': 'hunk',
                    'content': line,
                    'old_start': old_start,
                    'old_count': old_count,
                    'new_start': new_start,
                    'new_count': new_count,
                    'old_line': None,
                    'new_line': None
                })
            else:
                parsed_lines.append({
                    'type': 'other',
                    'content': line,
                    'old_line': None,
                    'new_line': None
                })
        elif line.startswith('---') or line.startswith('+++'):
            # 文件头信息
            continue

            #parsed_lines.append({
            #    'type': 'header',
            #    'content': line,
            #    'old_line': None,
            #    'new_line': None
            #})
        elif in_hunk:
            if line.startswith('-'):
                # 删除的行
                parsed_lines.append({
                    'type': 'removed',
                    'content': line[1:],  # 去掉开头的 '-'
                    'old_line': current_old_line,
                    'new_line': None
                })
                current_old_line += 1
            elif line.startswith('+'):
                # 新增的行
                parsed_lines.append({
                    'type': 'added',
                    'content': line[1:],  # 去掉开头的 '+'
                    'old_line': None,
                    'new_line': current_new_line
                })
                current_new_line += 1
            elif line.startswith(' '):
                # 未变更的行
                parsed_lines.append({
                    'type': 'context',
                    'content': line[1:],  # 去掉开头的 ' '
                    'old_line': current_old_line,
                    'new_line': current_new_line
                })
                current_old_line += 1
                current_new_line += 1
            else:
                # 其他行（如空行）
                parsed_lines.append({
                    'type': 'other',
                    'content': line,
                    'old_line': None,
                    'new_line': None
                })
        else:
            # 不在任何hunk中的行
            parsed_lines.append({
                'type': 'other',
                'content': line,
                'old_line': None,
                'new_line': None
            })

    return parsed_lines

def search_chinese_page(title):
    """在中文wiki中搜索对应的页面"""
    # 首先尝试精确匹配
    params = {
        "action": "query",
        "list": "search",
        "srsearch": f'"{title}"',
        "srwhat": "title",
        "srlimit": 5,
        "format": "json"
    }

    try:
        r = SESSION_CN.get(WIKI_API_URL_CN, params=params).json()
        search_results = r.get("query", {}).get("search", [])

        if search_results:
            # 返回第一个匹配的结果
            return search_results[0]["title"]

        # 如果精确匹配没有结果，尝试模糊搜索
        params["srsearch"] = title.replace(" ", "%20")
        r = SESSION_CN.get(WIKI_API_URL_CN, params=params).json()
        search_results = r.get("query", {}).get("search", [])

        if search_results:
            return search_results[0]["title"]

    except Exception as e:
        print(f"搜索中文页面时出错: {e}")

    return None

def create_diff_html(title, en_diff, en_old_lines, en_new_lines, cn_content=None):
    """创建双语对比的HTML页面 - Word批注风格，英文变更直接显示在对应中文行右侧"""
    # 准备中文内容行
    cn_lines = []
    if cn_content:
        cn_lines = cn_content.splitlines()

    # 解析diff并获取行号信息
    parsed_diff = parse_diff_with_line_numbers(en_diff) if en_diff else []

    # 构建行号到diff内容的映射 - 科学处理连续diff块
    en_changes_by_line = {}
    blank_lines_to_insert = {}  # 记录需要在某行前插入的空白行及其对应的新增内容

    if parsed_diff:
        i = 0
        while i < len(parsed_diff):
            # 收集连续的diff块
            diff_block = []
            start_index = i

            # 收集连续的添加/删除操作（跳过hunk和header）
            while i < len(parsed_diff):
                item = parsed_diff[i]
                if item['type'] in ['added', 'removed']:
                    diff_block.append(item)
                elif item['type'] in ['hunk', 'header'] or item['type'] == 'context':
                    if diff_block:  # 如果已经有diff块，就停止
                        break
                i += 1

            # 处理连续的diff块
            if diff_block:
                # 计算行数平衡
                line_balance = 0
                for item in diff_block:
                    if item['type'] == 'added':
                        line_balance += 1
                    elif item['type'] == 'removed':
                        line_balance -= 1

                # 如果平衡为正数，需要在中文侧添加空白行
                if line_balance > 0:
                    # 找到基准行号（第一个操作的行号）
                    base_line = None
                    for item in diff_block:
                        if item['old_line']:  # 优先使用删除行的行号
                            base_line = item['old_line']
                            break
                        elif item['new_line'] and base_line is None:
                            base_line = item['new_line']

                    if base_line:
                        # 收集需要分配到空白行的新增内容
                        additions_for_blank_lines = []
                        remaining_additions = []

                        for item in diff_block:
                            if item['type'] == 'added':
                                additions_for_blank_lines.append(item['content'])

                        # 记录需要插入的空白行和对应的内容
                        blank_lines_to_insert[base_line] = additions_for_blank_lines

                # 处理具体的diff项
                j = 0
                while j < len(diff_block):
                    item = diff_block[j]

                    # 检查是否是替换操作（删除后紧跟新增）
                    if (item['type'] == 'removed' and j + 1 < len(diff_block) and
                        diff_block[j + 1]['type'] == 'added'):
                        next_item = diff_block[j + 1]

                        # 这是同一行的替换操作
                        target_line = item['old_line']  # 使用删除行的行号作为目标行号

                        if target_line not in en_changes_by_line:
                            en_changes_by_line[target_line] = []

                        en_changes_by_line[target_line].append({
                            'type': 'replaced',
                            'old_content': item['content'],
                            'new_content': next_item['content']
                        })

                        j += 2  # 跳过下一个项目，因为已经处理了

                    # 处理普通的添加操作（不包括需要分配到空白行的）
                    elif item['type'] == 'added' and item['new_line']:
                        # 如果这个新增内容已经被分配到空白行，就跳过
                        if line_balance > 0 and item['content'] in blank_lines_to_insert.get(base_line, []):
                            j += 1
                            continue

                        if item['new_line'] not in en_changes_by_line:
                            en_changes_by_line[item['new_line']] = []
                        en_changes_by_line[item['new_line']].append({
                            'type': 'added',
                            'content': item['content']
                        })
                        j += 1

                    # 处理普通的删除操作（没有对应的新增）
                    elif item['type'] == 'removed' and item['old_line']:
                        if item['old_line'] not in en_changes_by_line:
                            en_changes_by_line[item['old_line']] = []
                        en_changes_by_line[item['old_line']].append({
                            'type': 'removed',
                            'content': item['content']
                        })
                        j += 1
                    else:
                        j += 1

            # 继续处理剩余项
            else:
                i += 1

    # HTML转义函数
    def html_escape(text):
        if not text:
            return ""
        return (str(text)
                .replace("&", "&amp;")
                .replace("<", "&lt;")
                .replace(">", "&gt;")
                .replace('"', "&quot;")
                .replace("'", "&#39;"))

    def generate_inline_diff(old_text, new_text):
        """生成GitHub风格的行内字符级diff"""
        if not old_text or not new_text:
            return html_escape(new_text or "")

        escaped_old = html_escape(old_text)
        escaped_new = html_escape(new_text)

        # 使用difflib进行字符级别的比较
        differ = difflib.SequenceMatcher(None, escaped_old, escaped_new)

        result = []
        for tag, i1, i2, j1, j2 in differ.get_opcodes():
            if tag == 'equal':
                # 相同的部分
                result.append(escaped_new[j1:j2])
            elif tag == 'replace':
                # 替换的部分：删除的用红色背景，新增的用绿色背景
                deleted = escaped_old[i1:i2]
                added = escaped_new[j1:j2]
                result.append(f'<span class="diff-char-removed">{deleted}</span>')
                result.append(f'<span class="diff-char-added">{added}</span>')
            elif tag == 'delete':
                # 删除的部分用红色背景
                deleted = escaped_old[i1:i2]
                result.append(f'<span class="diff-char-removed">{deleted}</span>')
            elif tag == 'insert':
                # 新增的部分用绿色背景
                added = escaped_new[j1:j2]
                result.append(f'<span class="diff-char-added">{added}</span>')

        return ''.join(result)

    def generate_clean_new_content(old_text, new_text):
        """生成干净的新内容，只显示新增部分的高亮，不包含删除部分"""
        if not old_text or not new_text:
            return html_escape(new_text or "")

        escaped_old = html_escape(old_text)
        escaped_new = html_escape(new_text)

        # 使用difflib进行字符级别的比较
        differ = difflib.SequenceMatcher(None, escaped_old, escaped_new)

        result = []
        for tag, i1, i2, j1, j2 in differ.get_opcodes():
            if tag == 'equal':
                # 相同的部分
                result.append(escaped_new[j1:j2])
            elif tag == 'replace':
                # 替换的部分：只显示新增的内容（绿色高亮），跳过删除的内容
                added = escaped_new[j1:j2]
                result.append(f'<span class="diff-char-added">{added}</span>')
            elif tag == 'delete':
                # 删除的部分：跳过，不显示
                continue
            elif tag == 'insert':
                # 新增的部分用绿色背景
                added = escaped_new[j1:j2]
                result.append(f'<span class="diff-char-added">{added}</span>')

        return ''.join(result)

    # 生成HTML
    html = f'''<!DOCTYPE html>
<html lang="zh-CN">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Wiki Diff: {title}</title>
    <style>
        * {{
            margin: 0;
            padding: 0;
            box-sizing: border-box;
        }}

        body {{
            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
            background-color: #f5f5f5;
            line-height: 1.6;
            padding: 20px;
        }}

        .header {{
            background-color: #fff;
            padding: 20px;
            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
            margin-bottom: 20px;
            border-radius: 8px;
        }}

        .header h1 {{
            color: #333;
            font-size: 24px;
            margin-bottom: 10px;
        }}

        .header .meta {{
            color: #666;
            font-size: 14px;
        }}

        .content-container {{
            max-width: 1200px;
            margin: 0 auto;
            background-color: #fff;
            border-radius: 8px;
            box-shadow: 0 2px 8px rgba(0,0,0,0.1);
            overflow: hidden;
        }}

        .content-header {{
            background-color: #e9ecef;
            padding: 15px 20px;
            font-weight: bold;
            color: #495057;
            border-bottom: 1px solid #dee2e6;
        }}

        .diff-content {{
            padding: 0;
        }}

        .line-wrapper {{
            display: flex;
            border-bottom: 1px solid #f0f0f0;
            position: relative;
        }}

        .line-wrapper:hover {{
            background-color: rgba(0, 123, 255, 0.02);
        }}

        .line-wrapper.has-changes {{
            background-color: rgba(255, 193, 7, 0.05);
        }}

        .main-line {{
            display: flex;
            flex: 1;
            min-height: 24px;
            align-items: center;
        }}

        .line-number {{
            width: 60px;
            text-align: right;
            padding: 8px 12px;
            background-color: #f8f9fa;
            color: #6c757d;
            font-size: 12px;
            user-select: none;
            flex-shrink: 0;
            border-right: 1px solid #e9ecef;
        }}

        .line-content {{
            flex: 1;
            padding: 8px 12px;
            font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace;
            font-size: 13px;
            line-height: 1.5;
            white-space: pre-wrap;
            word-break: break-word;
            color: #333;
        }}

        /* 批注样式 */
        .annotation {{
            width: 400px;
            background-color: #f8f9fa;
            border-left: 1px solid #dee2e6;
            padding: 10px 14px;
            font-size: 14px;
            display: none;
        }}

        .line-wrapper.has-changes .annotation {{
            display: block;
        }}

        .annotation-item {{
            margin-bottom: 8px;
            padding: 8px 10px;
            border-radius: 4px;
            font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace;
            font-size: 13px;
            line-height: 1.5;
        }}

        .annotation-item:last-child {{
            margin-bottom: 0;
        }}

        .annotation-item.added {{
            background-color: #e6ffec;
            border-left: 3px solid #28a745;
            color: #155724;
        }}

        .annotation-item.removed {{
            background-color: #ffeef0;
            border-left: 3px solid #dc3545;
            color: #721c24;
            text-decoration: line-through;
        }}

        .annotation-item.replaced {{
            margin-bottom: 8px;
        }}

        .annotation-item.replaced .old-content {{
            background-color: #ffeef0;
            border-left: 3px solid #dc3545;
            color: #721c24;
            text-decoration: line-through;
            padding: 4px 6px;
            border-radius: 3px;
            margin-bottom: 4px;
        }}

        .annotation-item.replaced .new-content {{
            background-color: #e6ffec;
            border-left: 3px solid #28a745;
            color: #155724;
            padding: 4px 6px;
            border-radius: 3px;
        }}

        .annotation-header {{
            font-size: 11px;
            color: #6c757d;
            margin-bottom: 6px;
            font-weight: bold;
            text-transform: uppercase;
            letter-spacing: 0.5px;
        }}

        /* GitHub风格的字符级diff样式 */
        .diff-char-added {{
            background-color: #acf2bd;
            color: #24292f;
            border-radius: 2px;
            padding: 1px 2px;
        }}

        .diff-char-removed {{
            background-color: #ffd5d5;
            color: #24292f;
            border-radius: 2px;
            padding: 1px 2px;
        }}

        /* 批注项内的字符级diff样式调整 */
        .annotation-item.removed {{
            text-decoration: none; /* 移除删除线 */
        }}

        .annotation-item.replaced .old-content {{
            text-decoration: none; /* 移除删除线 */
        }}

        /* 新页面提示 */
        .new-page-notice {{
            background-color: #d4edda;
            color: #155724;
            padding: 15px 20px;
            margin: 15px;
            border-radius: 4px;
            border-left: 4px solid #28a745;
        }}

        .no-translation {{
            background-color: #fff3cd;
            color: #856404;
            padding: 15px 20px;
            margin: 15px;
            border-radius: 4px;
            border-left: 4px solid #ffc107;
        }}

        /* 响应式设计 */
        @media (max-width: 1024px) {{
            .annotation {{
                width: 300px;
            }}
        }}

        @media (max-width: 768px) {{
            body {{
                padding: 10px;
            }}

            .annotation {{
                width: 100%;
                display: block !important;
                border-left: none;
                border-top: 1px solid #dee2e6;
            }}

            .line-wrapper {{
                flex-direction: column;
            }}

            .main-line {{
                border-bottom: none;
            }}
        }}

        /* 高亮效果 */
        .line-wrapper.highlight {{
            background-color: rgba(255, 235, 59, 0.3) !important;
            animation: highlight 2s ease-in-out;
        }}

        @keyframes highlight {{
            0% {{ background-color: rgba(255, 235, 59, 0.6); }}
            100% {{ background-color: rgba(255, 235, 59, 0.3); }}
        }}

        /* 空行样式 */
        .line-wrapper.empty-line .line-content {{
            min-height: 24px;
            color: #999;
            font-style: italic;
        }}

        /* 空白占位行样式 */
        .line-wrapper.blank-placeholder {{
            background-color: #fafafa;
            border-bottom: 1px solid #e9ecef;
            display: flex;
        }}

        .line-wrapper.blank-placeholder .main-line {{
            min-height: 24px;
            flex: 1;
            display: flex;
        }}

        .line-wrapper.blank-placeholder .line-number {{
            color: #dee2e6;
        }}

        .line-wrapper.blank-placeholder .line-content {{
            color: #dee2e6;
            font-style: italic;
            min-height: 24px;
            display: flex;
            align-items: center;
        }}

        /* 空白占位行的批注样式 */
        .line-wrapper.blank-placeholder .annotation {{
            width: 400px;
            background-color: #f8f9fa;
            border-left: 1px solid #dee2e6;
            padding: 10px 14px;
            font-size: 14px;
            display: block;
        }}

        .line-wrapper.blank-placeholder .annotation .annotation-item {{
            margin-bottom: 8px;
            padding: 8px 10px;
            border-radius: 4px;
            font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace;
            font-size: 13px;
            line-height: 1.5;
        }}

        .line-wrapper.blank-placeholder .annotation .annotation-item:last-child {{
            margin-bottom: 0;
        }}

        .line-wrapper.blank-placeholder .annotation .annotation-item.added {{
            background-color: #e6ffec;
            border-left: 3px solid #28a745;
            color: #155724;
        }}

        .line-wrapper.blank-placeholder .annotation .annotation-header {{
            font-size: 10px;
            color: #6c757d;
            margin-bottom: 4px;
            font-weight: bold;
            text-transform: uppercase;
            letter-spacing: 0.5px;
        }}

        .line-wrapper.blank-placeholder:hover {{
            background-color: #f8f9fa;
        }}

        .line-wrapper.blank-placeholder:hover .main-line {{
            background-color: rgba(0, 123, 255, 0.02);
        }}
    </style>
</head>
<body>
    <div class="header">
        <h1>{title}</h1>
        <div class="meta">
            <span>英文Wiki: wiki.projectdiablo2.com</span>
            {f' | 中文Wiki: wiki.projectdiablo2.cn' if cn_content else ''}
        </div>
    </div>

    <div class="content-container">
        <div class="content-header">中文翻译（含英文变更批注）</div>
        <div class="diff-content">
'''

    # 添加中文内容和英文变更批注
    if cn_content:
        for i, line in enumerate(cn_lines, 1):
            # 检查是否需要在此行之前插入空白行
            if i in blank_lines_to_insert:
                additions_list = blank_lines_to_insert[i]
                for addition_content in additions_list:
                    html += f'<div class="line-wrapper blank-placeholder">'
                    html += '<div class="main-line">'
                    html += '<span class="line-number">&nbsp;</span>'  # 不显示行号
                    html += f'<span class="line-content">（新增英文内容占位）</span>'
                    html += '</div>'

                    # 为空白行添加对应的新增批注
                    escaped_addition = html_escape(addition_content)
                    html += '<div class="annotation">'
                    html += f'<div class="annotation-item added">'
                    html += f'<div class="annotation-header">新增</div>'
                    html += f'<div>{escaped_addition}</div>'
                    html += '</div>'
                    html += '</div>'

                    html += '</div>'

            escaped_line = html_escape(line)
            has_changes = i in en_changes_by_line
            changes = en_changes_by_line.get(i, [])

            # 判断是否为空行
            is_empty = not line.strip()

            html += f'<div class="line-wrapper {"has-changes" if has_changes else ""} {"empty-line" if is_empty else ""}">'
            html += f'<div class="main-line">'
            html += f'<span class="line-number">{i}</span>'
            html += f'<span class="line-content">{escaped_line if not is_empty else "（空行）"}</span>'
            html += '</div>'

            # 添加英文变更批注（只显示替换和删除操作，新增操作已经在空白行中显示）
            if has_changes:
                html += '<div class="annotation">'
                for change in changes:
                    if change['type'] == 'added':
                        # 新增内容已经在空白行中显示，这里跳过
                        continue
                    elif change['type'] == 'removed':
                        escaped_change = html_escape(change['content'])
                        html += f'<div class="annotation-item removed">'
                        html += f'<div class="annotation-header">删除</div>'
                        html += f'<div>{escaped_change}</div>'
                        html += '</div>'
                    elif change['type'] == 'replaced':
                        # 生成干净的新内容（只显示新增部分的高亮，不包含删除部分）
                        clean_new_content = generate_clean_new_content(change['old_content'], change['new_content'])
                        html += f'<div class="annotation-item replaced">'
                        html += f'<div class="annotation-header">替换</div>'
                        html += f'<div class="old-content">{html_escape(change["old_content"])}</div>'
                        html += f'<div class="new-content">{clean_new_content}</div>'
                        html += '</div>'
                html += '</div>'

            html += '</div>'
    else:
        html += '<div class="no-translation">未找到对应的中文翻译页面</div>'

    html += '''
        </div>
    </div>

    <script>
        // 点击有变更的行时高亮
        document.querySelectorAll('.line-wrapper.has-changes').forEach(lineWrapper => {{
            lineWrapper.addEventListener('click', () => {{
                // 移除所有高亮
                document.querySelectorAll('.line-wrapper.highlight').forEach(line => {{
                    line.classList.remove('highlight');
                }});

                // 高亮当前行
                lineWrapper.classList.add('highlight');
            }});
        }});
    </script>
</body>
</html>'''

    return html

def save_files(title, diff_html, diff_text, full_text, timestamp, note="", revid=None, cn_content=None, old_full_text=None):
    global CURRENT_OUTPUT_DIR

    # 确保本次执行的输出目录已经创建
    if CURRENT_OUTPUT_DIR is None:
        current_time_str = datetime.now().strftime("%Y%m%d_%H%M%S")
        CURRENT_OUTPUT_DIR = OUTPUT_DIR / current_time_str
        CURRENT_OUTPUT_DIR.mkdir(exist_ok=True)
        print(f"创建本次执行的输出目录: {CURRENT_OUTPUT_DIR}")

    safe_title = "".join(c if c.isalnum() or c in " -_." else "_" for c in title)
    time_str = timestamp[:19].replace("-", "").replace(":", "").replace("T", "_")
    base_filename = f"{safe_title}-{time_str}-{revid}" if revid else f"{safe_title}-{time_str}"

    # 保存各种文件
    files_to_save = []

    # 1. 标准MediaWiki diff HTML
    diff_file = CURRENT_OUTPUT_DIR / f"{base_filename}.diff.html"
    if diff_html:
        files_to_save.append((diff_file, diff_html))

    # 2. 文本格式的diff
    text_diff_file = CURRENT_OUTPUT_DIR / f"{base_filename}.diff.txt"
    if diff_text:
        files_to_save.append((text_diff_file, diff_text))

    # 3. 最新完整内容
    full_file = CURRENT_OUTPUT_DIR / f"{base_filename}.full.txt"
    if full_text:
        files_to_save.append((full_file, full_text))

    # 4. 历史版本内容（如果存在）
    if old_full_text:
        old_full_file = CURRENT_OUTPUT_DIR / f"{base_filename}.old.txt"
        files_to_save.append((old_full_file, old_full_text))

    # 5. 中文翻译内容（如果存在）
    if cn_content:
        cn_file = CURRENT_OUTPUT_DIR / f"{base_filename}.cn.txt"
        files_to_save.append((cn_file, cn_content))

    # 6. 双语对比HTML页面
    if cn_content:
        # 为文本diff准备行
        en_new_lines = full_text.splitlines() if full_text else []
        en_old_lines = old_full_text.splitlines() if old_full_text else []

        # 创建双语对比页面
        comparison_html = create_diff_html(title, diff_text, en_old_lines, en_new_lines, cn_content)
        comparison_file = CURRENT_OUTPUT_DIR / f"{base_filename}.comparison.html"
        files_to_save.append((comparison_file, comparison_html))
        print(f"  → 已保存: {comparison_file.relative_to(OUTPUT_DIR)} (双语对比页面)")

    # 写入所有文件
    for file_path, content in files_to_save:
        try:
            with open(file_path, "w", encoding="utf-8") as f:
                f.write(content)
            print(f"  → 已保存: {file_path.relative_to(OUTPUT_DIR)}")
        except Exception as e:
            print(f"  → 保存文件 {file_path} 时出错: {e}")

def process_single_page(title, since_time, update_timestamp=False):
    """只处理单个页面"""
    print(f"正在单独处理页面：{title}")

    # 获取当前最新 revid
    try:
        latest_content, latest_ts, latest_revid = get_page_content(WIKI_API_URL_EN, SESSION_EN, title)
        if latest_content is None:
            print("页面不存在或被删除")
            return None

        # 获取旧 revid
        old_revid = get_old_revid(title, since_time)

        # 初始化变量
        diff_html = None
        diff_text = None
        old_content = None
        cn_content = None

        if old_revid:
            # 获取历史版本内容
            old_content, old_ts, _ = get_page_content(WIKI_API_URL_EN, SESSION_EN, title, old_revid)

            if old_content is not None:
                # 生成文本diff
                diff_text = generate_text_diff(old_content, latest_content)
                print(f"  生成了文本diff ({len(diff_text)} 字符)")
            else:
                print(f"  无法获取历史版本内容")
        else:
            # 新页面
            print("  这是新创建的页面")

        # 搜索对应的中文页面
        print("  搜索中文翻译...")
        cn_title = search_chinese_page(title)
        if cn_title:
            print(f"  找到中文页面: {cn_title}")
            cn_content, cn_ts, cn_revid = get_page_content(WIKI_API_URL_CN, SESSION_CN, cn_title)
            if cn_content:
                print(f"  获取中文内容成功 ({len(cn_content)} 字符)")
            else:
                print("  无法获取中文页面内容")
        else:
            print("  未找到对应的中文翻译页面")

        # 获取官方diff（可选）
        if old_revid:
            diff_params = {
                "action": "compare",
                "fromrev": old_revid,
                "torev": latest_revid,
                "format": "json"
            }
            try:
                diff_resp = SESSION_EN.get(WIKI_API_URL_EN, params=diff_params).json()
                diff_html = diff_resp.get("compare", {}).get("*", "")
            except Exception as e:
                print(f"  获取官方HTML diff时出错: {e}")

        # 保存所有文件
        save_files(title, diff_html, diff_text, latest_content, latest_ts, "", latest_revid, cn_content, old_content)

        if update_timestamp:
            save_last_timestamp(latest_ts)
            print(f"已更新全局时间戳 → {latest_ts}")

        return latest_ts
    except Exception as e:
        print(f"处理页面 '{title}' 时出错: {e}")
        return None

def process_all_pages_since(since_time):
    """处理自指定时间以来的所有页面变更"""
    print("正在获取最近变更列表...")
    changes = get_recent_changes(since_time)
    if not changes:
        print("没有发现任何变更")
        return

    latest_global_ts = since_time
    for title, (latest_revid, ts) in changes.items():
        print(f"\n处理：{title}")
        # 复用单页处理逻辑
        page_latest_ts = process_single_page(title, since_time)

        if page_latest_ts and page_latest_ts > latest_global_ts:
            latest_global_ts = page_latest_ts

    save_last_timestamp(latest_global_ts)
    print(f"\n全量同步完成！本次最新时间戳已更新为：{latest_global_ts}")
    print(f"文件保存在：{CURRENT_OUTPUT_DIR.resolve() if CURRENT_OUTPUT_DIR else OUTPUT_DIR.resolve()}")

def main():
    parser = argparse.ArgumentParser(description="MediaWiki 同步工具 - 增强版支持双语对比")
    parser.add_argument("--since", type=str, help="强制从指定时间开始同步，格式如 2025-11-28T00:00:00Z")
    parser.add_argument("--title", type=str, help="只同步指定的单个页面标题")
    parser.add_argument("--update-timestamp", action="store_true",
                        help="在单页模式下，完成后仍然更新全局 last_sync_timestamp.txt")
    parser.add_argument("--run", action="store_true",
                        help="执行同步操作（必须提供此参数才能真正执行同步）")

    args = parser.parse_args()

    # 如果没有提供 --run 参数，则显示帮助信息并退出
    if not args.run:
        parser.print_help()
        return

    # 确定实际使用的 since 时间
    if args.since:
        since_time = args.since
        print(f"使用命令行指定的时间起点：{since_time}")
    else:
        since_time = load_last_timestamp()
        if not since_time:
            from datetime import timedelta
            since_time = (datetime.utcnow() - timedelta(days=1)).isoformat(timespec='seconds') + "Z"
        print(f"使用上次记录的时间起点：{since_time}")

    # 单页面模式
    if args.title:
        process_single_page(args.title.strip(), since_time, args.update_timestamp)
        return

    # 全量模式 - 使用复用的单页处理逻辑
    process_all_pages_since(since_time)

if __name__ == "__main__":
    main()