sync-pd2-wiki/sync.py

# -*- coding: utf-8 -*-
"""
MediaWiki 最近变更同步工具 - 增强版
支持：
1. 正常全量同步（无参数）
2. 手动指定时间起点：--since 2025-11-28T00:00:00Z
3. 只同步单个页面：--title "页面名称"
4. 单个页面时可选更新全局时间戳：--update-timestamp
5. 获取历史版本并生成diff
6. 同步中文翻译版本
7. 生成双语对比网页
"""

import os
import argparse
from pathlib import Path
from datetime import datetime
import requests
from dotenv import load_dotenv
import difflib
import json
import re
from urllib.parse import quote

# ==================== 配置区 ====================
load_dotenv()
WIKI_API_URL_EN = os.getenv("WIKI_API_URL_EN", "https://wiki.projectdiablo2.com/w/api.php")
WIKI_API_URL_CN = os.getenv("WIKI_API_URL_CN", "https://wiki.projectdiablo2.cn/w/api.php")
OUTPUT_DIR = Path("wiki_sync_output")
OUTPUT_DIR.mkdir(exist_ok=True)

# 全局变量，存储本次执行的输出目录
CURRENT_OUTPUT_DIR = None

LAST_TIMESTAMP_FILE = "last_sync_timestamp.txt"

SESSION_EN = requests.Session()
SESSION_EN.headers.update({
    "User-Agent": "WikiSyncTool/4.0 (your-email@example.com; MediaWiki Sync Bot)"
})

SESSION_CN = requests.Session()
SESSION_CN.headers.update({
    "User-Agent": "WikiSyncTool/4.0 (your-email@example.com; MediaWiki Sync Bot)"
})
# ================================================

def load_last_timestamp():
    if not os.path.exists(LAST_TIMESTAMP_FILE):
        return None
    with open(LAST_TIMESTAMP_FILE, encoding="utf-8") as f:
        return f.read().strip()

def save_last_timestamp(ts):
    with open(LAST_TIMESTAMP_FILE, "w", encoding="utf-8") as f:
        f.write(ts)

def get_recent_changes(since):
    """获取自 since 时间后每个页面的最新 revid（自动去重）"""
    params = {
        "action": "query",
        "list": "recentchanges",
        "rcprop": "title|ids|timestamp",
        "rctype": "edit|new",
        "rcdir": "newer",
        "rcstart": since,
        "rclimit": 500,
        "format": "json"
    }
    latest = {}
    while True:
        try:
            r = SESSION_EN.get(WIKI_API_URL_EN, params=params)
            r.raise_for_status()
            response_data = r.json()
            if "error" in response_data:
                raise Exception(response_data["error"])
            for rc in response_data.get("query", {}).get("recentchanges", []):
                latest[rc["title"]] = (rc["revid"], rc["timestamp"])
            if "continue" not in response_data:
                break
            params.update(response_data["continue"])
        except Exception as e:
            print(f"获取最近更改时出错: {e}")
            break
    return latest

def get_old_revid(title, end_time):
    """获取 ≤ end_time 的最后一次修订的 revid（用于 fromrev）"""
    params = {
        "action": "query",
        "prop": "revisions",
        "titles": title,
        "rvprop": "ids|timestamp",
        "rvlimit": 1,
        "rvdir": "older",
        "rvstart": end_time,
        "format": "json"
    }
    try:
        r = SESSION_EN.get(WIKI_API_URL_EN, params=params).json()
        pages = r["query"]["pages"]
        page = next(iter(pages.values()))
        if "revisions" not in page:
            print(f"  页面 '{title}' 在指定时间前没有找到修订版本")
            return None

        revisions = page["revisions"]
        if len(revisions) >= 1:
            return revisions[0]["revid"]
        print(f"  页面 '{title}' 在指定时间前没有找到修订版本")
        return None
    except Exception as e:
        print(f"获取旧版本ID时出错: {e}")
        return None

def get_page_content(wiki_url, session, title, revid=None):
    """获取页面完整内容"""
    params = {
        "action": "query",
        "prop": "revisions",
        "titles": title,
        "rvprop": "content|timestamp|ids",
        "rvslots": "main",
        "format": "json"
    }
    if revid:
        params["rvstartid"] = revid
        params["rvendid"] = revid

    try:
        r = session.get(wiki_url, params=params).json()
        pages = r["query"]["pages"]
        page = next(iter(pages.values()))

        if "revisions" not in page:
            return None, None, None

        rev = page["revisions"][0]
        content = rev["slots"]["main"]["*"]
        timestamp = rev["timestamp"]
        rev_id = rev["revid"]

        return content, timestamp, rev_id
    except Exception as e:
        print(f"获取页面内容时出错: {e}")
        return None, None, None

def generate_text_diff(old_text, new_text):
    """生成类似git diff的文本diff"""
    if not old_text:
        return "新创建页面"

    old_lines = old_text.splitlines(keepends=True)
    new_lines = new_text.splitlines(keepends=True)

    differ = difflib.unified_diff(
        old_lines,
        new_lines,
        lineterm='\n'
    )

    return ''.join(differ)

def parse_diff_with_line_numbers(diff_text):
    """解析diff文本，提取详细的行号信息"""
    if not diff_text or diff_text.startswith("新创建页面"):
        return []

    parsed_lines = []
    current_old_line = 0
    current_new_line = 0
    in_hunk = False

    for line in diff_text.splitlines():
        if line.startswith('@@'):
            # 解析hunk头部，格式如: @@ -start,count +start,count @@
            import re
            match = re.match(r'@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@', line)
            if match:
                old_start = int(match.group(1))
                old_count = int(match.group(2)) if match.group(2) else 1
                new_start = int(match.group(3))
                new_count = int(match.group(4)) if match.group(4) else 1

                current_old_line = old_start
                current_new_line = new_start
                in_hunk = True

                parsed_lines.append({
                    'type': 'hunk',
                    'content': line,
                    'old_start': old_start,
                    'old_count': old_count,
                    'new_start': new_start,
                    'new_count': new_count,
                    'old_line': None,
                    'new_line': None
                })
            else:
                parsed_lines.append({
                    'type': 'other',
                    'content': line,
                    'old_line': None,
                    'new_line': None
                })
        elif line.startswith('---') or line.startswith('+++'):
            # 文件头信息
            parsed_lines.append({
                'type': 'header',
                'content': line,
                'old_line': None,
                'new_line': None
            })
        elif in_hunk:
            if line.startswith('-'):
                # 删除的行
                parsed_lines.append({
                    'type': 'removed',
                    'content': line[1:],  # 去掉开头的 '-'
                    'old_line': current_old_line,
                    'new_line': None
                })
                current_old_line += 1
            elif line.startswith('+'):
                # 新增的行
                parsed_lines.append({
                    'type': 'added',
                    'content': line[1:],  # 去掉开头的 '+'
                    'old_line': None,
                    'new_line': current_new_line
                })
                current_new_line += 1
            elif line.startswith(' '):
                # 未变更的行
                parsed_lines.append({
                    'type': 'context',
                    'content': line[1:],  # 去掉开头的 ' '
                    'old_line': current_old_line,
                    'new_line': current_new_line
                })
                current_old_line += 1
                current_new_line += 1
            else:
                # 其他行（如空行）
                parsed_lines.append({
                    'type': 'other',
                    'content': line,
                    'old_line': None,
                    'new_line': None
                })
        else:
            # 不在任何hunk中的行
            parsed_lines.append({
                'type': 'other',
                'content': line,
                'old_line': None,
                'new_line': None
            })

    return parsed_lines

def search_chinese_page(title):
    """在中文wiki中搜索对应的页面"""
    # 首先尝试精确匹配
    params = {
        "action": "query",
        "list": "search",
        "srsearch": f'"{title}"',
        "srwhat": "title",
        "srlimit": 5,
        "format": "json"
    }

    try:
        r = SESSION_CN.get(WIKI_API_URL_CN, params=params).json()
        search_results = r.get("query", {}).get("search", [])

        if search_results:
            # 返回第一个匹配的结果
            return search_results[0]["title"]

        # 如果精确匹配没有结果，尝试模糊搜索
        params["srsearch"] = title.replace(" ", "%20")
        r = SESSION_CN.get(WIKI_API_URL_CN, params=params).json()
        search_results = r.get("query", {}).get("search", [])

        if search_results:
            return search_results[0]["title"]

    except Exception as e:
        print(f"搜索中文页面时出错: {e}")

    return None

def create_diff_html(title, en_diff, en_old_lines, en_new_lines, cn_content=None):
    """创建双语对比的HTML页面 - 使用精确的行号映射"""
    # 准备中文内容行
    cn_lines = []
    if cn_content:
        cn_lines = cn_content.splitlines()

    # 解析diff并获取行号信息
    parsed_diff = parse_diff_with_line_numbers(en_diff) if en_diff else []

    # 生成HTML
    html = f'''<!DOCTYPE html>
<html lang="zh-CN">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Wiki Diff: {title}</title>
    <style>
        * {{
            margin: 0;
            padding: 0;
            box-sizing: border-box;
        }}

        body {{
            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
            background-color: #f5f5f5;
            line-height: 1.6;
        }}

        .header {{
            background-color: #fff;
            padding: 20px;
            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
            margin-bottom: 20px;
        }}

        .header h1 {{
            color: #333;
            font-size: 24px;
            margin-bottom: 10px;
        }}

        .header .meta {{
            color: #666;
            font-size: 14px;
        }}

        .container {{
            display: flex;
            max-width: 100%;
            margin: 0 auto;
            background-color: #fff;
            min-height: calc(100vh - 100px);
        }}

        .column {{
            flex: 1;
            overflow: hidden;
            display: flex;
            flex-direction: column;
        }}

        .column-header {{
            background-color: #e9ecef;
            padding: 12px 20px;
            font-weight: bold;
            color: #495057;
            border-bottom: 1px solid #dee2e6;
        }}

        .diff-content {{
            flex: 1;
            overflow-y: auto;
            font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace;
            font-size: 13px;
            line-height: 1.4;
        }}

        .line {{
            display: flex;
            min-height: 20px;
            position: relative;
        }}

        .line-number {{
            width: 60px;
            text-align: right;
            padding: 0 10px;
            background-color: #f8f9fa;
            color: #6c757d;
            border-right: 1px solid #dee2e6;
            user-select: none;
            flex-shrink: 0;
        }}

        .line.highlight {{
            background-color: rgba(255, 235, 59, 0.3) !important;
            animation: highlight 2s ease-in-out;
        }}

        @keyframes highlight {{
            0% {{ background-color: rgba(255, 235, 59, 0.8); }}
            100% {{ background-color: rgba(255, 235, 59, 0.3); }}
        }}

        .line-content {{
            flex: 1;
            padding: 0 10px;
            white-space: pre-wrap;
            word-break: break-word;
        }}

        /* Diff specific styles */
        .line.diff-added {{
            background-color: #e6ffec;
        }}

        .line.diff-added .line-content {{
            background-color: #cdffd8;
            border-left: 3px solid #28a745;
        }}

        .line.diff-removed {{
            background-color: #ffeef0;
        }}

        .line.diff-removed .line-content {{
            background-color: #fdb8c0;
            border-left: 3px solid #dc3545;
            text-decoration: line-through;
        }}

        .line.diff-context {{
            background-color: #ffffff;
        }}

        .line.diff-context .line-content {{
            background-color: #ffffff;
        }}

        .line.diff-hunk {{
            background-color: #f8f9fa;
            color: #6c757d;
            font-style: italic;
        }}

        .line.diff-hunk .line-content {{
            background-color: #f1f3f4;
        }}

        .line.diff-header {{
            background-color: #e9ecef;
            color: #495057;
            font-style: italic;
        }}

        .line.diff-header .line-content {{
            background-color: #e9ecef;
        }}

        /* Separator between columns */
        .separator {{
            width: 1px;
            background-color: #dee2e6;
            box-shadow: 0 0 5px rgba(0,0,0,0.1);
            position: relative;
            z-index: 10;
        }}

        /* Scrollbar styling */
        .diff-content::-webkit-scrollbar {{
            width: 8px;
            height: 8px;
        }}

        .diff-content::-webkit-scrollbar-track {{
            background: #f1f1f1;
        }}

        .diff-content::-webkit-scrollbar-thumb {{
            background: #888;
            border-radius: 4px;
        }}

        .diff-content::-webkit-scrollbar-thumb:hover {{
            background: #555;
        }}

        /* Responsive design */
        @media (max-width: 768px) {{
            .container {{
                flex-direction: column;
            }}

            .separator {{
                width: 100%;
                height: 1px;
            }}
        }}

        /* Special styling for new page */
        .new-page-notice {{
            background-color: #d4edda;
            color: #155724;
            padding: 15px 20px;
            margin-bottom: 20px;
            border-left: 4px solid #28a745;
        }}

        .no-translation {{
            background-color: #fff3cd;
            color: #856404;
            padding: 15px 20px;
            margin-bottom: 20px;
            border-left: 4px solid #ffc107;
        }}

        /* Line linking styles */
        .line[data-cn-line] {{
            cursor: pointer;
        }}

        .line:hover {{
            background-color: rgba(0, 123, 255, 0.05);
        }}
    </style>
</head>
<body>
    <div class="header">
        <h1>{title}</h1>
        <div class="meta">
            <span>英文Wiki: wiki.projectdiablo2.com</span>
            {f' | 中文Wiki: wiki.projectdiablo2.cn' if cn_content else ''}
        </div>
    </div>

    <div class="container">
        <div class="column">
            <div class="column-header">English Diff</div>
            <div class="diff-content" id="en-diff">
'''

    # 生成英文diff内容
    if parsed_diff:
        for item in parsed_diff:
            if item['type'] == 'hunk':
                html += f'<div class="line diff-hunk"><span class="line-content">{item["content"]}</span></div>'
            elif item['type'] == 'header':
                html += f'<div class="line diff-header"><span class="line-content">{item["content"]}</span></div>'
            elif item['type'] == 'added':
                cn_line_attr = f'data-cn-line="{item["new_line"]}"' if item["new_line"] and cn_lines and item["new_line"] <= len(cn_lines) else ''
                cn_title = f'中文第{item["new_line"]}行' if item["new_line"] and cn_lines and item["new_line"] <= len(cn_lines) else ''
                html += f'<div class="line diff-added" {cn_line_attr} title="{cn_title}"><span class="line-number">{item["new_line"] or ""}</span><span class="line-content">{item["content"]}</span></div>'
            elif item['type'] == 'removed':
                html += f'<div class="line diff-removed" title="已删除"><span class="line-number">{item["old_line"] or ""}</span><span class="line-content">{item["content"]}</span></div>'
            elif item['type'] == 'context':
                cn_line_attr = f'data-cn-line="{item["new_line"]}"' if item["new_line"] and cn_lines and item["new_line"] <= len(cn_lines) else ''
                cn_title = f'中文第{item["new_line"]}行' if item["new_line"] and cn_lines and item["new_line"] <= len(cn_lines) else ''
                html += f'<div class="line diff-context" {cn_line_attr} title="{cn_title}"><span class="line-number">{item["new_line"]}</span><span class="line-content">{item["content"]}</span></div>'
            else:
                html += f'<div class="line"><span class="line-content">{item["content"]}</span></div>'
    else:
        # 新页面或无diff
        if en_diff and en_diff.startswith("新创建页面"):
            html += '<div class="new-page-notice">新创建页面</div>'

        # 显示完整内容（新页面或无diff时）
        for i, line in enumerate(en_new_lines or [], 1):
            cn_line_attr = f'data-cn-line="{i}"' if cn_lines and i <= len(cn_lines) else ''
            cn_title = f'中文第{i}行' if cn_lines and i <= len(cn_lines) else ''
            html += f'<div class="line diff-context" {cn_line_attr} title="{cn_title}"><span class="line-number">{i}</span><span class="line-content">{line}</span></div>'

    html += '''
            </div>
        </div>

        <div class="separator"></div>

        <div class="column">
            <div class="column-header">中文翻译</div>
            <div class="diff-content" id="cn-content">
'''

    # 添加中文内容
    if cn_content:
        html += '<div id="cn-lines">'
        for i, line in enumerate(cn_lines, 1):
            html += f'<div class="line diff-context" id="cn-line-{i}"><span class="line-number">{i}</span><span class="line-content">{line}</span></div>'
        html += '</div>'
    else:
        html += '<div class="no-translation">未找到对应的中文翻译页面</div>'

    html += '''
            </div>
        </div>
    </div>

    <script>
        // 同步滚动功能
        const enDiff = document.querySelector('#en-diff');
        const cnContent = document.querySelector('#cn-content');
        const cnLines = {};

        // 构建中文行的位置映射
        if (document.getElementById('cn-lines')) {{
            document.querySelectorAll('#cn-lines .line').forEach(line => {{
                const lineNum = line.querySelector('.line-number').textContent;
                if (lineNum) {{
                    cnLines[lineNum] = line.offsetTop;
                }}
            }});
        }}

        // 同步滚动
        if (enDiff && cnContent) {{
            enDiff.addEventListener('scroll', () => {{
                cnContent.scrollTop = enDiff.scrollTop;
            }});

            cnContent.addEventListener('scroll', () => {{
                enDiff.scrollTop = cnContent.scrollTop;
            }});
        }}

        // 点击英文行时，高亮对应的中文行
        document.querySelectorAll('[data-cn-line]').forEach(enLine => {{
            enLine.addEventListener('click', () => {{
                const cnLineNum = enLine.getAttribute('data-cn-line');
                if (cnLineNum) {{
                    const cnLine = document.getElementById(`cn-line-${cnLineNum}`);
                    if (cnLine) {{
                        // 移除所有高亮
                        document.querySelectorAll('.line.highlight').forEach(line => {{
                            line.classList.remove('highlight');
                        }});

                        // 高亮英文行和中文行
                        enLine.classList.add('highlight');
                        cnLine.classList.add('highlight');

                        // 滚动到中文行的位置
                        cnLine.scrollIntoView({{ behavior: 'smooth', block: 'center' }});
                    }}
                }}
            }});

            // 鼠标悬停时显示预览
            enLine.addEventListener('mouseenter', () => {{
                const cnLineNum = enLine.getAttribute('data-cn-line');
                if (cnLineNum) {{
                    const cnLine = document.getElementById(`cn-line-${cnLineNum}`);
                    if (cnLine) {{
                        enLine.style.backgroundColor = 'rgba(0, 123, 255, 0.1)';
                        cnLine.style.backgroundColor = 'rgba(0, 123, 255, 0.1)';
                    }}
                }}
            }});

            enLine.addEventListener('mouseleave', () => {{
                if (!enLine.classList.contains('highlight')) {{
                    enLine.style.backgroundColor = '';
                }}
                const cnLineNum = enLine.getAttribute('data-cn-line');
                if (cnLineNum) {{
                    const cnLine = document.getElementById(`cn-line-${cnLineNum}`);
                    if (cnLine && !cnLine.classList.contains('highlight')) {{
                        cnLine.style.backgroundColor = '';
                    }}
                }}
            }});
        }});
    </script>
</body>
</html>'''

    return html

def save_files(title, diff_html, diff_text, full_text, timestamp, note="", revid=None, cn_content=None, old_full_text=None):
    global CURRENT_OUTPUT_DIR

    # 确保本次执行的输出目录已经创建
    if CURRENT_OUTPUT_DIR is None:
        current_time_str = datetime.now().strftime("%Y%m%d_%H%M%S")
        CURRENT_OUTPUT_DIR = OUTPUT_DIR / current_time_str
        CURRENT_OUTPUT_DIR.mkdir(exist_ok=True)
        print(f"创建本次执行的输出目录: {CURRENT_OUTPUT_DIR}")

    safe_title = "".join(c if c.isalnum() or c in " -_." else "_" for c in title)
    time_str = timestamp[:19].replace("-", "").replace(":", "").replace("T", "_")
    base_filename = f"{safe_title}-{time_str}-{revid}" if revid else f"{safe_title}-{time_str}"

    # 保存各种文件
    files_to_save = []

    # 1. 标准MediaWiki diff HTML
    diff_file = CURRENT_OUTPUT_DIR / f"{base_filename}.diff.html"
    if diff_html:
        files_to_save.append((diff_file, diff_html))

    # 2. 文本格式的diff
    text_diff_file = CURRENT_OUTPUT_DIR / f"{base_filename}.diff.txt"
    if diff_text:
        files_to_save.append((text_diff_file, diff_text))

    # 3. 最新完整内容
    full_file = CURRENT_OUTPUT_DIR / f"{base_filename}.full.txt"
    if full_text:
        files_to_save.append((full_file, full_text))

    # 4. 历史版本内容（如果存在）
    if old_full_text:
        old_full_file = CURRENT_OUTPUT_DIR / f"{base_filename}.old.txt"
        files_to_save.append((old_full_file, old_full_text))

    # 5. 中文翻译内容（如果存在）
    if cn_content:
        cn_file = CURRENT_OUTPUT_DIR / f"{base_filename}.cn.txt"
        files_to_save.append((cn_file, cn_content))

    # 6. 双语对比HTML页面
    if cn_content:
        # 为文本diff准备行
        en_new_lines = full_text.splitlines() if full_text else []
        en_old_lines = old_full_text.splitlines() if old_full_text else []

        # 创建双语对比页面
        comparison_html = create_diff_html(title, diff_text, en_old_lines, en_new_lines, cn_content)
        comparison_file = CURRENT_OUTPUT_DIR / f"{base_filename}.comparison.html"
        files_to_save.append((comparison_file, comparison_html))
        print(f"  → 已保存: {comparison_file.relative_to(OUTPUT_DIR)} (双语对比页面)")

    # 写入所有文件
    for file_path, content in files_to_save:
        try:
            with open(file_path, "w", encoding="utf-8") as f:
                f.write(content)
            print(f"  → 已保存: {file_path.relative_to(OUTPUT_DIR)}")
        except Exception as e:
            print(f"  → 保存文件 {file_path} 时出错: {e}")

def process_single_page(title, since_time, update_timestamp=False):
    """只处理单个页面"""
    print(f"正在单独处理页面：{title}")

    # 获取当前最新 revid
    try:
        latest_content, latest_ts, latest_revid = get_page_content(WIKI_API_URL_EN, SESSION_EN, title)
        if latest_content is None:
            print("页面不存在或被删除")
            return None

        # 获取旧 revid
        old_revid = get_old_revid(title, since_time)

        # 初始化变量
        diff_html = None
        diff_text = None
        old_content = None
        cn_content = None

        if old_revid:
            # 获取历史版本内容
            old_content, old_ts, _ = get_page_content(WIKI_API_URL_EN, SESSION_EN, title, old_revid)

            if old_content is not None:
                # 生成文本diff
                diff_text = generate_text_diff(old_content, latest_content)
                print(f"  生成了文本diff ({len(diff_text)} 字符)")
            else:
                print(f"  无法获取历史版本内容")
        else:
            # 新页面
            print("  这是新创建的页面")

        # 搜索对应的中文页面
        print("  搜索中文翻译...")
        cn_title = search_chinese_page(title)
        if cn_title:
            print(f"  找到中文页面: {cn_title}")
            cn_content, cn_ts, cn_revid = get_page_content(WIKI_API_URL_CN, SESSION_CN, cn_title)
            if cn_content:
                print(f"  获取中文内容成功 ({len(cn_content)} 字符)")
            else:
                print("  无法获取中文页面内容")
        else:
            print("  未找到对应的中文翻译页面")

        # 获取官方diff（可选）
        if old_revid:
            diff_params = {
                "action": "compare",
                "fromrev": old_revid,
                "torev": latest_revid,
                "format": "json"
            }
            try:
                diff_resp = SESSION_EN.get(WIKI_API_URL_EN, params=diff_params).json()
                diff_html = diff_resp.get("compare", {}).get("*", "")
            except Exception as e:
                print(f"  获取官方HTML diff时出错: {e}")

        # 保存所有文件
        save_files(title, diff_html, diff_text, latest_content, latest_ts, "", latest_revid, cn_content, old_content)

        if update_timestamp:
            save_last_timestamp(latest_ts)
            print(f"已更新全局时间戳 → {latest_ts}")

        return latest_ts
    except Exception as e:
        print(f"处理页面 '{title}' 时出错: {e}")
        return None

def process_all_pages_since(since_time):
    """处理自指定时间以来的所有页面变更"""
    print("正在获取最近变更列表...")
    changes = get_recent_changes(since_time)
    if not changes:
        print("没有发现任何变更")
        return

    latest_global_ts = since_time
    for title, (latest_revid, ts) in changes.items():
        print(f"\n处理：{title}")
        # 复用单页处理逻辑
        page_latest_ts = process_single_page(title, since_time)

        if page_latest_ts and page_latest_ts > latest_global_ts:
            latest_global_ts = page_latest_ts

    save_last_timestamp(latest_global_ts)
    print(f"\n全量同步完成！本次最新时间戳已更新为：{latest_global_ts}")
    print(f"文件保存在：{CURRENT_OUTPUT_DIR.resolve() if CURRENT_OUTPUT_DIR else OUTPUT_DIR.resolve()}")

def main():
    parser = argparse.ArgumentParser(description="MediaWiki 同步工具 - 增强版支持双语对比")
    parser.add_argument("--since", type=str, help="强制从指定时间开始同步，格式如 2025-11-28T00:00:00Z")
    parser.add_argument("--title", type=str, help="只同步指定的单个页面标题")
    parser.add_argument("--update-timestamp", action="store_true",
                        help="在单页模式下，完成后仍然更新全局 last_sync_timestamp.txt")
    parser.add_argument("--run", action="store_true",
                        help="执行同步操作（必须提供此参数才能真正执行同步）")

    args = parser.parse_args()

    # 如果没有提供 --run 参数，则显示帮助信息并退出
    if not args.run:
        parser.print_help()
        return

    # 确定实际使用的 since 时间
    if args.since:
        since_time = args.since
        print(f"使用命令行指定的时间起点：{since_time}")
    else:
        since_time = load_last_timestamp()
        if not since_time:
            from datetime import timedelta
            since_time = (datetime.utcnow() - timedelta(days=1)).isoformat(timespec='seconds') + "Z"
        print(f"使用上次记录的时间起点：{since_time}")

    # 单页面模式
    if args.title:
        process_single_page(args.title.strip(), since_time, args.update_timestamp)
        return

    # 全量模式 - 使用复用的单页处理逻辑
    process_all_pages_since(since_time)

if __name__ == "__main__":
    main()