commit 8e93b5b82b28ac637f3324b85aaf99f9617b14c4
Author: wdjwxh <wdjwxh@gmail.com>
Date:   Wed Dec 3 15:54:39 2025 +0800

    init

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..906d727
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,164 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak
+venv.bak
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+# 自定义忽略项
+last_sync_timestamp.txt
+wiki_sync_output/
\ No newline at end of file
diff --git a/sync.py b/sync.py
new file mode 100644
index 0000000..05fd727
--- /dev/null
+++ b/sync.py
@@ -0,0 +1,398 @@
+# -*- coding: utf-8 -*-
+"""
+MediaWiki 最近变更同步工具 - 绯红终版
+支持：
+1. 正常全量同步（无参数）
+2. 手动指定时间起点：--since 2025-11-28T00:00:00Z
+3. 只同步单个页面：--title "页面名称"
+4. 单个页面时可选更新全局时间戳：--update-timestamp
+5. 全部使用官方 action=compare 生成最完美的 diff
+"""
+
+import os
+import argparse
+from pathlib import Path
+from datetime import datetime
+import requests
+
+# ==================== 配置区 ====================
+WIKI_API_URL = "https://wiki.projectdiablo2.com/w/api.php"   # ← 改成你的国外 wiki
+OUTPUT_DIR = Path("wiki_sync_output")
+OUTPUT_DIR.mkdir(exist_ok=True)
+
+# 全局变量，存储本次执行的输出目录
+CURRENT_OUTPUT_DIR = None
+
+LAST_TIMESTAMP_FILE = "last_sync_timestamp.txt"
+
+SESSION = requests.Session()
+SESSION.headers.update({
+    "User-Agent": "WikiSyncTool/3.0 (your-email@example.com; MediaWiki Sync Bot)"
+})
+# ================================================
+
+def load_last_timestamp():
+    if not os.path.exists(LAST_TIMESTAMP_FILE):
+        return None
+    with open(LAST_TIMESTAMP_FILE, encoding="utf-8") as f:
+        return f.read().strip()
+
+def save_last_timestamp(ts):
+    with open(LAST_TIMESTAMP_FILE, "w", encoding="utf-8") as f:
+        f.write(ts)
+
+def get_recent_changes(since):
+    """获取自 since 时间后每个页面的最新 revid（自动去重）"""
+    params = {
+        "action": "query",
+        "list": "recentchanges",
+        "rcprop": "title|ids|timestamp",
+        "rctype": "edit|new",
+        "rcdir": "newer",
+        "rcstart": since,
+        "rclimit": 500,
+        "format": "json"
+    }
+    latest = {}
+    while True:
+        try:
+            r = SESSION.get(WIKI_API_URL, params=params)
+            r.raise_for_status()
+            response_data = r.json()
+            if "error" in response_data:
+                raise Exception(response_data["error"])
+            for rc in response_data.get("query", {}).get("recentchanges", []):
+                latest[rc["title"]] = (rc["revid"], rc["timestamp"])
+            if "continue" not in response_data:
+                break
+            params.update(response_data["continue"])
+        except Exception as e:
+            print(f"获取最近更改时出错: {e}")
+            break
+    return latest
+
+def get_old_revid(title, end_time):
+    """获取 ≤ end_time 的最后一次修订的 revid（用于 fromrev）"""
+    params = {
+        "action": "query",
+        "prop": "revisions",
+        "titles": title,
+        "rvprop": "ids|timestamp",
+        "rvlimit": 1,  # 获取2个版本，确保能找到不同的版本
+        "rvdir": "older",
+        "rvstart": end_time,
+        "format": "json"
+    }
+    try:
+        r = SESSION.get(WIKI_API_URL, params=params).json()
+        url = WIKI_API_URL + "?" + "&".join([f"{k}={v}" for k, v in params.items()])
+        print(f"  请求URL: {url}")
+        pages = r["query"]["pages"]
+        page = next(iter(pages.values()))
+        if "revisions" not in page:
+            print(f"  页面 '{title}' 在指定时间前没有找到修订版本")
+            return None
+        
+        revisions = page["revisions"]
+        if len(revisions) >= 1:
+            return revisions[0]["revid"]
+        print(f"  页面 '{title}' 在指定时间前没有找到修订版本")
+        return None
+    except Exception as e:
+        print(f"获取旧版本ID时出错: {e}")
+        return None
+
+def get_official_diff_and_content(title, from_revid, to_revid):
+    # 获取官方 diff（HTML）
+    diff_params = {
+        "action": "compare",
+        "fromrev": from_revid or "",
+        "torev": to_revid,
+        "format": "json"
+    }
+    
+    print(f"  获取diff: fromrev={from_revid}, torev={to_revid}")
+    
+    try:
+        diff_resp = SESSION.get(WIKI_API_URL, params=diff_params).json()
+        print(f"  Diff响应: {list(diff_resp.keys())}")
+        diff_html = diff_resp.get("compare", {}).get("*", "<p>无法获取 diff</p>")
+        print(f"  Diff内容长度: {len(diff_html)} 字符")
+
+        # 获取最新完整内容
+        content_params = {
+            "action": "query",
+            "prop": "revisions",
+            "titles": title,
+            "rvprop": "content|timestamp",
+            "rvslots": "main",
+            "format": "json"
+        }
+        r = SESSION.get(WIKI_API_URL, params=content_params).json()
+        page = next(iter(r["query"]["pages"].values()))
+        if "revisions" not in page:
+            return None, None, None
+        rev = page["revisions"][0]
+        full_text = rev["slots"]["main"]["*"]
+        ts = rev["timestamp"]
+        return diff_html, full_text, ts
+    except Exception as e:
+        print(f"获取diff和内容时出错: {e}")
+        return None, None, None
+
+def save_files(title, diff_html, full_text, timestamp, note="", revid=None):
+    global CURRENT_OUTPUT_DIR
+    
+    # 确保本次执行的输出目录已经创建
+    if CURRENT_OUTPUT_DIR is None:
+        current_time_str = datetime.now().strftime("%Y%m%d_%H%M%S")
+        CURRENT_OUTPUT_DIR = OUTPUT_DIR / current_time_str
+        CURRENT_OUTPUT_DIR.mkdir(exist_ok=True)
+        print(f"创建本次执行的输出目录: {CURRENT_OUTPUT_DIR}")
+    
+    safe_title = "".join(c if c.isalnum() or c in " -_." else "_" for c in title)
+    time_str = timestamp[:19].replace("-", "").replace(":", "").replace("T", "_")
+    # 简化文件名格式，只包含标题、时间和revid
+    base_filename = f"{safe_title}-{time_str}-{revid}" if revid else f"{safe_title}-{time_str}"
+    
+    diff_file = CURRENT_OUTPUT_DIR / f"{base_filename}.diff.html"
+    full_file = CURRENT_OUTPUT_DIR / f"{base_filename}.full.txt"
+
+    # 美化 HTML diff，使用类似git diff的配色方案
+    # 先处理diff_html，将ins/del标签替换为span标签
+    processed_diff_html = diff_html.replace('<ins class="diffchange', '<span class="diffchange added"').replace('</ins>', '</span>').replace('<del class="diffchange', '<span class="diffchange deleted"').replace('</del>', '</span>')
+    # 再处理diff标记，将data-marker属性替换为实际的span元素
+    processed_diff_html = processed_diff_html.replace('<td class="diff-marker" data-marker="−"></td>', '<td class="diff-marker"><span class="minus-marker">−</span></td>').replace('<td class="diff-marker" data-marker="+"></td>', '<td class="diff-marker"><span class="plus-marker">+</span></td>')
+    
+    html_wrapper = f'''<!DOCTYPE html>
+<html><head><meta charset="utf-8"><title>Diff: {title}</title>
+<style>
+body {{
+  font-family: system-ui, sans-serif;
+  margin: 20px;
+}}
+table.diff {{
+  border-collapse: collapse;
+  font-family: monospace;
+  width: 100%;
+  table-layout: fixed;
+}}
+table.diff td {{
+  padding: 0 5px;
+  vertical-align: top;
+  white-space: pre-wrap;
+  word-break: break-all;
+  font-size: 14px;
+  line-height: 1.4;
+}}
+table.diff col.diff-marker {{
+  width: 20px;
+  text-align: right;
+  background-color: #fafafa;
+}}
+table.diff col.diff-content {{
+  width: auto;
+}}
+table.diff col.diff-addedline,
+table.diff col.diff-deletedline {{
+  width: 50%;
+}}
+.diff-addedline {{
+  background-color: #dfd;
+}}
+.diff-addedline .diffchange {{
+  background-color: #9e9;
+  color: #000;
+}}
+.diff-deletedline {{
+  background-color: #fee8e8;
+}}
+.diff-deletedline .diffchange {{
+  background-color: #faa;
+  color: #000;
+}}
+.diff-context {{
+  background-color: #fafafa;
+}}
+.diff-context td {{
+  color: #777;
+}}
+.diff-marker {{
+  font-weight: bold;
+  text-align: right;
+  padding: 0 4px;
+}}
+.diff-lineno {{
+  background-color: #f0f0f0;
+  text-align: right;
+  padding: 0 4px;
+}}
+.diff-addedline .diff-marker {{
+  color: #080;
+}}
+.diff-deletedline .diff-marker {{
+  color: #800;
+}}
+
+/* 新增的diff标记样式 */
+.plus-marker {{
+  color: #080;
+  font-weight: bold;
+}}
+.minus-marker {{
+  color: #800;
+  font-weight: bold;
+}}
+
+/* 确保变更行有明显的视觉区分 */
+.diff-addedline div,
+.diff-deletedline div {{
+  display: inline-block;
+  width: 100%;
+}}
+
+/* 增加一些额外的视觉提示 */
+.diff-addedline {{
+  border-left: 4px solid #080;
+}}
+.diff-deletedline {{
+  border-left: 4px solid #800;
+}}
+.diff-context {{
+  border-left: 4px solid #ccc;
+}}
+
+/* 替换ins/del标签为span标签的样式 */
+.diffchange.added {{
+  background-color: #9e9;
+  color: #000;
+  font-weight: bold;
+  text-decoration: none;
+}}
+.diffchange.deleted {{
+  background-color: #faa;
+  color: #000;
+  font-weight: bold;
+  text-decoration: line-through;
+}}
+</style></head><body>
+<h2>{title}</h2>
+<p>修改时间: {timestamp}</p>
+{processed_diff_html}
+</body></html>'''
+
+    try:
+        with open(diff_file, "w", encoding="utf-8") as f:
+            f.write(html_wrapper)
+        with open(full_file, "w", encoding="utf-8") as f:
+            f.write(full_text)
+        
+        print(f"  → 已保存: {diff_file.relative_to(OUTPUT_DIR)}")
+        print(f"  → 已保存: {full_file.relative_to(OUTPUT_DIR)}")
+    except Exception as e:
+        print(f"  → 保存文件时出错: {e}")
+
+    print(f"  → 完整路径: {diff_file}")
+    print(f"  → 完整路径: {full_file}")
+
+def process_single_page(title, since_time, update_timestamp=False):
+    """只处理单个页面"""
+    print(f"正在单独处理页面：{title}")
+    
+    # 获取当前最新 revid
+    params = {
+        "action": "query",
+        "prop": "revisions",
+        "titles": title,
+        "rvprop": "ids|timestamp",
+        "rvlimit": 1,
+        "format": "json"
+    }
+    try:
+        r = SESSION.get(WIKI_API_URL, params=params).json()
+        page = next(iter(r["query"]["pages"].values()))
+        if "revisions" not in page:
+            print("页面不存在或被删除")
+            return None
+        latest_revid = page["revisions"][0]["revid"]
+        latest_ts = page["revisions"][0]["timestamp"]
+
+        # 获取旧 revid
+        old_revid = get_old_revid(title, since_time)
+
+        diff_html, full_text, new_ts = get_official_diff_and_content(title, old_revid, latest_revid)
+        if diff_html is not None and full_text is not None:
+            # 移除旧的note标记，使用更简洁的命名方式
+            if not old_revid:
+                diff_html = "<p style='color:green;font-weight:bold'>新创建页面（无历史版本）</p>"
+            save_files(title, diff_html, full_text, new_ts, "", latest_revid)
+        else:
+            print(f"  警告: 未能获取完整的差异或内容数据")
+
+        if update_timestamp:
+            save_last_timestamp(latest_ts)
+            print(f"已更新全局时间戳 → {latest_ts}")
+        
+        return latest_ts
+    except Exception as e:
+        print(f"处理页面 '{title}' 时出错: {e}")
+        return None
+
+def process_all_pages_since(since_time):
+    """处理自指定时间以来的所有页面变更"""
+    print("正在获取最近变更列表...")
+    changes = get_recent_changes(since_time)
+    if not changes:
+        print("没有发现任何变更")
+        return
+
+    latest_global_ts = since_time
+    for title, (latest_revid, ts) in changes.items():
+        print(f"\n处理：{title}")
+        # 复用单页处理逻辑
+        page_latest_ts = process_single_page(title, since_time)
+        
+        if page_latest_ts and page_latest_ts > latest_global_ts:
+            latest_global_ts = page_latest_ts
+
+    save_last_timestamp(latest_global_ts)
+    print(f"\n全量同步完成！本次最新时间戳已更新为：{latest_global_ts}")
+    print(f"文件保存在：{CURRENT_OUTPUT_DIR.resolve() if CURRENT_OUTPUT_DIR else OUTPUT_DIR.resolve()}")
+
+def main():
+    parser = argparse.ArgumentParser(description="MediaWiki 同步工具 - 支持全量/单页/自定义时间")
+    parser.add_argument("--since", type=str, help="强制从指定时间开始同步，格式如 2025-11-28T00:00:00Z")
+    parser.add_argument("--title", type=str, help="只同步指定的单个页面标题")
+    parser.add_argument("--update-timestamp", action="store_true", 
+                        help="在单页模式下，完成后仍然更新全局 last_sync_timestamp.txt")
+    parser.add_argument("--run", action="store_true",
+                        help="执行同步操作（必须提供此参数才能真正执行同步）")
+    
+    args = parser.parse_args()
+
+    # 如果没有提供 --run 参数，则显示帮助信息并退出
+    if not args.run:
+        parser.print_help()
+        return
+
+    # 确定实际使用的 since 时间
+    if args.since:
+        since_time = args.since
+        print(f"使用命令行指定的时间起点：{since_time}")
+    else:
+        since_time = load_last_timestamp()
+        if not since_time:
+            from datetime import timedelta
+            since_time = (datetime.utcnow() - timedelta(days=1)).isoformat(timespec='seconds') + "Z"
+        print(f"使用上次记录的时间起点：{since_time}")
+
+    # 单页面模式
+    if args.title:
+        process_single_page(args.title.strip(), since_time, args.update_timestamp)
+        return
+
+    # 全量模式 - 使用复用的单页处理逻辑
+    process_all_pages_since(since_time)
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file