init
This commit is contained in:
commit
8e93b5b82b
|
|
@ -0,0 +1,164 @@
|
||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
share/python-wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a python script from a template
|
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.nox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*.cover
|
||||||
|
*.py,cover
|
||||||
|
.hypothesis/
|
||||||
|
.pytest_cache/
|
||||||
|
cover/
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
|
||||||
|
# Django stuff:
|
||||||
|
*.log
|
||||||
|
local_settings.py
|
||||||
|
db.sqlite3
|
||||||
|
db.sqlite3-journal
|
||||||
|
|
||||||
|
# Flask stuff:
|
||||||
|
instance/
|
||||||
|
.webassets-cache
|
||||||
|
|
||||||
|
# Scrapy stuff:
|
||||||
|
.scrapy
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# PyBuilder
|
||||||
|
.pybuilder/
|
||||||
|
target/
|
||||||
|
|
||||||
|
# Jupyter Notebook
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# IPython
|
||||||
|
profile_default/
|
||||||
|
ipython_config.py
|
||||||
|
|
||||||
|
# pyenv
|
||||||
|
# For a library or package, you might want to ignore these files since the code is
|
||||||
|
# intended to run in multiple environments; otherwise, check them in:
|
||||||
|
.python-version
|
||||||
|
|
||||||
|
# pipenv
|
||||||
|
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||||
|
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||||
|
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||||
|
# install all needed dependencies.
|
||||||
|
Pipfile.lock
|
||||||
|
|
||||||
|
# poetry
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||||
|
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||||
|
# commonly ignored for libraries.
|
||||||
|
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||||
|
poetry.lock
|
||||||
|
|
||||||
|
# pdm
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||||
|
#pdm.lock
|
||||||
|
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||||
|
# in version control.
|
||||||
|
# https://pdm.fming.dev/#use-with-ide
|
||||||
|
.pdm.toml
|
||||||
|
|
||||||
|
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||||
|
__pypackages__/
|
||||||
|
|
||||||
|
# Celery stuff
|
||||||
|
celerybeat-schedule
|
||||||
|
celerybeat.pid
|
||||||
|
|
||||||
|
# SageMath parsed files
|
||||||
|
*.sage.py
|
||||||
|
|
||||||
|
# Environments
|
||||||
|
.env
|
||||||
|
.venv
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
env.bak
|
||||||
|
venv.bak
|
||||||
|
|
||||||
|
# Spyder project settings
|
||||||
|
.spyderproject
|
||||||
|
.spyproject
|
||||||
|
|
||||||
|
# Rope project settings
|
||||||
|
.ropeproject
|
||||||
|
|
||||||
|
# mkdocs documentation
|
||||||
|
/site
|
||||||
|
|
||||||
|
# mypy
|
||||||
|
.mypy_cache/
|
||||||
|
.dmypy.json
|
||||||
|
dmypy.json
|
||||||
|
|
||||||
|
# Pyre type checker
|
||||||
|
.pyre/
|
||||||
|
|
||||||
|
# pytype static type analyzer
|
||||||
|
.pytype/
|
||||||
|
|
||||||
|
# Cython debug symbols
|
||||||
|
cython_debug/
|
||||||
|
|
||||||
|
# PyCharm
|
||||||
|
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||||
|
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||||
|
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||||
|
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||||
|
#.idea/
|
||||||
|
|
||||||
|
# 自定义忽略项
|
||||||
|
last_sync_timestamp.txt
|
||||||
|
wiki_sync_output/
|
||||||
|
|
@ -0,0 +1,398 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
MediaWiki 最近变更同步工具 - 绯红终版
|
||||||
|
支持:
|
||||||
|
1. 正常全量同步(无参数)
|
||||||
|
2. 手动指定时间起点:--since 2025-11-28T00:00:00Z
|
||||||
|
3. 只同步单个页面:--title "页面名称"
|
||||||
|
4. 单个页面时可选更新全局时间戳:--update-timestamp
|
||||||
|
5. 全部使用官方 action=compare 生成最完美的 diff
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import argparse
|
||||||
|
from pathlib import Path
|
||||||
|
from datetime import datetime
|
||||||
|
import requests
|
||||||
|
|
||||||
|
# ==================== 配置区 ====================
|
||||||
|
WIKI_API_URL = "https://wiki.projectdiablo2.com/w/api.php" # ← 改成你的国外 wiki
|
||||||
|
OUTPUT_DIR = Path("wiki_sync_output")
|
||||||
|
OUTPUT_DIR.mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
# 全局变量,存储本次执行的输出目录
|
||||||
|
CURRENT_OUTPUT_DIR = None
|
||||||
|
|
||||||
|
LAST_TIMESTAMP_FILE = "last_sync_timestamp.txt"
|
||||||
|
|
||||||
|
SESSION = requests.Session()
|
||||||
|
SESSION.headers.update({
|
||||||
|
"User-Agent": "WikiSyncTool/3.0 (your-email@example.com; MediaWiki Sync Bot)"
|
||||||
|
})
|
||||||
|
# ================================================
|
||||||
|
|
||||||
|
def load_last_timestamp():
|
||||||
|
if not os.path.exists(LAST_TIMESTAMP_FILE):
|
||||||
|
return None
|
||||||
|
with open(LAST_TIMESTAMP_FILE, encoding="utf-8") as f:
|
||||||
|
return f.read().strip()
|
||||||
|
|
||||||
|
def save_last_timestamp(ts):
|
||||||
|
with open(LAST_TIMESTAMP_FILE, "w", encoding="utf-8") as f:
|
||||||
|
f.write(ts)
|
||||||
|
|
||||||
|
def get_recent_changes(since):
|
||||||
|
"""获取自 since 时间后每个页面的最新 revid(自动去重)"""
|
||||||
|
params = {
|
||||||
|
"action": "query",
|
||||||
|
"list": "recentchanges",
|
||||||
|
"rcprop": "title|ids|timestamp",
|
||||||
|
"rctype": "edit|new",
|
||||||
|
"rcdir": "newer",
|
||||||
|
"rcstart": since,
|
||||||
|
"rclimit": 500,
|
||||||
|
"format": "json"
|
||||||
|
}
|
||||||
|
latest = {}
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
r = SESSION.get(WIKI_API_URL, params=params)
|
||||||
|
r.raise_for_status()
|
||||||
|
response_data = r.json()
|
||||||
|
if "error" in response_data:
|
||||||
|
raise Exception(response_data["error"])
|
||||||
|
for rc in response_data.get("query", {}).get("recentchanges", []):
|
||||||
|
latest[rc["title"]] = (rc["revid"], rc["timestamp"])
|
||||||
|
if "continue" not in response_data:
|
||||||
|
break
|
||||||
|
params.update(response_data["continue"])
|
||||||
|
except Exception as e:
|
||||||
|
print(f"获取最近更改时出错: {e}")
|
||||||
|
break
|
||||||
|
return latest
|
||||||
|
|
||||||
|
def get_old_revid(title, end_time):
|
||||||
|
"""获取 ≤ end_time 的最后一次修订的 revid(用于 fromrev)"""
|
||||||
|
params = {
|
||||||
|
"action": "query",
|
||||||
|
"prop": "revisions",
|
||||||
|
"titles": title,
|
||||||
|
"rvprop": "ids|timestamp",
|
||||||
|
"rvlimit": 1, # 获取2个版本,确保能找到不同的版本
|
||||||
|
"rvdir": "older",
|
||||||
|
"rvstart": end_time,
|
||||||
|
"format": "json"
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
r = SESSION.get(WIKI_API_URL, params=params).json()
|
||||||
|
url = WIKI_API_URL + "?" + "&".join([f"{k}={v}" for k, v in params.items()])
|
||||||
|
print(f" 请求URL: {url}")
|
||||||
|
pages = r["query"]["pages"]
|
||||||
|
page = next(iter(pages.values()))
|
||||||
|
if "revisions" not in page:
|
||||||
|
print(f" 页面 '{title}' 在指定时间前没有找到修订版本")
|
||||||
|
return None
|
||||||
|
|
||||||
|
revisions = page["revisions"]
|
||||||
|
if len(revisions) >= 1:
|
||||||
|
return revisions[0]["revid"]
|
||||||
|
print(f" 页面 '{title}' 在指定时间前没有找到修订版本")
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
print(f"获取旧版本ID时出错: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_official_diff_and_content(title, from_revid, to_revid):
|
||||||
|
# 获取官方 diff(HTML)
|
||||||
|
diff_params = {
|
||||||
|
"action": "compare",
|
||||||
|
"fromrev": from_revid or "",
|
||||||
|
"torev": to_revid,
|
||||||
|
"format": "json"
|
||||||
|
}
|
||||||
|
|
||||||
|
print(f" 获取diff: fromrev={from_revid}, torev={to_revid}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
diff_resp = SESSION.get(WIKI_API_URL, params=diff_params).json()
|
||||||
|
print(f" Diff响应: {list(diff_resp.keys())}")
|
||||||
|
diff_html = diff_resp.get("compare", {}).get("*", "<p>无法获取 diff</p>")
|
||||||
|
print(f" Diff内容长度: {len(diff_html)} 字符")
|
||||||
|
|
||||||
|
# 获取最新完整内容
|
||||||
|
content_params = {
|
||||||
|
"action": "query",
|
||||||
|
"prop": "revisions",
|
||||||
|
"titles": title,
|
||||||
|
"rvprop": "content|timestamp",
|
||||||
|
"rvslots": "main",
|
||||||
|
"format": "json"
|
||||||
|
}
|
||||||
|
r = SESSION.get(WIKI_API_URL, params=content_params).json()
|
||||||
|
page = next(iter(r["query"]["pages"].values()))
|
||||||
|
if "revisions" not in page:
|
||||||
|
return None, None, None
|
||||||
|
rev = page["revisions"][0]
|
||||||
|
full_text = rev["slots"]["main"]["*"]
|
||||||
|
ts = rev["timestamp"]
|
||||||
|
return diff_html, full_text, ts
|
||||||
|
except Exception as e:
|
||||||
|
print(f"获取diff和内容时出错: {e}")
|
||||||
|
return None, None, None
|
||||||
|
|
||||||
|
def save_files(title, diff_html, full_text, timestamp, note="", revid=None):
|
||||||
|
global CURRENT_OUTPUT_DIR
|
||||||
|
|
||||||
|
# 确保本次执行的输出目录已经创建
|
||||||
|
if CURRENT_OUTPUT_DIR is None:
|
||||||
|
current_time_str = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
|
CURRENT_OUTPUT_DIR = OUTPUT_DIR / current_time_str
|
||||||
|
CURRENT_OUTPUT_DIR.mkdir(exist_ok=True)
|
||||||
|
print(f"创建本次执行的输出目录: {CURRENT_OUTPUT_DIR}")
|
||||||
|
|
||||||
|
safe_title = "".join(c if c.isalnum() or c in " -_." else "_" for c in title)
|
||||||
|
time_str = timestamp[:19].replace("-", "").replace(":", "").replace("T", "_")
|
||||||
|
# 简化文件名格式,只包含标题、时间和revid
|
||||||
|
base_filename = f"{safe_title}-{time_str}-{revid}" if revid else f"{safe_title}-{time_str}"
|
||||||
|
|
||||||
|
diff_file = CURRENT_OUTPUT_DIR / f"{base_filename}.diff.html"
|
||||||
|
full_file = CURRENT_OUTPUT_DIR / f"{base_filename}.full.txt"
|
||||||
|
|
||||||
|
# 美化 HTML diff,使用类似git diff的配色方案
|
||||||
|
# 先处理diff_html,将ins/del标签替换为span标签
|
||||||
|
processed_diff_html = diff_html.replace('<ins class="diffchange', '<span class="diffchange added"').replace('</ins>', '</span>').replace('<del class="diffchange', '<span class="diffchange deleted"').replace('</del>', '</span>')
|
||||||
|
# 再处理diff标记,将data-marker属性替换为实际的span元素
|
||||||
|
processed_diff_html = processed_diff_html.replace('<td class="diff-marker" data-marker="−"></td>', '<td class="diff-marker"><span class="minus-marker">−</span></td>').replace('<td class="diff-marker" data-marker="+"></td>', '<td class="diff-marker"><span class="plus-marker">+</span></td>')
|
||||||
|
|
||||||
|
html_wrapper = f'''<!DOCTYPE html>
|
||||||
|
<html><head><meta charset="utf-8"><title>Diff: {title}</title>
|
||||||
|
<style>
|
||||||
|
body {{
|
||||||
|
font-family: system-ui, sans-serif;
|
||||||
|
margin: 20px;
|
||||||
|
}}
|
||||||
|
table.diff {{
|
||||||
|
border-collapse: collapse;
|
||||||
|
font-family: monospace;
|
||||||
|
width: 100%;
|
||||||
|
table-layout: fixed;
|
||||||
|
}}
|
||||||
|
table.diff td {{
|
||||||
|
padding: 0 5px;
|
||||||
|
vertical-align: top;
|
||||||
|
white-space: pre-wrap;
|
||||||
|
word-break: break-all;
|
||||||
|
font-size: 14px;
|
||||||
|
line-height: 1.4;
|
||||||
|
}}
|
||||||
|
table.diff col.diff-marker {{
|
||||||
|
width: 20px;
|
||||||
|
text-align: right;
|
||||||
|
background-color: #fafafa;
|
||||||
|
}}
|
||||||
|
table.diff col.diff-content {{
|
||||||
|
width: auto;
|
||||||
|
}}
|
||||||
|
table.diff col.diff-addedline,
|
||||||
|
table.diff col.diff-deletedline {{
|
||||||
|
width: 50%;
|
||||||
|
}}
|
||||||
|
.diff-addedline {{
|
||||||
|
background-color: #dfd;
|
||||||
|
}}
|
||||||
|
.diff-addedline .diffchange {{
|
||||||
|
background-color: #9e9;
|
||||||
|
color: #000;
|
||||||
|
}}
|
||||||
|
.diff-deletedline {{
|
||||||
|
background-color: #fee8e8;
|
||||||
|
}}
|
||||||
|
.diff-deletedline .diffchange {{
|
||||||
|
background-color: #faa;
|
||||||
|
color: #000;
|
||||||
|
}}
|
||||||
|
.diff-context {{
|
||||||
|
background-color: #fafafa;
|
||||||
|
}}
|
||||||
|
.diff-context td {{
|
||||||
|
color: #777;
|
||||||
|
}}
|
||||||
|
.diff-marker {{
|
||||||
|
font-weight: bold;
|
||||||
|
text-align: right;
|
||||||
|
padding: 0 4px;
|
||||||
|
}}
|
||||||
|
.diff-lineno {{
|
||||||
|
background-color: #f0f0f0;
|
||||||
|
text-align: right;
|
||||||
|
padding: 0 4px;
|
||||||
|
}}
|
||||||
|
.diff-addedline .diff-marker {{
|
||||||
|
color: #080;
|
||||||
|
}}
|
||||||
|
.diff-deletedline .diff-marker {{
|
||||||
|
color: #800;
|
||||||
|
}}
|
||||||
|
|
||||||
|
/* 新增的diff标记样式 */
|
||||||
|
.plus-marker {{
|
||||||
|
color: #080;
|
||||||
|
font-weight: bold;
|
||||||
|
}}
|
||||||
|
.minus-marker {{
|
||||||
|
color: #800;
|
||||||
|
font-weight: bold;
|
||||||
|
}}
|
||||||
|
|
||||||
|
/* 确保变更行有明显的视觉区分 */
|
||||||
|
.diff-addedline div,
|
||||||
|
.diff-deletedline div {{
|
||||||
|
display: inline-block;
|
||||||
|
width: 100%;
|
||||||
|
}}
|
||||||
|
|
||||||
|
/* 增加一些额外的视觉提示 */
|
||||||
|
.diff-addedline {{
|
||||||
|
border-left: 4px solid #080;
|
||||||
|
}}
|
||||||
|
.diff-deletedline {{
|
||||||
|
border-left: 4px solid #800;
|
||||||
|
}}
|
||||||
|
.diff-context {{
|
||||||
|
border-left: 4px solid #ccc;
|
||||||
|
}}
|
||||||
|
|
||||||
|
/* 替换ins/del标签为span标签的样式 */
|
||||||
|
.diffchange.added {{
|
||||||
|
background-color: #9e9;
|
||||||
|
color: #000;
|
||||||
|
font-weight: bold;
|
||||||
|
text-decoration: none;
|
||||||
|
}}
|
||||||
|
.diffchange.deleted {{
|
||||||
|
background-color: #faa;
|
||||||
|
color: #000;
|
||||||
|
font-weight: bold;
|
||||||
|
text-decoration: line-through;
|
||||||
|
}}
|
||||||
|
</style></head><body>
|
||||||
|
<h2>{title}</h2>
|
||||||
|
<p>修改时间: {timestamp}</p>
|
||||||
|
{processed_diff_html}
|
||||||
|
</body></html>'''
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(diff_file, "w", encoding="utf-8") as f:
|
||||||
|
f.write(html_wrapper)
|
||||||
|
with open(full_file, "w", encoding="utf-8") as f:
|
||||||
|
f.write(full_text)
|
||||||
|
|
||||||
|
print(f" → 已保存: {diff_file.relative_to(OUTPUT_DIR)}")
|
||||||
|
print(f" → 已保存: {full_file.relative_to(OUTPUT_DIR)}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f" → 保存文件时出错: {e}")
|
||||||
|
|
||||||
|
print(f" → 完整路径: {diff_file}")
|
||||||
|
print(f" → 完整路径: {full_file}")
|
||||||
|
|
||||||
|
def process_single_page(title, since_time, update_timestamp=False):
|
||||||
|
"""只处理单个页面"""
|
||||||
|
print(f"正在单独处理页面:{title}")
|
||||||
|
|
||||||
|
# 获取当前最新 revid
|
||||||
|
params = {
|
||||||
|
"action": "query",
|
||||||
|
"prop": "revisions",
|
||||||
|
"titles": title,
|
||||||
|
"rvprop": "ids|timestamp",
|
||||||
|
"rvlimit": 1,
|
||||||
|
"format": "json"
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
r = SESSION.get(WIKI_API_URL, params=params).json()
|
||||||
|
page = next(iter(r["query"]["pages"].values()))
|
||||||
|
if "revisions" not in page:
|
||||||
|
print("页面不存在或被删除")
|
||||||
|
return None
|
||||||
|
latest_revid = page["revisions"][0]["revid"]
|
||||||
|
latest_ts = page["revisions"][0]["timestamp"]
|
||||||
|
|
||||||
|
# 获取旧 revid
|
||||||
|
old_revid = get_old_revid(title, since_time)
|
||||||
|
|
||||||
|
diff_html, full_text, new_ts = get_official_diff_and_content(title, old_revid, latest_revid)
|
||||||
|
if diff_html is not None and full_text is not None:
|
||||||
|
# 移除旧的note标记,使用更简洁的命名方式
|
||||||
|
if not old_revid:
|
||||||
|
diff_html = "<p style='color:green;font-weight:bold'>新创建页面(无历史版本)</p>"
|
||||||
|
save_files(title, diff_html, full_text, new_ts, "", latest_revid)
|
||||||
|
else:
|
||||||
|
print(f" 警告: 未能获取完整的差异或内容数据")
|
||||||
|
|
||||||
|
if update_timestamp:
|
||||||
|
save_last_timestamp(latest_ts)
|
||||||
|
print(f"已更新全局时间戳 → {latest_ts}")
|
||||||
|
|
||||||
|
return latest_ts
|
||||||
|
except Exception as e:
|
||||||
|
print(f"处理页面 '{title}' 时出错: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def process_all_pages_since(since_time):
|
||||||
|
"""处理自指定时间以来的所有页面变更"""
|
||||||
|
print("正在获取最近变更列表...")
|
||||||
|
changes = get_recent_changes(since_time)
|
||||||
|
if not changes:
|
||||||
|
print("没有发现任何变更")
|
||||||
|
return
|
||||||
|
|
||||||
|
latest_global_ts = since_time
|
||||||
|
for title, (latest_revid, ts) in changes.items():
|
||||||
|
print(f"\n处理:{title}")
|
||||||
|
# 复用单页处理逻辑
|
||||||
|
page_latest_ts = process_single_page(title, since_time)
|
||||||
|
|
||||||
|
if page_latest_ts and page_latest_ts > latest_global_ts:
|
||||||
|
latest_global_ts = page_latest_ts
|
||||||
|
|
||||||
|
save_last_timestamp(latest_global_ts)
|
||||||
|
print(f"\n全量同步完成!本次最新时间戳已更新为:{latest_global_ts}")
|
||||||
|
print(f"文件保存在:{CURRENT_OUTPUT_DIR.resolve() if CURRENT_OUTPUT_DIR else OUTPUT_DIR.resolve()}")
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="MediaWiki 同步工具 - 支持全量/单页/自定义时间")
|
||||||
|
parser.add_argument("--since", type=str, help="强制从指定时间开始同步,格式如 2025-11-28T00:00:00Z")
|
||||||
|
parser.add_argument("--title", type=str, help="只同步指定的单个页面标题")
|
||||||
|
parser.add_argument("--update-timestamp", action="store_true",
|
||||||
|
help="在单页模式下,完成后仍然更新全局 last_sync_timestamp.txt")
|
||||||
|
parser.add_argument("--run", action="store_true",
|
||||||
|
help="执行同步操作(必须提供此参数才能真正执行同步)")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# 如果没有提供 --run 参数,则显示帮助信息并退出
|
||||||
|
if not args.run:
|
||||||
|
parser.print_help()
|
||||||
|
return
|
||||||
|
|
||||||
|
# 确定实际使用的 since 时间
|
||||||
|
if args.since:
|
||||||
|
since_time = args.since
|
||||||
|
print(f"使用命令行指定的时间起点:{since_time}")
|
||||||
|
else:
|
||||||
|
since_time = load_last_timestamp()
|
||||||
|
if not since_time:
|
||||||
|
from datetime import timedelta
|
||||||
|
since_time = (datetime.utcnow() - timedelta(days=1)).isoformat(timespec='seconds') + "Z"
|
||||||
|
print(f"使用上次记录的时间起点:{since_time}")
|
||||||
|
|
||||||
|
# 单页面模式
|
||||||
|
if args.title:
|
||||||
|
process_single_page(args.title.strip(), since_time, args.update_timestamp)
|
||||||
|
return
|
||||||
|
|
||||||
|
# 全量模式 - 使用复用的单页处理逻辑
|
||||||
|
process_all_pages_since(since_time)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Loading…
Reference in New Issue