sync-pd2-wiki/sync.py

869 lines
29 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
"""
MediaWiki 最近变更同步工具 - 增强版
支持:
1. 正常全量同步(无参数)
2. 手动指定时间起点:--since 2025-11-28T00:00:00Z
3. 只同步单个页面:--title "页面名称"
4. 单个页面时可选更新全局时间戳:--update-timestamp
5. 获取历史版本并生成diff
6. 同步中文翻译版本
7. 生成双语对比网页
"""
import os
import argparse
from pathlib import Path
from datetime import datetime
import requests
from dotenv import load_dotenv
import difflib
import json
import re
from urllib.parse import quote
# ==================== 配置区 ====================
load_dotenv()
WIKI_API_URL_EN = os.getenv("WIKI_API_URL_EN", "https://wiki.projectdiablo2.com/w/api.php")
WIKI_API_URL_CN = os.getenv("WIKI_API_URL_CN", "https://wiki.projectdiablo2.cn/w/api.php")
OUTPUT_DIR = Path("wiki_sync_output")
OUTPUT_DIR.mkdir(exist_ok=True)
# 全局变量,存储本次执行的输出目录
CURRENT_OUTPUT_DIR = None
LAST_TIMESTAMP_FILE = "last_sync_timestamp.txt"
SESSION_EN = requests.Session()
SESSION_EN.headers.update({
"User-Agent": "WikiSyncTool/4.0 (your-email@example.com; MediaWiki Sync Bot)"
})
SESSION_CN = requests.Session()
SESSION_CN.headers.update({
"User-Agent": "WikiSyncTool/4.0 (your-email@example.com; MediaWiki Sync Bot)"
})
# ================================================
def load_last_timestamp():
if not os.path.exists(LAST_TIMESTAMP_FILE):
return None
with open(LAST_TIMESTAMP_FILE, encoding="utf-8") as f:
return f.read().strip()
def save_last_timestamp(ts):
with open(LAST_TIMESTAMP_FILE, "w", encoding="utf-8") as f:
f.write(ts)
def get_recent_changes(since):
"""获取自 since 时间后每个页面的最新 revid自动去重"""
params = {
"action": "query",
"list": "recentchanges",
"rcprop": "title|ids|timestamp",
"rctype": "edit|new",
"rcdir": "newer",
"rcstart": since,
"rclimit": 500,
"format": "json"
}
latest = {}
while True:
try:
r = SESSION_EN.get(WIKI_API_URL_EN, params=params)
r.raise_for_status()
response_data = r.json()
if "error" in response_data:
raise Exception(response_data["error"])
for rc in response_data.get("query", {}).get("recentchanges", []):
latest[rc["title"]] = (rc["revid"], rc["timestamp"])
if "continue" not in response_data:
break
params.update(response_data["continue"])
except Exception as e:
print(f"获取最近更改时出错: {e}")
break
return latest
def get_old_revid(title, end_time):
"""获取 ≤ end_time 的最后一次修订的 revid用于 fromrev"""
params = {
"action": "query",
"prop": "revisions",
"titles": title,
"rvprop": "ids|timestamp",
"rvlimit": 1,
"rvdir": "older",
"rvstart": end_time,
"format": "json"
}
try:
r = SESSION_EN.get(WIKI_API_URL_EN, params=params).json()
pages = r["query"]["pages"]
page = next(iter(pages.values()))
if "revisions" not in page:
print(f" 页面 '{title}' 在指定时间前没有找到修订版本")
return None
revisions = page["revisions"]
if len(revisions) >= 1:
return revisions[0]["revid"]
print(f" 页面 '{title}' 在指定时间前没有找到修订版本")
return None
except Exception as e:
print(f"获取旧版本ID时出错: {e}")
return None
def get_page_content(wiki_url, session, title, revid=None):
"""获取页面完整内容"""
params = {
"action": "query",
"prop": "revisions",
"titles": title,
"rvprop": "content|timestamp|ids",
"rvslots": "main",
"format": "json"
}
if revid:
params["rvstartid"] = revid
params["rvendid"] = revid
try:
r = session.get(wiki_url, params=params).json()
pages = r["query"]["pages"]
page = next(iter(pages.values()))
if "revisions" not in page:
return None, None, None
rev = page["revisions"][0]
content = rev["slots"]["main"]["*"]
timestamp = rev["timestamp"]
rev_id = rev["revid"]
return content, timestamp, rev_id
except Exception as e:
print(f"获取页面内容时出错: {e}")
return None, None, None
def generate_text_diff(old_text, new_text):
"""生成类似git diff的文本diff"""
if not old_text:
return "新创建页面"
old_lines = old_text.splitlines(keepends=True)
new_lines = new_text.splitlines(keepends=True)
differ = difflib.unified_diff(
old_lines,
new_lines,
lineterm='\n'
)
return ''.join(differ)
def parse_diff_with_line_numbers(diff_text):
"""解析diff文本提取详细的行号信息"""
if not diff_text or diff_text.startswith("新创建页面"):
return []
parsed_lines = []
current_old_line = 0
current_new_line = 0
in_hunk = False
for line in diff_text.splitlines():
if line.startswith('@@'):
# 解析hunk头部格式如: @@ -start,count +start,count @@
import re
match = re.match(r'@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@', line)
if match:
old_start = int(match.group(1))
old_count = int(match.group(2)) if match.group(2) else 1
new_start = int(match.group(3))
new_count = int(match.group(4)) if match.group(4) else 1
current_old_line = old_start
current_new_line = new_start
in_hunk = True
parsed_lines.append({
'type': 'hunk',
'content': line,
'old_start': old_start,
'old_count': old_count,
'new_start': new_start,
'new_count': new_count,
'old_line': None,
'new_line': None
})
else:
parsed_lines.append({
'type': 'other',
'content': line,
'old_line': None,
'new_line': None
})
elif line.startswith('---') or line.startswith('+++'):
# 文件头信息
continue
#parsed_lines.append({
# 'type': 'header',
# 'content': line,
# 'old_line': None,
# 'new_line': None
#})
elif in_hunk:
if line.startswith('-'):
# 删除的行
parsed_lines.append({
'type': 'removed',
'content': line[1:], # 去掉开头的 '-'
'old_line': current_old_line,
'new_line': None
})
current_old_line += 1
elif line.startswith('+'):
# 新增的行
parsed_lines.append({
'type': 'added',
'content': line[1:], # 去掉开头的 '+'
'old_line': None,
'new_line': current_new_line
})
current_new_line += 1
elif line.startswith(' '):
# 未变更的行
parsed_lines.append({
'type': 'context',
'content': line[1:], # 去掉开头的 ' '
'old_line': current_old_line,
'new_line': current_new_line
})
current_old_line += 1
current_new_line += 1
else:
# 其他行(如空行)
parsed_lines.append({
'type': 'other',
'content': line,
'old_line': None,
'new_line': None
})
else:
# 不在任何hunk中的行
parsed_lines.append({
'type': 'other',
'content': line,
'old_line': None,
'new_line': None
})
return parsed_lines
def search_chinese_page(title):
"""在中文wiki中搜索对应的页面"""
# 首先尝试精确匹配
params = {
"action": "query",
"list": "search",
"srsearch": f'"{title}"',
"srwhat": "title",
"srlimit": 5,
"format": "json"
}
try:
r = SESSION_CN.get(WIKI_API_URL_CN, params=params).json()
search_results = r.get("query", {}).get("search", [])
if search_results:
# 返回第一个匹配的结果
return search_results[0]["title"]
# 如果精确匹配没有结果,尝试模糊搜索
params["srsearch"] = title.replace(" ", "%20")
r = SESSION_CN.get(WIKI_API_URL_CN, params=params).json()
search_results = r.get("query", {}).get("search", [])
if search_results:
return search_results[0]["title"]
except Exception as e:
print(f"搜索中文页面时出错: {e}")
return None
def create_diff_html(title, en_diff, en_old_lines, en_new_lines, cn_content=None):
"""创建双语对比的HTML页面 - 使用精确的行号映射"""
# 准备中文内容行
cn_lines = []
if cn_content:
cn_lines = cn_content.splitlines()
# 解析diff并获取行号信息
parsed_diff = parse_diff_with_line_numbers(en_diff) if en_diff else []
# 生成HTML
html = f'''<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Wiki Diff: {title}</title>
<style>
* {{
margin: 0;
padding: 0;
box-sizing: border-box;
}}
body {{
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
background-color: #f5f5f5;
line-height: 1.6;
}}
.header {{
background-color: #fff;
padding: 20px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
margin-bottom: 20px;
}}
.header h1 {{
color: #333;
font-size: 24px;
margin-bottom: 10px;
}}
.header .meta {{
color: #666;
font-size: 14px;
}}
.container {{
display: flex;
max-width: 100%;
margin: 0 auto;
background-color: #fff;
min-height: calc(100vh - 100px);
}}
.column {{
flex: 1;
overflow: hidden;
display: flex;
flex-direction: column;
}}
.column-header {{
background-color: #e9ecef;
padding: 12px 20px;
font-weight: bold;
color: #495057;
border-bottom: 1px solid #dee2e6;
}}
.diff-content {{
flex: 1;
overflow-y: auto;
font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace;
font-size: 13px;
line-height: 1.4;
}}
.line {{
display: flex;
min-height: 20px;
position: relative;
}}
.line-number {{
width: 60px;
text-align: right;
padding: 0 10px;
background-color: #f8f9fa;
color: #6c757d;
border-right: 1px solid #dee2e6;
user-select: none;
flex-shrink: 0;
}}
.line.highlight {{
background-color: rgba(255, 235, 59, 0.3) !important;
animation: highlight 2s ease-in-out;
}}
@keyframes highlight {{
0% {{ background-color: rgba(255, 235, 59, 0.8); }}
100% {{ background-color: rgba(255, 235, 59, 0.3); }}
}}
.line-content {{
flex: 1;
padding: 0 10px;
white-space: pre-wrap;
word-break: break-word;
}}
/* Diff specific styles */
.line.diff-added {{
background-color: #e6ffec;
}}
.line.diff-added .line-content {{
background-color: #cdffd8;
border-left: 3px solid #28a745;
}}
.line.diff-removed {{
background-color: #ffeef0;
}}
.line.diff-removed .line-content {{
background-color: #fdb8c0;
border-left: 3px solid #dc3545;
text-decoration: line-through;
}}
.line.diff-context {{
background-color: #ffffff;
}}
.line.diff-context .line-content {{
background-color: #ffffff;
}}
.line.diff-hunk {{
background-color: #f8f9fa;
color: #6c757d;
font-style: italic;
}}
.line.diff-hunk .line-content {{
background-color: #f1f3f4;
}}
.line.diff-header {{
background-color: #e9ecef;
color: #495057;
font-style: italic;
}}
.line.diff-header .line-content {{
background-color: #e9ecef;
}}
/* Separator between columns */
.separator {{
width: 1px;
background-color: #dee2e6;
box-shadow: 0 0 5px rgba(0,0,0,0.1);
position: relative;
z-index: 10;
}}
/* Scrollbar styling */
.diff-content::-webkit-scrollbar {{
width: 8px;
height: 8px;
}}
.diff-content::-webkit-scrollbar-track {{
background: #f1f1f1;
}}
.diff-content::-webkit-scrollbar-thumb {{
background: #888;
border-radius: 4px;
}}
.diff-content::-webkit-scrollbar-thumb:hover {{
background: #555;
}}
/* Responsive design */
@media (max-width: 768px) {{
.container {{
flex-direction: column;
}}
.separator {{
width: 100%;
height: 1px;
}}
}}
/* Special styling for new page */
.new-page-notice {{
background-color: #d4edda;
color: #155724;
padding: 15px 20px;
margin-bottom: 20px;
border-left: 4px solid #28a745;
}}
.no-translation {{
background-color: #fff3cd;
color: #856404;
padding: 15px 20px;
margin-bottom: 20px;
border-left: 4px solid #ffc107;
}}
/* Line linking styles */
.line[data-cn-line] {{
cursor: pointer;
}}
.line:hover {{
background-color: rgba(0, 123, 255, 0.05);
}}
</style>
</head>
<body>
<div class="header">
<h1>{title}</h1>
<div class="meta">
<span>英文Wiki: wiki.projectdiablo2.com</span>
{f' | 中文Wiki: wiki.projectdiablo2.cn' if cn_content else ''}
</div>
</div>
<div class="container">
<div class="column">
<div class="column-header">English Diff</div>
<div class="diff-content" id="en-diff">
'''
# 生成英文diff内容
if parsed_diff:
for item in parsed_diff:
if item['type'] == 'hunk':
html += f'<div class="line diff-hunk"><span class="line-content">{item["content"]}</span></div>'
elif item['type'] == 'header':
html += f'<div class="line diff-header"><span class="line-content">{item["content"]}</span></div>'
elif item['type'] == 'added':
cn_line_attr = f'data-cn-line="{item["new_line"]}"' if item["new_line"] and cn_lines and item["new_line"] <= len(cn_lines) else ''
cn_title = f'中文第{item["new_line"]}' if item["new_line"] and cn_lines and item["new_line"] <= len(cn_lines) else ''
html += f'<div class="line diff-added" {cn_line_attr} title="{cn_title}"><span class="line-number">{item["new_line"] or ""}</span><span class="line-content">{item["content"]}</span></div>'
elif item['type'] == 'removed':
html += f'<div class="line diff-removed" title="已删除"><span class="line-number">{item["old_line"] or ""}</span><span class="line-content">{item["content"]}</span></div>'
elif item['type'] == 'context':
cn_line_attr = f'data-cn-line="{item["new_line"]}"' if item["new_line"] and cn_lines and item["new_line"] <= len(cn_lines) else ''
cn_title = f'中文第{item["new_line"]}' if item["new_line"] and cn_lines and item["new_line"] <= len(cn_lines) else ''
html += f'<div class="line diff-context" {cn_line_attr} title="{cn_title}"><span class="line-number">{item["new_line"]}</span><span class="line-content">{item["content"]}</span></div>'
else:
html += f'<div class="line"><span class="line-content">{item["content"]}</span></div>'
else:
# 新页面或无diff
if en_diff and en_diff.startswith("新创建页面"):
html += '<div class="new-page-notice">新创建页面</div>'
# 显示完整内容新页面或无diff时
for i, line in enumerate(en_new_lines or [], 1):
cn_line_attr = f'data-cn-line="{i}"' if cn_lines and i <= len(cn_lines) else ''
cn_title = f'中文第{i}' if cn_lines and i <= len(cn_lines) else ''
html += f'<div class="line diff-context" {cn_line_attr} title="{cn_title}"><span class="line-number">{i}</span><span class="line-content">{line}</span></div>'
html += '''
</div>
</div>
<div class="separator"></div>
<div class="column">
<div class="column-header">中文翻译</div>
<div class="diff-content" id="cn-content">
'''
# 添加中文内容
if cn_content:
html += '<div id="cn-lines">'
for i, line in enumerate(cn_lines, 1):
html += f'<div class="line diff-context" id="cn-line-{i}"><span class="line-number">{i}</span><span class="line-content">{line}</span></div>'
html += '</div>'
else:
html += '<div class="no-translation">未找到对应的中文翻译页面</div>'
html += '''
</div>
</div>
</div>
<script>
// 同步滚动功能
const enDiff = document.querySelector('#en-diff');
const cnContent = document.querySelector('#cn-content');
const cnLines = {};
// 构建中文行的位置映射
if (document.getElementById('cn-lines')) {{
document.querySelectorAll('#cn-lines .line').forEach(line => {{
const lineNum = line.querySelector('.line-number').textContent;
if (lineNum) {{
cnLines[lineNum] = line.offsetTop;
}}
}});
}}
// 同步滚动
if (enDiff && cnContent) {{
enDiff.addEventListener('scroll', () => {{
cnContent.scrollTop = enDiff.scrollTop;
}});
cnContent.addEventListener('scroll', () => {{
enDiff.scrollTop = cnContent.scrollTop;
}});
}}
// 点击英文行时,高亮对应的中文行
document.querySelectorAll('[data-cn-line]').forEach(enLine => {{
enLine.addEventListener('click', () => {{
const cnLineNum = enLine.getAttribute('data-cn-line');
if (cnLineNum) {{
const cnLine = document.getElementById(`cn-line-${cnLineNum}`);
if (cnLine) {{
// 移除所有高亮
document.querySelectorAll('.line.highlight').forEach(line => {{
line.classList.remove('highlight');
}});
// 高亮英文行和中文行
enLine.classList.add('highlight');
cnLine.classList.add('highlight');
// 滚动到中文行的位置
cnLine.scrollIntoView({{ behavior: 'smooth', block: 'center' }});
}}
}}
}});
// 鼠标悬停时显示预览
enLine.addEventListener('mouseenter', () => {{
const cnLineNum = enLine.getAttribute('data-cn-line');
if (cnLineNum) {{
const cnLine = document.getElementById(`cn-line-${cnLineNum}`);
if (cnLine) {{
enLine.style.backgroundColor = 'rgba(0, 123, 255, 0.1)';
cnLine.style.backgroundColor = 'rgba(0, 123, 255, 0.1)';
}}
}}
}});
enLine.addEventListener('mouseleave', () => {{
if (!enLine.classList.contains('highlight')) {{
enLine.style.backgroundColor = '';
}}
const cnLineNum = enLine.getAttribute('data-cn-line');
if (cnLineNum) {{
const cnLine = document.getElementById(`cn-line-${cnLineNum}`);
if (cnLine && !cnLine.classList.contains('highlight')) {{
cnLine.style.backgroundColor = '';
}}
}}
}});
}});
</script>
</body>
</html>'''
return html
def save_files(title, diff_html, diff_text, full_text, timestamp, note="", revid=None, cn_content=None, old_full_text=None):
global CURRENT_OUTPUT_DIR
# 确保本次执行的输出目录已经创建
if CURRENT_OUTPUT_DIR is None:
current_time_str = datetime.now().strftime("%Y%m%d_%H%M%S")
CURRENT_OUTPUT_DIR = OUTPUT_DIR / current_time_str
CURRENT_OUTPUT_DIR.mkdir(exist_ok=True)
print(f"创建本次执行的输出目录: {CURRENT_OUTPUT_DIR}")
safe_title = "".join(c if c.isalnum() or c in " -_." else "_" for c in title)
time_str = timestamp[:19].replace("-", "").replace(":", "").replace("T", "_")
base_filename = f"{safe_title}-{time_str}-{revid}" if revid else f"{safe_title}-{time_str}"
# 保存各种文件
files_to_save = []
# 1. 标准MediaWiki diff HTML
diff_file = CURRENT_OUTPUT_DIR / f"{base_filename}.diff.html"
if diff_html:
files_to_save.append((diff_file, diff_html))
# 2. 文本格式的diff
text_diff_file = CURRENT_OUTPUT_DIR / f"{base_filename}.diff.txt"
if diff_text:
files_to_save.append((text_diff_file, diff_text))
# 3. 最新完整内容
full_file = CURRENT_OUTPUT_DIR / f"{base_filename}.full.txt"
if full_text:
files_to_save.append((full_file, full_text))
# 4. 历史版本内容(如果存在)
if old_full_text:
old_full_file = CURRENT_OUTPUT_DIR / f"{base_filename}.old.txt"
files_to_save.append((old_full_file, old_full_text))
# 5. 中文翻译内容(如果存在)
if cn_content:
cn_file = CURRENT_OUTPUT_DIR / f"{base_filename}.cn.txt"
files_to_save.append((cn_file, cn_content))
# 6. 双语对比HTML页面
if cn_content:
# 为文本diff准备行
en_new_lines = full_text.splitlines() if full_text else []
en_old_lines = old_full_text.splitlines() if old_full_text else []
# 创建双语对比页面
comparison_html = create_diff_html(title, diff_text, en_old_lines, en_new_lines, cn_content)
comparison_file = CURRENT_OUTPUT_DIR / f"{base_filename}.comparison.html"
files_to_save.append((comparison_file, comparison_html))
print(f" → 已保存: {comparison_file.relative_to(OUTPUT_DIR)} (双语对比页面)")
# 写入所有文件
for file_path, content in files_to_save:
try:
with open(file_path, "w", encoding="utf-8") as f:
f.write(content)
print(f" → 已保存: {file_path.relative_to(OUTPUT_DIR)}")
except Exception as e:
print(f" → 保存文件 {file_path} 时出错: {e}")
def process_single_page(title, since_time, update_timestamp=False):
"""只处理单个页面"""
print(f"正在单独处理页面:{title}")
# 获取当前最新 revid
try:
latest_content, latest_ts, latest_revid = get_page_content(WIKI_API_URL_EN, SESSION_EN, title)
if latest_content is None:
print("页面不存在或被删除")
return None
# 获取旧 revid
old_revid = get_old_revid(title, since_time)
# 初始化变量
diff_html = None
diff_text = None
old_content = None
cn_content = None
if old_revid:
# 获取历史版本内容
old_content, old_ts, _ = get_page_content(WIKI_API_URL_EN, SESSION_EN, title, old_revid)
if old_content is not None:
# 生成文本diff
diff_text = generate_text_diff(old_content, latest_content)
print(f" 生成了文本diff ({len(diff_text)} 字符)")
else:
print(f" 无法获取历史版本内容")
else:
# 新页面
print(" 这是新创建的页面")
# 搜索对应的中文页面
print(" 搜索中文翻译...")
cn_title = search_chinese_page(title)
if cn_title:
print(f" 找到中文页面: {cn_title}")
cn_content, cn_ts, cn_revid = get_page_content(WIKI_API_URL_CN, SESSION_CN, cn_title)
if cn_content:
print(f" 获取中文内容成功 ({len(cn_content)} 字符)")
else:
print(" 无法获取中文页面内容")
else:
print(" 未找到对应的中文翻译页面")
# 获取官方diff可选
if old_revid:
diff_params = {
"action": "compare",
"fromrev": old_revid,
"torev": latest_revid,
"format": "json"
}
try:
diff_resp = SESSION_EN.get(WIKI_API_URL_EN, params=diff_params).json()
diff_html = diff_resp.get("compare", {}).get("*", "")
except Exception as e:
print(f" 获取官方HTML diff时出错: {e}")
# 保存所有文件
save_files(title, diff_html, diff_text, latest_content, latest_ts, "", latest_revid, cn_content, old_content)
if update_timestamp:
save_last_timestamp(latest_ts)
print(f"已更新全局时间戳 → {latest_ts}")
return latest_ts
except Exception as e:
print(f"处理页面 '{title}' 时出错: {e}")
return None
def process_all_pages_since(since_time):
"""处理自指定时间以来的所有页面变更"""
print("正在获取最近变更列表...")
changes = get_recent_changes(since_time)
if not changes:
print("没有发现任何变更")
return
latest_global_ts = since_time
for title, (latest_revid, ts) in changes.items():
print(f"\n处理:{title}")
# 复用单页处理逻辑
page_latest_ts = process_single_page(title, since_time)
if page_latest_ts and page_latest_ts > latest_global_ts:
latest_global_ts = page_latest_ts
save_last_timestamp(latest_global_ts)
print(f"\n全量同步完成!本次最新时间戳已更新为:{latest_global_ts}")
print(f"文件保存在:{CURRENT_OUTPUT_DIR.resolve() if CURRENT_OUTPUT_DIR else OUTPUT_DIR.resolve()}")
def main():
parser = argparse.ArgumentParser(description="MediaWiki 同步工具 - 增强版支持双语对比")
parser.add_argument("--since", type=str, help="强制从指定时间开始同步,格式如 2025-11-28T00:00:00Z")
parser.add_argument("--title", type=str, help="只同步指定的单个页面标题")
parser.add_argument("--update-timestamp", action="store_true",
help="在单页模式下,完成后仍然更新全局 last_sync_timestamp.txt")
parser.add_argument("--run", action="store_true",
help="执行同步操作(必须提供此参数才能真正执行同步)")
args = parser.parse_args()
# 如果没有提供 --run 参数,则显示帮助信息并退出
if not args.run:
parser.print_help()
return
# 确定实际使用的 since 时间
if args.since:
since_time = args.since
print(f"使用命令行指定的时间起点:{since_time}")
else:
since_time = load_last_timestamp()
if not since_time:
from datetime import timedelta
since_time = (datetime.utcnow() - timedelta(days=1)).isoformat(timespec='seconds') + "Z"
print(f"使用上次记录的时间起点:{since_time}")
# 单页面模式
if args.title:
process_single_page(args.title.strip(), since_time, args.update_timestamp)
return
# 全量模式 - 使用复用的单页处理逻辑
process_all_pages_since(since_time)
if __name__ == "__main__":
main()