This commit is contained in:
wdjwxh 2025-12-19 10:07:11 +08:00
parent 3d5eb0e017
commit 0e1294d833
1 changed files with 41 additions and 73 deletions

114
sync.py
View File

@ -326,89 +326,57 @@ def create_diff_html(title, en_diff, en_old_lines, en_new_lines, cn_content=None
break break
i += 1 i += 1
# 处理连续的diff块 # 处理连续的diff块 - 改进的连续匹配算法
if diff_block: if diff_block:
# 计算行数平衡 # 使用新的匹配算法:连续的减号和加号应该按顺序匹配
line_balance = 0 removed_items = []
added_items = []
# 分离删除和新增项目
for item in diff_block: for item in diff_block:
if item['type'] == 'added': if item['type'] == 'removed':
line_balance += 1 removed_items.append(item)
elif item['type'] == 'removed': elif item['type'] == 'added':
line_balance -= 1 added_items.append(item)
# 如果平衡为正数,需要在中文侧添加空白行
if line_balance > 0:
# 找到基准行号(第一个操作的行号)
base_line = None
for item in diff_block:
if item['old_line']: # 优先使用删除行的行号
base_line = item['old_line']
break
elif item['new_line'] and base_line is None:
base_line = item['new_line']
if base_line:
# 收集需要分配到空白行的新增内容
additions_for_blank_lines = []
remaining_additions = []
for item in diff_block:
if item['type'] == 'added':
additions_for_blank_lines.append(item['content'])
# 记录需要插入的空白行和对应的内容
blank_lines_to_insert[base_line] = additions_for_blank_lines
# 处理具体的diff项
j = 0
while j < len(diff_block):
item = diff_block[j]
# 检查是否是替换操作(删除后紧跟新增)
if (item['type'] == 'removed' and j + 1 < len(diff_block) and
diff_block[j + 1]['type'] == 'added'):
next_item = diff_block[j + 1]
# 这是同一行的替换操作
target_line = item['old_line'] # 使用删除行的行号作为目标行号
# 进行匹配:连续块里的每一个减都应该和后续的加形成匹配替换
match_index = 0
for removed_item in removed_items:
if match_index < len(added_items):
# 匹配成功:形成替换
target_line = removed_item['old_line']
if target_line not in en_changes_by_line: if target_line not in en_changes_by_line:
en_changes_by_line[target_line] = [] en_changes_by_line[target_line] = []
en_changes_by_line[target_line].append({ en_changes_by_line[target_line].append({
'type': 'replaced', 'type': 'replaced',
'old_content': item['content'], 'old_content': removed_item['content'],
'new_content': next_item['content'] 'new_content': added_items[match_index]['content']
}) })
match_index += 1
j += 2 # 跳过下一个项目,因为已经处理了
# 处理普通的添加操作(不包括需要分配到空白行的)
elif item['type'] == 'added' and item['new_line']:
# 如果这个新增内容已经被分配到空白行,就跳过
if line_balance > 0 and item['content'] in blank_lines_to_insert.get(base_line, []):
j += 1
continue
if item['new_line'] not in en_changes_by_line:
en_changes_by_line[item['new_line']] = []
en_changes_by_line[item['new_line']].append({
'type': 'added',
'content': item['content']
})
j += 1
# 处理普通的删除操作(没有对应的新增)
elif item['type'] == 'removed' and item['old_line']:
if item['old_line'] not in en_changes_by_line:
en_changes_by_line[item['old_line']] = []
en_changes_by_line[item['old_line']].append({
'type': 'removed',
'content': item['content']
})
j += 1
else: else:
j += 1 # 没有匹配的加:这是删除
target_line = removed_item['old_line']
if target_line not in en_changes_by_line:
en_changes_by_line[target_line] = []
en_changes_by_line[target_line].append({
'type': 'removed',
'content': removed_item['content']
})
# 处理剩余的加(没有匹配的减):这是新增,应该在左侧空行
if match_index < len(added_items):
# 找到基准行号(使用最后一个删除行的行号,如果没有则使用第一个新增的行号)
base_line = None
if removed_items:
base_line = removed_items[-1]['old_line']
elif added_items:
base_line = added_items[match_index]['new_line'] - len(added_items) + match_index
if base_line:
remaining_additions = added_items[match_index:]
blank_lines_to_insert[base_line + 1] = remaining_additions
# 继续处理剩余项 # 继续处理剩余项
else: else: