From 0e1294d83342810ba8e4e53813b56bc5964d4f41 Mon Sep 17 00:00:00 2001 From: wdjwxh Date: Fri, 19 Dec 2025 10:07:11 +0800 Subject: [PATCH] v2 --- sync.py | 114 ++++++++++++++++++++------------------------------------ 1 file changed, 41 insertions(+), 73 deletions(-) diff --git a/sync.py b/sync.py index 07db0bb..6581271 100644 --- a/sync.py +++ b/sync.py @@ -326,89 +326,57 @@ def create_diff_html(title, en_diff, en_old_lines, en_new_lines, cn_content=None break i += 1 - # 处理连续的diff块 + # 处理连续的diff块 - 改进的连续匹配算法 if diff_block: - # 计算行数平衡 - line_balance = 0 + # 使用新的匹配算法:连续的减号和加号应该按顺序匹配 + removed_items = [] + added_items = [] + + # 分离删除和新增项目 for item in diff_block: - if item['type'] == 'added': - line_balance += 1 - elif item['type'] == 'removed': - line_balance -= 1 - - # 如果平衡为正数,需要在中文侧添加空白行 - if line_balance > 0: - # 找到基准行号(第一个操作的行号) - base_line = None - for item in diff_block: - if item['old_line']: # 优先使用删除行的行号 - base_line = item['old_line'] - break - elif item['new_line'] and base_line is None: - base_line = item['new_line'] - - if base_line: - # 收集需要分配到空白行的新增内容 - additions_for_blank_lines = [] - remaining_additions = [] - - for item in diff_block: - if item['type'] == 'added': - additions_for_blank_lines.append(item['content']) - - # 记录需要插入的空白行和对应的内容 - blank_lines_to_insert[base_line] = additions_for_blank_lines - - # 处理具体的diff项 - j = 0 - while j < len(diff_block): - item = diff_block[j] - - # 检查是否是替换操作(删除后紧跟新增) - if (item['type'] == 'removed' and j + 1 < len(diff_block) and - diff_block[j + 1]['type'] == 'added'): - next_item = diff_block[j + 1] - - # 这是同一行的替换操作 - target_line = item['old_line'] # 使用删除行的行号作为目标行号 + if item['type'] == 'removed': + removed_items.append(item) + elif item['type'] == 'added': + added_items.append(item) + # 进行匹配:连续块里的每一个减都应该和后续的加形成匹配替换 + match_index = 0 + for removed_item in removed_items: + if match_index < len(added_items): + # 匹配成功:形成替换 + target_line = removed_item['old_line'] if target_line not in en_changes_by_line: en_changes_by_line[target_line] = [] en_changes_by_line[target_line].append({ 'type': 'replaced', - 'old_content': item['content'], - 'new_content': next_item['content'] + 'old_content': removed_item['content'], + 'new_content': added_items[match_index]['content'] }) - - j += 2 # 跳过下一个项目,因为已经处理了 - - # 处理普通的添加操作(不包括需要分配到空白行的) - elif item['type'] == 'added' and item['new_line']: - # 如果这个新增内容已经被分配到空白行,就跳过 - if line_balance > 0 and item['content'] in blank_lines_to_insert.get(base_line, []): - j += 1 - continue - - if item['new_line'] not in en_changes_by_line: - en_changes_by_line[item['new_line']] = [] - en_changes_by_line[item['new_line']].append({ - 'type': 'added', - 'content': item['content'] - }) - j += 1 - - # 处理普通的删除操作(没有对应的新增) - elif item['type'] == 'removed' and item['old_line']: - if item['old_line'] not in en_changes_by_line: - en_changes_by_line[item['old_line']] = [] - en_changes_by_line[item['old_line']].append({ - 'type': 'removed', - 'content': item['content'] - }) - j += 1 + match_index += 1 else: - j += 1 + # 没有匹配的加:这是删除 + target_line = removed_item['old_line'] + if target_line not in en_changes_by_line: + en_changes_by_line[target_line] = [] + + en_changes_by_line[target_line].append({ + 'type': 'removed', + 'content': removed_item['content'] + }) + + # 处理剩余的加(没有匹配的减):这是新增,应该在左侧空行 + if match_index < len(added_items): + # 找到基准行号(使用最后一个删除行的行号,如果没有则使用第一个新增的行号) + base_line = None + if removed_items: + base_line = removed_items[-1]['old_line'] + elif added_items: + base_line = added_items[match_index]['new_line'] - len(added_items) + match_index + + if base_line: + remaining_additions = added_items[match_index:] + blank_lines_to_insert[base_line + 1] = remaining_additions # 继续处理剩余项 else: