Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions document_page/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@ Trobz

* Ángel García de la Chica Herrera

* `Dixmit <https://www.dixmit.com>`__:

* Enric Tobella

Other credits
~~~~~~~~~~~~~

Expand Down
5 changes: 4 additions & 1 deletion document_page/__manifest__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
],
"website": "https://github.com/OCA/knowledge",
"license": "AGPL-3",
"depends": ["mail", "document_knowledge"],
"depends": ["mail", "document_knowledge", "web_editor"],
"data": [
"security/document_page_security.xml",
"security/ir.model.access.csv",
Expand All @@ -30,6 +30,9 @@
],
"demo": ["demo/document_page.xml"],
"assets": {
"web._assets_primary_variables": [
"document_page/static/src/**/document_page_variables.scss",
],
"web.assets_backend": [
"document_page/static/src/scss/document_page.scss",
],
Expand Down
242 changes: 242 additions & 0 deletions document_page/models/diff_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
# This code has been ported from Odoo 18.0 web_editor module.
# License of this part should remain LGPL-3.0 or later following Odoo's licensing.
# Part of Odoo. See LICENSE file for full copyright and licensing details.

import re
from difflib import SequenceMatcher

# ------------------------------------------------------------
# Patch and comparison functions
# ------------------------------------------------------------


OPERATION_SEPARATOR = "\n"
LINE_SEPARATOR = "<"

PATCH_OPERATION_LINE_AT = "@"
PATCH_OPERATION_CONTENT = ":"

PATCH_OPERATION_ADD = "+"
PATCH_OPERATION_REMOVE = "-"
PATCH_OPERATION_REPLACE = "R"

PATCH_OPERATIONS = dict(
insert=PATCH_OPERATION_ADD,
delete=PATCH_OPERATION_REMOVE,
replace=PATCH_OPERATION_REPLACE,
)

HTML_ATTRIBUTES_TO_REMOVE = [
"data-last-history-steps",
]


HTML_TAG_ISOLATION_REGEX = r"^([^>]*>)(.*)$"
ADDITION_COMPARISON_REGEX = r"\1<added>\2</added>"
ADDITION_1ST_REPLACE_COMPARISON_REGEX = r"added>\2</added>"
DELETION_COMPARISON_REGEX = r"\1<removed>\2</removed>"
EMPTY_OPERATION_TAG = r"<(added|removed)><\/(added|removed)>"
SAME_TAG_REPLACE_FIXER = r"<\/added><(?:[^\/>]|(?:><))+><removed>"
UNNECESSARY_REPLACE_FIXER = (
r"<added>([^<](?!<\/added>)*)<\/added>"
r"<removed>([^<](?!<\/removed>)*)<\/removed>"
)


def generate_comparison(new_content, old_content): # noqa: C901
"""Compare a content to an older content
and generate a comparison html between both content.

:param string new_content: the current content
:param string old_content: the old content

:return: string: the comparison content
"""
new_content = _remove_html_attribute(new_content, HTML_ATTRIBUTES_TO_REMOVE)
old_content = _remove_html_attribute(old_content, HTML_ATTRIBUTES_TO_REMOVE)

if new_content == old_content:
return new_content

patch = generate_patch(new_content, old_content)
comparison = new_content.split(LINE_SEPARATOR)
patch_operations = patch.split(OPERATION_SEPARATOR)
# We need to apply operation from last to the first
# to preserve the indexes integrity.
patch_operations.reverse()

for operation in patch_operations:
metadata, *patch_content_line = operation.split(LINE_SEPARATOR)

metadata_split = metadata.split(PATCH_OPERATION_LINE_AT)
operation_type = metadata_split[0]
lines_index_range = metadata_split[1] if len(metadata_split) > 1 else ""
lines_index_range = lines_index_range.split(PATCH_OPERATION_CONTENT)[0]
indexes = lines_index_range.split(",")
start_index = int(indexes[0])
end_index = int(indexes[1]) if len(indexes) > 1 else start_index

# If the operation is a replace, we need to flag the changes that
# will generate ghost opening tags if we don't ignore
# them.
# this can append when:
# * A change concerning only html parameters.
# <p class="x">a</p> => <p class="y">a</p>
# * An addition in a previously empty element opening tag
# <p></p> => <p>a</p>
if operation_type == PATCH_OPERATION_REPLACE:
for i, line in enumerate(patch_content_line):
current_index = start_index + i
if current_index > end_index:
break

current_line = comparison[current_index]
current_line_tag = current_line.split(">")[0]
line_tag = line.split(">")[0]
if current_line[-1] == ">" and (
current_line_tag == line_tag
or current_line_tag.split(" ")[0] == line_tag.split(" ")[0]
):
comparison[start_index + i] = "delete_me>"

# We need to insert lines from last to the first
# to preserve the indexes integrity.
patch_content_line.reverse()

for index in range(end_index, start_index - 1, -1):
if operation_type in [
PATCH_OPERATION_REMOVE,
PATCH_OPERATION_REPLACE,
]:
deletion_flagged_comparison = re.sub(
HTML_TAG_ISOLATION_REGEX,
DELETION_COMPARISON_REGEX,
comparison[index],
)
# Only use this line if it doesn't generate an empty
# <removed> tag
if not re.search(EMPTY_OPERATION_TAG, deletion_flagged_comparison):
comparison[index] = deletion_flagged_comparison

if operation_type == PATCH_OPERATION_ADD:
for line in patch_content_line:
addition_flagged_line = re.sub(
HTML_TAG_ISOLATION_REGEX, ADDITION_COMPARISON_REGEX, line
)

if not re.search(EMPTY_OPERATION_TAG, addition_flagged_line):
comparison.insert(start_index + 1, addition_flagged_line)
else:
comparison.insert(start_index + 1, line)

if operation_type == PATCH_OPERATION_REPLACE:
for _i, line in enumerate(patch_content_line):
addition_flagged_line = re.sub(
HTML_TAG_ISOLATION_REGEX, ADDITION_COMPARISON_REGEX, line
)
if not re.search(EMPTY_OPERATION_TAG, addition_flagged_line):
comparison.insert(start_index, addition_flagged_line)
elif line.split(">")[0] != comparison[start_index].split(">")[0]:
comparison.insert(start_index, line)

final_comparison = LINE_SEPARATOR.join(comparison)
# We can remove all the opening tags which are located between the end of an
# added tag and the start of a removed tag, because this should never happen
# as the added and removed tags should always be near each other.
# This can happen when the new container tag had a parameter change.
final_comparison = re.sub(
SAME_TAG_REPLACE_FIXER, "</added><removed>", final_comparison
)

# Remove al the <delete_me> tags
final_comparison = final_comparison.replace(r"<delete_me>", "")

# This fix the issue of unnecessary replace tags.
# ex: <added>abc</added><removed>abc</removed> -> abc
# This can occur when the new content is the same as the old content and
# their container tags are the same but the tags parameters are different
for match in re.finditer(UNNECESSARY_REPLACE_FIXER, final_comparison):
if match.group(1) == match.group(2):
final_comparison = final_comparison.replace(match.group(0), match.group(1))

return final_comparison


def _format_line_index(start, end):
"""Format the line index to be used in a patch operation.

:param start: the start index
:param end: the end index
:return: string
"""
length = end - start
if not length:
start -= 1
if length <= 1:
return f"{PATCH_OPERATION_LINE_AT}{start}"
return f"{PATCH_OPERATION_LINE_AT}{start},{start + length - 1}"


def _patch_generator(new_content, old_content):
"""Generate a patch (multiple operations) between two contents.
Each operation is a string with the following format:
<operation_type>@<start_index>[,<end_index>][:<patch_text>*]
patch format example:
+@4:<p>ab</p><p>cd</p>
+@4,15:<p>ef</p><p>gh</p>
-@32
-@125,129
R@523:<b>sdf</b>

:param string new_content: the new content
:param string old_content: the old content

:return: string: the patch containing all the operations to reverse
the new content to the old content
"""
# remove break line in contents to ensure they don't interfere with
# operations
new_content = new_content.replace("\n", "")
old_content = old_content.replace("\n", "")

new_content_lines = new_content.split(LINE_SEPARATOR)
old_content_lines = old_content.split(LINE_SEPARATOR)

for group in SequenceMatcher(
None, new_content_lines, old_content_lines, False
).get_grouped_opcodes(0):
patch_content_line = []
first, last = group[0], group[-1]
patch_operation = _format_line_index(first[1], last[2])

if any(tag in {"replace", "delete"} for tag, _, _, _, _ in group):
for tag, _, _, _, _ in group:
if tag not in {"insert", "equal", "replace"}:
patch_operation = PATCH_OPERATIONS[tag] + patch_operation

if any(tag in {"replace", "insert"} for tag, _, _, _, _ in group):
for tag, _, _, j1, j2 in group:
if tag not in {"delete", "equal"}:
patch_operation = PATCH_OPERATIONS[tag] + patch_operation
for line in old_content_lines[j1:j2]:
patch_content_line.append(line)

if patch_content_line:
patch_content = LINE_SEPARATOR + LINE_SEPARATOR.join(patch_content_line)
yield str(patch_operation) + PATCH_OPERATION_CONTENT + patch_content
else:
yield str(patch_operation)


def generate_patch(new_content, old_content):
new_content = _remove_html_attribute(new_content, HTML_ATTRIBUTES_TO_REMOVE)
old_content = _remove_html_attribute(old_content, HTML_ATTRIBUTES_TO_REMOVE)

return OPERATION_SEPARATOR.join(list(_patch_generator(new_content, old_content)))


def _remove_html_attribute(html_content, attributes_to_remove):
for attribute in attributes_to_remove:
html_content = re.sub(rf' {attribute}="[^"]*"', "", html_content)

return html_content
29 changes: 7 additions & 22 deletions document_page/models/document_page_history.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
# Copyright (C) 2004-2010 Tiny SPRL (<http://tiny.be>).
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl).

import difflib

from odoo import _, api, fields, models
from odoo import fields, models

from .diff_utils import (
generate_comparison,
)


class DocumentPageHistory(models.Model):
Expand All @@ -17,7 +20,7 @@ class DocumentPageHistory(models.Model):
name = fields.Char(index=True)
summary = fields.Char(index=True)
content = fields.Html(sanitize=False)
diff = fields.Html(compute="_compute_diff")
diff = fields.Html(compute="_compute_diff", sanitize_tags=False)

company_id = fields.Many2one(
"res.company",
Expand All @@ -43,28 +46,10 @@ def _compute_diff(self):
)
rec.diff = self._get_diff(prev.id, rec.id)

@api.model
def _get_diff(self, v1, v2):
"""Return the difference between two version of document version."""
text1 = v1 and self.browse(v1).content or ""
text2 = v2 and self.browse(v2).content or ""
# Include line breaks to make it more readable
# TODO: consider using a beautify library directly on the content
text1 = text1.replace("</p><p>", "</p>\r\n<p>")
text2 = text2.replace("</p><p>", "</p>\r\n<p>")
line1 = text1.splitlines(True)
line2 = text2.splitlines(True)
if line1 == line2:
return _("There are no changes in revisions.")
else:
diff = difflib.HtmlDiff()
return diff.make_table(
line1,
line2,
f"Revision-{v1}",
f"Revision-{v2}",
context=True,
)
return generate_comparison(text1, text2)

def name_get(self):
return [(rec.id, "%s #%i" % (rec.page_id.name, rec.id)) for rec in self]
4 changes: 4 additions & 0 deletions document_page/readme/CONTRIBUTORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,7 @@ Trobz
* `Sygel <https://www.sygel.es>`_:

* Ángel García de la Chica Herrera

* `Dixmit <https://www.dixmit.com>`__:

* Enric Tobella
4 changes: 4 additions & 0 deletions document_page/static/description/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,10 @@ <h2><a class="toc-backref" href="#toc-entry-7">Contributors</a></h2>
<li>Ángel García de la Chica Herrera</li>
</ul>
</li>
<li><a class="reference external" href="https://www.dixmit.com">Dixmit</a>:<ul>
<li>Enric Tobella</li>
</ul>
</li>
</ul>
</div>
<div class="section" id="other-credits">
Expand Down
Loading