Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce consecutive diffmods to one #2

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 89 additions & 9 deletions lib/htmldiff.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,42 @@ def end_in_new
Operation = Struct.new(:action, :start_in_old, :end_in_old, :start_in_new, :end_in_new)

class DiffBuilder

def initialize(old_version, new_version, ignore_whitespace = false, ignore_tags = false)
# For BC reasons, you can call this constructor with positioned options, but named are strongly preferred.
#
# Legacy signature:
# def initialize(old_version, new_version, ignore_whitespace = false, ignore_tags = false)
#
# New signature:
# def initialize(old_version, new_version, ignore_whitespace: false, ignore_tags: false, reduce_consecutive: false)
#
def initialize(old_version, new_version, *mixed)
@old_version, @new_version = old_version, new_version
@ignore_whitespace = ignore_whitespace
@ignore_tags = ignore_tags
@join_char = ignore_whitespace ? ' ' : ''
@sibling_elements = %w(p li div)
if mixed.first.is_a?(Hash)
options = mixed.first
@ignore_whitespace = !! options[:ignore_whitespace]
@ignore_tags = !! options[:ignore_tags]
@reduce_consecutive = !! options[:reduce_consecutive]
@sibling_elements = options[:sibling_elements] if options[:sibling_elements]
else
@ignore_whitespace = !! mixed[0]
@ignore_tags = !! mixed[1]
@reduce_consecutive = false
end
@join_char = @ignore_whitespace ? ' ' : ''
@content = []
end

def build
split_inputs_to_words
index_new_words
operations.each { |op| perform_operation(op) }
return @content.join(@join_char)
diff_output = @content.join(@join_char)
if @reduce_consecutive
ConsecutiveDiffReducer.new.call(diff_output)
else
diff_output
end
end

def split_inputs_to_words
Expand Down Expand Up @@ -211,7 +233,10 @@ def insert_tag(tagname, cssclass, words)
@content << wrap_text(non_tags.join(@join_char), tagname, cssclass) unless non_tags.empty?

break if words.empty?
break if @ignore_tags && tagname == "del"
mm = words.first.match(/<\/?(\w+)>/)
next_tagname = mm ? mm[1] : nil
break if @ignore_tags && tagname == "del" && !@sibling_elements.include?(next_tagname)

@content += extract_consecutive_words(words) { |word| tag?(word) }
end
end
Expand Down Expand Up @@ -293,8 +318,63 @@ def convert_html_to_list_of_words(x, use_brackets = false)

end # of class Diff Builder

def diff(a, b, ignore_whitespace = false, ignore_tags = false)
DiffBuilder.new(a, b, ignore_whitespace, ignore_tags).build
class ConsecutiveDiffReducer
def initialize(skip: /^\s+$/)
@skip_regexp = skip
end

def call(input)
token_regexp = /(<del[^>]*>.*?<\/del><ins[^>]*>.*?<\/ins>)/i
mode = :none
@output = []
@buffer = []
input.split(token_regexp).each do |token|
if token =~ token_regexp
flush_buffer! unless mode == :diffmod
mode = :diffmod
@buffer << token
elsif token =~ @skip_regexp && mode == :diffmod
@buffer << token
else
flush_buffer!
mode = :none
@output << token
end
end
flush_buffer!
@output.join
end

def flush_buffer!
@output = @output + reduce_buffer
@buffer = []
end

def reduce_buffer
return [] if @buffer.empty?
delete_tag = nil
insert_tag = nil
deletes = []
inserts = []
@buffer.each do |token|
if token =~ @skip_regexp
deletes << token
inserts << token
else
m = token.match(/(<del[^>]*>)(.*?)<\/del>(<ins[^>]*>)(.*?)<\/ins>/i)
fail "Token didn't match expression" unless m
delete_tag ||= m[1]
deletes << m[2]
insert_tag ||= m[3]
inserts << m[4]
end
end
[delete_tag, *deletes, "</del>", insert_tag, *inserts, "</ins>"]
end
end # of class ConsecutiveDiffReducer

def diff(a, b, *options)
DiffBuilder.new(a, b, *options).build
end

end
110 changes: 96 additions & 14 deletions spec/htmldiff_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -39,24 +39,106 @@ class TestDiff
expect(diff).to eq("a <a href=\"#c1\"><a href=\"#c2\"></a> <del class=\"diffmod\">b</del><ins class=\"diffmod\">c</ins>")
end

it "changes in properties will render both versions of the start tag, but not end tag" do
a = 'a <a href="#c1"></a> b'
b = 'a <a href="#c2"></a> c'
diff = TestDiff.diff(a, b, false, true)
expect(diff).to eq("a <a href=\"#c2\"></a> <del class=\"diffmod\">b</del><ins class=\"diffmod\">c</ins>")
end

it "works when jumping between tags and non tags" do
a = 'a <a href="#c1"></a>b<a href="#c1"> c<a href="#c1">e'
b = 'a <a href="#c2"></a>c<a href="#c3"> d<a href="#c4">e'
diff = TestDiff.diff(a, b, false, true)
expect(diff).to eq("a <a href=\"#c2\"></a><del class=\"diffmod\">b</del><ins class=\"diffmod\">c</ins><a href=\"#c3\"> <del class=\"diffmod\">c</del><ins class=\"diffmod\">d</ins><a href=\"#c4\">e")
end

it "example from the library" do
a = '<p>a</p>'
b = '<p>ab</p><p>c</b>'
diff = TestDiff.diff(a, b)
expect(diff).to eq("<p><del class=\"diffmod\">a</del><ins class=\"diffmod\">ab</ins></p><p><ins class=\"diffins\">c</ins></b>")
end

describe "ignore_tags option" do
describe "changes in properties should render balanced tags" do
describe "when disabled" do
it "will render both versions of the start tag, but not end tag" do
a = 'a <a href="#c1"></a> b'
b = 'a <a href="#c2"></a> c'
expected = 'a <a href="#c1"><a href="#c2"></a> <del class="diffmod">b</del><ins class="diffmod">c</ins>'
diff = TestDiff.diff(a, b, ignore_tags: false)
expect(diff).to eq(expected)
end

it "will render both versions of the start tag, but not end tag" do
a = 'a <a href="#c1"></a>b<a href="#c1"> c<a href="#c1">e'
b = 'a <a href="#c2"></a>c<a href="#c3"> d<a href="#c4">e'
expected = 'a <a href="#c1"><a href="#c2"></a><del class="diffmod">b</del><a href="#c1"><ins class="diffmod">c</ins><a href="#c3"> <del class="diffmod">c</del><a href="#c1"><ins class="diffmod">d</ins><a href="#c4">e'
diff = TestDiff.diff(a, b, ignore_tags: false)
expect(diff).to eq(expected)
end
end

describe "when enabled" do
it "will produce valid html" do
a = 'a <a href="#c1"></a> b'
b = 'a <a href="#c2"></a> c'
expected = 'a <a href="#c2"></a> <del class="diffmod">b</del><ins class="diffmod">c</ins>'
diff = TestDiff.diff(a, b, ignore_tags: true)
expect(diff).to eq(expected)
end

it "will produce valid html" do
a = 'a <a href="#c1"></a>b<a href="#c1"> c<a href="#c1">e'
b = 'a <a href="#c2"></a>c<a href="#c3"> d<a href="#c4">e'
expected = 'a <a href="#c2"></a><del class="diffmod">b</del><ins class="diffmod">c</ins><a href="#c3"> <del class="diffmod">c</del><ins class="diffmod">d</ins><a href="#c4">e'
diff = TestDiff.diff(a, b, ignore_tags: true)
expect(diff).to eq(expected)
end
end
end

describe "removing tag with similar siblings" do
describe "when disabled" do
it "should show deleted paragraph" do
a = '<p>first</p><p>second</p>'
b = '<p>first</p>'
expected = '<p>first</p><p><del class="diffdel">second</del></p>'
diff = TestDiff.diff(a, b, ignore_tags: false)
expect(diff).to eq(expected)
end

it "should show deleted list-element" do
a = 'my list <ol><li>item a</li><li>item b</li></ol>'
b = 'my list <ol><li>item a</li></ol>'
expected = 'my list <ol><li>item a</li><li><del class="diffdel">item b</del></li></ol>'
diff = TestDiff.diff(a, b, ignore_tags: false)
expect(diff).to eq(expected)
end
end

describe "when enabled" do
it "should show deleted paragraph" do
a = '<p>first</p><p>second</p>'
b = '<p>first</p>'
expected = '<p>first</p><p><del class="diffdel">second</del></p>'
diff = TestDiff.diff(a, b, ignore_tags: true)
expect(diff).to eq(expected)
end

it "should show deleted list-element" do
a = 'my list <ol><li>item a</li><li>item b</li></ol>'
b = 'my list <ol><li>item a</li></ol>'
expected = 'my list <ol><li>item a</li><li><del class="diffdel">item b</del></li></ol>'
diff = TestDiff.diff(a, b, ignore_tags: true)
expect(diff).to eq(expected)
end
end
end
end

describe "reduce_consecutive option" do
it "should diff individual words, when not enabled" do
a = '<p>Han går til samtaler ved en psykiater. Like a boss.</p>'
b = '<p>Han drikker stærk spiritus. Like a boss.</p>'
expected = '<p>Han <del class="diffmod">går</del><ins class="diffmod">drikker</ins> <del class="diffmod">til</del><ins class="diffmod">stærk</ins> <del class="diffmod">samtaler ved en psykiater.</del><ins class="diffmod">spiritus.</ins> Like a boss.</p>'
diff = TestDiff.diff(a, b, reduce_consecutive: false)
expect(diff).to eq(expected)
end

it "should reduce consecutive matches, when enabled" do
a = '<p>Han går til samtaler ved en psykiater. Like a boss.</p>'
b = '<p>Han drikker stærk spiritus. Like a boss.</p>'
expected = '<p>Han <del class="diffmod">går til samtaler ved en psykiater.</del><ins class="diffmod">drikker stærk spiritus.</ins> Like a boss.</p>'
diff = TestDiff.diff(a, b, reduce_consecutive: true)
expect(diff).to eq(expected)
end
end
end