diff-match-patch-0.1.0/0000755000175000017500000000000013601103003013616 5ustar pravipravidiff-match-patch-0.1.0/LICENSE0000644000175000017500000000207313601103003014625 0ustar pravipraviCopyright (c) 2011, Jorge Kalmbach Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff-match-patch-0.1.0/Rakefile0000644000175000017500000000016313601103003015263 0ustar pravipravirequire 'rake/testtask' Rake::TestTask.new do |t| t.libs << 'test' end desc "Run tests" task :default => :test diff-match-patch-0.1.0/diff_match_patch.gemspec0000644000175000017500000000176313601103003020435 0ustar pravipravi######################################################### # This file has been automatically generated by gem2tgz # ######################################################### # -*- encoding: utf-8 -*- # stub: diff_match_patch 0.1.0 ruby lib Gem::Specification.new do |s| s.name = "diff_match_patch".freeze s.version = "0.1.0" s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version= s.require_paths = ["lib".freeze] s.authors = ["Jorge Kalmbach".freeze] s.date = "2011-11-18" s.description = "Ruby implementation of Google diff_match_patch".freeze s.email = "kalmbach@gmail.com".freeze s.files = ["LICENSE".freeze, "README.md".freeze, "Rakefile".freeze, "lib/diff_match_patch.rb".freeze, "lib/patch_obj.rb".freeze, "test/diff_match_patch-test.rb".freeze] s.homepage = "https://github.com/kalmbach/diff_match_patch".freeze s.rubygems_version = "2.7.6.2".freeze s.summary = "Ruby implementation of Google diff_match_patch".freeze end diff-match-patch-0.1.0/lib/0000755000175000017500000000000013601103003014364 5ustar pravipravidiff-match-patch-0.1.0/lib/patch_obj.rb0000644000175000017500000000230513601103003016642 0ustar pravipravirequire 'uri' # Class representing one patch operation. class PatchObj attr_accessor :start1, :start2 attr_accessor :length1, :length2 attr_accessor :diffs def initialize # Initializes with an empty list of diffs. @start1 = nil @start2 = nil @length1 = 0 @length2 = 0 @diffs = [] end # Emulate GNU diff's format # Header: @@ -382,8 +481,9 @@ # Indices are printed as 1-based, not 0-based. def to_s if length1 == 0 coords1 = start1.to_s + ",0" elsif length1 == 1 coords1 = (start1 + 1).to_s else coords1 = (start1 + 1).to_s + "," + length1.to_s end if length2 == 0 coords2 = start2.to_s + ",0" elsif length2 == 1 coords2 = (start2 + 1).to_s else coords2 = (start2 + 1).to_s + "," + length2.to_s end text = '@@ -' + coords1 + ' +' + coords2 + " @@\n" # Encode the body of the patch with %xx notation. text += diffs.map do |op, data| op = case op when :insert; '+' when :delete; '-' when :equal ; ' ' end op + URI.encode(data, /[^0-9A-Za-z_.;!~*'(),\/?:@&=+$\#-]/) + "\n" end.join.gsub('%20', ' ') return text end end diff-match-patch-0.1.0/lib/diff_match_patch.rb0000644000175000017500000015356213601103003020170 0ustar pravipravirequire 'patch_obj' # Class containing the diff, match and patch methods. # Also contains the behaviour settings. class DiffMatchPatch attr_accessor :diff_timeout attr_accessor :diff_editCost attr_accessor :match_threshold attr_accessor :match_distance attr_accessor :patch_deleteThreshold attr_accessor :patch_margin attr_reader :match_maxBits def initialize # Inits a diff_match_patch object with default settings. # Redefine these in your program to override the defaults. # Number of seconds to map a diff before giving up (0 for infinity). @diff_timeout = 1 # Cost of an empty edit operation in terms of edit characters. @diff_editCost = 4 # At what point is no match declared (0.0 = perfection, 1.0 = very loose). @match_threshold = 0.5 # How far to search for a match (0 = exact location, 1000+ = broad match). # A match this many characters away from the expected location will add # 1.0 to the score (0.0 is a perfect match). @match_distance = 1000 # When deleting a large block of text (over ~64 characters), how close does # the contents have to match the expected contents. (0.0 = perfection, # 1.0 = very loose). Note that Match_Threshold controls how closely the # end points of a delete need to match. @patch_deleteThreshold = 0.5 # Chunk size for context length. @patch_margin = 4 # The number of bits in an int. # Python has no maximum, thus to disable patch splitting set to 0. # However to avoid long patches in certain pathological cases, use 32. # Multiple short patches (using native ints) are much faster than long ones. @match_maxBits = 32 end # Find the differences between two texts. Simplifies the problem by # stripping any common prefix or suffix off the texts before diffing. def diff_main(text1, text2, checklines=true, deadline=nil) # Set a deadline by which time the diff must be complete. if deadline.nil? && diff_timeout > 0 deadline = Time.now + diff_timeout end # Check for null inputs. if text1.nil? || text2.nil? raise ArgumentError.new('Null inputs. (diff_main)') end # Check for equality (speedup). if text1 == text2 return [] if text1.empty? return [[:equal, text1]] end checklines = true if checklines.nil? # Trim off common prefix (speedup). common_length = diff_commonPrefix(text1, text2) if common_length.nonzero? common_prefix = text1[0...common_length] text1 = text1[common_length..-1] text2 = text2[common_length..-1] end # Trim off common suffix (speedup). common_length = diff_commonSuffix(text1, text2) if common_length.nonzero? common_suffix = text1[-common_length..-1] text1 = text1[0...-common_length] text2 = text2[0...-common_length] end # Compute the diff on the middle block. diffs = diff_compute(text1, text2, checklines, deadline) # Restore the prefix and suffix. diffs.unshift([:equal, common_prefix]) unless common_prefix.nil? diffs.push([:equal, common_suffix]) unless common_suffix.nil? diff_cleanupMerge(diffs) diffs end # Find the differences between two texts. Assumes that the texts do not # have any common prefix or suffix. def diff_compute(text1, text2, checklines, deadline) # Just add some text (speedup). return [[:insert, text2]] if text1.empty? # Just delete some text (speedup). return [[:delete, text1]] if text2.empty? shorttext, longtext = [text1, text2].sort_by(&:length) if i = longtext.index(shorttext) # Shorter text is inside the longer text (speedup). diffs = [[:insert, longtext[0...i]], [:equal, shorttext], [:insert, longtext[(i + shorttext.length)..-1]]] # Swap insertions for deletions if diff is reversed. if text1.length > text2.length diffs[0][0] = :delete diffs[2][0] = :delete end return diffs end if shorttext.length == 1 # Single character string. # After the previous speedup, the character can't be an equality. return [[:delete, text1], [:insert, text2]] end # Garbage collect. longtext = nil shorttext = nil # Check to see if the problem can be split in two. if hm = diff_halfMatch(text1, text2) # A half-match was found, sort out the return data. text1_a, text1_b, text2_a, text2_b, mid_common = hm # Send both pairs off for separate processing. diffs_a = diff_main(text1_a, text2_a, checklines, deadline) diffs_b = diff_main(text1_b, text2_b, checklines, deadline) # Merge the results. return diffs_a + [[:equal, mid_common]] + diffs_b end if checklines && text1.length > 100 && text2.length > 100 return diff_lineMode(text1, text2, deadline) end return diff_bisect(text1, text2, deadline) end # Do a quick line-level diff on both strings, then rediff the parts for # greater accuracy. # This speedup can produce non-minimal diffs. def diff_lineMode(text1, text2, deadline) # Scan the text on a line-by-line basis first. text1, text2, line_array = diff_linesToChars(text1, text2) diffs = diff_main(text1, text2, false, deadline) # Convert the diff back to original text. diff_charsToLines(diffs, line_array) # Eliminate freak matches (e.g. blank lines) diff_cleanupSemantic(diffs) # Rediff any replacement blocks, this time character-by-character. # Add a dummy entry at the end. diffs.push([:equal, '']) pointer = 0 count_delete = 0 count_insert = 0 text_delete = '' text_insert = '' while pointer < diffs.length case diffs[pointer][0] when :insert count_insert += 1 text_insert += diffs[pointer][1] when :delete count_delete += 1 text_delete += diffs[pointer][1] when :equal # Upon reaching an equality, check for prior redundancies. if count_delete >= 1 && count_insert >= 1 # Delete the offending records and add the merged ones. a = diff_main(text_delete, text_insert, false, deadline) diffs[pointer - count_delete - count_insert, count_delete + count_insert] = [] pointer = pointer - count_delete - count_insert diffs[pointer, 0] = a pointer = pointer + a.length end count_insert = 0 count_delete = 0 text_delete = '' text_insert = '' end pointer += 1 end diffs.pop # Remove the dummy entry at the end. return diffs end # Find the 'middle snake' of a diff, split the problem in two # and return the recursively constructed diff. # See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. def diff_bisect(text1, text2, deadline) # Cache the text lengths to prevent multiple calls. text1_length = text1.length text2_length = text2.length max_d = (text1_length + text2_length + 1) / 2 v_offset = max_d v_length = 2 * max_d v1 = Array.new(v_length, -1) v2 = Array.new(v_length, -1) v1[v_offset + 1] = 0 v2[v_offset + 1] = 0 delta = text1_length - text2_length # If the total number of characters is odd, then the front path will # collide with the reverse path. front = (delta % 2 != 0) # Offsets for start and end of k loop. # Prevents mapping of space beyond the grid. k1start = 0 k1end = 0 k2start = 0 k2end = 0 max_d.times do |d| # Bail out if deadline is reached. break if deadline && Time.now >= deadline # Walk the front path one step. (-d + k1start).step(d - k1end, 2) do |k1| k1_offset = v_offset + k1 if k1 == -d || k1 != d && v1[k1_offset - 1] < v1[k1_offset + 1] x1 = v1[k1_offset + 1] else x1 = v1[k1_offset - 1] + 1 end y1 = x1 - k1 while x1 < text1_length && y1 < text2_length && text1[x1] == text2[y1] x1 += 1 y1 += 1 end v1[k1_offset] = x1 if x1 > text1_length # Ran off the right of the graph. k1end += 2 elsif y1 > text2_length # Ran off the bottom of the graph. k1start += 2 elsif front k2_offset = v_offset + delta - k1 if k2_offset >= 0 && k2_offset < v_length && v2[k2_offset] != -1 # Mirror x2 onto top-left coordinate system. x2 = text1_length - v2[k2_offset] if x1 >= x2 # Overlap detected. return diff_bisectSplit(text1, text2, x1, y1, deadline) end end end end # Walk the reverse path one step. (-d + k2start).step(d - k2end, 2) do |k2| k2_offset = v_offset + k2 if k2 == -d || k2 != d && v2[k2_offset - 1] < v2[k2_offset + 1] x2 = v2[k2_offset + 1] else x2 = v2[k2_offset - 1] + 1 end y2 = x2 - k2 while x2 < text1_length && y2 < text2_length && text1[-x2-1] == text2[-y2-1] x2 += 1 y2 += 1 end v2[k2_offset] = x2 if x2 > text1_length # Ran off the left of the graph. k2end += 2 elsif y2 > text2_length # Ran off the top of the graph. k2start += 2 elsif !front k1_offset = v_offset + delta - k2 if k1_offset >= 0 && k1_offset < v_length && v1[k1_offset] != -1 x1 = v1[k1_offset] y1 = v_offset + x1 - k1_offset # Mirror x2 onto top-left coordinate system. x2 = text1_length - x2 if x1 >= x2 # Overlap detected. return diff_bisectSplit(text1, text2, x1, y1, deadline) end end end end end # Diff took too long and hit the deadline or # number of diffs equals number of characters, no commonality at all. [[:delete, text1], [:insert, text2]] end # Given the location of the 'middle snake', split the diff in two parts # and recurse. def diff_bisectSplit(text1, text2, x, y, deadline) text1a = text1[0...x] text2a = text2[0...y] text1b = text1[x..-1] text2b = text2[y..-1] # Compute both diffs serially. diffs = diff_main(text1a, text2a, false, deadline) diffsb = diff_main(text1b, text2b, false, deadline) diffs + diffsb end # Split two texts into an array of strings. Reduce the texts to a string # of hashes where each Unicode character represents one line. def diff_linesToChars(text1, text2) line_array = [''] # e.g. line_array[4] == "Hello\n" line_hash = {} # e.g. line_hash["Hello\n"] == 4 [text1, text2].map do |text| # Split text into an array of strings. Reduce the text to a string of # hashes where each Unicode character represents one line. chars = '' text.each_line do |line| if line_hash[line] chars += line_hash[line].chr(Encoding::UTF_8) else chars += line_array.length.chr(Encoding::UTF_8) line_hash[line] = line_array.length line_array.push(line) end end chars end.push(line_array) end # Rehydrate the text in a diff from a string of line hashes to real lines of text. def diff_charsToLines(diffs, line_array) diffs.each do |diff| diff[1] = diff[1].chars.map{ |c| line_array[c.ord] }.join end end # Determine the common prefix of two strings. def diff_commonPrefix(text1, text2) # Quick check for common null cases. return 0 if text1.empty? || text2.empty? || text1[0] != text2[0] # Binary search. # Performance analysis: http://neil.fraser.name/news/2007/10/09/ pointer_min = 0 pointer_max = [text1.length, text2.length].min pointer_mid = pointer_max pointer_start = 0 while pointer_min < pointer_mid if text1[pointer_start...pointer_mid] == text2[pointer_start...pointer_mid] pointer_min = pointer_mid pointer_start = pointer_min else pointer_max = pointer_mid end pointer_mid = (pointer_max - pointer_min) / 2 + pointer_min end pointer_mid end # Determine the common suffix of two strings. def diff_commonSuffix(text1, text2) # Quick check for common null cases. return 0 if text1.empty? || text2.empty? || text1[-1] != text2[-1] # Binary search. # Performance analysis: http://neil.fraser.name/news/2007/10/09/ pointer_min = 0 pointer_max = [text1.length, text2.length].min pointer_mid = pointer_max pointer_end = 0 while pointer_min < pointer_mid if text1[-pointer_mid..(-pointer_end-1)] == text2[-pointer_mid..(-pointer_end-1)] pointer_min = pointer_mid pointer_end = pointer_min else pointer_max = pointer_mid end pointer_mid = (pointer_max - pointer_min) / 2 + pointer_min end pointer_mid end # Determine if the suffix of one string is the prefix of another. def diff_commonOverlap(text1, text2) # Cache the text lengths to prevent multiple calls. text1_length = text1.length text2_length = text2.length # Eliminate the null case. return 0 if text1_length.zero? || text2_length.zero? # Truncate the longer string. if text1_length > text2_length text1 = text1[-text2_length..-1] else text2 = text2[0...text1_length] end text_length = [text1_length, text2_length].min # Quick check for the whole case. return text_length if text1 == text2 # Start by looking for a single character match # and increase length until no match is found. # Performance analysis: http://neil.fraser.name/news/2010/11/04/ best = 0 length = 1 loop do pattern = text1[(text_length - length)..-1] found = text2.index(pattern) return best if found.nil? length += found if found == 0 || text1[(text_length - length)..-1] == text2[0..length] best = length length += 1 end end end # Does a substring of shorttext exist within longtext such that the # substring is at least half the length of longtext? def diff_halfMatchI(longtext, shorttext, i) seed = longtext[i, longtext.length / 4] j = -1 best_common = '' while j = shorttext.index(seed, j + 1) prefix_length = diff_commonPrefix(longtext[i..-1], shorttext[j..-1]) suffix_length = diff_commonSuffix(longtext[0...i], shorttext[0...j]) if best_common.length < suffix_length + prefix_length best_common = shorttext[(j - suffix_length)...j] + shorttext[j...(j + prefix_length)] best_longtext_a = longtext[0...(i - suffix_length)] best_longtext_b = longtext[(i + prefix_length)..-1] best_shorttext_a = shorttext[0...(j - suffix_length)] best_shorttext_b = shorttext[(j + prefix_length)..-1] end end if best_common.length * 2 >= longtext.length [best_longtext_a, best_longtext_b, best_shorttext_a, best_shorttext_b, best_common] end end # Do the two texts share a substring which is at least half the length of the # longer text? # This speedup can produce non-minimal diffs. def diff_halfMatch(text1, text2) # Don't risk returning a non-optimal diff if we have unlimited time return nil if diff_timeout <= 0 shorttext, longtext = [text1, text2].sort_by(&:length) if longtext.length < 4 || shorttext.length * 2 < longtext.length return nil # Pointless. end # First check if the second quarter is the seed for a half-match. hm1 = diff_halfMatchI(longtext, shorttext, (longtext.length + 3) / 4) # Check again based on the third quarter. hm2 = diff_halfMatchI(longtext, shorttext, (longtext.length + 1) / 2) if hm1.nil? && hm2.nil? return nil elsif hm2.nil? || hm1.nil? hm = hm2.nil? ? hm1 : hm2 else # Both matched. Select the longest. hm = hm1[4].length > hm2[4].length ? hm1 : hm2 end # A half-match was found, sort out the return data. if text1.length > text2.length text1_a, text1_b, text2_a, text2_b, mid_common = hm else text2_a, text2_b, text1_a, text1_b, mid_common = hm end [text1_a, text1_b, text2_a, text2_b, mid_common] end # Reduce the number of edits by eliminating semantically trivial equalities. def diff_cleanupSemantic(diffs) changes = false equalities = [] # Stack of indices where equalities are found. last_equality = nil # Always equal to equalities.last[1] pointer = 0 # Index of current position. # Number of characters that changed prior to the equality. length_insertions1 = 0 length_deletions1 = 0 # Number of characters that changed after the equality. length_insertions2 = 0 length_deletions2 = 0 while pointer < diffs.length if diffs[pointer][0] == :equal # Equality found. equalities.push(pointer) length_insertions1 = length_insertions2 length_deletions1 = length_deletions2 length_insertions2 = 0 length_deletions2 = 0 last_equality = diffs[pointer][1] else # An insertion or deletion. if diffs[pointer][0] == :insert length_insertions2 += diffs[pointer][1].length else length_deletions2 += diffs[pointer][1].length end if last_equality && last_equality.length <= [length_insertions1, length_deletions1].max && last_equality.length <= [length_insertions2, length_deletions2].max # Duplicate record. diffs[equalities.last, 0] = [[:delete, last_equality]] # Change second copy to insert. diffs[equalities.last + 1][0] = :insert # Throw away the equality we just deleted. equalities.pop # Throw away the previous equality (it needs to be reevaluated). equalities.pop pointer = equalities.last || -1 # Reset the counters. length_insertions1 = 0 length_deletions1 = 0 length_insertions2 = 0 length_deletions2 = 0 last_equality = nil changes = true end end pointer += 1 end # Normalize the diff. if changes diff_cleanupMerge(diffs) end diff_cleanupSemanticLossless(diffs) # Find any overlaps between deletions and insertions. # e.g: abcxxxxxxdef # -> abcxxxdef # e.g: xxxabcdefxxx # -> defxxxabc # Only extract an overlap if it is as big as the edit ahead or behind it. pointer = 1 while pointer < diffs.length if diffs[pointer - 1][0] == :delete && diffs[pointer][0] == :insert deletion = diffs[pointer - 1][1] insertion = diffs[pointer][1] overlap_length1 = diff_commonOverlap(deletion, insertion) overlap_length2 = diff_commonOverlap(insertion, deletion) if overlap_length1 >= overlap_length2 if overlap_length1 >= deletion.length / 2.0 || overlap_length1 >= insertion.length / 2.0 # Overlap found. Insert an equality and trim the surrounding edits. diffs[pointer, 0] = [[:equal, insertion[0...overlap_length1]]] diffs[pointer -1][0] = :delete diffs[pointer - 1][1] = deletion[0...-overlap_length1] diffs[pointer + 1][0] = :insert diffs[pointer + 1][1] = insertion[overlap_length1..-1] pointer += 1 end else if overlap_length2 >= deletion.length / 2.0 || overlap_length2 >= insertion.length / 2.0 diffs[pointer, 0] = [[:equal, deletion[0...overlap_length2]]] diffs[pointer - 1][0] = :insert diffs[pointer - 1][1] = insertion[0...-overlap_length2] diffs[pointer + 1][0] = :delete diffs[pointer + 1][1] = deletion[overlap_length2..-1] pointer += 1 end end pointer += 1 end pointer += 1 end end # Given two strings, compute a score representing whether the # internal boundary falls on logical boundaries. # Scores range from 5 (best) to 0 (worst). def diff_cleanupSemanticScore(one, two) if one.empty? || two.empty? # Edges are the best. return 5 end # Define some regex patterns for matching boundaries. nonWordCharacter = /[^a-zA-Z0-9]/ whitespace = /\s/ linebreak = /[\r\n]/ lineEnd = /\n\r?\n$/ lineStart = /^\r?\n\r?\n/ # Each port of this function behaves slightly differently due to # subtle differences in each language's definition of things like # 'whitespace'. Since this function's purpose is largely cosmetic, # the choice has been made to use each language's native features # rather than force total conformity. score = 0 # One point for non-alphanumeric. if one[-1] =~ nonWordCharacter || two[0] =~ nonWordCharacter score += 1 # Two points for whitespace. if one[-1] =~ whitespace || two[0] =~ whitespace score += 1 # Three points for line breaks. if one[-1] =~ linebreak || two[0] =~ linebreak score += 1 # Four points for blank lines. if one =~ lineEnd || two =~ lineStart score += 1 end end end end score end # Look for single edits surrounded on both sides by equalities # which can be shifted sideways to align the edit to a word boundary. # e.g: The cat came. -> The cat came. def diff_cleanupSemanticLossless(diffs) pointer = 1 # Intentionally ignore the first and last element (don't need checking). while pointer < diffs.length - 1 if diffs[pointer - 1][0] == :equal && diffs[pointer + 1][0] == :equal # This is a single edit surrounded by equalities. equality1 = diffs[pointer - 1][1] edit = diffs[pointer][1] equality2 = diffs[pointer + 1][1] # First, shift the edit as far left as possible. common_offset = diff_commonSuffix(equality1, edit) if common_offset != 0 common_string = edit[-common_offset..-1] equality1 = equality1[0...-common_offset] edit = common_string + edit[0...-common_offset] equality2 = common_string + equality2 end # Second, step character by character right, looking for the best fit. best_equality1 = equality1 best_edit = edit best_equality2 = equality2 best_score = diff_cleanupSemanticScore(equality1, edit) + diff_cleanupSemanticScore(edit, equality2) while edit[0] == equality2[0] equality1 += edit[0] edit = edit[1..-1] + equality2[0] equality2 = equality2[1..-1] score = diff_cleanupSemanticScore(equality1, edit) + diff_cleanupSemanticScore(edit, equality2) # The >= encourages trailing rather than leading whitespace on edits. if score >= best_score best_score = score best_equality1 = equality1 best_edit = edit best_equality2 = equality2 end end if diffs[pointer - 1][1] != best_equality1 # We have an improvement, save it back to the diff. if best_equality1.empty? diffs[pointer - 1, 1] = [] pointer -= 1 else diffs[pointer - 1][1] = best_equality1 end diffs[pointer][1] = best_edit if best_equality2.empty? diffs[pointer + 1, 1] = [] pointer -= 1 else diffs[pointer + 1][1] = best_equality2 end end end pointer += 1 end end # Reduce the number of edits by eliminating operationally trivial equalities. def diff_cleanupEfficiency(diffs) changes = false equalities = [] # Stack of indices where equalities are found. last_equality = '' # Always equal to equalities.last[1] pointer = 0 # Index of current position. pre_ins = false # Is there an insertion operation before the last equality. pre_del = false # Is there a deletion operation before the last equality. post_ins = false # Is there an insertion operation after the last equality. post_del = false # Is there a deletion operation after the last equality. while pointer < diffs.length if diffs[pointer][0] == :equal # Equality found. if diffs[pointer][1].length < diff_editCost && (post_ins || post_del) # Candidate found. equalities.push(pointer) pre_ins = post_ins pre_del = post_del last_equality = diffs[pointer][1] else # Not a candidate, and can never become one. equalities.clear last_equality = '' end post_ins = false post_del = false else # An insertion or deletion. if diffs[pointer][0] == :delete post_del = true else post_ins = true end # Five types to be split: # ABXYCD # AXCD # ABXC # AXCD # ABXC if !last_equality.empty? && ((pre_ins && pre_del && post_ins && post_del) || ((last_equality.length < diff_editCost / 2) && [pre_ins, pre_del, post_ins, post_del].count(true) == 3)) # Duplicate record. diffs[equalities.last, 0] = [[:delete, last_equality]] # Change second copy to insert. diffs[equalities.last + 1][0] = :insert equalities.pop # Throw away the equality we just deleted last_equality = '' if pre_ins && pre_del # No changes made which could affect previous entry, keep going. post_ins = true post_del = true equalities.clear else if !equalities.empty? equalities.pop # Throw away the previous equality. pointer = equalities.last || -1 end post_ins = false post_del = false end changes = true end end pointer += 1 end if changes diff_cleanupMerge(diffs) end end # Reorder and merge like edit sections. Merge equalities. # Any edit section can move as long as it doesn't cross an equality. def diff_cleanupMerge(diffs) diffs.push([:equal, '']) # Add a dummy entry at the end. pointer = 0 count_delete = 0 count_insert = 0 text_delete = '' text_insert = '' while pointer < diffs.length case diffs[pointer][0] when :insert count_insert += 1 text_insert += diffs[pointer][1] pointer += 1 when :delete count_delete += 1 text_delete += diffs[pointer][1] pointer += 1 when :equal # Upon reaching an equality, check for prior redundancies. if count_delete + count_insert > 1 if count_delete != 0 && count_insert != 0 # Factor out any common prefixies. common_length = diff_commonPrefix(text_insert, text_delete) if common_length != 0 if (pointer - count_delete - count_insert) > 0 && diffs[pointer - count_delete - count_insert - 1][0] == :equal diffs[pointer - count_delete - count_insert - 1][1] += text_insert[0...common_length] else diffs.unshift([:equal, text_insert[0...common_length]]) pointer += 1 end text_insert = text_insert[common_length..-1] text_delete = text_delete[common_length..-1] end # Factor out any common suffixies. common_length = diff_commonSuffix(text_insert, text_delete) if common_length != 0 diffs[pointer][1] = text_insert[-common_length..-1] + diffs[pointer][1] text_insert = text_insert[0...-common_length] text_delete = text_delete[0...-common_length] end end # Delete the offending records and add the merged ones. if count_delete.zero? diffs[pointer - count_delete - count_insert, count_delete + count_insert] = [[:insert, text_insert]] elsif count_insert.zero? diffs[pointer - count_delete - count_insert, count_delete + count_insert] = [[:delete, text_delete]] else diffs[pointer - count_delete - count_insert, count_delete + count_insert] = [[:delete, text_delete], [:insert, text_insert]] end pointer = pointer - count_delete - count_insert + (count_delete.zero? ? 0 : 1) + (count_insert.zero? ? 0 : 1) + 1 elsif pointer != 0 && diffs[pointer - 1][0] == :equal # Merge this equality with the previous one. diffs[pointer - 1][1] += diffs[pointer][1] diffs[pointer, 1] = [] else pointer += 1 end count_insert = 0 count_delete = 0 text_delete = '' text_insert = '' end end if diffs.last[1].empty? diffs.pop # Remove the dummy entry at the end. end # Second pass: look for single edits surrounded on both sides by equalities # which can be shifted sideways to eliminate an equality. # e.g: ABAC -> ABAC changes = false pointer = 1 # Intentionally ignore the first and last element (don't need checking). while pointer < diffs.length - 1 if diffs[pointer - 1][0] == :equal && diffs[pointer + 1][0] == :equal # This is a single edit surrounded by equalities. if diffs[pointer][1][-diffs[pointer - 1][1].length..-1] == diffs[pointer - 1][1] # Shift the edit over the previous equality. diffs[pointer][1] = diffs[pointer - 1][1] + diffs[pointer][1][0...-diffs[pointer - 1][1].length] diffs[pointer + 1][1] = diffs[pointer - 1][1] + diffs[pointer + 1][1] diffs[pointer - 1, 1] = [] changes = true elsif diffs[pointer][1][0...diffs[pointer + 1][1].length] == diffs[pointer + 1][1] # Shift the edit over the next equality. diffs[pointer - 1][1] += diffs[pointer + 1][1] diffs[pointer][1] = diffs[pointer][1][diffs[pointer + 1][1].length..-1] + diffs[pointer + 1][1] diffs[pointer + 1, 1] = [] changes = true end end pointer += 1 end # If shifts were made, the diff needs reordering and another shift sweep. if changes diff_cleanupMerge(diffs) end end # loc is a location in text1, compute and return the equivalent location # in text2. e.g. 'The cat' vs 'The big cat', 1->1, 5->8 def diff_xIndex(diffs, loc) chars1 = 0 chars2 = 0 last_chars1 = 0 last_chars2 = 0 x = diffs.index do |diff| if diff[0] != :insert chars1 += diff[1].length end if diff[0] != :delete chars2 += diff[1].length end if chars1 > loc true else last_chars1 = chars1 last_chars2 = chars2 false end end if diffs.length != x && diffs[x][0] == :delete # The location was deleted. last_chars2 else # Add the remaining len(character). last_chars2 + (loc - last_chars1) end end # Convert a diff array into a pretty HTML report. def diff_prettyHtml(diffs) diffs.map do |op, data| text = data.gsub('&', '&').gsub('<', '<').gsub('>', '>').gsub('\n', '¶
') case op when :insert "#{text}" when :delete "#{text}" when :equal "#{text}" end end.join end # Compute and return the source text (all equalities and deletions). def diff_text1(diffs) diffs.map do |op, data| if op == :insert '' else data end end.join end # Compute and return the destination text (all equalities and insertions). def diff_text2(diffs) diffs.map do |op, data| if op == :delete '' else data end end.join end # Compute the Levenshtein distance; the number of inserted, deleted or # substituted characters. def diff_levenshtein(diffs) levenshtein = 0 insertions = 0 deletions = 0 diffs.each do |op, data| case op when :insert insertions += data.length when :delete deletions += data.length when :equal # A deletion and an insertion is one substitution. levenshtein += [insertions, deletions].max insertions = 0 deletions = 0 end end levenshtein + [insertions, deletions].max end # Crush the diff into an encoded string which describes the operations # required to transform text1 into text2. # E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. # Operations are tab-separated. Inserted text is escaped using %xx notation. def diff_toDelta(diffs) diffs.map do |op, data| case op when :insert '+' + URI.encode(data, /[^0-9A-Za-z_.;!~*'(),\/?:@&=+$\#-]/) when :delete '-' + data.length.to_s when :equal '=' + data.length.to_s end end.join("\t").gsub('%20', ' ') end # Given the original text1, and an encoded string which describes the # operations required to transform text1 into text2, compute the full diff. def diff_fromDelta(text1, delta) # Deltas should be composed of a subset of ascii chars, Unicode not required. delta.encode('ascii') diffs = [] pointer = 0 # Cursor in text1 delta.split("\t").each do |token| # Each token begins with a one character parameter which specifies the # operation of this token (delete, insert, equality). param = token[1..-1] case token[0] when '+' diffs.push([:insert, URI.decode(param.force_encoding(Encoding::UTF_8))]) when '-', '=' begin n = Integer(param) raise if n < 0 text = text1[pointer...(pointer + n)] pointer += n if token[0] == '=' diffs.push([:equal, text]) else diffs.push([:delete, text]) end rescue ArgumentError => e raise ArgumentError.new( "Invalid number in diff_fromDelta: #{param.inspect}") end else raise ArgumentError.new( "Invalid diff operation in diff_fromDelta: #{token.inspect}") end end if pointer != text1.length raise ArgumentError.new("Delta length (#{pointer}) does not equal " + "source text length #{text1.length}") end diffs end # Locate the best instance of 'pattern' in 'text' near 'loc'. def match_main(text, pattern, loc) # Check for null inputs. if [text, pattern].any?(&:nil?) raise ArgumentError.new("Null input. (match_main)") end loc = [0, [loc, text.length].min].max if text == pattern # Shortcut (potentially not guaranteed by the algorithm) 0 elsif text.empty? # Nothing to match -1 elsif text[loc, pattern.length] == pattern # Perfect match at the perfect spot! (Includes case of null pattern) loc else # Do a fuzzy compare. match_bitap(text, pattern, loc) end end # Locate the best instance of 'pattern' in 'text' near 'loc' using the # Bitap algorithm. def match_bitap(text, pattern, loc) if pattern.length > match_maxBits throw ArgumentError.new("Pattern too long") end # Initialise the alphabet. s = match_alphabet(pattern) # Compute and return the score for a match with e errors and x location. match_bitapScore = -> e, x do accuracy = e.to_f / pattern.length proximity = (loc - x).abs if match_distance == 0 # Dodge divide by zero error. return proximity == 0 ? accuracy : 1.0 end return accuracy + (proximity.to_f / match_distance) end # Highest score beyond which we give up. score_threshold = match_threshold # Is there a nearby exact match? (speedup) best_loc = text.index(pattern, loc) if best_loc score_threshold = [match_bitapScore[0, best_loc], score_threshold].min # What about in the other direction? (speedup) best_loc = text.rindex(pattern, loc + pattern.length) if best_loc score_threshold = [match_bitapScore[0, best_loc], score_threshold].min end end # Initialise the bit arrays. match_mask = 1 << (pattern.length - 1) best_loc = -1 bin_max = pattern.length + text.length # Empty initialization added to appease pychecker. last_rd = nil pattern.length.times do |d| # Scan for the best match; each iteration allows for one more error. # Run a binary search to determine how far from 'loc' we can stray at this # error level. bin_min = 0 bin_mid = bin_max while bin_min < bin_mid if match_bitapScore[d, loc + bin_mid] <= score_threshold bin_min = bin_mid else bin_max = bin_mid end bin_mid = (bin_max - bin_min) / 2 + bin_min end # Use the result from this iteration as the maximum for the next. bin_max = bin_mid start = [1, loc - bin_mid + 1].max finish = [loc + bin_mid, text.length].min + pattern.length rd = Array.new(finish + 2, 0) rd[finish + 1] = (1 << d) - 1 finish.downto(start) do |j| char_match = s[text[j - 1]] || 0 if d == 0 # First pass: exact match. rd[j] = ((rd[j + 1] << 1) | 1) & char_match else # Subsequent passes: fuzzy match. rd[j] = ((rd[j + 1] << 1) | 1) & char_match | (((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1] end if (rd[j] & match_mask).nonzero? score = match_bitapScore[d, j - 1] # This match will almost certainly be better than any existing match. # But check anyway. if score <= score_threshold # Told you so. score_threshold = score best_loc = j - 1 if best_loc > loc # When passing loc, don't exceed our current distance from loc. start = [1, 2 * loc - best_loc].max else # Already passed loc, downhill from here on in. break end end end end # No hope for a (better) match at greater error levels. if match_bitapScore[d + 1, loc] > score_threshold break end last_rd = rd end best_loc end # Initialise the alphabet for the Bitap algorithm. def match_alphabet(pattern) s = {} pattern.chars.each_with_index do |c, i| s[c] ||= 0 s[c] |= 1 << (pattern.length - i - 1) end s end # Parse a textual representation of patches and return a list of patch # objects. def patch_fromText(textline) return [] if textline.empty? patches = [] text = textline.split("\n") text_pointer = 0 patch_header = /^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$/ while text_pointer < text.length m = text[text_pointer].match(patch_header) if m.nil? raise ArgumentError.new("Invalid patch string: #{text[text_pointer]}") end patch = PatchObj.new patches.push(patch) patch.start1 = m[1].to_i if m[2].empty? patch.start1 -= 1 patch.length1 = 1 elsif m[2] == '0' patch.length1 = 0 else patch.start1 -= 1 patch.length1 = m[2].to_i end patch.start2 = m[3].to_i if m[4].empty? patch.start2 -= 1 patch.length2 = 1 elsif m[4] == '0' patch.length2 = 0 else patch.start2 -= 1 patch.length2 = m[4].to_i end text_pointer += 1 while text_pointer < text.length if text[text_pointer].empty? # Blank line? Whatever. text_pointer += 1 next end sign = text[text_pointer][0] line = URI.decode(text[text_pointer][1..-1].force_encoding(Encoding::UTF_8)) case sign when '-' # Deletion. patch.diffs.push([:delete, line]) when '+' # Insertion. patch.diffs.push([:insert, line]) when ' ' # Minor equality patch.diffs.push([:equal, line]) when '@' # Start of next patch. break else # WTF? raise ArgumentError.new("Invalid patch mode \"#{sign}\" in: #{line}") end text_pointer += 1 end end patches end # Take a list of patches and return a textual representation def patch_toText(patches) patches.join end # Increase the context until it is unique, # but don't let the pattern expand beyond match_maxBits def patch_addContext(patch, text) return if text.empty? pattern = text[patch.start2, patch.length1] padding = 0 # Look for the first and last matches of pattern in text. If two different # matches are found, increase the pattern length. while text.index(pattern) != text.rindex(pattern) && pattern.length < match_maxBits - 2 * patch_margin padding += patch_margin pattern = text[[0, patch.start2 - padding].max...(patch.start2 + patch.length1 + padding)] end # Add one chunk for good luck. padding += patch_margin # Add the prefix. prefix = text[[0, patch.start2 - padding].max...patch.start2] patch.diffs.unshift([:equal, prefix]) if !prefix.to_s.empty? # Add the suffix. suffix = text[patch.start2 + patch.length1, padding] patch.diffs.push([:equal, suffix]) if !suffix.to_s.empty? # Roll back the start points. patch.start1 -= prefix.length patch.start2 -= prefix.length # Extend the lengths. patch.length1 += prefix.length + suffix.length patch.length2 += prefix.length + suffix.length end # Compute a list of patches to turn text1 into text2. # Use diffs if provided, otherwise compute it ourselves. # There are four ways to call this function, depending on what data is # available to the caller: # Method 1: # a = text1, b = text2 # Method 2: # a = diffs # Method 3 (optimal): # a = text1, b = diffs # Method 4 (deprecated, use method 3): # a = text1, b = text2, c = diffs def patch_make(*args) text1 = nil diffs = nil if args.length == 2 && args[0].is_a?(String) && args[1].is_a?(String) # Compute diffs from text1 and text2. text1 = args[0] text2 = args[1] diffs = diff_main(text1, text2, true) if diffs.length > 2 diff_cleanupSemantic(diffs) diff_cleanupEfficiency(diffs) end elsif args.length == 1 && args[0].is_a?(Array) # Compute text1 from diffs. diffs = args[0] text1 = diff_text1(diffs) elsif args.length == 2 && args[0].is_a?(String) && args[1].is_a?(Array) text1 = args[0] diffs = args[1] elsif args.length == 3 && args[0].is_a?(String) && args[1].is_a?(String) && args[2].is_a?(Array) # Method 4: text1, text2, diffs # text2 is not used. text1 = args[0] text2 = args[1] diffs = args[2] else raise ArgumentError.new('Unknown call format to patch_make.') end return [] if diffs.empty? # Get rid of the null case. patches = [] patch = PatchObj.new char_count1 = 0 # Number of characters into the text1 string. char_count2 = 0 # Number of characters into the text2 string. prepatch_text = text1 # Recreate the patches to determine context info. postpatch_text = text1 diffs.each_with_index do |diff, x| diff_type, diff_text = diffs[x] if patch.diffs.empty? && diff_type != :equal # A new patch starts here. patch.start1 = char_count1 patch.start2 = char_count2 end case diff_type when :insert patch.diffs.push(diff) patch.length2 += diff_text.length postpatch_text = postpatch_text[0...char_count2] + diff_text + postpatch_text[char_count2..-1] when :delete patch.length1 += diff_text.length patch.diffs.push(diff) postpatch_text = postpatch_text[0...char_count2] + postpatch_text[(char_count2 + diff_text.length)..-1] when :equal if diff_text.length <= 2 * patch_margin && !patch.diffs.empty? && diffs.length != x + 1 # Small equality inside a patch. patch.diffs.push(diff) patch.length1 += diff_text.length patch.length2 += diff_text.length elsif diff_text.length >= 2 * patch_margin # Time for a new patch. unless patch.diffs.empty? patch_addContext(patch, prepatch_text) patches.push(patch) patch = PatchObj.new # Unlike Unidiff, our patch lists have a rolling context. # http://code.google.com/p/google-diff-match-patch/wiki/Unidiff # Update prepatch text & pos to reflect the application of the # just completed patch. prepatch_text = postpatch_text char_count1 = char_count2 end end end # Update the current character count. if diff_type != :insert char_count1 += diff_text.length end if diff_type != :delete char_count2 += diff_text.length end end # Pick up the leftover patch if not empty. unless patch.diffs.empty? patch_addContext(patch, prepatch_text) patches.push(patch) end patches end # Merge a set of patches onto the text. Return a patched text, as well # as a list of true/false values indicating which patches were applied. def patch_apply(patches, text) return [text, []] if patches.empty? # Deep copy the patches so that no changes are made to originals. patches = Marshal.load(Marshal.dump(patches)) null_padding = patch_addPadding(patches) text = null_padding + text + null_padding patch_splitMax(patches) # delta keeps track of the offset between the expected and actual location # of the previous patch. If there are patches expected at positions 10 and # 20, but the first patch was found at 12, delta is 2 and the second patch # has an effective expected position of 22. delta = 0 results = [] patches.each_with_index do |patch, x| expected_loc = patch.start2 + delta text1 = diff_text1(patch.diffs) end_loc = -1 if text1.length > match_maxBits # patch_splitMax will only provide an oversized pattern in the case of # a monster delete. start_loc = match_main(text, text1[0, match_maxBits], expected_loc) if start_loc != -1 end_loc = match_main(text, text1[(text1.length - match_maxBits)..-1], expected_loc + text1.length - match_maxBits) if end_loc == -1 || start_loc >= end_loc # Can't find valid trailing context. Drop this patch. start_loc = -1 end end else start_loc = match_main(text, text1, expected_loc) end if start_loc == -1 # No match found. :( results[x] = false # Subtract the delta for this failed patch from subsequent patches. delta -= patch.length2 - patch.length1 else # Found a match. :) results[x] = true delta = start_loc - expected_loc text2 = text[start_loc, (end_loc == -1) ? text1.length : end_loc + match_maxBits] if text1 == text2 # Perfect match, just shove the replacement text in. text = text[0, start_loc] + diff_text2(patch.diffs) + text[(start_loc + text1.length)..-1] else # Imperfect match. # Run a diff to get a framework of equivalent indices. diffs = diff_main(text1, text2, false) if text1.length > match_maxBits && diff_levenshtein(diffs).to_f / text1.length > patch_deleteThreshold # The end points match, but the content is unacceptably bad. results[x] = false else diff_cleanupSemanticLossless(diffs) index1 = 0 patch.diffs.each do |op, data| if op != :equal index2 = diff_xIndex(diffs, index1) end if op == :insert # Insertion text = text[0, start_loc + index2] + data + text[(start_loc + index2)..-1] elsif op == :delete # Deletion text = text[0, start_loc + index2] + text[(start_loc + diff_xIndex(diffs, index1 + data.length))..-1] end if op != :delete index1 += data.length end end end end end end # Strip the padding off. text = text[null_padding.length...-null_padding.length] [text, results] end # Add some padding on text start and end so that edges can match # something. Intended to be called only from within patch_apply. def patch_addPadding(patches) padding_length = patch_margin null_padding = (1..padding_length).map{ |x| x.chr(Encoding::UTF_8) }.join # Bump all the patches forward. patches.each do |patch| patch.start1 += padding_length patch.start2 += padding_length end # Add some padding on start of first diff. patch = patches.first diffs = patch.diffs if diffs.empty? || diffs.first[0] != :equal # Add nullPadding equality. diffs.unshift([:equal, null_padding]) patch.start1 -= padding_length # Should be 0. patch.start2 -= padding_length # Should be 0. patch.length1 += padding_length patch.length2 += padding_length elsif padding_length > diffs.first[1].length # Grow first equality. extra_length = padding_length - diffs.first[1].length diffs.first[1] = null_padding[diffs.first[1].length..-1] + diffs.first[1] patch.start1 -= extra_length patch.start2 -= extra_length patch.length1 += extra_length patch.length2 += extra_length end # Add some padding on end of last diff. patch = patches.last diffs = patch.diffs if diffs.empty? || diffs.last[0] != :equal # Add nullPadding equality. diffs.push([:equal, null_padding]) patch.length1 += padding_length patch.length2 += padding_length elsif padding_length > diffs.last[1].length # Grow last equality. extra_length = padding_length - diffs.last[1].length diffs.last[1] += null_padding[0, extra_length] patch.length1 += extra_length patch.length2 += extra_length end null_padding end # Look through the patches and break up any which are longer than the # maximum limit of the match algorithm. def patch_splitMax(patches) patch_size = match_maxBits x = 0 while x < patches.length if patches[x].length1 > patch_size big_patch = patches[x] # Remove the big old patch patches[x, 1] = [] x -= 1 start1 = big_patch.start1 start2 = big_patch.start2 pre_context = '' while !big_patch.diffs.empty? # Create one of several smaller patches. patch = PatchObj.new empty = true patch.start1 = start1 - pre_context.length patch.start2 = start2 - pre_context.length unless pre_context.empty? patch.length1 = patch.length2 = pre_context.length patch.diffs.push([:equal, pre_context]) end while !big_patch.diffs.empty? && patch.length1 < patch_size - patch_margin diff = big_patch.diffs.first if diff[0] == :insert # Insertions are harmless. patch.length2 += diff[1].length start2 += diff[1].length patch.diffs.push(big_patch.diffs.shift) empty = false elsif diff[0] == :delete && patch.diffs.length == 1 && patch.diffs.first[0] == :equal && diff[1].length > 2 * patch_size # This is a large deletion. Let it pass in one chunk. patch.length1 += diff[1].length start1 += diff[1].length empty = false patch.diffs.push(big_patch.diffs.shift) else # Deletion or equality. Only take as much as we can stomach. diff_text = diff[1][0, patch_size - patch.length1 - patch_margin] patch.length1 += diff_text.length start1 += diff_text.length if diff[0] == :equal patch.length2 += diff_text.length start2 += diff_text.length else empty = false end patch.diffs.push([diff[0], diff_text]) if diff_text == big_patch.diffs.first[1] big_patch.diffs.shift else big_patch.diffs.first[1] = big_patch.diffs.first[1][diff_text.length..-1] end end end # Compute the head context for the next patch. pre_context = diff_text2(patch.diffs)[-patch_margin..-1] || '' # Append the end context for this patch. post_context = diff_text1(big_patch.diffs)[0...patch_margin] || '' unless post_context.empty? patch.length1 += post_context.length patch.length2 += post_context.length if !patch.diffs.empty? && patch.diffs.last[0] == :equal patch.diffs.last[1] += post_context else patch.diffs.push([:equal, post_context]) end end if !empty x += 1 patches[x, 0] = [patch] end end end x += 1 end end end diff-match-patch-0.1.0/README.md0000644000175000017500000000065513601103003015103 0ustar pravipravi# DiffMatchPatch A ruby implementation of the google diff-match-patch library. http://code.google.com/p/google-diff-match-patch/ The Diff Match and Patch libraries offer robust algorithms to perform the operations required for synchronizing plain text. This work was inspired by the diff_match_patch-ruby module. (https://github.com/reima/diff_match_patch-ruby) Copyright (c) 2011, Jorge Kalmbach diff-match-patch-0.1.0/test/0000755000175000017500000000000013601103003014575 5ustar pravipravidiff-match-patch-0.1.0/test/diff_match_patch-test.rb0000644000175000017500000010664613601103003021357 0ustar pravipravirequire 'test/unit' require 'diff_match_patch' class DiffTest < Test::Unit::TestCase def setup @dmp = DiffMatchPatch.new end def test_diff_commonPrefix # Detect any common prefix. # Null case. assert_equal(0, @dmp.diff_commonPrefix('abc', 'xyz')) # Non-null case. assert_equal(4, @dmp.diff_commonPrefix('1234abcdef', '1234xyz')) # Whole case. assert_equal(4, @dmp.diff_commonPrefix('1234', '1234xyz')) end def test_diff_commonSuffix # Detect any common suffix. # Null case. assert_equal(0, @dmp.diff_commonSuffix('abc', 'xyz')) # Non-null case. assert_equal(4, @dmp.diff_commonSuffix('abcdef1234', 'xyz1234')) # Whole case. assert_equal(4, @dmp.diff_commonSuffix('1234', 'xyz1234')) end def test_diff_commonOverlap # Detect any suffix/prefix overlap. # Null case. assert_equal(0, @dmp.diff_commonOverlap('', 'abcd')) # Whole case. assert_equal(3, @dmp.diff_commonOverlap('abc', 'abcd')) # No overlap. assert_equal(0, @dmp.diff_commonOverlap('123456', 'abcd')) # Overlap. assert_equal(3, @dmp.diff_commonOverlap('123456xxx', 'xxxabcd')) # Unicode. # Some overly clever languages (C#) may treat ligatures as equal to their # component letters. E.g. U+FB01 == 'fi' assert_equal(0, @dmp.diff_commonOverlap('fi', '\ufb01i')); end def test_diff_halfMatch # Detect a halfmatch. @dmp.diff_timeout = 1 # No match. assert_equal(nil, @dmp.diff_halfMatch('1234567890', 'abcdef')) assert_equal(nil, @dmp.diff_halfMatch('12345', '23')) # Single Match. assert_equal( ['12', '90', 'a', 'z', '345678'], @dmp.diff_halfMatch('1234567890', 'a345678z') ) assert_equal( ['a', 'z', '12', '90', '345678'], @dmp.diff_halfMatch('a345678z', '1234567890') ) assert_equal( ['abc', 'z', '1234', '0', '56789'], @dmp.diff_halfMatch('abc56789z', '1234567890') ) assert_equal( ['a', 'xyz', '1', '7890', '23456'], @dmp.diff_halfMatch('a23456xyz', '1234567890') ) # Multiple Matches. assert_equal( ['12123', '123121', 'a', 'z', '1234123451234'], @dmp.diff_halfMatch('121231234123451234123121', 'a1234123451234z') ) assert_equal( ['', '-=-=-=-=-=', 'x', '', 'x-=-=-=-=-=-=-='], @dmp.diff_halfMatch('x-=-=-=-=-=-=-=-=-=-=-=-=', 'xx-=-=-=-=-=-=-=') ) assert_equal( ['-=-=-=-=-=', '', '', 'y', '-=-=-=-=-=-=-=y'], @dmp.diff_halfMatch('-=-=-=-=-=-=-=-=-=-=-=-=y', '-=-=-=-=-=-=-=yy') ) # Non-optimal halfmatch. # Optimal diff would be -q+x=H-i+e=lloHe+Hu=llo-Hew+y # not -qHillo+x=HelloHe-w+Hulloy assert_equal( ['qHillo', 'w', 'x', 'Hulloy', 'HelloHe'], @dmp.diff_halfMatch('qHilloHelloHew', 'xHelloHeHulloy') ) # Optimal no halfmatch. @dmp.diff_timeout = 0 assert_equal(nil, @dmp.diff_halfMatch('qHilloHelloHew', 'xHelloHeHulloy')) end def test_diff_linesToChars # Convert lines down to characters. assert_equal( ["\x01\x02\x01", "\x02\x01\x02", ['', "alpha\n", "beta\n"]], @dmp.diff_linesToChars("alpha\nbeta\nalpha\n", "beta\nalpha\nbeta\n") ) assert_equal( ['', "\x01\x02\x03\x03", ['', "alpha\r\n", "beta\r\n", "\r\n"]], @dmp.diff_linesToChars('', "alpha\r\nbeta\r\n\r\n\r\n") ) assert_equal( ["\x01", "\x02", ['', 'a', 'b']], @dmp.diff_linesToChars('a', 'b') ) # More than 256 to reveal any 8-bit limitations. n = 300 line_list = (1..n).map {|x| x.to_s + "\n" } char_list = (1..n).map {|x| x.chr(Encoding::UTF_8) } assert_equal(n, line_list.length) lines = line_list.join chars = char_list.join assert_equal(n, chars.length) line_list.unshift('') assert_equal([chars, '', line_list], @dmp.diff_linesToChars(lines, '')) end def test_diff_charsToLines # Convert chars up to lines. diffs = [[:equal, "\x01\x02\x01"], [:insert, "\x02\x01\x02"]] @dmp.diff_charsToLines(diffs, ['', "alpha\n", "beta\n"]) assert_equal( [[:equal, "alpha\nbeta\nalpha\n"], [:insert, "beta\nalpha\nbeta\n"]], diffs ) # More than 256 to reveal any 8-bit limitations. n = 300 line_list = (1..n).map {|x| x.to_s + "\n" } char_list = (1..n).map {|x| x.chr(Encoding::UTF_8) } assert_equal(n, line_list.length) lines = line_list.join chars = char_list.join assert_equal(n, chars.length) line_list.unshift('') diffs = [[:delete, chars]] @dmp.diff_charsToLines(diffs, line_list) assert_equal([[:delete, lines]], diffs) end def test_diff_cleanupMerge # Cleanup a messy diff. # Null case. diffs = [] @dmp.diff_cleanupMerge(diffs) assert_equal([], diffs) # No change case. diffs = [[:equal, 'a'], [:delete, 'b'], [:insert, 'c']] @dmp.diff_cleanupMerge(diffs) assert_equal([[:equal, 'a'], [:delete, 'b'], [:insert, 'c']], diffs) # Merge equalities. diffs = [[:equal, 'a'], [:equal, 'b'], [:equal, 'c']] @dmp.diff_cleanupMerge(diffs) assert_equal([[:equal, 'abc']], diffs) # Merge deletions. diffs = [[:delete, 'a'], [:delete, 'b'], [:delete, 'c']] @dmp.diff_cleanupMerge(diffs) assert_equal([[:delete, 'abc']], diffs) # Merge insertions. diffs = [[:insert, 'a'], [:insert, 'b'], [:insert, 'c']] @dmp.diff_cleanupMerge(diffs) assert_equal([[:insert, 'abc']], diffs) # Merge interweave. diffs = [ [:delete, 'a'], [:insert, 'b'], [:delete, 'c'], [:insert, 'd'], [:equal, 'e'], [:equal, 'f'] ] @dmp.diff_cleanupMerge(diffs) assert_equal([[:delete, 'ac'], [:insert, 'bd'], [:equal, 'ef']], diffs) # Prefix and suffix detection. diffs = [[:delete, 'a'], [:insert, 'abc'], [:delete, 'dc']] @dmp.diff_cleanupMerge(diffs) assert_equal( [[:equal, 'a'], [:delete, 'd'], [:insert, 'b'],[:equal, 'c']], diffs ) # Prefix and suffix detection with equalities. diffs = [ [:equal, 'x'], [:delete, 'a'], [:insert, 'abc'], [:delete, 'dc'], [:equal, 'y'] ] @dmp.diff_cleanupMerge(diffs) assert_equal( [[:equal, 'xa'], [:delete, 'd'], [:insert, 'b'], [:equal, 'cy']], diffs ) # Slide edit left. diffs = [[:equal, 'a'], [:insert, 'ba'], [:equal, 'c']] @dmp.diff_cleanupMerge(diffs) assert_equal([[:insert, 'ab'], [:equal, 'ac']], diffs) # Slide edit right. diffs = [[:equal, 'c'], [:insert, 'ab'], [:equal, 'a']] @dmp.diff_cleanupMerge(diffs) assert_equal([[:equal, 'ca'], [:insert, 'ba']], diffs) # Slide edit left recursive. diffs = [ [:equal, 'a'], [:delete, 'b'], [:equal, 'c'], [:delete, 'ac'], [:equal, 'x'] ] @dmp.diff_cleanupMerge(diffs) assert_equal([[:delete, 'abc'], [:equal, 'acx']], diffs) # Slide edit right recursive. diffs = [ [:equal, 'x'], [:delete, 'ca'], [:equal, 'c'], [:delete, 'b'], [:equal, 'a'] ] @dmp.diff_cleanupMerge(diffs) assert_equal([[:equal, 'xca'], [:delete, 'cba']], diffs) end def test_diff_cleanupSemanticLossless # Slide diffs to match logical boundaries. # Null case. diffs = [] @dmp.diff_cleanupSemanticLossless(diffs) assert_equal([], diffs) # Blank lines. diffs = [ [:equal, "AAA\r\n\r\nBBB"], [:insert, "\r\nDDD\r\n\r\nBBB"], [:equal, "\r\nEEE"] ] @dmp.diff_cleanupSemanticLossless(diffs) assert_equal([ [:equal, "AAA\r\n\r\n"], [:insert, "BBB\r\nDDD\r\n\r\n"], [:equal, "BBB\r\nEEE"] ], diffs ) # Line boundaries. diffs = [[:equal, "AAA\r\nBBB"], [:insert, " DDD\r\nBBB"], [:equal, " EEE"]] @dmp.diff_cleanupSemanticLossless(diffs) assert_equal( [[:equal, "AAA\r\n"], [:insert, "BBB DDD\r\n"], [:equal, "BBB EEE"]], diffs ) # Word boundaries. diffs = [[:equal, 'The c'], [:insert, 'ow and the c'], [:equal, 'at.']] @dmp.diff_cleanupSemanticLossless(diffs) assert_equal( [[:equal, 'The '], [:insert, 'cow and the '], [:equal, 'cat.']], diffs ) # Alphanumeric boundaries. diffs = [[:equal, 'The-c'], [:insert, 'ow-and-the-c'], [:equal, 'at.']] @dmp.diff_cleanupSemanticLossless(diffs) assert_equal( [[:equal, 'The-'], [:insert, 'cow-and-the-'], [:equal, 'cat.']], diffs ) # Hitting the start. diffs = [[:equal, 'a'], [:delete, 'a'], [:equal, 'ax']] @dmp.diff_cleanupSemanticLossless(diffs) assert_equal([[:delete, 'a'], [:equal, 'aax']], diffs) # Hitting the end. diffs = [[:equal, 'xa'], [:delete, 'a'], [:equal, 'a']] @dmp.diff_cleanupSemanticLossless(diffs) assert_equal([[:equal, 'xaa'], [:delete, 'a']], diffs) end def test_diff_cleanupSemantic # Cleanup semantically trivial equalities. # Null case. diffs = [] @dmp.diff_cleanupSemantic(diffs) assert_equal([], diffs) # No elimination #1. diffs = [[:delete, 'ab'], [:insert, 'cd'], [:equal, '12'], [:delete, 'e']] @dmp.diff_cleanupSemantic(diffs) assert_equal( [[:delete, 'ab'], [:insert, 'cd'], [:equal, '12'], [:delete, 'e']], diffs ) # No elimination #2. diffs = [ [:delete, 'abc'], [:insert, 'ABC'], [:equal, '1234'], [:delete, 'wxyz'] ] @dmp.diff_cleanupSemantic(diffs) assert_equal( [[:delete, 'abc'], [:insert, 'ABC'], [:equal, '1234'], [:delete, 'wxyz']], diffs ) # Simple elimination. diffs = [[:delete, 'a'], [:equal, 'b'], [:delete, 'c']] @dmp.diff_cleanupSemantic(diffs) assert_equal([[:delete, 'abc'], [:insert, 'b']], diffs) # Backpass elimination. diffs = [ [:delete, 'ab'], [:equal, 'cd'], [:delete, 'e'], [:equal, 'f'], [:insert, 'g'] ] @dmp.diff_cleanupSemantic(diffs) assert_equal([[:delete, 'abcdef'], [:insert, 'cdfg']], diffs) # Multiple eliminations. diffs = [ [:insert, '1'], [:equal, 'A'], [:delete, 'B'], [:insert, '2'], [:equal, '_'], [:insert, '1'], [:equal, 'A'], [:delete, 'B'], [:insert, '2'] ] @dmp.diff_cleanupSemantic(diffs) assert_equal([[:delete, 'AB_AB'], [:insert, '1A2_1A2']], diffs) # Word boundaries. diffs = [[:equal, 'The c'], [:delete, 'ow and the c'], [:equal, 'at.']] @dmp.diff_cleanupSemantic(diffs) assert_equal( [[:equal, 'The '], [:delete, 'cow and the '], [:equal, 'cat.']], diffs ) # No overlap elimination. diffs =[[:delete, 'abcxx'],[:insert, 'xxdef']] @dmp.diff_cleanupSemantic(diffs) assert_equal([[:delete, 'abcxx'], [:insert, 'xxdef']], diffs) # Overlap elimination. diffs = [[:delete, 'abcxxx'], [:insert, 'xxxdef']] @dmp.diff_cleanupSemantic(diffs) assert_equal([[:delete, 'abc'], [:equal, 'xxx'], [:insert, 'def']], diffs) # Two overlap eliminations. diffs = [ [:delete, 'abcd1212'], [:insert, '1212efghi'], [:equal, '----'], [:delete, 'A3'], [:insert, '3BC'] ] @dmp.diff_cleanupSemantic(diffs) assert_equal([ [:delete, 'abcd'], [:equal, '1212'], [:insert, 'efghi'], [:equal, '----'], [:delete, 'A'], [:equal, '3'], [:insert, 'BC'] ], diffs ) end def test_diff_cleanupEfficiency # Cleanup operationally trivial equalities. @dmp.diff_editCost = 4 # Null case. diffs = [] @dmp.diff_cleanupEfficiency(diffs) assert_equal([], diffs) # No elimination. diffs = [ [:delete, 'ab'], [:insert, '12'], [:equal, 'wxyz'], [:delete, 'cd'], [:insert, '34'] ] @dmp.diff_cleanupEfficiency(diffs) assert_equal([ [:delete, 'ab'], [:insert, '12'], [:equal, 'wxyz'], [:delete, 'cd'], [:insert, '34'] ], diffs ) # Four-edit elimination. diffs = [ [:delete, 'ab'], [:insert, '12'], [:equal, 'xyz'], [:delete, 'cd'], [:insert, '34'] ] @dmp.diff_cleanupEfficiency(diffs) assert_equal([[:delete, 'abxyzcd'], [:insert, '12xyz34']], diffs) # Three-edit elimination. diffs = [[:insert, '12'], [:equal, 'x'], [:delete, 'cd'], [:insert, '34']] @dmp.diff_cleanupEfficiency(diffs) assert_equal([[:delete, 'xcd'], [:insert, '12x34']], diffs) # Backpass elimination. diffs = [ [:delete, 'ab'], [:insert, '12'], [:equal, 'xy'], [:insert, '34'], [:equal, 'z'], [:delete, 'cd'], [:insert, '56'] ] @dmp.diff_cleanupEfficiency(diffs) assert_equal([[:delete, 'abxyzcd'], [:insert, '12xy34z56']], diffs) # High cost elimination. @dmp.diff_editCost = 5 diffs = [ [:delete, 'ab'], [:insert, '12'], [:equal, 'wxyz'], [:delete, 'cd'], [:insert, '34'] ] @dmp.diff_cleanupEfficiency(diffs) assert_equal([[:delete, 'abwxyzcd'], [:insert, '12wxyz34']], diffs) @dmp.diff_editCost = 4 end def test_diff_prettyHtml # Pretty print. diffs = [[:equal, 'a\n'], [:delete, 'b'], [:insert, 'c&d']] assert_equal( '
<B>' + 'b</B>c&d', @dmp.diff_prettyHtml(diffs) ) end def test_diff_text # Compute the source and destination texts. diffs = [ [:equal, 'jump'], [:delete, 's'], [:insert, 'ed'], [:equal, ' over '], [:delete, 'the'], [:insert, 'a'], [:equal, ' lazy'] ] assert_equal('jumps over the lazy', @dmp.diff_text1(diffs)) assert_equal('jumped over a lazy', @dmp.diff_text2(diffs)) end def test_diff_delta # Convert a diff into delta string. diffs = [ [:equal, 'jump'], [:delete, 's'], [:insert, 'ed'], [:equal, ' over '], [:delete, 'the'], [:insert, 'a'], [:equal, ' lazy'], [:insert, 'old dog'] ] text1 = @dmp.diff_text1(diffs) assert_equal('jumps over the lazy', text1) delta = @dmp.diff_toDelta(diffs) assert_equal("=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog", delta) # Convert delta string into a diff. assert_equal(diffs, @dmp.diff_fromDelta(text1, delta)) # Generates error (19 != 20). assert_raise ArgumentError do @dmp.diff_fromDelta(text1 + 'x', delta) end # Generates error (19 != 18). assert_raise ArgumentError do @dmp.diff_fromDelta(text1[1..-1], delta) end # Test deltas with special characters. diffs = [ [:equal, "\u0680 \x00 \t %"], [:delete, "\u0681 \x01 \n ^"], [:insert, "\u0682 \x02 \\ |"] ] text1 = @dmp.diff_text1(diffs) assert_equal("\u0680 \x00 \t %\u0681 \x01 \n ^", text1) delta = @dmp.diff_toDelta(diffs) assert_equal("=7\t-7\t+%DA%82 %02 %5C %7C", delta) # Convert delta string into a diff. assert_equal(diffs, @dmp.diff_fromDelta(text1, delta)) # Verify pool of unchanged characters. diffs = [[:insert, "A-Z a-z 0-9 - _ . ! ~ * \' ( ) / ? : @ & = + $ , # "]] text2 = @dmp.diff_text2(diffs) assert_equal("A-Z a-z 0-9 - _ . ! ~ * \' ( ) / ? : @ & = + $ , # ", text2) delta = @dmp.diff_toDelta(diffs) assert_equal("+A-Z a-z 0-9 - _ . ! ~ * \' ( ) / ? : @ & = + $ , # ", delta) # Convert delta string into a diff. assert_equal(diffs, @dmp.diff_fromDelta('', delta)) end def test_diff_xIndex # Translate a location in text1 to text2. # Translation on equality. diffs = [[:delete, 'a'], [:insert, '1234'], [:equal, 'xyz']] assert_equal(5, @dmp.diff_xIndex(diffs, 2)) # Translation on deletion. diffs = [[:equal, 'a'], [:delete, '1234'], [:equal, 'xyz']] assert_equal(1, @dmp.diff_xIndex(diffs, 3)) end def test_diff_levenshtein # Levenshtein with trailing equality. diffs = [[:delete, 'abc'], [:insert, '1234'], [:equal, 'xyz']] assert_equal(4, @dmp.diff_levenshtein(diffs)) # Levenshtein with leading equality. diffs = [[:equal, 'xyz'], [:delete, 'abc'], [:insert, '1234']] assert_equal(4, @dmp.diff_levenshtein(diffs)) # Levenshtein with middle equality. diffs = [[:delete, 'abc'], [:equal, 'xyz'], [:insert, '1234']] assert_equal(7, @dmp.diff_levenshtein(diffs)) end def test_diff_bisect # Normal. a = 'cat' b = 'map' # Since the resulting diff hasn't been normalized, it would be ok if # the insertion and deletion pairs are swapped. # If the order changes, tweak this test as required. diffs = [ [:delete, 'c'], [:insert, 'm'], [:equal, 'a'], [:delete, 't'], [:insert, 'p'] ] assert_equal(diffs, @dmp.diff_bisect(a, b, nil)) # Timeout. assert_equal( [[:delete, 'cat'], [:insert, 'map']], @dmp.diff_bisect(a, b, Time.now - 1) ) end def test_diff_main # Perform a trivial diff. # Null case. assert_equal([], @dmp.diff_main('', '', false)) # Equality. assert_equal([[:equal, 'abc']], @dmp.diff_main('abc', 'abc', false)) # Simple insertion. assert_equal( [[:equal, 'ab'], [:insert, '123'], [:equal, 'c']], @dmp.diff_main('abc', 'ab123c', false) ) # Simple deletion. assert_equal( [[:equal, 'a'], [:delete, '123'], [:equal, 'bc']], @dmp.diff_main('a123bc', 'abc', false) ) # Two insertions. assert_equal([ [:equal, 'a'], [:insert, '123'], [:equal, 'b'], [:insert, '456'], [:equal, 'c'] ], @dmp.diff_main('abc', 'a123b456c', false) ) # Two deletions. assert_equal([ [:equal, 'a'], [:delete, '123'], [:equal, 'b'], [:delete, '456'], [:equal, 'c'] ], @dmp.diff_main('a123b456c', 'abc', false) ) # Perform a real diff. # Switch off the timeout. @dmp.diff_timeout = 0 # Simple cases. assert_equal( [[:delete, 'a'], [:insert, 'b']], @dmp.diff_main('a', 'b', false) ) assert_equal([ [:delete, 'Apple'], [:insert, 'Banana'], [:equal, 's are a'], [:insert, 'lso'], [:equal, ' fruit.'] ], @dmp.diff_main('Apples are a fruit.', 'Bananas are also fruit.', false) ) assert_equal([ [:delete, 'a'], [:insert, "\u0680"], [:equal, 'x'], [:delete, "\t"], [:insert, "\0"] ], @dmp.diff_main("ax\t", "\u0680x\0", false) ) # Overlaps. assert_equal([ [:delete, '1'], [:equal, 'a'], [:delete, 'y'], [:equal, 'b'], [:delete, '2'], [:insert, 'xab'] ], @dmp.diff_main('1ayb2', 'abxab', false) ) assert_equal( [[:insert, 'xaxcx'], [:equal, 'abc'], [:delete, 'y']], @dmp.diff_main('abcy', 'xaxcxabc', false) ) assert_equal([ [:delete, 'ABCD'], [:equal, 'a'], [:delete, '='], [:insert, '-'], [:equal, 'bcd'], [:delete, '='], [:insert, '-'], [:equal, 'efghijklmnopqrs'], [:delete, 'EFGHIJKLMNOefg'] ], @dmp.diff_main( 'ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg', 'a-bcd-efghijklmnopqrs', false ) ) # Large equality. assert_equal( [ [:insert, ' '], [:equal, 'a'], [:insert, 'nd'], [:equal, ' [[Pennsylvania]]'], [:delete, ' and [[New'] ], @dmp.diff_main( 'a [[Pennsylvania]] and [[New', ' and [[Pennsylvania]]', false ) ) # Timeout. @dmp.diff_timeout = 0.1 # 100ms a = "`Twas brillig, and the slithy toves\nDid gyre and gimble in the " + "wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n" b = "I am the very model of a modern major general,\nI\'ve information " + "vegetable, animal, and mineral,\nI know the kings of England, and " + "I quote the fights historical,\nFrom Marathon to Waterloo, in " + "order categorical.\n" # Increase the text lengths by 1024 times to ensure a timeout. a = a * 1024 b = b * 1024 start_time = Time.now @dmp.diff_main(a, b) end_time = Time.now # Test that we took at least the timeout period. assert_equal(true, @dmp.diff_timeout <= end_time - start_time) # Test that we didn't take forever (be forgiving). # Theoretically this test could fail very occasionally if the # OS task swaps or locks up for a second at the wrong moment. assert_equal(true, @dmp.diff_timeout * 1000 * 2 > end_time - start_time) @dmp.diff_timeout = 0 # Test the linemode speedup. # Must be long to pass the 100 char cutoff. # Simple line-mode. a = "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n" + "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n" + "1234567890\n1234567890\n1234567890\n" b = "abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n" + "abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n" + "abcdefghij\nabcdefghij\nabcdefghij\n" assert_equal(@dmp.diff_main(a, b, false), @dmp.diff_main(a, b, true)) # Single line-mode. a = '123456789012345678901234567890123456789012345678901234567890' + '123456789012345678901234567890123456789012345678901234567890' + '1234567890' b = 'abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij' + 'abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij' assert_equal(@dmp.diff_main(a, b, false), @dmp.diff_main(a, b, true)) # Overlap line-mode. a = "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n" + "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n" + "1234567890\n1234567890\n1234567890\n" b = "abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n" + "1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n" + "1234567890\n1234567890\nabcdefghij\n" diffs_linemode = @dmp.diff_main(a, b, false) diffs_textmode = @dmp.diff_main(a, b, true) assert_equal( @dmp.diff_text1(diffs_linemode), @dmp.diff_text1(diffs_textmode) ) assert_equal( @dmp.diff_text2(diffs_linemode), @dmp.diff_text2(diffs_textmode) ) # Test null inputs. assert_raise ArgumentError do @dmp.diff_main(nil, nil) end end def test_match_alphabet # Initialise the bitmasks for Bitap. # Unique. assert_equal({'a'=>4, 'b'=>2, 'c'=>1}, @dmp.match_alphabet('abc')) # Duplicates. assert_equal({'a'=>37, 'b'=>18, 'c'=>8}, @dmp.match_alphabet('abcaba')) end def test_match_bitap # Bitap algorithm. @dmp.match_distance = 100 @dmp.match_threshold = 0.5 # Exact matches. assert_equal(5, @dmp.match_bitap('abcdefghijk', 'fgh', 5)) assert_equal(5, @dmp.match_bitap('abcdefghijk', 'fgh', 0)) # Fuzzy matches. assert_equal(4, @dmp.match_bitap('abcdefghijk', 'efxhi', 0)) assert_equal(2, @dmp.match_bitap('abcdefghijk', 'cdefxyhijk', 5)) assert_equal(-1, @dmp.match_bitap('abcdefghijk', 'bxy', 1)) # Overflow. assert_equal(2, @dmp.match_bitap('123456789xx0', '3456789x0', 2)) # Threshold test. @dmp.match_threshold = 0.4 assert_equal(4, @dmp.match_bitap('abcdefghijk', 'efxyhi', 1)) @dmp.match_threshold = 0.3 assert_equal(-1, @dmp.match_bitap('abcdefghijk', 'efxyhi', 1)) @dmp.match_threshold = 0.0 assert_equal(1, @dmp.match_bitap('abcdefghijk', 'bcdef', 1)) @dmp.match_threshold = 0.5 # Multiple select. assert_equal(0, @dmp.match_bitap('abcdexyzabcde', 'abccde', 3)) assert_equal(8, @dmp.match_bitap('abcdexyzabcde', 'abccde', 5)) # Distance test. @dmp.match_distance = 10 # Strict location. assert_equal( -1, @dmp.match_bitap('abcdefghijklmnopqrstuvwxyz', 'abcdefg', 24) ) assert_equal( 0, @dmp.match_bitap('abcdefghijklmnopqrstuvwxyz', 'abcdxxefg', 1) ) @dmp.match_distance = 1000 # Loose location. assert_equal( 0, @dmp.match_bitap('abcdefghijklmnopqrstuvwxyz', 'abcdefg', 24) ) end def test_match_main # Full match. # Shortcut matches. assert_equal(0, @dmp.match_main('abcdef', 'abcdef', 1000)) assert_equal(-1, @dmp.match_main('', 'abcdef', 1)) assert_equal(3, @dmp.match_main('abcdef', '', 3)) assert_equal(3, @dmp.match_main('abcdef', 'de', 3)) # Beyond end match. assert_equal(3, @dmp.match_main("abcdef", "defy", 4)) # Oversized pattern. assert_equal(0, @dmp.match_main("abcdef", "abcdefy", 0)) # Complex match. assert_equal( 4, @dmp.match_main( 'I am the very model of a modern major general.', ' that berry ', 5 ) ) # Test null inputs. assert_raise ArgumentError do @dmp.match_main(nil, nil, 0) end end # Patch tests def test_patch_obj # Patch Object. p = PatchObj.new p.start1 = 20 p.start2 = 21 p.length1 = 18 p.length2 = 17 p.diffs = [ [:equal, 'jump'], [:delete, 's'], [:insert, 'ed'], [:equal, ' over '], [:delete, 'the'], [:insert, 'a'], [:equal, "\nlaz"] ] strp = p.to_s assert_equal( "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n", strp ) end def test_patch_fromText assert_equal([], @dmp.patch_fromText("")) [ "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n", "@@ -1 +1 @@\n-a\n+b\n", "@@ -1 +1 @@\n-a\n+b\n", "@@ -0,0 +1,3 @@\n+abc\n" ].each do |strp| assert_equal(strp, @dmp.patch_fromText(strp).first.to_s) end # Generates error. assert_raise ArgumentError do @dmp.patch_fromText('Bad\nPatch\n') end end def test_patch_toText [ "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n", "@@ -1,9 +1,9 @@\n-f\n+F\n oo+fooba\n@@ -7,9 +7,9 @@\n obar\n-,\n+.\n tes\n" ].each do |strp| p = @dmp.patch_fromText(strp) assert_equal(strp, @dmp.patch_toText(p)) end end def test_patch_addContext @dmp.patch_margin = 4 p = @dmp.patch_fromText("@@ -21,4 +21,10 @@\n-jump\n+somersault\n").first @dmp.patch_addContext(p, 'The quick brown fox jumps over the lazy dog.') assert_equal( "@@ -17,12 +17,18 @@\n fox \n-jump\n+somersault\n s ov\n", p.to_s ) # Same, but not enough trailing context. p = @dmp.patch_fromText("@@ -21,4 +21,10 @@\n-jump\n+somersault\n").first @dmp.patch_addContext(p, 'The quick brown fox jumps.') assert_equal( "@@ -17,10 +17,16 @@\n fox \n-jump\n+somersault\n s.\n", p.to_s ) # Same, but not enough leading context. p = @dmp.patch_fromText("@@ -3 +3,2 @@\n-e\n+at\n").first @dmp.patch_addContext(p, 'The quick brown fox jumps.') assert_equal( "@@ -1,7 +1,8 @@\n Th\n-e\n+at\n qui\n", p.to_s ) # Same, but with ambiguity. p = @dmp.patch_fromText("@@ -3 +3,2 @@\n-e\n+at\n").first @dmp.patch_addContext( p, 'The quick brown fox jumps. The quick brown fox crashes.' ); assert_equal( "@@ -1,27 +1,28 @@\n Th\n-e\n+at\n quick brown fox jumps. \n", p.to_s ) end def test_patch_make # Null case. patches = @dmp.patch_make('', '') assert_equal('', @dmp.patch_toText(patches)) text1 = 'The quick brown fox jumps over the lazy dog.' text2 = 'That quick brown fox jumped over a lazy dog.' # Text2+Text1 inputs. expectedPatch = "@@ -1,8 +1,7 @@\n Th\n-at\n+e\n qui\n@@ -21,17 +21,18 " + "@@\n jump\n-ed\n+s\n over \n-a\n+the\n laz\n" # The second patch must be "-21,17 +21,18", # not "-22,17 +21,18" due to rolling context patches = @dmp.patch_make(text2, text1) assert_equal(expectedPatch, @dmp.patch_toText(patches)) # Text1+Text2 inputs. expectedPatch = "@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18" + " +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n" patches = @dmp.patch_make(text1, text2) assert_equal(expectedPatch, @dmp.patch_toText(patches)) # Diff input. diffs = @dmp.diff_main(text1, text2, false) patches = @dmp.patch_make(diffs) assert_equal(expectedPatch, @dmp.patch_toText(patches)) # Text1+Diff inputs. patches = @dmp.patch_make(text1, diffs) assert_equal(expectedPatch, @dmp.patch_toText(patches)) # Text1+Text2+Diff inputs (deprecated) patches = @dmp.patch_make(text1, text2, diffs) assert_equal(expectedPatch, @dmp.patch_toText(patches)) # Character encoding. patches = @dmp.patch_make( '`1234567890-=[]\\;\',./', '~!@#$%^&*()_+{}|:"<>?' ) assert_equal( "@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;\',./\n+~!" + "@\#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n", @dmp.patch_toText(patches) ) # Character decoding. diffs = [ [:delete, '`1234567890-=[]\\;\',./'], [:insert, '~!@#$%^&*()_+{}|:"<>?'] ] assert_equal( diffs, @dmp.patch_fromText( "@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;\',./\n+~!" + "@\#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n" ).first.diffs ) # Long string with repeats. text1 = 'abcdef' * 100 text2 = text1 + '123' expectedPatch = "@@ -573,28 +573,31 @@\n cdefabcdefabcdefabcdefabcdef\n+123\n" patches = @dmp.patch_make(text1, text2) assert_equal(expectedPatch, @dmp.patch_toText(patches)) # Test null inputs. assert_raise ArgumentError do @dmp.patch_make(nil) end end def test_patch_splitMax # Assumes that dmp.Match_MaxBits is 32. patches = @dmp.patch_make( 'abcdefghijklmnopqrstuvwxyz01234567890', 'XabXcdXefXghXijXklXmnXopXqrXstXuvXwxXyzX01X23X45X67X89X0' ) @dmp.patch_splitMax(patches) assert_equal( "@@ -1,32 +1,46 @@\n+X\n ab\n+X\n cd\n+X\n ef\n+X\n gh\n+X\n "+ "ij\n+X\n kl\n+X\n mn\n+X\n op\n+X\n qr\n+X\n st\n+X\n uv\n+X\n " + "wx\n+X\n yz\n+X\n 012345\n@@ -25,13 +39,18 @@\n zX01\n+X\n 23\n+X\n " + "45\n+X\n 67\n+X\n 89\n+X\n 0\n", @dmp.patch_toText(patches) ) patches = @dmp.patch_make( 'abcdef1234567890123456789012345678901234567890' + '123456789012345678901234567890uvwxyz', 'abcdefuvwxyz' ) oldToText = @dmp.patch_toText(patches) @dmp.patch_splitMax(patches) assert_equal(oldToText, @dmp.patch_toText(patches)) patches = @dmp.patch_make( '1234567890123456789012345678901234567890123456789012345678901234567890', 'abc' ) @dmp.patch_splitMax(patches) assert_equal( "@@ -1,32 +1,4 @@\n-1234567890123456789012345678\n 9012\n" + "@@ -29,32 +1,4 @@\n-9012345678901234567890123456\n 7890\n" + "@@ -57,14 +1,3 @@\n-78901234567890\n+abc\n", @dmp.patch_toText(patches) ) patches = @dmp.patch_make( 'abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1', 'abcdefghij , h : 1 , t : 1 abcdefghij , h : 1 , t : 1 abcdefghij , h : 0 , t : 1' ) @dmp.patch_splitMax(patches) assert_equal( "@@ -2,32 +2,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n" + "@@ -29,32 +29,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n", @dmp.patch_toText(patches) ) end def test_patch_addPadding # Both edges full. patches = @dmp.patch_make('', 'test') assert_equal("@@ -0,0 +1,4 @@\n+test\n", @dmp.patch_toText(patches)) @dmp.patch_addPadding(patches) assert_equal( "@@ -1,8 +1,12 @@\n %01%02%03%04\n+test\n %01%02%03%04\n", @dmp.patch_toText(patches) ) # Both edges partial. patches = @dmp.patch_make('XY', 'XtestY') assert_equal("@@ -1,2 +1,6 @@\n X\n+test\n Y\n", @dmp.patch_toText(patches)) @dmp.patch_addPadding(patches) assert_equal( "@@ -2,8 +2,12 @@\n %02%03%04X\n+test\n Y%01%02%03\n", @dmp.patch_toText(patches) ) # Both edges none. patches = @dmp.patch_make('XXXXYYYY', 'XXXXtestYYYY') assert_equal( "@@ -1,8 +1,12 @@\n XXXX\n+test\n YYYY\n", @dmp.patch_toText(patches) ) @dmp.patch_addPadding(patches) assert_equal( "@@ -5,8 +5,12 @@\n XXXX\n+test\n YYYY\n", @dmp.patch_toText(patches) ) end def test_patch_apply @dmp.match_distance = 1000 @dmp.match_threshold = 0.5 @dmp.patch_deleteThreshold = 0.5 # Null case. patches = @dmp.patch_make('', '') results = @dmp.patch_apply(patches, 'Hello world.') assert_equal(['Hello world.', []], results) # Exact match. patches = @dmp.patch_make( 'The quick brown fox jumps over the lazy dog.', 'That quick brown fox jumped over a lazy dog.' ) results = @dmp.patch_apply( patches, 'The quick brown fox jumps over the lazy dog.' ) assert_equal( ['That quick brown fox jumped over a lazy dog.', [true, true]], results ) # Partial match. results = @dmp.patch_apply( patches, 'The quick red rabbit jumps over the tired tiger.' ) assert_equal( ['That quick red rabbit jumped over a tired tiger.', [true, true]], results ) # Failed match. results = @dmp.patch_apply( patches, 'I am the very model of a modern major general.' ) assert_equal( ['I am the very model of a modern major general.', [false, false]], results ) # Big delete, small change. patches = @dmp.patch_make( 'x1234567890123456789012345678901234567890123456789012345678901234567890y', 'xabcy' ) results = @dmp.patch_apply( patches, 'x123456789012345678901234567890-----++++++++++-----' + '123456789012345678901234567890y' ) assert_equal(['xabcy', [true, true]], results) # Big delete, big change 1. patches = @dmp.patch_make( 'x1234567890123456789012345678901234567890123456789012345678901234567890y', 'xabcy' ) results = @dmp.patch_apply( patches, 'x12345678901234567890---------------++++++++++---------------' + '12345678901234567890y' ) assert_equal([ 'xabc12345678901234567890---------------++++++++++---------------' + '12345678901234567890y', [false, true] ], results ) # Big delete, big change 2. @dmp.patch_deleteThreshold = 0.6 patches = @dmp.patch_make( 'x1234567890123456789012345678901234567890123456789012345678901234567890y', 'xabcy' ) results = @dmp.patch_apply( patches, 'x12345678901234567890---------------++++++++++---------------' + '12345678901234567890y' ) assert_equal(['xabcy', [true, true]], results) @dmp.patch_deleteThreshold = 0.5 # Compensate for failed patch. @dmp.match_threshold = 0.0 @dmp.match_distance = 0 patches = @dmp.patch_make( 'abcdefghijklmnopqrstuvwxyz--------------------1234567890', 'abcXXXXXXXXXXdefghijklmnopqrstuvwxyz--------------------' + '1234567YYYYYYYYYY890' ) results = @dmp.patch_apply( patches, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567890' ) assert_equal([ 'ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567YYYYYYYYYY890', [false, true] ], results ) @dmp.match_threshold = 0.5 @dmp.match_distance = 1000 # No side effects. patches = @dmp.patch_make('', 'test') patchstr = @dmp.patch_toText(patches) @dmp.patch_apply(patches, '') assert_equal(patchstr, @dmp.patch_toText(patches)) # No side effects with major delete. patches = @dmp.patch_make( 'The quick brown fox jumps over the lazy dog.', 'Woof' ) patchstr = @dmp.patch_toText(patches) @dmp.patch_apply(patches, 'The quick brown fox jumps over the lazy dog.') assert_equal(patchstr, @dmp.patch_toText(patches)) # Edge exact match. patches = @dmp.patch_make('', 'test') results = @dmp.patch_apply(patches, '') assert_equal(['test', [true]], results) # Near edge exact match. patches = @dmp.patch_make('XY', 'XtestY') results = @dmp.patch_apply(patches, 'XY') assert_equal(['XtestY', [true]], results) # Edge partial match. patches = @dmp.patch_make('y', 'y123') results = @dmp.patch_apply(patches, 'x') assert_equal(['x123', [true]], results) end end