discourse-diff-0.1.0/ 0000755 0001750 0001750 00000000000 13467254524 013766 5 ustar samyak samyak discourse-diff-0.1.0/lib/ 0000755 0001750 0001750 00000000000 13467254524 014534 5 ustar samyak samyak discourse-diff-0.1.0/lib/discourse/ 0000755 0001750 0001750 00000000000 13467254524 016534 5 ustar samyak samyak discourse-diff-0.1.0/lib/discourse/diff.rb 0000644 0001750 0001750 00000020405 13467254524 017772 0 ustar samyak samyak require "discourse/onpdiff"
require "nokogiri"
module Discourse
class Diff
MAX_DIFFERENCE = 200
def initialize(before, after)
@before = before
@after = after
before_html = tokenize_html_blocks(@before)
after_html = tokenize_html_blocks(@after)
before_markdown = tokenize_line(CGI::escapeHTML(@before))
after_markdown = tokenize_line(CGI::escapeHTML(@after))
@block_by_block_diff = ONPDiff.new(before_html, after_html).diff
@line_by_line_diff = ONPDiff.new(before_markdown, after_markdown).short_diff
end
def inline_html
i = 0
inline = []
while i < @block_by_block_diff.length
op_code = @block_by_block_diff[i][1]
if op_code == :common then inline << @block_by_block_diff[i][0]
else
if op_code == :delete
opposite_op_code = :add
klass = "del"
first = i
second = i + 1
else
opposite_op_code = :delete
klass = "ins"
first = i + 1
second = i
end
if i + 1 < @block_by_block_diff.length && @block_by_block_diff[i + 1][1] == opposite_op_code
diff = ONPDiff.new(tokenize_html(@block_by_block_diff[first][0]), tokenize_html(@block_by_block_diff[second][0])).diff
inline << generate_inline_html(diff)
i += 1
else
inline << add_class_or_wrap_in_tags(@block_by_block_diff[i][0], klass)
end
end
i += 1
end
"
#{inline.join}
"
end
def side_by_side_html
i = 0
left, right = [], []
while i < @block_by_block_diff.length
op_code = @block_by_block_diff[i][1]
if op_code == :common
left << @block_by_block_diff[i][0]
right << @block_by_block_diff[i][0]
else
if op_code == :delete
opposite_op_code = :add
side = left
klass = "del"
first = i
second = i + 1
else
opposite_op_code = :delete
side = right
klass = "ins"
first = i + 1
second = i
end
if i + 1 < @block_by_block_diff.length && @block_by_block_diff[i + 1][1] == opposite_op_code
diff = ONPDiff.new(tokenize_html(@block_by_block_diff[first][0]), tokenize_html(@block_by_block_diff[second][0])).diff
deleted, inserted = generate_side_by_side_html(diff)
left << deleted
right << inserted
i += 1
else
side << add_class_or_wrap_in_tags(@block_by_block_diff[i][0], klass)
end
end
i += 1
end
"#{left.join}
#{right.join}
"
end
def side_by_side_markdown
i = 0
table = [""]
while i < @line_by_line_diff.length
table << ""
op_code = @line_by_line_diff[i][1]
if op_code == :common
table << "#{@line_by_line_diff[i][0]} | "
table << "#{@line_by_line_diff[i][0]} | "
else
if op_code == :delete
opposite_op_code = :add
first = i
second = i + 1
else
opposite_op_code = :delete
first = i + 1
second = i
end
if i + 1 < @line_by_line_diff.length && @line_by_line_diff[i + 1][1] == opposite_op_code
before_tokens, after_tokens = tokenize_markdown(@line_by_line_diff[first][0]), tokenize_markdown(@line_by_line_diff[second][0])
if (before_tokens.length - after_tokens.length).abs > MAX_DIFFERENCE
before_tokens, after_tokens = tokenize_line(@line_by_line_diff[first][0]), tokenize_line(@line_by_line_diff[second][0])
end
diff = ONPDiff.new(before_tokens, after_tokens).short_diff
deleted, inserted = generate_side_by_side_markdown(diff)
table << "#{deleted.join} | "
table << "#{inserted.join} | "
i += 1
else
if op_code == :delete
table << "#{@line_by_line_diff[i][0]} | "
table << " | "
else
table << " | "
table << "#{@line_by_line_diff[i][0]} | "
end
end
end
table << "
"
i += 1
end
table << "
"
table.join
end
private
def tokenize_line(text)
text.scan(/[^\r\n]+[\r\n]*/)
end
def tokenize_markdown(text)
t, tokens = [], []
i = 0
while i < text.length
if text[i] =~ /\w/
t << text[i]
elsif text[i] =~ /[ \t]/ && t.join =~ /^\w+$/
begin
t << text[i]
i += 1
end while i < text.length && text[i] =~ /[ \t]/
i -= 1
tokens << t.join
t = []
else
tokens << t.join if t.length > 0
tokens << text[i]
t = []
end
i += 1
end
tokens << t.join if t.length > 0
tokens
end
def tokenize_html_blocks(html)
Nokogiri::HTML.fragment(html).search("./*").map(&:to_html)
end
def tokenize_html(html)
HtmlTokenizer.tokenize(html)
end
def add_class_or_wrap_in_tags(html_or_text, klass)
index_of_next_chevron = html_or_text.index(">")
if html_or_text.length > 0 && html_or_text[0] == '<' && index_of_next_chevron
index_of_class = html_or_text.index("class=")
if index_of_class.nil? || index_of_class > index_of_next_chevron
# we do not have a class for the current tag
# add it right before the ">"
html_or_text.insert(index_of_next_chevron, " class=\"diff-#{klass}\"")
else
# we have a class, insert it at the beginning if not already present
classes = html_or_text[/class=(["'])([^\1]*)\1/, 2]
if classes.include?("diff-#{klass}")
html_or_text
else
html_or_text.insert(index_of_class + "class=".length + 1, "diff-#{klass} ")
end
end
else
"<#{klass}>#{html_or_text}#{klass}>"
end
end
def generate_inline_html(diff)
inline = []
diff.each do |d|
case d[1]
when :common then inline << d[0]
when :delete then inline << add_class_or_wrap_in_tags(d[0], "del")
when :add then inline << add_class_or_wrap_in_tags(d[0], "ins")
end
end
inline
end
def generate_side_by_side_html(diff)
deleted, inserted = [], []
diff.each do |d|
case d[1]
when :common
deleted << d[0]
inserted << d[0]
when :delete then deleted << add_class_or_wrap_in_tags(d[0], "del")
when :add then inserted << add_class_or_wrap_in_tags(d[0], "ins")
end
end
[deleted, inserted]
end
def generate_side_by_side_markdown(diff)
deleted, inserted = [], []
diff.each do |d|
case d[1]
when :common
deleted << d[0]
inserted << d[0]
when :delete then deleted << "#{d[0]}"
when :add then inserted << "#{d[0]}"
end
end
[deleted, inserted]
end
class HtmlTokenizer < Nokogiri::XML::SAX::Document
attr_accessor :tokens
def initialize
@tokens = []
end
def self.tokenize(html)
me = new
parser = Nokogiri::HTML::SAX::Parser.new(me)
parser.parse("#{html}")
me.tokens
end
USELESS_TAGS = %w{html body}
def start_element(name, attributes = [])
return if USELESS_TAGS.include?(name)
attrs = attributes.map { |a| " #{a[0]}=\"#{a[1]}\"" }.join
@tokens << "<#{name}#{attrs}>"
end
AUTOCLOSING_TAGS = %w{area base br col embed hr img input meta}
def end_element(name)
return if USELESS_TAGS.include?(name) || AUTOCLOSING_TAGS.include?(name)
@tokens << "#{name}>"
end
def characters(string)
@tokens.concat string.scan(/\W|\w+[ \t]*/).map { |x| CGI::escapeHTML(x) }
end
end
end
end
discourse-diff-0.1.0/lib/discourse/onpdiff.rb 0000644 0001750 0001750 00000006626 13467254524 020520 0 ustar samyak samyak # Use "An O(NP) Sequence Comparison Algorithm" as described by Sun Wu, Udi Manber and Gene Myers
# in http://www.itu.dk/stud/speciale/bepjea/xwebtex/litt/an-onp-sequence-comparison-algorithm.pdf
class ONPDiff
def initialize(a, b)
@a, @b = a, b
@m, @n = a.length, b.length
@backtrack = []
if @reverse = @m > @n
@a, @b = @b, @a
@m, @n = @n, @m
end
@offset = @m + 1
@delta = @n - @m
end
def diff
@diff ||= build_edit_script(compose)
end
def short_diff
@short_diff ||= build_short_edit_script(compose)
end
private
def compose
return @shortest_path if @shortest_path
size = @m + @n + 3
fp = Array.new(size) { |i| -1 }
@path = Array.new(size) { |i| -1 }
p = -1
begin
p += 1
k = -p
while k <= @delta - 1
fp[k + @offset] = snake(k, fp[k - 1 + @offset] + 1, fp[k + 1 + @offset])
k += 1
end
k = @delta + p
while k >= @delta + 1
fp[k + @offset] = snake(k, fp[k - 1 + @offset] + 1, fp[k + 1 + @offset])
k -= 1
end
fp[@delta + @offset] = snake(@delta, fp[@delta - 1 + @offset] + 1, fp[@delta + 1 + @offset])
end until fp[@delta + @offset] == @n
r = @path[@delta + @offset]
@shortest_path = []
while r != -1
@shortest_path << [@backtrack[r][0], @backtrack[r][1]]
r = @backtrack[r][2]
end
@shortest_path
end
def snake(k, p, pp)
r = p > pp ? @path[k - 1 + @offset] : @path[k + 1 + @offset]
y = [p, pp].max
x = y - k
while x < @m && y < @n && @a[x] == @b[y]
x += 1
y += 1
end
@path[k + @offset] = @backtrack.length
@backtrack << [x, y, r]
y
end
def build_edit_script(shortest_path)
ses = []
x, y = 1, 1
px, py = 0, 0
i = shortest_path.length - 1
while i >= 0
while px < shortest_path[i][0] || py < shortest_path[i][1]
if shortest_path[i][1] - shortest_path[i][0] > py - px
t = @reverse ? :delete : :add
ses << [@b[py], t]
y += 1
py += 1
elsif shortest_path[i][1] - shortest_path[i][0] < py - px
t = @reverse ? :add : :delete
ses << [@a[px], t]
x += 1
px += 1
else
ses << [@a[px], :common]
x += 1
y += 1
px += 1
py += 1
end
end
i -= 1
end
ses
end
def build_short_edit_script(shortest_path)
ses = []
x, y = 1, 1
px, py = 0, 0
i = shortest_path.length - 1
while i >= 0
while px < shortest_path[i][0] || py < shortest_path[i][1]
if shortest_path[i][1] - shortest_path[i][0] > py - px
t = @reverse ? :delete : :add
if ses.length > 0 && ses[-1][1] == t
ses[-1][0] << @b[py]
else
ses << [@b[py], t]
end
y += 1
py += 1
elsif shortest_path[i][1] - shortest_path[i][0] < py - px
t = @reverse ? :add : :delete
if ses.length > 0 && ses[-1][1] == t
ses[-1][0] << @a[px]
else
ses << [@a[px], t]
end
x += 1
px += 1
else
if ses.length > 0 && ses[-1][1] == :common
ses[-1][0] << @a[px]
else
ses << [@a[px], :common]
end
x += 1
y += 1
px += 1
py += 1
end
end
i -= 1
end
ses
end
end
discourse-diff-0.1.0/.travis.yml 0000644 0001750 0001750 00000000130 13467254524 016071 0 ustar samyak samyak sudo: false
language: ruby
rvm:
- 2.3.1
before_install: gem install bundler -v 1.16.0
discourse-diff-0.1.0/LICENSE.txt 0000644 0001750 0001750 00000002067 13467254524 015616 0 ustar samyak samyak The MIT License (MIT)
Copyright (c) 2018 James Kiesel
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
discourse-diff-0.1.0/Gemfile 0000644 0001750 0001750 00000000271 13467254524 015261 0 ustar samyak samyak source "https://rubygems.org"
git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
gem 'nokogiri'
# Specify your gem's dependencies in discourse-diff.gemspec
gemspec
discourse-diff-0.1.0/.gitignore 0000644 0001750 0001750 00000000111 13467254524 015747 0 ustar samyak samyak /.bundle/
/.yardoc
/_yardoc/
/coverage/
/doc/
/pkg/
/spec/reports/
/tmp/
discourse-diff-0.1.0/discourse-diff.gemspec 0000644 0001750 0001750 00000001573 13467254524 020247 0 ustar samyak samyak lib = File.expand_path("../lib", __FILE__)
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
require "discourse/diff"
Gem::Specification.new do |spec|
spec.name = "discourse-diff"
spec.version = "0.1.0"
spec.authors = ["Discourse", "James Kiesel"]
spec.email = ["contact@discourse.org", "james.kiesel@gmail.com"]
spec.summary = %q{Discourse Diff provides inline html diffing for markdown blobs}
spec.description = %q{This library has been extracted from [Discourse core](https://www.github.com/discourse/discourse)}
spec.license = "MIT"
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(spec)/}) }
spec.require_paths = ["lib"]
spec.add_development_dependency "bundler", "~> 1.16"
spec.add_development_dependency "rake", "~> 10.0"
spec.add_development_dependency "rspec", ">= 3.0.0"
end
discourse-diff-0.1.0/README.md 0000644 0001750 0001750 00000003426 13467254524 015252 0 ustar samyak samyak # Discourse::Diff
Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/discourse/diff`. To experiment with that code, run `bin/console` for an interactive prompt.
TODO: Delete this and the text above, and describe your gem
## Installation
Add this line to your application's Gemfile:
```ruby
gem 'discourse-diff'
```
And then execute:
$ bundle
Or install it yourself as:
$ gem install discourse-diff
## Usage
TODO: Write usage instructions here
## Development
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
## Contributing
Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/discourse-diff. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
## License
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
## Code of Conduct
Everyone interacting in the Discourse::Diff project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/[USERNAME]/discourse-diff/blob/master/CODE_OF_CONDUCT.md).
discourse-diff-0.1.0/CODE_OF_CONDUCT.md 0000644 0001750 0001750 00000006236 13467254524 016574 0 ustar samyak samyak # Contributor Covenant Code of Conduct
## Our Pledge
In the interest of fostering an open and welcoming environment, we as
contributors and maintainers pledge to making participation in our project and
our community a harassment-free experience for everyone, regardless of age, body
size, disability, ethnicity, gender identity and expression, level of experience,
nationality, personal appearance, race, religion, or sexual identity and
orientation.
## Our Standards
Examples of behavior that contributes to creating a positive environment
include:
* Using welcoming and inclusive language
* Being respectful of differing viewpoints and experiences
* Gracefully accepting constructive criticism
* Focusing on what is best for the community
* Showing empathy towards other community members
Examples of unacceptable behavior by participants include:
* The use of sexualized language or imagery and unwelcome sexual attention or
advances
* Trolling, insulting/derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or electronic
address, without explicit permission
* Other conduct which could reasonably be considered inappropriate in a
professional setting
## Our Responsibilities
Project maintainers are responsible for clarifying the standards of acceptable
behavior and are expected to take appropriate and fair corrective action in
response to any instances of unacceptable behavior.
Project maintainers have the right and responsibility to remove, edit, or
reject comments, commits, code, wiki edits, issues, and other contributions
that are not aligned to this Code of Conduct, or to ban temporarily or
permanently any contributor for other behaviors that they deem inappropriate,
threatening, offensive, or harmful.
## Scope
This Code of Conduct applies both within project spaces and in public spaces
when an individual is representing the project or its community. Examples of
representing a project or community include using an official project e-mail
address, posting via an official social media account, or acting as an appointed
representative at an online or offline event. Representation of a project may be
further defined and clarified by project maintainers.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported by contacting the project team at james.kiesel@gmail.com. All
complaints will be reviewed and investigated and will result in a response that
is deemed necessary and appropriate to the circumstances. The project team is
obligated to maintain confidentiality with regard to the reporter of an incident.
Further details of specific enforcement policies may be posted separately.
Project maintainers who do not follow or enforce the Code of Conduct in good
faith may face temporary or permanent repercussions as determined by other
members of the project's leadership.
## Attribution
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
available at [http://contributor-covenant.org/version/1/4][version]
[homepage]: http://contributor-covenant.org
[version]: http://contributor-covenant.org/version/1/4/
discourse-diff-0.1.0/Rakefile 0000644 0001750 0001750 00000000306 13467254524 015432 0 ustar samyak samyak require "bundler/gem_tasks"
require "rake/testtask"
Rake::TestTask.new(:test) do |t|
t.libs << "test"
t.libs << "lib"
t.test_files = FileList["test/**/*_test.rb"]
end
task :default => :test
discourse-diff-0.1.0/bin/ 0000755 0001750 0001750 00000000000 13467254524 014536 5 ustar samyak samyak discourse-diff-0.1.0/bin/setup 0000755 0001750 0001750 00000000203 13467254524 015617 0 ustar samyak samyak #!/usr/bin/env bash
set -euo pipefail
IFS=$'\n\t'
set -vx
bundle install
# Do any other automated setup that you need to do here
discourse-diff-0.1.0/bin/console 0000755 0001750 0001750 00000000535 13467254524 016131 0 ustar samyak samyak #!/usr/bin/env ruby
require "bundler/setup"
require "discourse/diff"
# You can add fixtures and/or initialization code here to make experimenting
# with your gem easier. You can also use a different console, if you like.
# (If you use this, don't forget to add pry to your Gemfile!)
# require "pry"
# Pry.start
require "irb"
IRB.start(__FILE__)
discourse-diff-0.1.0/Gemfile.lock 0000644 0001750 0001750 00000001270 13467254524 016210 0 ustar samyak samyak PATH
remote: .
specs:
discourse-diff (0.1.0)
GEM
remote: https://rubygems.org/
specs:
diff-lcs (1.3)
mini_portile2 (2.3.0)
nokogiri (1.8.2)
mini_portile2 (~> 2.3.0)
rake (10.5.0)
rspec (3.0.0)
rspec-core (~> 3.0.0)
rspec-expectations (~> 3.0.0)
rspec-mocks (~> 3.0.0)
rspec-core (3.0.4)
rspec-support (~> 3.0.0)
rspec-expectations (3.0.4)
diff-lcs (>= 1.2.0, < 2.0)
rspec-support (~> 3.0.0)
rspec-mocks (3.0.4)
rspec-support (~> 3.0.0)
rspec-support (3.0.4)
PLATFORMS
ruby
DEPENDENCIES
bundler (~> 1.16)
discourse-diff!
nokogiri
rake (~> 10.0)
rspec (~> 3.0.0)
BUNDLED WITH
1.16.0