reverse_markdown-3.0.0/0000755000004100000410000000000014727742461015135 5ustar www-datawww-datareverse_markdown-3.0.0/bin/0000755000004100000410000000000014727742461015705 5ustar www-datawww-datareverse_markdown-3.0.0/bin/reverse_markdown0000755000004100000410000000113014727742461021203 0ustar www-datawww-data#!/usr/bin/env ruby # Usage: reverse_markdown [FILE]... # Usage: cat FILE | reverse_markdown require 'reverse_markdown' require 'optparse' options = {} OptionParser.new do |opts| opts.banner = "Usage: reverse_markdown [options] " opts.on('-u', '--unknown_tags [pass_through, drop, bypass, raise]', 'Unknown tag handling (default: pass_through)') { |v| ReverseMarkdown.config.unknown_tags = v } opts.on('-g', '--github_flavored bool', 'use github flavored markdown (default: false)') { |v| ReverseMarkdown.config.github_flavored = v } end.parse! puts ReverseMarkdown.convert(ARGF.read) reverse_markdown-3.0.0/.gitignore0000644000004100000410000000014014727742461017120 0ustar www-datawww-data*.gem .bundle .rvmrc .ruby-version .ruby-gemset .codeclimate Gemfile.lock pkg/* coverage/* TODO reverse_markdown-3.0.0/.tool-versions0000644000004100000410000000001314727742461017753 0ustar www-datawww-dataruby 3.3.5 reverse_markdown-3.0.0/.github/0000755000004100000410000000000014727742461016475 5ustar www-datawww-datareverse_markdown-3.0.0/.github/workflows/0000755000004100000410000000000014727742461020532 5ustar www-datawww-datareverse_markdown-3.0.0/.github/workflows/ci.yml0000644000004100000410000000217314727742461021653 0ustar www-datawww-dataname: CI on: push: branches: - master pull_request: branches: - master jobs: test: runs-on: ubuntu-latest strategy: matrix: ruby-version: [ '2.7', '3.0', '3.1', '3.2', '3.3', 'jruby-9.4' ] steps: - name: Checkout code uses: actions/checkout@v3 - name: Set up Ruby uses: ruby/setup-ruby@v1 with: ruby-version: ${{ matrix.ruby-version }} bundler-cache: true - name: Install dependencies run: bundle install - name: Run tests run: bundle exec rspec - name: Run Code Climate Test Reporter run: | curl -L https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64 > ./cc-test-reporter chmod +x ./cc-test-reporter ./cc-test-reporter before-build - name: Run tests run: bundle exec rspec - name: Upload Code Climate Coverage Report if: matrix.ruby-version == '3.3' env: CC_TEST_REPORTER_ID: ${{ secrets.CC_TEST_REPORTER_ID }} run: | ./cc-test-reporter after-build --exit-code $? reverse_markdown-3.0.0/lib/0000755000004100000410000000000014727742461015703 5ustar www-datawww-datareverse_markdown-3.0.0/lib/reverse_markdown.rb0000644000004100000410000000400514727742461021604 0ustar www-datawww-datarequire 'nokogiri' require 'reverse_markdown/version' require 'reverse_markdown/errors' require 'reverse_markdown/cleaner' require 'reverse_markdown/config' require 'reverse_markdown/converters' require 'reverse_markdown/converters/base' require 'reverse_markdown/converters/a' require 'reverse_markdown/converters/blockquote' require 'reverse_markdown/converters/br' require 'reverse_markdown/converters/bypass' require 'reverse_markdown/converters/code' require 'reverse_markdown/converters/del' require 'reverse_markdown/converters/div' require 'reverse_markdown/converters/drop' require 'reverse_markdown/converters/details' require 'reverse_markdown/converters/em' require 'reverse_markdown/converters/figcaption' require 'reverse_markdown/converters/figure' require 'reverse_markdown/converters/h' require 'reverse_markdown/converters/hr' require 'reverse_markdown/converters/ignore' require 'reverse_markdown/converters/img' require 'reverse_markdown/converters/li' require 'reverse_markdown/converters/ol' require 'reverse_markdown/converters/p' require 'reverse_markdown/converters/pass_through' require 'reverse_markdown/converters/pre' require 'reverse_markdown/converters/strong' require 'reverse_markdown/converters/table' require 'reverse_markdown/converters/td' require 'reverse_markdown/converters/text' require 'reverse_markdown/converters/tr' require 'reverse_markdown/converters/iframe' module ReverseMarkdown def self.convert(input, options = {}) config.with(options) do input = cleaner.force_encoding(input.to_s) root = case input when String then Nokogiri::HTML(input).root when Nokogiri::XML::Document then input.root when Nokogiri::XML::Node then input end root or return '' result = ReverseMarkdown::Converters.lookup(root.name).convert(root) cleaner.tidy(result) end end def self.config @config ||= Config.new yield @config if block_given? @config end def self.cleaner @cleaner ||= Cleaner.new end end reverse_markdown-3.0.0/lib/reverse_markdown/0000755000004100000410000000000014727742461021260 5ustar www-datawww-datareverse_markdown-3.0.0/lib/reverse_markdown/cleaner.rb0000644000004100000410000000537314727742461023226 0ustar www-datawww-datamodule ReverseMarkdown class Cleaner def tidy(string) result = remove_inner_whitespaces(string) result = remove_newlines(result) result = remove_leading_newlines(result) result = clean_tag_borders(result) clean_punctuation_characters(result) end def remove_newlines(string) string.gsub(/\n{3,}/, "\n\n") end def remove_leading_newlines(string) string.gsub(/\A\n+/, '') end def remove_inner_whitespaces(string) string.each_line.inject("") do |memo, line| memo + preserve_border_whitespaces(line) do line.strip.gsub(/[ \t]{2,}/, ' ') end end end # Find non-asterisk content that is enclosed by two or # more asterisks. Ensure that only one whitespace occurs # in the border area. # Same for underscores and brackets. def clean_tag_borders(string) result = string.gsub(/\s?\*{2,}.*?\*{2,}\s?/) do |match| preserve_border_whitespaces(match, default_border: ReverseMarkdown.config.tag_border) do match.strip.sub('** ', '**').sub(' **', '**') end end result = result.gsub(/\s?\_{2,}.*?\_{2,}\s?/) do |match| preserve_border_whitespaces(match, default_border: ReverseMarkdown.config.tag_border) do match.strip.sub('__ ', '__').sub(' __', '__') end end result = result.gsub(/\s?~{2,}.*?~{2,}\s?/) do |match| preserve_border_whitespaces(match, default_border: ReverseMarkdown.config.tag_border) do match.strip.sub('~~ ', '~~').sub(' ~~', '~~') end end result.gsub(/\s?\[.*?\]\s?/) do |match| preserve_border_whitespaces(match) do match.strip.sub('[ ', '[').sub(' ]', ']') end end end def clean_punctuation_characters(string) string.gsub(/(\*\*|~~|__)\s([\.!\?'"])/, "\\1".strip + "\\2") end def force_encoding(string) ReverseMarkdown.config.force_encoding or return string string.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '') end private def preserve_border_whitespaces(string, options = {}, &block) return string if string =~ /\A\s*\Z/ default_border = options.fetch(:default_border, '') # If the string contains part of a link so the characters [,],(,) # then don't add any extra spaces default_border = '' if string =~ /[\[\(\]\)]/ string_start = present_or_default(string[/\A\s*/], default_border) string_end = present_or_default(string[/\s*\Z/], default_border) result = yield string_start + result + string_end end def present_or_default(string, default) if string.nil? || string.empty? default else string end end end end reverse_markdown-3.0.0/lib/reverse_markdown/config.rb0000644000004100000410000000155114727742461023054 0ustar www-datawww-datamodule ReverseMarkdown class Config attr_writer :unknown_tags, :github_flavored, :tag_border, :force_encoding def initialize @unknown_tags = :pass_through @github_flavored = false @force_encoding = false @em_delimiter = '_'.freeze @strong_delimiter = '**'.freeze @inline_options = {} @tag_border = ' '.freeze end def with(options = {}) @inline_options = options result = yield @inline_options = {} result end def unknown_tags @inline_options[:unknown_tags] || @unknown_tags end def github_flavored @inline_options[:github_flavored] || @github_flavored end def tag_border @inline_options[:tag_border] || @tag_border end def force_encoding @inline_options[:force_encoding] || @force_encoding end end end reverse_markdown-3.0.0/lib/reverse_markdown/converters/0000755000004100000410000000000014727742461023452 5ustar www-datawww-datareverse_markdown-3.0.0/lib/reverse_markdown/converters/hr.rb0000644000004100000410000000025514727742461024412 0ustar www-datawww-datamodule ReverseMarkdown module Converters class Hr < Base def convert(node, state = {}) "\n* * *\n" end end register :hr, Hr.new end end reverse_markdown-3.0.0/lib/reverse_markdown/converters/ol.rb0000644000004100000410000000046214727742461024413 0ustar www-datawww-datamodule ReverseMarkdown module Converters class Ol < Base def convert(node, state = {}) ol_count = state.fetch(:ol_count, 0) + 1 +"\n" << treat_children(node, state.merge(ol_count: ol_count)) << "\n" end end register :ol, Ol.new register :ul, Ol.new end end reverse_markdown-3.0.0/lib/reverse_markdown/converters/div.rb0000644000004100000410000000036414727742461024564 0ustar www-datawww-datamodule ReverseMarkdown module Converters class Div < Base def convert(node, state = {}) +"\n" << treat_children(node, state) << "\n" end end register :div, Div.new register :article, Div.new end end reverse_markdown-3.0.0/lib/reverse_markdown/converters/text.rb0000644000004100000410000000263314727742461024767 0ustar www-datawww-datamodule ReverseMarkdown module Converters class Text < Base def convert(node, options = {}) if node.text.strip.empty? treat_empty(node) else treat_text(node) end end private def treat_empty(node) parent = node.parent.name.to_sym if [:ol, :ul].include?(parent) # Otherwise the identation is broken '' elsif node.text == ' ' # Regular whitespace text node ' ' else '' end end def treat_text(node) text = node.text text = preserve_nbsp(text) text = remove_border_newlines(text) text = remove_inner_newlines(text) text = escape_keychars(text) text = preserve_keychars_within_backticks(text) text = preserve_tags(text) text end def preserve_nbsp(text) text.gsub(/\u00A0/, " ") end def preserve_tags(text) text.gsub(/[<>]/, '>' => '\>', '<' => '\<') end def remove_border_newlines(text) text.gsub(/\A\n+/, '').gsub(/\n+\z/, '') end def remove_inner_newlines(text) text.tr("\r\n\t", ' ').squeeze(' ') end def preserve_keychars_within_backticks(text) text.gsub(/`.*?`/) do |match| match.gsub('\_', '_').gsub('\*', '*') end end end register :text, Text.new end end reverse_markdown-3.0.0/lib/reverse_markdown/converters/del.rb0000644000004100000410000000107214727742461024543 0ustar www-datawww-datamodule ReverseMarkdown module Converters class Del < Base def convert(node, state = {}) content = treat_children(node, state.merge(already_crossed_out: true)) if disabled? || content.strip.empty? || state[:already_crossed_out] content else "~~#{content}~~" end end def enabled? ReverseMarkdown.config.github_flavored end def disabled? !enabled? end end register :strike, Del.new register :s, Del.new register :del, Del.new end end reverse_markdown-3.0.0/lib/reverse_markdown/converters/drop.rb0000644000004100000410000000021414727742461024740 0ustar www-datawww-datamodule ReverseMarkdown module Converters class Drop < Base def convert(node, state = {}) '' end end end end reverse_markdown-3.0.0/lib/reverse_markdown/converters/code.rb0000644000004100000410000000027014727742461024710 0ustar www-datawww-datamodule ReverseMarkdown module Converters class Code < Base def convert(node, state = {}) "`#{node.text}`" end end register :code, Code.new end end reverse_markdown-3.0.0/lib/reverse_markdown/converters/p.rb0000644000004100000410000000032514727742461024236 0ustar www-datawww-datamodule ReverseMarkdown module Converters class P < Base def convert(node, state = {}) +"\n\n" << treat_children(node, state).strip << "\n\n" end end register :p, P.new end end reverse_markdown-3.0.0/lib/reverse_markdown/converters/base.rb0000644000004100000410000000124514727742461024713 0ustar www-datawww-datamodule ReverseMarkdown module Converters class Base def treat_children(node, state) node.children.inject(+'') do |memo, child| memo << treat(child, state) end end def treat(node, state) ReverseMarkdown::Converters.lookup(node.name).convert(node, state) end def escape_keychars(string) string.gsub(/(? '\*', '_' => '\_') end def extract_title(node) title = escape_keychars(node['title'].to_s) title.empty? ? '' : %[ "#{title}"] end def extract_src(node) node['src'].to_s.empty? ? '' : node['src'].to_s end end end end reverse_markdown-3.0.0/lib/reverse_markdown/converters/td.rb0000644000004100000410000000037014727742461024406 0ustar www-datawww-datamodule ReverseMarkdown module Converters class Td < Base def convert(node, state = {}) content = treat_children(node, state) " #{content} |" end end register :td, Td.new register :th, Td.new end end reverse_markdown-3.0.0/lib/reverse_markdown/converters/a.rb0000644000004100000410000000110314727742461024212 0ustar www-datawww-datamodule ReverseMarkdown module Converters class A < Base def convert(node, state = {}) name = treat_children(node, state) href = node['href'] title = extract_title(node) if href.to_s.empty? || name.empty? name else link = "[#{name}](#{href}#{title})" link.prepend(' ') if prepend_space?(node) link end end private def prepend_space?(node) node.at_xpath("preceding::text()[1]").to_s.end_with?('!') end end register :a, A.new end end reverse_markdown-3.0.0/lib/reverse_markdown/converters/bypass.rb0000644000004100000410000000063514727742461025304 0ustar www-datawww-datamodule ReverseMarkdown module Converters class Bypass < Base def convert(node, state = {}) treat_children(node, state) end end register :document, Bypass.new register :html, Bypass.new register :body, Bypass.new register :span, Bypass.new register :thead, Bypass.new register :tbody, Bypass.new register :tfoot, Bypass.new end end reverse_markdown-3.0.0/lib/reverse_markdown/converters/img.rb0000644000004100000410000000043514727742461024555 0ustar www-datawww-datamodule ReverseMarkdown module Converters class Img < Base def convert(node, state = {}) alt = node['alt'] src = node['src'] title = extract_title(node) " ![#{alt}](#{src}#{title})" end end register :img, Img.new end end reverse_markdown-3.0.0/lib/reverse_markdown/converters/pass_through.rb0000644000004100000410000000023214727742461026502 0ustar www-datawww-datamodule ReverseMarkdown module Converters class PassThrough < Base def convert(node, state = {}) node.to_s end end end end reverse_markdown-3.0.0/lib/reverse_markdown/converters/iframe.rb0000644000004100000410000000027714727742461025250 0ustar www-datawww-datamodule ReverseMarkdown module Converters class Iframe < Base def convert(node, state = {}) extract_src(node) end end register :iframe, Iframe.new end end reverse_markdown-3.0.0/lib/reverse_markdown/converters/strong.rb0000644000004100000410000000067214727742461025320 0ustar www-datawww-datamodule ReverseMarkdown module Converters class Strong < Base def convert(node, state = {}) content = treat_children(node, state.merge(already_strong: true)) if content.strip.empty? || state[:already_strong] content else "#{content[/^\s*/]}**#{content.strip}**#{content[/\s*$/]}" end end end register :strong, Strong.new register :b, Strong.new end end reverse_markdown-3.0.0/lib/reverse_markdown/converters/table.rb0000644000004100000410000000033114727742461025063 0ustar www-datawww-datamodule ReverseMarkdown module Converters class Table < Base def convert(node, state = {}) +"\n\n" << treat_children(node, state) << "\n" end end register :table, Table.new end end reverse_markdown-3.0.0/lib/reverse_markdown/converters/li.rb0000644000004100000410000000170314727742461024404 0ustar www-datawww-datamodule ReverseMarkdown module Converters class Li < Base def convert(node, state = {}) contains_child_paragraph = node.first_element_child ? node.first_element_child.name == 'p' : false content_node = contains_child_paragraph ? node.first_element_child : node content = treat_children(content_node, state) indentation = indentation_from(state) prefix = prefix_for(node) "#{indentation}#{prefix}#{content.chomp}\n" + (contains_child_paragraph ? "\n" : '') end def prefix_for(node) if node.parent.name == 'ol' index = node.parent.xpath('li').index(node) "#{index.to_i + 1}. " else '- ' end end def indentation_from(state) length = state.fetch(:ol_count, 0) ' ' * [length - 1, 0].max end end register :li, Li.new end end reverse_markdown-3.0.0/lib/reverse_markdown/converters/ignore.rb0000644000004100000410000000037714727742461025271 0ustar www-datawww-datamodule ReverseMarkdown module Converters class Ignore < Base def convert(node, state = {}) '' # noop end end register :colgroup, Ignore.new register :col, Ignore.new register :head, Ignore.new end end reverse_markdown-3.0.0/lib/reverse_markdown/converters/h.rb0000644000004100000410000000057714727742461024237 0ustar www-datawww-datamodule ReverseMarkdown module Converters class H < Base def convert(node, state = {}) prefix = '#' * node.name[/\d/].to_i ["\n", prefix, ' ', treat_children(node, state), "\n"].join end end register :h1, H.new register :h2, H.new register :h3, H.new register :h4, H.new register :h5, H.new register :h6, H.new end end reverse_markdown-3.0.0/lib/reverse_markdown/converters/figcaption.rb0000644000004100000410000000045314727742461026124 0ustar www-datawww-datamodule ReverseMarkdown module Converters class FigCaption < Base def convert(node, state = {}) if node.text.strip.empty? "" else +"\n" << "_#{node.text.strip}_" << "\n" end end end register :figcaption, FigCaption.new end end reverse_markdown-3.0.0/lib/reverse_markdown/converters/br.rb0000644000004100000410000000025014727742461024377 0ustar www-datawww-datamodule ReverseMarkdown module Converters class Br < Base def convert(node, state = {}) " \n" end end register :br, Br.new end end reverse_markdown-3.0.0/lib/reverse_markdown/converters/details.rb0000644000004100000410000000111314727742461025420 0ustar www-datawww-datamodule ReverseMarkdown module Converters class Details < Base def convert(node, state = {}) content = treat_children(node, state.merge(already_processed: true)) if disabled? || content.strip.empty? || state[:already_processed] content else "##{content}" end end def enabled? ReverseMarkdown.config.github_flavored end def disabled? !enabled? end end register :details, Details.new register :summary, Details.new end end reverse_markdown-3.0.0/lib/reverse_markdown/converters/tr.rb0000644000004100000410000000103714727742461024425 0ustar www-datawww-datamodule ReverseMarkdown module Converters class Tr < Base def convert(node, state = {}) content = treat_children(node, state).rstrip result = "|#{content}\n" table_header_row?(node) ? result + underline_for(node) : result end def table_header_row?(node) node.element_children.all? {|child| child.name.to_sym == :th} end def underline_for(node) "| " + (['---'] * node.element_children.size).join(' | ') + " |\n" end end register :tr, Tr.new end end reverse_markdown-3.0.0/lib/reverse_markdown/converters/blockquote.rb0000644000004100000410000000054514727742461026153 0ustar www-datawww-datamodule ReverseMarkdown module Converters class Blockquote < Base def convert(node, state = {}) content = treat_children(node, state).strip content = ReverseMarkdown.cleaner.remove_newlines(content) +"\n\n> " << content.lines.to_a.join('> ') << "\n\n" end end register :blockquote, Blockquote.new end end reverse_markdown-3.0.0/lib/reverse_markdown/converters/em.rb0000644000004100000410000000064414727742461024404 0ustar www-datawww-datamodule ReverseMarkdown module Converters class Em < Base def convert(node, state = {}) content = treat_children(node, state.merge(already_italic: true)) if content.strip.empty? || state[:already_italic] content else "#{content[/^\s*/]}_#{content.strip}_#{content[/\s*$/]}" end end end register :em, Em.new register :i, Em.new end end reverse_markdown-3.0.0/lib/reverse_markdown/converters/figure.rb0000644000004100000410000000036214727742461025261 0ustar www-datawww-datamodule ReverseMarkdown module Converters class Figure < Base def convert(node, state = {}) content = treat_children(node, state) "\n#{content.strip}\n" end end register :figure, Figure.new end end reverse_markdown-3.0.0/lib/reverse_markdown/converters/pre.rb0000644000004100000410000000206414727742461024567 0ustar www-datawww-datamodule ReverseMarkdown module Converters class Pre < Base def convert(node, state = {}) content = treat_children(node, state) if ReverseMarkdown.config.github_flavored +"\n```#{language(node)}\n" << content.strip << "\n```\n" else +"\n\n " << content.lines.to_a.join(" ") << "\n\n" end end private # Override #treat as proposed in https://github.com/xijo/reverse_markdown/pull/69 def treat(node, state) case node.name when 'code', 'text' node.text.strip when 'br' "\n" else super end end def language(node) lang = language_from_highlight_class(node) lang || language_from_confluence_class(node) end def language_from_highlight_class(node) node.parent['class'].to_s[/highlight-([a-zA-Z0-9]+)/, 1] end def language_from_confluence_class(node) node['class'].to_s[/brush:\s?(:?.*);/, 1] end end register :pre, Pre.new end end reverse_markdown-3.0.0/lib/reverse_markdown/version.rb0000644000004100000410000000005714727742461023274 0ustar www-datawww-datamodule ReverseMarkdown VERSION = '3.0.0' end reverse_markdown-3.0.0/lib/reverse_markdown/converters.rb0000644000004100000410000000165014727742461024001 0ustar www-datawww-datamodule ReverseMarkdown module Converters def self.register(tag_name, converter) @@converters ||= {} @@converters[tag_name.to_sym] = converter end def self.unregister(tag_name) @@converters.delete(tag_name.to_sym) end def self.lookup(tag_name) @@converters[tag_name.to_sym] or default_converter(tag_name) end private def self.default_converter(tag_name) case ReverseMarkdown.config.unknown_tags.to_sym when :pass_through ReverseMarkdown::Converters::PassThrough.new when :drop ReverseMarkdown::Converters::Drop.new when :bypass ReverseMarkdown::Converters::Bypass.new when :raise raise UnknownTagError, "unknown tag: #{tag_name}" else raise InvalidConfigurationError, "unknown value #{ReverseMarkdown.config.unknown_tags.inspect} for ReverseMarkdown.config.unknown_tags" end end end end reverse_markdown-3.0.0/lib/reverse_markdown/errors.rb0000644000004100000410000000022714727742461023122 0ustar www-datawww-datamodule ReverseMarkdown class Error < StandardError end class UnknownTagError < Error end class InvalidConfigurationError < Error end end reverse_markdown-3.0.0/spec/0000755000004100000410000000000014727742461016067 5ustar www-datawww-datareverse_markdown-3.0.0/spec/spec_helper.rb0000644000004100000410000000112014727742461020677 0ustar www-datawww-datarequire 'simplecov' require 'debug' unless RUBY_ENGINE == 'jruby' SimpleCov.profiles.define 'gem' do add_filter '/spec/' add_filter '/autotest/' add_group 'Libraries', '/lib/' end SimpleCov.start 'gem' require 'reverse_markdown' RSpec.configure do |config| config.after(:each) do ReverseMarkdown.instance_variable_set(:@config, nil) end config.around(jruby: :exclude) do |example| if RUBY_ENGINE == 'jruby' example.metadata[:skip] = true else example.call end end end def node_for(html) Nokogiri::HTML.parse(html).root.children.last.child end reverse_markdown-3.0.0/spec/assets/0000755000004100000410000000000014727742461017371 5ustar www-datawww-datareverse_markdown-3.0.0/spec/assets/basic.html0000644000004100000410000000272014727742461021341 0ustar www-datawww-data plain text

h1

h2

h3

h4

h5
h6
em tag content before and after empty em tags before and after em tags containing whitespace before
and after em tags containing whitespace double em tags

double em tags in p tag

a em with leading and trailing whitespace a em with extra leading and trailing whitespace strong tag content before and after empty strong tags before and after strong tags containing whitespace before
and after strong tags containing whitespace double strong tags

double strong tags in p tag

before double strong tags containing whitespace after a strong with leading and trailing whitespace a strong with extra leading and trailing whitespace b tag content i tag content br tags become double space followed by newline
before hr
after hr
section 1
section 2
reverse_markdown-3.0.0/spec/assets/quotation.html0000644000004100000410000000030514727742461022300 0ustar www-datawww-data
      Block of code
    

First quoted paragraph

Second quoted paragraph

reverse_markdown-3.0.0/spec/assets/escapables.html0000644000004100000410000000035114727742461022360 0ustar www-datawww-data some text... **two asterisks** ***three asterisks*** __two underscores__ ___three underscores___ some text...
var theoretical_max_infin = 1.0;
reverse_markdown-3.0.0/spec/assets/html_fragment.html0000644000004100000410000000005714727742461023110 0ustar www-datawww-datanaked text 1

paragraph text

naked text 2reverse_markdown-3.0.0/spec/assets/minimum.html0000644000004100000410000000004214727742461021726 0ustar www-datawww-data reverse_markdown-3.0.0/spec/assets/anchors.html0000644000004100000410000000230614727742461021715 0ustar www-datawww-data some text... Foobar Fubar Strong foobar There should be no extra space before and after the anchor (stripped). Exception: after an !there should be an extra space. Even with stripped elements inbetween: !there should be an extra space. ignore anchor tags with no link text not ignore An Image anchor tags with images pass through the text of internal jumplinks without treating them as links pass through the text of anchor tags with no href without treating them as links some text... foobar image foobar image 2 some text... reverse_markdown-3.0.0/spec/assets/code.html0000644000004100000410000000056214727742461021174 0ustar www-datawww-data
pre block
code block
pre code block

Paragraph with inline code block

var this;
this.is("A multi line code block")
console.log("Yup, it is")
	
Code with indentation:
tell application "Foo"
    beep
end tell
reverse_markdown-3.0.0/spec/assets/from_the_wild.html0000644000004100000410000000047714727742461023111 0ustar www-datawww-data

.
*** intentcast
: logo design
.

I\_AM\_HELPFUL reverse_markdown-3.0.0/spec/assets/lists.html0000644000004100000410000000460114727742461021416 0ustar www-datawww-data

some text...

  1. ordered list entry
  2. ordered list entry 2
  1. list entry 1st hierarchy
    • nested unsorted list entry
      1. deep nested list entry

a nested list with no whitespace:

a nested list with lots of whitespace:

  1. one
    1. one one
    2. one two
  2. two
    1. two one
      1. two one one
      2. two one two
    2. two two
  3. three

a nested list between adjacent list items

text after the list reverse_markdown-3.0.0/spec/assets/tables.html0000644000004100000410000000207714727742461021537 0ustar www-datawww-data some text...
header 1 header 2 header 3
data 1-1 data 2-1 data 3-1
data 1-2 data 2-2 data 3-2
footer 1 footer 2 footer 3
header oblique header bold header code
data oblique data bold data code
some text... reverse_markdown-3.0.0/spec/assets/full_example.html0000644000004100000410000000145614727742461022742 0ustar www-datawww-data
  1. li 1
    • eins
    • eins
    • eins
  1. li 1
  2. li 2

h1

h2

h3

h4

Hallo em Text

strong

      Block of code
    

First quoted paragraph

Second quoted paragraph

link
reverse_markdown-3.0.0/spec/assets/iframe.html0000644000004100000410000000031614727742461021522 0ustar www-datawww-data

Welcome to My Page

This is a sample paragraph before the iframe.

This is a sample paragraph after the iframe.

reverse_markdown-3.0.0/spec/assets/unknown_tags.html0000644000004100000410000000015414727742461022774 0ustar www-datawww-data

Foo with bar

reverse_markdown-3.0.0/spec/assets/paragraphs.html0000644000004100000410000000062114727742461022406 0ustar www-datawww-data

First content

Second content

Complex

        Content
      

Trailing whitespace:

Trailing non-breaking space: 

Combination: 

reverse_markdown-3.0.0/spec/lib/0000755000004100000410000000000014727742461016635 5ustar www-datawww-datareverse_markdown-3.0.0/spec/lib/reverse_markdown_spec.rb0000644000004100000410000000311314727742461023547 0ustar www-datawww-datarequire 'spec_helper' describe ReverseMarkdown do let(:input) { File.read('spec/assets/minimum.html') } let(:document) { Nokogiri::HTML(input) } it "parses nokogiri documents" do expect { ReverseMarkdown.convert(document) }.not_to raise_error end it "parses nokogiri elements" do expect { ReverseMarkdown.convert(document.root) }.not_to raise_error end it "parses string input" do expect { ReverseMarkdown.convert(input) }.not_to raise_error end it "behaves in a sane way when root element is nil" do expect(ReverseMarkdown.convert(nil)).to eq '' end describe '#config' do it 'stores a given configuration option' do ReverseMarkdown.config.github_flavored = true expect(ReverseMarkdown.config.github_flavored).to eq true end it 'can be used as a block configurator as well' do ReverseMarkdown.config do |config| expect(config.github_flavored).to eq false config.github_flavored = true end expect(ReverseMarkdown.config.github_flavored).to eq true end describe 'force_encoding option', jruby: :exclude do it 'raises invalid byte sequence in UTF-8 exception' do # Older versions of ruby used to raise ArgumentError here. Remove when we drop support for 3.1. expect { ReverseMarkdown.convert("hi \255") }.to raise_error { [Encoding::CompatibilityError, ArgumentError].include?(_1.class) } end it 'handles invalid byte sequence if option is set' do expect(ReverseMarkdown.convert("hi \255", force_encoding: true)).to eq "hi\n\n" end end end end reverse_markdown-3.0.0/spec/lib/reverse_markdown/0000755000004100000410000000000014727742461022212 5ustar www-datawww-datareverse_markdown-3.0.0/spec/lib/reverse_markdown/converters_spec.rb0000644000004100000410000000111414727742461025740 0ustar www-datawww-datarequire 'spec_helper' describe ReverseMarkdown::Converters do before { ReverseMarkdown.config.unknown_tags = :raise } let(:converters) { ReverseMarkdown::Converters } describe '.register and .unregister' do it 'adds a converter mapping to the list' do expect { converters.lookup(:foo) }.to raise_error ReverseMarkdown::UnknownTagError converters.register :foo, :foobar expect(converters.lookup(:foo)).to eq :foobar converters.unregister :foo expect { converters.lookup(:foo) }.to raise_error ReverseMarkdown::UnknownTagError end end end reverse_markdown-3.0.0/spec/lib/reverse_markdown/cleaner_spec.rb0000644000004100000410000001252214727742461025164 0ustar www-datawww-datarequire 'spec_helper' describe ReverseMarkdown::Cleaner do let(:cleaner) { ReverseMarkdown::Cleaner.new } describe '#remove_newlines' do it 'removes more than 2 subsequent newlines' do result = cleaner.remove_newlines("foo\n\n\nbar") expect(result).to eq "foo\n\nbar" end it 'skips single and double newlines' do result = cleaner.remove_newlines("foo\nbar\n\nbaz") expect(result).to eq "foo\nbar\n\nbaz" end end describe '#remove_inner_whitespaces' do it 'removes duplicate whitespaces from the string' do result = cleaner.remove_inner_whitespaces('foo bar') expect(result).to eq "foo bar" end it 'performs changes for multiple lines' do result = cleaner.remove_inner_whitespaces("foo bar\nbar foo") expect(result).to eq "foo bar\nbar foo" end it 'keeps leading whitespaces' do result = cleaner.remove_inner_whitespaces(" foo bar\n bar foo") expect(result).to eq " foo bar\n bar foo" end it 'keeps trailing whitespaces' do result = cleaner.remove_inner_whitespaces("foo \n") expect(result).to eq "foo \n" end it 'keeps trailing newlines' do result = cleaner.remove_inner_whitespaces("foo\n") expect(result).to eq "foo\n" end it 'removes tabs as well' do result = cleaner.remove_inner_whitespaces("foo\t \tbar") expect(result).to eq "foo bar" end it 'keeps lines that only contain whitespace' do result = cleaner.remove_inner_whitespaces("foo \nbar \n \n \nfoo") expect(result).to eq "foo \nbar \n \n \nfoo" end end describe '#clean_punctuation_characters' do it 'removes whitespace between tag end and punctuation characters' do input = "**fat** . ~~strike~~ ? __italic__ ! " result = cleaner.clean_punctuation_characters(input) expect(result).to eq "**fat**. ~~strike~~? __italic__! " end end describe '#clean_tag_borders' do context 'with default_border is set to space' do before { ReverseMarkdown.config.tag_border = ' ' } it 'removes not needed whitespaces from strong tags' do input = "foo ** foobar ** bar" result = cleaner.clean_tag_borders(input) expect(result).to eq "foo **foobar** bar" end it 'remotes leading or trailing whitespaces independently' do input = "1 **fat ** 2 ** fat** 3" result = cleaner.clean_tag_borders(input) expect(result).to eq "1 **fat** 2 **fat** 3" end it 'adds whitespaces if there are none' do input = "1**fat**2" result = cleaner.clean_tag_borders(input) expect(result).to eq "1 **fat** 2" end it "doesn't add whitespaces to underscore'ed elements if they are part of links" do input = "![im__age](sou__rce)" result = cleaner.clean_tag_borders(input) expect(result).to eq "![im__age](sou__rce)" end it "still cleans up whitespaces that aren't inside a link" do input = "now __italic __with following [under__scored](link)" result = cleaner.clean_tag_borders(input) expect(result).to eq "now __italic__ with following [under__scored](link)" end it 'cleans italic stuff as well' do input = "1 __italic __ 2 __ italic__ 3__italic __4" result = cleaner.clean_tag_borders(input) expect(result).to eq "1 __italic__ 2 __italic__ 3 __italic__ 4" end it 'cleans strikethrough stuff as well' do input = "1 ~~italic ~~ 2 ~~ italic~~ 3~~italic ~~4" result = cleaner.clean_tag_borders(input) expect(result).to eq "1 ~~italic~~ 2 ~~italic~~ 3 ~~italic~~ 4" end end context 'with default_border set to no space' do before { ReverseMarkdown.config.tag_border = '' } it 'removes not needed whitespaces from strong tags' do input = "foo ** foobar ** bar" result = cleaner.clean_tag_borders(input) expect(result).to eq "foo **foobar** bar" end it 'remotes leading or trailing whitespaces independently' do input = "1 **fat ** 2 ** fat** 3" result = cleaner.clean_tag_borders(input) expect(result).to eq "1 **fat** 2 **fat** 3" end it 'adds whitespaces if there are none' do input = "1**fat**2" result = cleaner.clean_tag_borders(input) expect(result).to eq "1**fat**2" end it "doesn't add whitespaces to underscore'ed elements if they are part of links" do input = "![im__age](sou__rce)" result = cleaner.clean_tag_borders(input) expect(result).to eq "![im__age](sou__rce)" end it "still cleans up whitespaces that aren't inside a link" do input = "now __italic __with following [under__scored](link)" result = cleaner.clean_tag_borders(input) expect(result).to eq "now __italic__with following [under__scored](link)" end it 'cleans italic stuff as well' do input = "1 __italic __ 2 __ italic__ 3__italic __4" result = cleaner.clean_tag_borders(input) expect(result).to eq "1 __italic__ 2 __italic__ 3__italic__4" end it 'cleans strikethrough stuff as well' do input = "1 ~~italic ~~ 2 ~~ italic~~ 3~~italic ~~4" result = cleaner.clean_tag_borders(input) expect(result).to eq "1 ~~italic~~ 2 ~~italic~~ 3~~italic~~4" end end end end reverse_markdown-3.0.0/spec/lib/reverse_markdown/config_spec.rb0000644000004100000410000000131414727742461025015 0ustar www-datawww-datarequire 'spec_helper' describe ReverseMarkdown::Config do describe '#with' do let(:config) { ReverseMarkdown.config } it 'takes additional options into account' do config.with(github_flavored: :foobar) do expect(ReverseMarkdown.config.github_flavored).to eq :foobar end end it 'returns the result of a given block' do expect(config.with { :something }).to eq :something end it 'resets to original settings afterwards' do config.github_flavored = :foo config.with(github_flavored: :bar) do expect(ReverseMarkdown.config.github_flavored).to eq :bar end expect(ReverseMarkdown.config.github_flavored).to eq :foo end end end reverse_markdown-3.0.0/spec/lib/reverse_markdown/converters/0000755000004100000410000000000014727742461024404 5ustar www-datawww-datareverse_markdown-3.0.0/spec/lib/reverse_markdown/converters/pre_spec.rb0000644000004100000410000000472314727742461026537 0ustar www-datawww-datarequire 'spec_helper' describe ReverseMarkdown::Converters::Pre do let(:converter) { ReverseMarkdown::Converters::Pre.new } context 'for standard markdown' do before { ReverseMarkdown.config.github_flavored = false } it 'converts with indentation' do node = node_for("
puts foo
") expect(converter.convert(node)).to include " puts foo\n" end it 'preserves new lines as
' do node = node_for("
one
two
three
") expect(converter.convert(node)).to include "\n\n one\n two\n three\n\n" end it 'preserves new lines as
and \n' do node = node_for("
one\ntwo\nthree
four
") expect(converter.convert(node)).to include "\n\n one\n two\n three\n four\n\n" end it 'handles code tags correctly' do node = node_for("
foobar
") expect(converter.convert(node)).to eq "\n\n foobar\n\n" end it 'handles indented correctly' do node = node_for("
if foo\n  return bar\nend
") expect(converter.convert(node)).to eq "\n\n if foo\n return bar\n end\n\n" end end context 'for github_flavored markdown' do before { ReverseMarkdown.config.github_flavored = true } it 'converts with backticks' do node = node_for("
puts foo
") expect(converter.convert(node)).to include "```\nputs foo\n```" end it 'preserves new lines' do node = node_for("
foo
bar
") expect(converter.convert(node)).to include "```\nfoo\nbar\n```" end it 'preserves new lines as
and \n' do node = node_for("
one\ntwo\nthree
four
") expect(converter.convert(node)).to include "```\none\ntwo\nthree\nfour\n```" end it 'handles code tags correctly' do node = node_for("
foobar
") expect(converter.convert(node)).to include "```\nfoobar\n```" end context 'syntax highlighting' do it 'works for "highlight-lang" mechanism' do div = node_for("
puts foo
") pre = div.children.first expect(converter.convert(pre)).to include "```ruby\n" end it 'works for the confluence mechanism' do pre = node_for("
puts foo
") expect(converter.convert(pre)).to include "```html/xml\n" end end end end reverse_markdown-3.0.0/spec/lib/reverse_markdown/converters/text_spec.rb0000644000004100000410000000357014727742461026734 0ustar www-datawww-datarequire 'spec_helper' describe ReverseMarkdown::Converters::Text do let(:converter) { ReverseMarkdown::Converters::Text.new } it 'treats newline within text as a single whitespace' do input = node_for("

foo\nbar

") result = converter.convert(input) expect(result).to eq 'foo bar' end it 'handles windows-style \r\n correctly' do input = node_for("

foo \r\n\r\n bar

") result = converter.convert(input) expect(result).to eq 'foo bar' end it 'removes leading newlines' do input = node_for("

\n\nfoo bar

") result = converter.convert(input) expect(result).to eq 'foo bar' end it 'removes trailing newlines' do input = node_for("

foo bar\n\n

") result = converter.convert(input) expect(result).to eq 'foo bar' end it 'keeps nbsps' do input = node_for("

foo\u00A0bar \u00A0

") result = converter.convert(input) expect(result).to eq "foo bar  " end it 'keeps escaped HTML-ish characters' do input = node_for("

<foo>

") result = converter.convert(input) expect(result).to eq '\' end context 'within backticks' do it "preserves single underscores" do input = node_for("

`foo_bar`

") result = converter.convert(input) expect(result).to eq '`foo_bar`' end it "preserves multiple underscores" do input = node_for("

`foo_bar __example__`

") result = converter.convert(input) expect(result).to eq '`foo_bar __example__`' end it "preserves single asterisks" do input = node_for("

`def foo *args`

") result = converter.convert(input) expect(result).to eq '`def foo *args`' end it "preserves multiple asterisks" do input = node_for("

`def foo 2***3`

") result = converter.convert(input) expect(result).to eq '`def foo 2***3`' end end end reverse_markdown-3.0.0/spec/lib/reverse_markdown/converters/details_spec.rb0000644000004100000410000000123314727742461027367 0ustar www-datawww-datarequire 'spec_helper' describe ReverseMarkdown::Converters::Details do let(:converter) { ReverseMarkdown::Converters::Details.new } context 'for standard markdown' do before { ReverseMarkdown.config.github_flavored = false } it 'handles details tags correctly' do node = node_for("
foo
") expect(converter.convert(node)).to include "foo" end end context 'for github_flavored markdown' do before { ReverseMarkdown.config.github_flavored = true } it 'handles details tags correctly' do node = node_for("
foo
") expect(converter.convert(node)).to include "#foo" end end endreverse_markdown-3.0.0/spec/lib/reverse_markdown/converters/li_spec.rb0000644000004100000410000000045714727742461026355 0ustar www-datawww-datarequire 'spec_helper' describe ReverseMarkdown::Converters::Li do let(:converter) { ReverseMarkdown::Converters::Li.new } it 'does not fail without a valid parent context' do input = node_for("
  • foo
  • ") result = converter.convert(input) expect(result).to eq "- foo\n" end end reverse_markdown-3.0.0/spec/lib/reverse_markdown/converters/blockquote_spec.rb0000644000004100000410000000112314727742461030110 0ustar www-datawww-datarequire 'spec_helper' describe ReverseMarkdown::Converters::Blockquote do let(:converter) { ReverseMarkdown::Converters::Blockquote.new } it 'converts nested elements as well' do input = node_for("
    • foo
    ") result = converter.convert(input) expect(result).to eq "\n\n> - foo\n\n" end it 'can deal with paragraphs inside' do input = node_for("

    Some text.

    Some more text.

    ") result = converter.convert(input) expect(result).to eq "\n\n> Some text.\n> \n> Some more text.\n\n" end end reverse_markdown-3.0.0/spec/lib/reverse_markdown/converters/br_spec.rb0000644000004100000410000000036414727742461026351 0ustar www-datawww-datarequire 'spec_helper' describe ReverseMarkdown::Converters::Br do let(:converter) { ReverseMarkdown::Converters::Br.new } it 'just converts into two spaces and a newline' do expect(converter.convert(:anything)).to eq " \n" end end reverse_markdown-3.0.0/spec/lib/reverse_markdown/converters/strong_spec.rb0000644000004100000410000000127414727742461027263 0ustar www-datawww-datarequire 'spec_helper' describe ReverseMarkdown::Converters::Strong do let(:converter) { ReverseMarkdown::Converters::Strong.new } it 'returns an empty string if the node is empty' do input = node_for('') expect(converter.convert(input)).to eq '' end it 'returns just the content if the strong tag is nested in another strong' do input = node_for('foo') expect(converter.convert(input.children.first, already_strong: true)).to eq 'foo' end it 'moves border whitespaces outside of the delimiters tag' do input = node_for(" \n foo ") expect(converter.convert(input)).to eq " **foo** " end end reverse_markdown-3.0.0/spec/lib/reverse_markdown/converters/del_spec.rb0000644000004100000410000000232014727742461026504 0ustar www-datawww-datarequire 'spec_helper' describe ReverseMarkdown::Converters::Del do let(:converter) { ReverseMarkdown::Converters::Del.new } context 'with github_flavored = true' do before { ReverseMarkdown.config.github_flavored = true } it 'converts the input as expected' do input = node_for('deldeldel') expect(converter.convert(input)).to eq '~~deldeldel~~' end it 'converts the input as expected' do input = node_for('strike that') expect(converter.convert(input)).to eq '~~strike that~~' end it 'skips empty tags' do input = node_for('') expect(converter.convert(input)).to eq '' end it 'knows about its enabled/disabled state' do expect(converter).to be_enabled expect(converter).not_to be_disabled end end context 'with github_flavored = false' do before { ReverseMarkdown.config.github_flavored = false } it 'does not convert anything' do input = node_for('deldeldel') expect(converter.convert(input)).to eq 'deldeldel' end it 'knows about its enabled/disabled state' do expect(converter).not_to be_enabled expect(converter).to be_disabled end end end reverse_markdown-3.0.0/spec/lib/reverse_markdown/converters/figure_spec.rb0000644000004100000410000000115114727742461027222 0ustar www-datawww-datarequire 'spec_helper' describe ReverseMarkdown::Converters::Figure do let(:converter) { ReverseMarkdown::Converters::Figure.new } it 'handles figure tags with figcaption correctly' do node = node_for("
    img_alt
    Figure Caption
    ") expect(converter.convert(node)).to eq "\n![img_alt](image.jpg)\n_Figure Caption_\n" end it 'handles figure tags without figcaption correctly' do node = node_for("
    img_alt
    ") expect(converter.convert(node)).to eq "\n![img_alt](image.jpg)\n" end end reverse_markdown-3.0.0/spec/components/0000755000004100000410000000000014727742461020254 5ustar www-datawww-datareverse_markdown-3.0.0/spec/components/basic_spec.rb0000644000004100000410000000357414727742461022705 0ustar www-datawww-datarequire 'spec_helper' describe ReverseMarkdown do let(:input) { File.read('spec/assets/basic.html') } let(:document) { Nokogiri::HTML(input) } subject { ReverseMarkdown.convert(input) } it { is_expected.to match /plain text ?\n/ } it { is_expected.to match /# h1\n/ } it { is_expected.to match /## h2\n/ } it { is_expected.to match /### h3\n/ } it { is_expected.to match /#### h4\n/ } it { is_expected.to match /##### h5\n/ } it { is_expected.to match /###### h6\n/ } it { is_expected.to match /_em tag content_/ } it { is_expected.to match /before and after empty em tags/ } it { is_expected.to match /before and after em tags containing whitespace/ } it { is_expected.to match /_double em tags_/ } it { is_expected.to match /_double em tags in p tag_/ } it { is_expected.to match /a _em with leading and trailing_ whitespace/ } it { is_expected.to match /a _em with extra leading and trailing_ whitespace/ } it { is_expected.to match /\*\*strong tag content\*\*/ } it { is_expected.to match /before and after empty strong tags/ } it { is_expected.to match /before and after strong tags containing whitespace/ } it { is_expected.to match /\*\*double strong tags\*\*/ } it { is_expected.to match /\*\*double strong tags in p tag\*\*/ } it { is_expected.to match /before \*\*double strong tags containing whitespace\*\* after/ } it { is_expected.to match /a \*\*strong with leading and trailing\*\* whitespace/ } it { is_expected.to match /a \*\*strong with extra leading and trailing\*\* whitespace/ } it { is_expected.to match /_i tag content_/ } it { is_expected.to match /\*\*b tag content\*\*/ } it { is_expected.to match /br tags become double space followed by newline \n/ } #it { should match /br tags XXX \n/ } it { is_expected.to match /before hr \n\* \* \*\n after hr/ } it { is_expected.to match /section 1\n ?\nsection 2/ } end reverse_markdown-3.0.0/spec/components/tables_spec.rb0000644000004100000410000000135214727742461023066 0ustar www-datawww-datarequire 'spec_helper' describe ReverseMarkdown do let(:input) { File.read('spec/assets/tables.html') } let(:document) { Nokogiri::HTML(input) } subject { ReverseMarkdown.convert(input) } it { is_expected.to match /\n\| header 1 \| header 2 \| header 3 \|\n\| --- \| --- \| --- \|\n/ } it { is_expected.to match /\n\| data 1-1 \| data 2-1 \| data 3-1 \|\n/ } it { is_expected.to match /\n\| data 1-2 \| data 2-2 \| data 3-2 \|\n/ } it { is_expected.to match /\n\| footer 1 \| footer 2 \| footer 3 \|\n/ } it { is_expected.to match /\n\| _header oblique_ \| \*\*header bold\*\* \| `header code` \|\n| --- \| --- \| --- \|\n/ } it { is_expected.to match /\n\| _data oblique_ \| \*\*data bold\*\* \| `data code` \|\n/ } end reverse_markdown-3.0.0/spec/components/quotation_spec.rb0000644000004100000410000000053514727742461023641 0ustar www-datawww-datarequire 'spec_helper' describe ReverseMarkdown do let(:input) { File.read('spec/assets/quotation.html') } let(:document) { Nokogiri::HTML(input) } subject { ReverseMarkdown.convert(input) } it { is_expected.to match /^ Block of code$/ } it { is_expected.to include "\n> First quoted paragraph\n> \n> Second quoted paragraph" } end reverse_markdown-3.0.0/spec/components/lists_spec.rb0000644000004100000410000000550114727742461022752 0ustar www-datawww-datarequire 'spec_helper' describe ReverseMarkdown do let(:input) { File.read('spec/assets/lists.html') } let(:document) { Nokogiri::HTML(input) } subject { ReverseMarkdown.convert(input) } it { is_expected.to match /\n- unordered list entry\n/ } it { is_expected.to match /\n- unordered list entry 2\n/ } it { is_expected.to match /\n1. ordered list entry\n/ } it { is_expected.to match /\n2. ordered list entry 2\n/ } it { is_expected.to match /\n1. list entry 1st hierarchy\n/ } it { is_expected.to match /\n {2}- nested unsorted list entry\n/ } it { is_expected.to match /\n {4}1. deep nested list entry\n/ } context "nested list with no whitespace" do it { is_expected.to match /\n- item a\n/ } it { is_expected.to match /\n- item b\n/ } it { is_expected.to match /\n {2}- item bb\n/ } it { is_expected.to match /\n {2}- item bc\n/ } end context "nested list with lots of whitespace" do it { is_expected.to match /\n- item wa \n/ } it { is_expected.to match /\n- item wb \n/ } it { is_expected.to match /\n - item wbb \n/ } it { is_expected.to match /\n - item wbc \n/ } end context "lists containing links" do it { is_expected.to match /\n- \[1 Basic concepts\]\(Basic_concepts\)\n/ } it { is_expected.to match /\n- \[2 History of the idea\]\(History_of_the_idea\)\n/ } it { is_expected.to match /\n- \[3 Intelligence explosion\]\(Intelligence_explosion\)\n/ } end context "lists containing embedded

    tags" do it { is_expected.to match /\n- I want to have a party at my house!\n/ } it { is_expected.to match /\n- I don't want to cleanup after the party!\n/ } end context "list item containing multiple

    tags" do xit { is_expected.to match /\n- li 1, p 1\n\n- li 1, p 2\n/ } end context 'it produces correct numbering' do it { is_expected.to include "1. one" } it { is_expected.to include " 1. one one" } it { is_expected.to include " 2. one two" } it { is_expected.to include "2. two" } it { is_expected.to include " 1. two one" } it { is_expected.to include " 1. two one one" } it { is_expected.to include " 2. two one two" } it { is_expected.to include " 2. two two" } it { is_expected.to include "3. three" } end context "properly embeds a nested list between adjacent list items" do it { is_expected.to match /\n- alpha\n/ } it { is_expected.to match /\n- bravo/ } it { is_expected.to match /\n - bravo alpha\n/ } it { is_expected.to match /\n - bravo bravo/ } it { is_expected.to match /\n - bravo bravo alpha/ } it { is_expected.to match /\n- charlie\n/ } it { is_expected.to match /\n- delta\n/ } end context "text following list should have a new line separator" do it { is_expected.to match /\n- item followed with a text\n\n text after the list/ } end end reverse_markdown-3.0.0/spec/components/html_fragment_spec.rb0000644000004100000410000000043614727742461024445 0ustar www-datawww-datarequire 'spec_helper' describe ReverseMarkdown do let(:input) { File.read('spec/assets/html_fragment.html') } let(:document) { Nokogiri::HTML(input) } subject { ReverseMarkdown.convert(input) } it { is_expected.to eq("naked text 1\n\nparagraph text\n\nnaked text 2") } end reverse_markdown-3.0.0/spec/components/iframe_spec.rb0000644000004100000410000000067414727742461023065 0ustar www-datawww-datarequire 'spec_helper' describe ReverseMarkdown do let(:input) { File.read('spec/assets/iframe.html') } let(:document) { Nokogiri::HTML(input) } subject { ReverseMarkdown.convert(input) } it do expected = <<~MD # Welcome to My Page This is a sample paragraph before the iframe. https://www.example.com This is a sample paragraph after the iframe. MD expect(subject).to eq expected end end reverse_markdown-3.0.0/spec/components/paragraphs_spec.rb0000644000004100000410000000110314727742461023736 0ustar www-datawww-datarequire 'spec_helper' describe ReverseMarkdown do let(:input) { File.read('spec/assets/paragraphs.html') } let(:document) { Nokogiri::HTML(input) } subject { ReverseMarkdown.convert(input) } it { is_expected.not_to start_with "\n\n" } it { is_expected.to start_with "First content\n\nSecond content\n\n" } it { is_expected.to include "\n\n_Complex_\n\n Content" } it { is_expected.to include "**Trailing whitespace:**" } it { is_expected.to include "**Trailing non-breaking space: **" } it { is_expected.to include "**_Combination: _**" } end reverse_markdown-3.0.0/spec/components/from_the_wild_spec.rb0000644000004100000410000000102014727742461024426 0ustar www-datawww-datarequire 'spec_helper' describe ReverseMarkdown do let(:input) { File.read('spec/assets/from_the_wild.html') } let(:document) { Nokogiri::HTML(input) } subject { ReverseMarkdown.convert(input) } it "should make sense of strong-crazy markup (as seen in the wild)" do expect(subject).to include "**. \n \\*\\*\\* intentcast** : logo design \n **.**\n\n" end it "should not over escape * or _" do expect(subject).to include '[![](example.com/foo_bar.png) I\_AM\_HELPFUL](example.com/foo_bar)' end end reverse_markdown-3.0.0/spec/components/code_spec.rb0000644000004100000410000000161714727742461022532 0ustar www-datawww-datarequire 'spec_helper' describe ReverseMarkdown do let(:input) { File.read('spec/assets/code.html') } let(:document) { Nokogiri::HTML(input) } subject { ReverseMarkdown.convert(input) } it { is_expected.to match /inline `code` block/ } it { is_expected.to match /\ var this\;\n this\.is/ } it { is_expected.to match /block"\)\n console/ } context "with github style code blocks" do subject { ReverseMarkdown.convert(input, github_flavored: true) } it { is_expected.to match /inline `code` block/ } it { is_expected.to match /```\nvar this\;\nthis/ } it { is_expected.to match /it is"\) ?\n```/ } end context "code with indentation" do subject { ReverseMarkdown.convert(input) } it { is_expected.to match(/^ tell application "Foo"\n/) } it { is_expected.to match(/^ beep\n/) } it { is_expected.to match(/^ end tell\n/) } end end reverse_markdown-3.0.0/spec/components/escapables_spec.rb0000644000004100000410000000125114727742461023714 0ustar www-datawww-datarequire 'spec_helper' describe ReverseMarkdown do let(:input) { File.read('spec/assets/escapables.html') } let(:document) { Nokogiri::HTML(input) } subject { ReverseMarkdown.convert(input) } context "multiple asterisks" do it { is_expected.to include ' \*\*two asterisks\*\* ' } it { is_expected.to include ' \*\*\*three asterisks\*\*\* ' } end context "multiple underscores" do it { is_expected.to include ' \_\_two underscores\_\_ ' } it { is_expected.to include ' \_\_\_three underscores\_\_\_ ' } end context "underscores within words in code blocks" do it { is_expected.to include ' var theoretical_max_infin = 1.0;' } end end reverse_markdown-3.0.0/spec/components/unknown_tags_spec.rb0000644000004100000410000000213314727742461024327 0ustar www-datawww-datarequire 'spec_helper' describe ReverseMarkdown do let(:input) { File.read('spec/assets/unknown_tags.html') } let(:document) { Nokogiri::HTML(input) } let(:result) { ReverseMarkdown.convert(input) } context 'with unknown_tags = :pass_through' do before { ReverseMarkdown.config.unknown_tags = :pass_through } it { expect(result).to include "Foo with bar" } end context 'with unknown_tags = :raise' do before { ReverseMarkdown.config.unknown_tags = :raise } it { expect { result }.to raise_error(ReverseMarkdown::UnknownTagError) } end context 'with unknown_tags = :drop' do before { ReverseMarkdown.config.unknown_tags = :drop } it { expect(result).to eq '' } end context 'with unknown_tags = :bypass' do before { ReverseMarkdown.config.unknown_tags = :bypass } it { expect(result).to eq "Foo with bar\n\n" } end context 'with unknown_tags = :something_wrong' do before { ReverseMarkdown.config.unknown_tags = :something_wrong } it { expect { result }.to raise_error(ReverseMarkdown::InvalidConfigurationError) } end end reverse_markdown-3.0.0/spec/components/anchors_spec.rb0000644000004100000410000000300114727742461023242 0ustar www-datawww-datarequire 'spec_helper' describe ReverseMarkdown do let(:input) { File.read('spec/assets/anchors.html') } let(:document) { Nokogiri::HTML(input) } subject { ReverseMarkdown.convert(input) } it { is_expected.to include '[Foobar](http://foobar.com)' } it { is_expected.to include '[Fubar](http://foobar.com "f\*\*\*\*\* up beyond all recognition")' } it { is_expected.to include '[**Strong foobar**](http://strong.foobar.com)' } it { is_expected.to include ' ![](http://foobar.com/logo.png) ' } it { is_expected.to include ' ![foobar image](http://foobar.com/foobar.png) ' } it { is_expected.to include ' ![foobar image 2](http://foobar.com/foobar2.png "this is the foobar image 2") ' } it { is_expected.to include 'no extra space before and after the anchor ([stripped](http://foobar.com)).'} it { is_expected.to include 'after an ! [there](http://not.an.image.foobar.com) should be an extra space.'} it { is_expected.to include 'with stripped elements inbetween: ! [there](http://still.not.an.image.foobar.com) should be an extra space.'} context "links to ignore" do it { is_expected.to include ' ignore anchor tags with no link text ' } it { is_expected.to include ' not ignore [![An Image](image.png)](foo.html) anchor tags with images' } it { is_expected.to include ' pass through the text of [internal jumplinks](#content) without treating them as links ' } it { is_expected.to include ' pass through the text of anchor tags with no href without treating them as links ' } end end reverse_markdown-3.0.0/spec/html_to_markdown_to_html_spec.rb0000644000004100000410000000552614727742461024534 0ustar www-datawww-data# coding:utf-8 require 'kramdown' require 'spec_helper' describe 'Round trip: HTML to markdown (via reverse_markdown) to HTML (via redcarpet)' do # helpers def roundtrip_should_preserve(input) output = html2markdown2html input expect(normalize_html(output)).to eq normalize_html(input) end def html2markdown2html(orig_html) markdown = ReverseMarkdown.convert orig_html new_html = Kramdown::Document.new(markdown).to_html new_html end def normalize_html(html) squeeze_whitespace(html).gsub('> <', '><').strip end def squeeze_whitespace(string) string.tr("\n\t", ' ').squeeze(' ').gsub(/\A \z/, '') end # specs it "should preserve

    blocks" do roundtrip_should_preserve('

    some text

    ') end it "should preserve unordered lists" do roundtrip_should_preserve("
    1. Bird
    2. McHale
    3. Parish
    ") end it "should preserve ordered lists" do roundtrip_should_preserve("
    • Bird
    • McHale
    • Parish
    ") end it "should preserve lists with paragraphs" do roundtrip_should_preserve("
    • Bird

    • McHale

    • Parish

    ") end it "should preserve
    tags" do roundtrip_should_preserve("
    ") end it "should preserve tags" do roundtrip_should_preserve("

    yes!

    ") end it "should preserve links inside tags" do roundtrip_should_preserve(%{

    Western philosophy

    }) end it "should preserve tags" do roundtrip_should_preserve("

    yes!

    ") end it "should preserve
    tags" do roundtrip_should_preserve("

    yes!
    \n we can!

    ") end it "should preserve tags" do roundtrip_should_preserve(%{

    This is an example inline link.

    }) roundtrip_should_preserve(%{

    This link has no title attribute.

    }) end it "should preserve tags" do roundtrip_should_preserve(%{

    My Dog

    }) roundtrip_should_preserve(%{

    My Dog

    }) end it "should preserve code blocks" do roundtrip_should_preserve(%{

    This is a normal paragraph:

    This is a code block. 
    }) end it "should preserve code blocks with embedded whitespace" do roundtrip_should_preserve(%{

    Here is an example of AppleScript:

    tell application Foo
              beep
          end tell
          
    }) end end reverse_markdown-3.0.0/.rspec0000644000004100000410000000001014727742461016241 0ustar www-datawww-data--color reverse_markdown-3.0.0/Rakefile0000644000004100000410000000052014727742461016577 0ustar www-datawww-datarequire 'bundler/gem_tasks' if File.exist?('.codeclimate') ENV["CODECLIMATE_REPO_TOKEN"] = File.read('.codeclimate').strip end require 'rspec/core/rake_task' RSpec::Core::RakeTask.new(:spec) task :default => :spec desc 'Open an irb session preloaded with this library' task :console do sh 'irb -I lib -r reverse_markdown.rb' end reverse_markdown-3.0.0/reverse_markdown.gemspec0000644000004100000410000000222714727742461022062 0ustar www-datawww-data# -*- encoding: utf-8 -*- $:.push File.expand_path("../lib", __FILE__) require "reverse_markdown/version" Gem::Specification.new do |s| s.name = "reverse_markdown" s.version = ReverseMarkdown::VERSION s.authors = ["Johannes Opper"] s.email = ["johannes.opper@gmail.com"] s.homepage = "http://github.com/xijo/reverse_markdown" s.summary = %q{Convert html code into markdown.} s.description = %q{Map simple html back into markdown, e.g. if you want to import existing html data in your application.} s.licenses = ["WTFPL"] s.files = `git ls-files`.split("\n") s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n") s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) } s.require_paths = ["lib"] # specify any dependencies here; for example: s.add_dependency 'nokogiri' s.add_development_dependency 'rspec' s.add_development_dependency 'simplecov' s.add_development_dependency 'rake' s.add_development_dependency 'kramdown' s.add_development_dependency 'debug' unless RUBY_ENGINE == 'jruby' s.add_development_dependency 'codeclimate-test-reporter' end reverse_markdown-3.0.0/Gemfile0000644000004100000410000000014414727742461016427 0ustar www-datawww-datasource "http://rubygems.org" # Specify your gem's dependencies in reverse_markdown.gemspec gemspec reverse_markdown-3.0.0/LICENSE0000644000004100000410000000074214727742461016145 0ustar www-datawww-data DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE Version 2, December 2004 Copyright (C) 2014 Johannes Opper Everyone is permitted to copy and distribute verbatim or modified copies of this license document, and changing it is allowed as long as the name is changed. DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. You just DO WHAT THE FUCK YOU WANT TO. reverse_markdown-3.0.0/README.md0000644000004100000410000001033414727742461016415 0ustar www-datawww-data# Summary Transform html into markdown. Useful for example if you want to import html into your markdown based application. ![Build Status](https://github.com/xijo/reverse_markdown/actions/workflows/ci.yml/badge.svg) [![Gem Version](https://badge.fury.io/rb/reverse_markdown.svg)](http://badge.fury.io/rb/reverse_markdown) [![Code Climate](https://codeclimate.com/github/xijo/reverse_markdown.svg)](https://codeclimate.com/github/xijo/reverse_markdown) [![Test Coverage](https://api.codeclimate.com/v1/badges/6ce481ba7ae6f57dc4d3/test_coverage)](https://codeclimate.com/github/xijo/reverse_markdown/test_coverage) ## Changelog See [Change Log](CHANGELOG.md) ## Requirements 1. [Nokogiri](http://nokogiri.org/) 2. Ruby 2.7.0 or higher ## Installation Install the gem ```sh [sudo] gem install reverse_markdown ``` or add it to your Gemfile ```ruby gem 'reverse_markdown' ``` ## Features - Supports all the established html tags like `h1`, `h2`, `h3`, `h4`, `h5`, `h6`, `p`, `em`, `strong`, `i`, `b`, `blockquote`, `code`, `img`, `a`, `hr`, `li`, `ol`, `ul`, `table`, `tr`, `th`, `td`, `br`, `figure` - Module based - if you miss a tag, just add it - Can deal with nested lists - Inline and block code is supported - Supports blockquote # Usage ## Ruby You can convert html content as string or Nokogiri document: ```ruby input = 'feelings' result = ReverseMarkdown.convert input result.inspect # " **feelings** " ```` ## Commandline It's also possible to convert html files to markdown using the binary: ```sh $ reverse_markdown file.html > file.md $ cat file.html | reverse_markdown > file.md ```` ## Configuration The following options are available: - `unknown_tags` (default `pass_through`) - how to handle unknown tags. Valid options are: - `pass_through` - Include the unknown tag completely into the result - `drop` - Drop the unknown tag and its content - `bypass` - Ignore the unknown tag but try to convert its content - `raise` - Raise an error to let you know - `github_flavored` (default `false`) - use [github flavored markdown](https://help.github.com/articles/github-flavored-markdown) (yet only code blocks are supported) - `tag_border` (default `' '`) - how to handle tag borders. valid options are: - `' '` - Add whitespace if there is none at tag borders. - `''` - Do not not add whitespace. ### As options Just pass your chosen configuration options in after the input. The given options will last for this operation only. ```ruby ReverseMarkdown.convert(input, unknown_tags: :raise, github_flavored: true) ``` ### Preconfigure Or configure it block style on a initializer level. These configurations will last for all conversions until they are set to something different. ```ruby ReverseMarkdown.config do |config| config.unknown_tags = :bypass config.github_flavored = true config.tag_border = '' end ``` # Related stuff - [Write custom converters](https://github.com/xijo/reverse_markdown/wiki/Write-your-own-converter) - Wiki entry about how to write your own converter - [html_massage](https://github.com/harlantwood/html_massage) - A gem by Harlan T. Wood to convert regular sites into markdown using reverse_markdown - [word-to-markdown](https://github.com/benbalter/word-to-markdown) - Convert word docs into markdown while using reverse_markdown, by Ben Balter - [markdown syntax](http://daringfireball.net/projects/markdown) - The markdown syntax specification - [github flavored markdown](https://help.github.com/articles/github-flavored-markdown) - Githubs extension to markdown - [wmd-editor](http://wmd-editor.com) - Markdown flavored text editor # Thanks Thanks to all [contributors](https://github.com/xijo/reverse_markdown/graphs/contributors) and all other helpers: - [Empact](https://github.com/Empact) Ben Woosley - [harlantwood](https://github.com/harlantwood) Harlan T. Wood - [aprescott](https://github.com/aprescott) Adam Prescott - [danschultzer](https://github.com/danschultzer) Dan Schultzer - [Benjamin-Dobell](https://github.com/Benjamin-Dobell) Benjamin Dobell - [schkovich](https://github.com/schkovich) Goran Miskovic - [craig-day](https://github.com/craig-day) Craig Day - [grmartin](https://github.com/grmartin) Glenn R. Martin - [willglynn](https://github.com/willglynn) Will Glynn reverse_markdown-3.0.0/CHANGELOG.md0000644000004100000410000000727614727742461016762 0ustar www-datawww-data# Change Log All notable changes to this project will be documented in this file. ## 3.0.0 - October 2024 - BREAKING: Dropped support for ruby 2.6.0 and lower - Bugfix for missing newline behind `ol`, thanks @Kevinrob, see #104 - Add support for `iframe` tags, thanks @gagandeepsinghj, see #102 - Support for frozen string literals, thanks @pat, see #105 ## 2.1.1 - October 2021 - Fixes unintentional newline characters within lists with paragraphs, thanks @diogoosorio, see #93 - Lets \n to be present in
     tag. solves #77 #78, thanks @shivabhusal
    
    ## 2.1.0 - May 2020
    - Add support for `figure` tags, see #86, thanks @anshul78
    
    ## 2.0.0 - March 2020
    - BREAKING: Dropped support for ruby 1.9.3
    - Add support for `details` and `summary` tags, see #85
    
    ## 1.4.0 – January 2020
    - BREAKING: jump links will no longer be ignored but treated as links, see #82
    
    ## 1.3.0 - September 2019
    - Add support for `s` HTML tag, thanks @fauno
    
    ## 1.2.0 - August 2019
    - Handle windows `\r\n` within text blocks, thanks for reporting @krisdigital
    - Handle paragraphs in `li` tags, thanks @gstamp
    
    ## 1.1.0 - April 2018
    - Support Jruby, thanks @grddev (#71)
    - Bypass `` tags, thanks @mu-is-too-short (#70)
    
    ## 1.0.5 - February 2018
    - Fix newline handling within pre tags, thanks @niallcolfer (#69)
    
    ## 1.0.4 - November 2017
    - Make blockquote behave as true block, thanks for reporting @kanedo (#67)
    
    ## 1.0.3 - Apr 2016
    ### Changes
    - Use tag_border option while cleaning up, thanks @AlexanderPruss (#66)
    
    ## 1.0.2 - Apr 2016
    ### Changes
    - Handle edge case: exclamation mark before links, thanks @Easy-D (#57)
    
    ## 1.0.1 - Jan 2016
    ### Changes
    - Prevent double escaping of * and _, thanks @craig-day (#61)
    
    ## 1.0.0 - Nov 2015
    ### Changes
    - BREAKING: Parsing was significantly improved, thanks @craig-day (#60)
      Please update your custom converters to accept and use the state hash, for
      examples look into exisiting standard converters.
    - Use OptionParser for command line options, thanks @grmartin (#55)
    - Tag border behavior is now configurable with the `tag_border` option, thanks @faheemmughal (#59)
    - Preserve > and < from original markup, thanks @willglynn (#58)
    
    ## 0.8.2 - May 2015
    ### Changes
    - Don't add whitespaces in links and images if they contain underscores
    
    ## 0.8.1 - April 2015
    ### Changes
    - Don't add newlines after nested lists
    
    ## 0.8.0 - April 2015
    ### Added
    - `article` tag is now supported and treated like a div
    
    ### Changed
    - Special characters are treated correctly inside of backticks, see (#47)
    
    ## 0.7.0 - February 2015
    ### Added
    - pre-tags support syntax github and confluence syntax highlighting now
    
    ## 0.6.1 - January 2015
    ### Changed
    - Setting config options in block style will last for all following `convert` calls.
    - Inline config options are only applied to this particular operation
    
    ### Removed
    - `config.reset` is removed
    
    ## 0.6.0 - September 2014
    ### Added
    - Ignore `col` and `colgroup` tags
    - Bypass `thead` and `tbody` tags to show the tables correctly
    
    ### Changed
    - Eliminate ruby warnings on load (thx @vsipuli)
    - Treat newlines within text nodes as space
    - Remove whitespace between inline tags and punctuation characters
    
    
    ## 0.5.1 - April 2014
    ### Added
    - Adds support for ruby versions 1.9.3 back in
    - More options for handling of unknown tags
    
    ### Changed
    - Bugfixes in `li` indentation behavior
    
    
    ## 0.5.0 - March 2014
    **There were some breaking changes, please make sure you don't miss them:**
    
    1. Only ruby versions 2.0.0 or above are supported
    2. There is no `Mapper` class any more. Just use `ReverseMarkdown.convert(input, options)`
    3. Config option `github_style_code_blocks` changed its name to `github_flavored`
    
    Please open an issue and let me know about it if you have any trouble with the new version.