static VALUE rb_mRinku;
typedef enum {
HTML_TAG_NONE = 0,
HTML_TAG_OPEN,
HTML_TAG_CLOSE,
} html_tag;
typedef enum {
AUTOLINK_URLS = (1 << 0),
AUTOLINK_EMAILS = (1 << 1),
AUTOLINK_ALL = AUTOLINK_URLS|AUTOLINK_EMAILS
} autolink_mode;
typedef size_t (*autolink_parse_cb)(
size_t *rewind, struct buf *, uint8_t *, size_t, size_t, unsigned int);
typedef enum {
AUTOLINK_ACTION_NONE = 0,
AUTOLINK_ACTION_WWW,
AUTOLINK_ACTION_EMAIL,
AUTOLINK_ACTION_URL,
AUTOLINK_ACTION_SKIP_TAG
} autolink_action;
static autolink_parse_cb g_callbacks[] = {
NULL,
sd_autolink__www, /* 1 */
sd_autolink__email,/* 2 */
sd_autolink__url, /* 3 */
};
static const char *g_hrefs[] = {
NULL,
"data, link->size);
}
/*
* Rinku assumes valid HTML encoding for all input, but there's still
* the case where a link can contain a double quote `"` that allows XSS.
*
* We need to properly escape the character we use for the `href` attribute
* declaration
*/
static void print_link(struct buf *ob, const char *link, size_t size)
{
size_t i = 0, org;
while (i < size) {
org = i;
while (i < size && link[i] != '"')
i++;
if (i > org)
bufput(ob, link + org, i - org);
if (i >= size)
break;
BUFPUTSL(ob, """);
i++;
}
}
/* From sundown/html/html.c */
static int
html_is_tag(const uint8_t *tag_data, size_t tag_size, const char *tagname)
{
size_t i;
int closed = 0;
if (tag_size < 3 || tag_data[0] != '<')
return HTML_TAG_NONE;
i = 1;
if (tag_data[i] == '/') {
closed = 1;
i++;
}
for (; i < tag_size; ++i, ++tagname) {
if (*tagname == 0)
break;
if (tag_data[i] != *tagname)
return HTML_TAG_NONE;
}
if (i == tag_size)
return HTML_TAG_NONE;
if (isspace(tag_data[i]) || tag_data[i] == '>')
return closed ? HTML_TAG_CLOSE : HTML_TAG_OPEN;
return HTML_TAG_NONE;
}
static size_t
autolink__skip_tag(
struct buf *ob,
const uint8_t *text,
size_t size,
const char **skip_tags)
{
size_t i = 0;
while (i < size && text[i] != '>')
i++;
while (*skip_tags != NULL) {
if (html_is_tag(text, size, *skip_tags) == HTML_TAG_OPEN)
break;
skip_tags++;
}
if (*skip_tags != NULL) {
for (;;) {
while (i < size && text[i] != '<')
i++;
if (i == size)
break;
if (html_is_tag(text + i, size - i, *skip_tags) == HTML_TAG_CLOSE)
break;
i++;
}
while (i < size && text[i] != '>')
i++;
}
// bufput(ob, text, i + 1);
return i;
}
int
rinku_autolink(
struct buf *ob,
const uint8_t *text,
size_t size,
autolink_mode mode,
unsigned int flags,
const char *link_attr,
const char **skip_tags,
void (*link_text_cb)(struct buf *ob, const struct buf *link, void *payload),
void *payload)
{
size_t i, end, last_link_found = 0;
struct buf *link = bufnew(16);
char active_chars[256];
void (*link_url_cb)(struct buf *, const struct buf *, void *);
int link_count = 0;
if (!text || size == 0)
return 0;
memset(active_chars, 0x0, sizeof(active_chars));
active_chars['<'] = AUTOLINK_ACTION_SKIP_TAG;
if (mode & AUTOLINK_EMAILS)
active_chars['@'] = AUTOLINK_ACTION_EMAIL;
if (mode & AUTOLINK_URLS) {
active_chars['w'] = AUTOLINK_ACTION_WWW;
active_chars['W'] = AUTOLINK_ACTION_WWW;
active_chars[':'] = AUTOLINK_ACTION_URL;
}
if (link_text_cb == NULL)
link_text_cb = &autolink__print;
if (link_attr != NULL) {
while (isspace(*link_attr))
link_attr++;
}
bufgrow(ob, size);
i = end = 0;
while (i < size) {
size_t rewind, link_end;
char action = 0;
while (end < size && (action = active_chars[text[end]]) == 0)
end++;
if (end == size) {
if (link_count > 0)
bufput(ob, text + i, end - i);
break;
}
if (action == AUTOLINK_ACTION_SKIP_TAG) {
end += autolink__skip_tag(ob,
text + end, size - end, skip_tags);
continue;
}
link->size = 0;
link_end = g_callbacks[(int)action](
&rewind, link, (uint8_t *)text + end,
end - last_link_found,
size - end, flags);
/* print the link */
if (link_end > 0) {
bufput(ob, text + i, end - i - rewind);
bufputs(ob, g_hrefs[(int)action]);
print_link(ob, link->data, link->size);
if (link_attr) {
BUFPUTSL(ob, "\" ");
bufputs(ob, link_attr);
bufputc(ob, '>');
} else {
BUFPUTSL(ob, "\">");
}
link_text_cb(ob, link, payload);
BUFPUTSL(ob, "");
link_count++;
i = end + link_end;
last_link_found = end = i;
} else {
end = end + 1;
}
}
bufrelease(link);
return link_count;
}
/**
* Ruby code
*/
static void
autolink_callback(struct buf *link_text, const struct buf *link, void *block)
{
VALUE rb_link, rb_link_text;
rb_link = rb_str_new(link->data, link->size);
rb_link_text = rb_funcall((VALUE)block, rb_intern("call"), 1, rb_link);
Check_Type(rb_link_text, T_STRING);
bufput(link_text, RSTRING_PTR(rb_link_text), RSTRING_LEN(rb_link_text));
}
const char **rinku_load_tags(VALUE rb_skip)
{
const char **skip_tags;
size_t i, count;
Check_Type(rb_skip, T_ARRAY);
count = RARRAY_LEN(rb_skip);
skip_tags = xmalloc(sizeof(void *) * (count + 1));
for (i = 0; i < count; ++i) {
VALUE tag = rb_ary_entry(rb_skip, i);
Check_Type(tag, T_STRING);
skip_tags[i] = StringValueCStr(tag);
}
skip_tags[count] = NULL;
return skip_tags;
}
/*
* Document-method: auto_link
*
* call-seq:
* auto_link(text, mode=:all, link_attr=nil, skip_tags=nil, flags=0)
* auto_link(text, mode=:all, link_attr=nil, skip_tags=nil, flags=0) { |link_text| ... }
*
* Parses a block of text looking for "safe" urls or email addresses,
* and turns them into HTML links with the given attributes.
*
* NOTE: The block of text may or may not be HTML; if the text is HTML,
* Rinku will skip the relevant tags to prevent double-linking and linking
* inside `pre` blocks by default.
*
* NOTE: If the input text is HTML, it's expected to be already escaped.
* Rinku will perform no escaping.
*
* NOTE: Currently the follow protocols are considered safe and are the
* only ones that will be autolinked.
*
* http:// https:// ftp:// mailto://
*
* Email addresses are also autolinked by default. URLs without a protocol
* specifier but starting with 'www.' will also be autolinked, defaulting to
* the 'http://' protocol.
*
* - `text` is a string in plain text or HTML markup. If the string is formatted in
* HTML, Rinku is smart enough to skip the links that are already enclosed in ``
* tags.`
*
* - `mode` is a symbol, either `:all`, `:urls` or `:email_addresses`,
* which specifies which kind of links will be auto-linked.
*
* - `link_attr` is a string containing the link attributes for each link that
* will be generated. These attributes are not sanitized and will be include as-is
* in each generated link, e.g.
*
* ~~~~~ruby
* auto_link('http://www.pokemon.com', :all, 'target="_blank"')
* # => 'http://www.pokemon.com'
* ~~~~~
*
* This string can be autogenerated from a hash using the Rails `tag_options` helper.
*
* - `skip_tags` is a list of strings with the names of HTML tags that will be skipped
* when autolinking. If `nil`, this defaults to the value of the global `Rinku.skip_tags`,
* which is initially `["a", "pre", "code", "kbd", "script"]`.
*
* - `flag` is an optional boolean value specifying whether to recognize
* 'http://foo' as a valid domain, or require at least one '.'. It defaults to false.
*
* - `&block` is an optional block argument. If a block is passed, it will
* be yielded for each found link in the text, and its return value will be used instead
* of the name of the link. E.g.
*
* ~~~~~ruby
* auto_link('Check it out at http://www.pokemon.com') do |url|
* "THE POKEMAN WEBSITEZ"
* end
* # => 'Check it out at THE POKEMAN WEBSITEZ'
* ~~~~~~
*/
static VALUE
rb_rinku_autolink(int argc, VALUE *argv, VALUE self)
{
static const char *SKIP_TAGS[] = {"a", "pre", "code", "kbd", "script", NULL};
VALUE result, rb_text, rb_mode, rb_html, rb_skip, rb_flags, rb_block;
struct buf *output_buf;
int link_mode, count;
unsigned int link_flags = 0;
const char *link_attr = NULL;
const char **skip_tags = NULL;
ID mode_sym;
rb_scan_args(argc, argv, "14&", &rb_text, &rb_mode,
&rb_html, &rb_skip, &rb_flags, &rb_block);
Check_Type(rb_text, T_STRING);
if (!NIL_P(rb_mode)) {
Check_Type(rb_mode, T_SYMBOL);
mode_sym = SYM2ID(rb_mode);
} else {
mode_sym = rb_intern("all");
}
if (!NIL_P(rb_html)) {
Check_Type(rb_html, T_STRING);
link_attr = RSTRING_PTR(rb_html);
}
if (NIL_P(rb_skip))
rb_skip = rb_iv_get(self, "@skip_tags");
if (NIL_P(rb_skip)) {
skip_tags = SKIP_TAGS;
} else {
skip_tags = rinku_load_tags(rb_skip);
}
if (!NIL_P(rb_flags)) {
Check_Type(rb_flags, T_FIXNUM);
link_flags = FIX2INT(rb_flags);
}
output_buf = bufnew(32);
if (mode_sym == rb_intern("all"))
link_mode = AUTOLINK_ALL;
else if (mode_sym == rb_intern("email_addresses"))
link_mode = AUTOLINK_EMAILS;
else if (mode_sym == rb_intern("urls"))
link_mode = AUTOLINK_URLS;
else
rb_raise(rb_eTypeError,
"Invalid linking mode (possible values are :all, :urls, :email_addresses)");
count = rinku_autolink(
output_buf,
RSTRING_PTR(rb_text),
RSTRING_LEN(rb_text),
link_mode,
link_flags,
link_attr,
skip_tags,
RTEST(rb_block) ? &autolink_callback : NULL,
(void*)rb_block);
if (count == 0)
result = rb_text;
else {
result = rb_str_new(output_buf->data, output_buf->size);
rb_enc_copy(result, rb_text);
}
if (skip_tags != SKIP_TAGS)
xfree(skip_tags);
bufrelease(output_buf);
return result;
}
void RUBY_EXPORT Init_rinku()
{
rb_mRinku = rb_define_module("Rinku");
rb_define_method(rb_mRinku, "auto_link", rb_rinku_autolink, -1);
rb_define_const(rb_mRinku, "AUTOLINK_SHORT_DOMAINS", INT2FIX(SD_AUTOLINK_SHORT_DOMAINS));
}
rinku-master/test/ 0000755 0001750 0001750 00000000000 12136201504 014632 5 ustar avtobiff avtobiff rinku-master/test/autolink_test.rb 0000644 0001750 0001750 00000031635 12136201504 020054 0 ustar avtobiff avtobiff # encoding: utf-8
rootdir = File.dirname(File.dirname(__FILE__))
$LOAD_PATH.unshift "#{rootdir}/lib"
require 'test/unit'
require 'cgi'
require 'uri'
require 'rinku'
class RedcarpetAutolinkTest < Test::Unit::TestCase
SAFE_CHARS = "{}[]~'"
def assert_linked(expected, url)
assert_equal expected, Rinku.auto_link(url)
end
def test_segfault
Rinku.auto_link("a+b@d.com+e@f.com", mode=:all)
end
def test_escapes_quotes
assert_linked %(http://website.com/"onmouseover=document.body.style.backgroundColor="pink";//),
%(http://website.com/"onmouseover=document.body.style.backgroundColor="pink";//)
end
def test_global_skip_tags
assert_equal Rinku.skip_tags, nil
Rinku.skip_tags = ['pre']
assert_equal Rinku.skip_tags, ['pre']
Rinku.skip_tags = ['pa']
url = 'This is just a http://www.pokemon.com test'
assert_equal Rinku.auto_link(url), url
Rinku.skip_tags = nil
assert_not_equal Rinku.auto_link(url), url
end
def test_auto_link_with_single_trailing_punctuation_and_space
url = "http://www.youtube.com"
url_result = generate_result(url)
assert_equal url_result, Rinku.auto_link(url)
["?", "!", ".", ",", ":"].each do |punc|
assert_equal "link: #{url_result}#{punc} foo?", Rinku.auto_link("link: #{url}#{punc} foo?")
end
end
def test_does_not_segfault
assert_linked "< this is just a test", "< this is just a test"
end
def test_skips_tags
html = <<-html
This is just a test. http://www.pokemon.com
More test
http://www.amd.com
CODE www.less.es
html
result = <<-result
This is just a test. http://www.pokemon.com
More test
http://www.amd.com
CODE www.less.es
result
assert_equal result, Rinku.auto_link(html, :all, nil, ["div", "a"])
end
def test_auto_link_with_brackets
link1_raw = 'http://en.wikipedia.org/wiki/Sprite_(computer_graphics)'
link1_result = generate_result(link1_raw)
assert_equal link1_result, Rinku.auto_link(link1_raw)
assert_equal "(link: #{link1_result})", Rinku.auto_link("(link: #{link1_raw})")
link2_raw = 'http://en.wikipedia.org/wiki/Sprite_[computer_graphics]'
link2_result = generate_result(link2_raw)
assert_equal link2_result, Rinku.auto_link(link2_raw)
assert_equal "[link: #{link2_result}]", Rinku.auto_link("[link: #{link2_raw}]")
link3_raw = 'http://en.wikipedia.org/wiki/Sprite_{computer_graphics}'
link3_result = generate_result(link3_raw)
assert_equal link3_result, Rinku.auto_link(link3_raw)
assert_equal "{link: #{link3_result}}", Rinku.auto_link("{link: #{link3_raw}}")
end
def test_auto_link_with_multiple_trailing_punctuations
url = "http://youtube.com"
url_result = generate_result(url)
assert_equal url_result, Rinku.auto_link(url)
assert_equal "(link: #{url_result}).", Rinku.auto_link("(link: #{url}).")
end
def test_auto_link_with_block
url = "http://api.rubyonrails.com/Foo.html"
email = "fantabulous@shiznadel.ic"
assert_equal %(#{url[0...7]}...
#{email[0...7]}...
), Rinku.auto_link("#{url}
#{email}
") { |_url| _url[0...7] + '...'}
end
def test_auto_link_with_block_with_html
pic = "http://example.com/pic.png"
url = "http://example.com/album?a&b=c"
expect = %(My pic:
-- full album here #{generate_result(url)})
text = "My pic: #{pic} -- full album here #{CGI.escapeHTML url}"
assert_equal expect, Rinku.auto_link(text) { |link|
if link =~ /\.(jpg|gif|png|bmp|tif)$/i
%(
)
else
link
end
}
end
def test_auto_link_already_linked
linked1 = generate_result('Ruby On Rails', 'http://www.rubyonrails.com')
linked2 = %('www.example.com')
linked3 = %('www.example.com')
linked4 = %('www.example.com')
linked5 = %('close www.example.com')
assert_equal linked1, Rinku.auto_link(linked1)
assert_equal linked2, Rinku.auto_link(linked2)
assert_equal linked3, Rinku.auto_link(linked3)
assert_equal linked4, Rinku.auto_link(linked4)
assert_equal linked5, Rinku.auto_link(linked5)
linked_email = %Q(Mail me)
assert_equal linked_email, Rinku.auto_link(linked_email)
end
def test_auto_link_at_eol
url1 = "http://api.rubyonrails.com/Foo.html"
url2 = "http://www.ruby-doc.org/core/Bar.html"
assert_equal %(#{url1}
#{url2}
), Rinku.auto_link("#{url1}
#{url2}
")
end
def test_block
link = Rinku.auto_link("Find ur favorite pokeman @ http://www.pokemon.com") do |url|
assert_equal url, "http://www.pokemon.com"
"POKEMAN WEBSITE"
end
assert_equal link, "Find ur favorite pokeman @ POKEMAN WEBSITE"
end
def test_autolink_works
url = "http://example.com/"
assert_linked "#{url}", url
end
def test_autolink_options_for_short_domains
url = "http://google"
linked_url = "#{url}"
flags = Rinku::AUTOLINK_SHORT_DOMAINS
# Specifying use short_domains in the args
url = "http://google"
linked_url = "#{url}"
assert_equal Rinku.auto_link(url, nil, nil, nil, flags), linked_url
# Specifying no short_domains in the args
url = "http://google"
linked_url = "#{url}"
assert_equal Rinku.auto_link(url, nil, nil, nil, 0), url
end
def test_not_autolink_www
assert_linked "Awww... man", "Awww... man"
end
def test_does_not_terminate_on_dash
url = "http://example.com/Notification_Center-GitHub-20101108-140050.jpg"
assert_linked "#{url}", url
end
def test_does_not_include_trailing_gt
url = "http://example.com"
assert_linked "<#{url}>", "<#{url}>"
end
def test_links_with_anchors
url = "https://github.com/github/hubot/blob/master/scripts/cream.js#L20-20"
assert_linked "#{url}", url
end
def test_links_like_rails
urls = %w(http://www.rubyonrails.com
http://www.rubyonrails.com:80
http://www.rubyonrails.com/~minam
https://www.rubyonrails.com/~minam
http://www.rubyonrails.com/~minam/url%20with%20spaces
http://www.rubyonrails.com/foo.cgi?something=here
http://www.rubyonrails.com/foo.cgi?something=here&and=here
http://www.rubyonrails.com/contact;new
http://www.rubyonrails.com/contact;new%20with%20spaces
http://www.rubyonrails.com/contact;new?with=query&string=params
http://www.rubyonrails.com/~minam/contact;new?with=query&string=params
http://en.wikipedia.org/wiki/Wikipedia:Today%27s_featured_picture_%28animation%29/January_20%2C_2007
http://www.mail-archive.com/rails@lists.rubyonrails.org/
http://www.amazon.com/Testing-Equal-Sign-In-Path/ref=pd_bbs_sr_1?ie=UTF8&s=books&qid=1198861734&sr=8-1
http://en.wikipedia.org/wiki/Sprite_(computer_graphics)
http://en.wikipedia.org/wiki/Texas_hold%27em
https://www.google.com/doku.php?id=gps:resource:scs:start
)
urls.each do |url|
assert_linked %(#{CGI.escapeHTML url}), CGI.escapeHTML(url)
end
end
def test_links_like_autolink_rails
email_raw = 'david@loudthinking.com'
email_result = %{#{email_raw}}
email2_raw = '+david@loudthinking.com'
email2_result = %{#{email2_raw}}
link_raw = 'http://www.rubyonrails.com'
link_result = %{#{link_raw}}
link_result_with_options = %{#{link_raw}}
link2_raw = 'www.rubyonrails.com'
link2_result = %{#{link2_raw}}
link3_raw = 'http://manuals.ruby-on-rails.com/read/chapter.need_a-period/103#page281'
link3_result = %{#{link3_raw}}
link4_raw = CGI.escapeHTML 'http://foo.example.com/controller/action?parm=value&p2=v2#anchor123'
link4_result = %{#{link4_raw}}
link5_raw = 'http://foo.example.com:3000/controller/action'
link5_result = %{#{link5_raw}}
link6_raw = 'http://foo.example.com:3000/controller/action+pack'
link6_result = %{#{link6_raw}}
link7_raw = CGI.escapeHTML 'http://foo.example.com/controller/action?parm=value&p2=v2#anchor-123'
link7_result = %{#{link7_raw}}
link8_raw = 'http://foo.example.com:3000/controller/action.html'
link8_result = %{#{link8_raw}}
link9_raw = 'http://business.timesonline.co.uk/article/0,,9065-2473189,00.html'
link9_result = %{#{link9_raw}}
link10_raw = 'http://www.mail-archive.com/ruby-talk@ruby-lang.org/'
link10_result = %{#{link10_raw}}
assert_linked %(Go to #{link_result} and say hello to #{email_result}), "Go to #{link_raw} and say hello to #{email_raw}"
assert_linked %(Link #{link_result}
), "Link #{link_raw}
"
assert_linked %(#{link_result} Link
), "#{link_raw} Link
"
assert_linked %(Go to #{link_result}.), %(Go to #{link_raw}.)
assert_linked %(Go to #{link_result}, then say hello to #{email_result}.
), %(Go to #{link_raw}, then say hello to #{email_raw}.
)
assert_linked %(Link #{link2_result}
), "Link #{link2_raw}
"
assert_linked %(#{link2_result} Link
), "#{link2_raw} Link
"
assert_linked %(Go to #{link2_result}.), %(Go to #{link2_raw}.)
assert_linked %(Say hello to #{email_result}, then go to #{link2_result},
), %(Say hello to #{email_raw}, then go to #{link2_raw},
)
assert_linked %(Link #{link3_result}
), "Link #{link3_raw}
"
assert_linked %(#{link3_result} Link
), "#{link3_raw} Link
"
assert_linked %(Go to #{link3_result}.), %(Go to #{link3_raw}.)
assert_linked %(Go to #{link3_result}. seriously, #{link3_result}? i think I'll say hello to #{email_result}. instead.
), %(Go to #{link3_raw}. seriously, #{link3_raw}? i think I'll say hello to #{email_raw}. instead.
)
assert_linked %(Link #{link4_result}
), "Link #{link4_raw}
"
assert_linked %(#{link4_result} Link
), "#{link4_raw} Link
"
assert_linked %(#{link5_result} Link
), "#{link5_raw} Link
"
assert_linked %(#{link6_result} Link
), "#{link6_raw} Link
"
assert_linked %(#{link7_result} Link
), "#{link7_raw} Link
"
assert_linked %(Link #{link8_result}
), "Link #{link8_raw}
"
assert_linked %(#{link8_result} Link
), "#{link8_raw} Link
"
assert_linked %(Go to #{link8_result}.), %(Go to #{link8_raw}.)
assert_linked %(Go to #{link8_result}. seriously, #{link8_result}? i think I'll say hello to #{email_result}. instead.
), %(Go to #{link8_raw}. seriously, #{link8_raw}? i think I'll say hello to #{email_raw}. instead.
)
assert_linked %(Link #{link9_result}
), "Link #{link9_raw}
"
assert_linked %(#{link9_result} Link
), "#{link9_raw} Link
"
assert_linked %(Go to #{link9_result}.), %(Go to #{link9_raw}.)
assert_linked %(Go to #{link9_result}. seriously, #{link9_result}? i think I'll say hello to #{email_result}. instead.
), %(Go to #{link9_raw}. seriously, #{link9_raw}? i think I'll say hello to #{email_raw}. instead.
)
assert_linked %(#{link10_result} Link
), "#{link10_raw} Link
"
assert_linked email2_result, email2_raw
assert_linked "#{link_result} #{link_result} #{link_result}", "#{link_raw} #{link_raw} #{link_raw}"
assert_linked 'Ruby On Rails', 'Ruby On Rails'
end
if "".respond_to?(:force_encoding)
def test_copies_source_encoding
str = "http://www.bash.org"
ret = Rinku.auto_link str
assert_equal str.encoding, ret.encoding
str.encode! 'binary'
ret = Rinku.auto_link str
assert_equal str.encoding, ret.encoding
end
end
def generate_result(link_text, href = nil)
href ||= link_text
%{#{CGI.escapeHTML link_text}}
end
end