twitter-text-1.13.4/ 0000755 0001750 0001750 00000000000 12667350232 014474 5 ustar sudheesh sudheesh twitter-text-1.13.4/spec/ 0000755 0001750 0001750 00000000000 12667350232 015426 5 ustar sudheesh sudheesh twitter-text-1.13.4/spec/twitter_text_spec.rb 0000644 0001750 0001750 00000000631 12667350232 021533 0 ustar sudheesh sudheesh # encoding: utf-8
require File.dirname(__FILE__) + '/spec_helper'
major, minor, patch = RUBY_VERSION.split('.')
if major.to_i == 1 && minor.to_i < 9
describe "base" do
before do
$KCODE = 'NONE'
end
after do
$KCODE = 'u'
end
it "should raise with invalid KCODE on Ruby < 1.9" do
lambda do
require 'twitter-text'
end.should raise_error
end
end
end
twitter-text-1.13.4/spec/spec_helper.rb 0000644 0001750 0001750 00000007571 12667350232 020256 0 ustar sudheesh sudheesh $TESTING=true
# Ruby 1.8 encoding check
major, minor, patch = RUBY_VERSION.split('.')
if major.to_i == 1 && minor.to_i < 9
$KCODE='u'
end
$:.push File.join(File.dirname(__FILE__), '..', 'lib')
require 'nokogiri'
require 'json'
require 'simplecov'
SimpleCov.start do
add_group 'Libraries', 'lib'
end
require File.expand_path('../../lib/twitter-text', __FILE__)
require File.expand_path('../test_urls', __FILE__)
RSpec.configure do |config|
config.include TestUrls
end
RSpec::Matchers.define :match_autolink_expression do
match do |string|
!Twitter::Extractor.extract_urls(string).empty?
end
end
RSpec::Matchers.define :match_autolink_expression_in do |text|
match do |url|
@match_data = Twitter::Regex[:valid_url].match(text)
@match_data && @match_data.to_s.strip == url
end
failure_message_for_should do |url|
"Expected to find url '#{url}' in text '#{text}', but the match was #{@match_data.captures}'"
end
end
RSpec::Matchers.define :have_autolinked_url do |url, inner_text|
match do |text|
@link = Nokogiri::HTML(text).search("a[@href='#{url}']")
@link &&
@link.inner_text &&
(inner_text && @link.inner_text == inner_text) || (!inner_text && @link.inner_text == url)
end
failure_message_for_should do |text|
"Expected url '#{url}'#{", inner_text '#{inner_text}'" if inner_text} to be autolinked in '#{text}'"
end
end
RSpec::Matchers.define :link_to_screen_name do |screen_name, inner_text|
expected = inner_text ? inner_text : screen_name
match do |text|
@link = Nokogiri::HTML(text).search("a.username")
@link &&
@link.inner_text == expected &&
"https://twitter.com/#{screen_name}".should == @link.first['href']
end
failure_message_for_should do |text|
if @link.first
"Expected link '#{@link.inner_text}' with href '#{@link.first['href']}' to match screen_name '#{expected}', but it does not."
else
"Expected screen name '#{screen_name}' to be autolinked in '#{text}', but no link was found."
end
end
failure_message_for_should_not do |text|
"Expected link '#{@link.inner_text}' with href '#{@link.first['href']}' not to match screen_name '#{expected}', but it does."
end
description do
"contain a link with the name and href pointing to the expected screen_name"
end
end
RSpec::Matchers.define :link_to_list_path do |list_path, inner_text|
expected = inner_text ? inner_text : list_path
match do |text|
@link = Nokogiri::HTML(text).search("a.list-slug")
@link &&
@link.inner_text == expected &&
"https://twitter.com/#{list_path}".downcase.should == @link.first['href']
end
failure_message_for_should do |text|
if @link.first
"Expected link '#{@link.inner_text}' with href '#{@link.first['href']}' to match the list path '#{expected}', but it does not."
else
"Expected list path '#{list_path}' to be autolinked in '#{text}', but no link was found."
end
end
failure_message_for_should_not do |text|
"Expected link '#{@link.inner_text}' with href '#{@link.first['href']}' not to match the list path '#{expected}', but it does."
end
description do
"contain a link with the list title and an href pointing to the list path"
end
end
RSpec::Matchers.define :have_autolinked_hashtag do |hashtag|
match do |text|
@link = Nokogiri::HTML(text).search("a[@href='https://twitter.com/#!/search?q=#{hashtag.sub(/^#/, '%23')}']")
@link &&
@link.inner_text &&
@link.inner_text == hashtag
end
failure_message_for_should do |text|
if @link.first
"Expected link text to be [#{hashtag}], but it was [#{@link.inner_text}] in #{text}"
else
"Expected hashtag #{hashtag} to be autolinked in '#{text}', but no link was found."
end
end
failure_message_for_should_not do |text|
"Expected link '#{@link.inner_text}' with href '#{@link.first['href']}' not to match the hashtag '#{hashtag}', but it does."
end
end
twitter-text-1.13.4/spec/rewriter_spec.rb 0000644 0001750 0001750 00000040002 12667350232 020624 0 ustar sudheesh sudheesh # encoding: utf-8
require File.dirname(__FILE__) + '/spec_helper'
describe Twitter::Rewriter do
def original_text; end
def url; end
def block(*args)
if Array === @block_args
unless Array === @block_args.first
@block_args = [@block_args]
end
@block_args << args
else
@block_args = args
end
"[rewritten]"
end
describe "rewrite usernames" do #{{{
before do
@rewritten_text = Twitter::Rewriter.rewrite_usernames_or_lists(original_text, &method(:block))
end
context "username preceded by a space" do
def original_text; "hello @jacob"; end
it "should be rewritten" do
@block_args.should == ["@", "jacob", nil]
@rewritten_text.should == "hello [rewritten]"
end
end
context "username at beginning of line" do
def original_text; "@jacob you're cool"; end
it "should be rewritten" do
@block_args.should == ["@", "jacob", nil]
@rewritten_text.should == "[rewritten] you're cool"
end
end
context "username preceded by word character" do
def original_text; "meet@the beach"; end
it "should not be rewritten" do
@block_args.should be_nil
@rewritten_text.should == "meet@the beach"
end
end
context "username preceded by non-word character" do
def original_text; "great.@jacob"; end
it "should be rewritten" do
@block_args.should == ["@", "jacob", nil]
@rewritten_text.should == "great.[rewritten]"
end
end
context "username containing non-word characters" do
def original_text; "@jacob&^$%^"; end
it "should be rewritten" do
@block_args.should == ["@", "jacob", nil]
@rewritten_text.should == "[rewritten]&^$%^"
end
end
context "username over twenty characters" do
def original_text
@twenty_character_username = "zach" * 5
"@" + @twenty_character_username + "1"
end
it "should be rewritten" do
@block_args.should == ["@", @twenty_character_username, nil]
@rewritten_text.should == "[rewritten]1"
end
end
context "username followed by japanese" do
def original_text; "@jacobの"; end
it "should be rewritten" do
@block_args.should == ["@", "jacob", nil]
@rewritten_text.should == "[rewritten]の"
end
end
context "username preceded by japanese" do
def original_text; "あ@jacob"; end
it "should be rewritten" do
@block_args.should == ["@", "jacob", nil]
@rewritten_text.should == "あ[rewritten]"
end
end
context "username surrounded by japanese" do
def original_text; "あ@jacobの"; end
it "should be rewritten" do
@block_args.should == ["@", "jacob", nil]
@rewritten_text.should == "あ[rewritten]の"
end
end
context "username using full-width at-sign" do
def original_text
"#{[0xFF20].pack('U')}jacob"
end
it "should be rewritten" do
@block_args.should == ["@", "jacob", nil]
@rewritten_text.should == "[rewritten]"
end
end
end #}}}
describe "rewrite lists" do #{{{
before do
@rewritten_text = Twitter::Rewriter.rewrite_usernames_or_lists(original_text, &method(:block))
end
context "slug preceded by a space" do
def original_text; "hello @jacob/my-list"; end
it "should be rewritten" do
@block_args.should == ["@", "jacob", "/my-list"]
@rewritten_text.should == "hello [rewritten]"
end
end
context "username followed by a slash but no list" do
def original_text; "hello @jacob/ my-list"; end
it "should not be rewritten" do
@block_args.should == ["@", "jacob", nil]
@rewritten_text.should == "hello [rewritten]/ my-list"
end
end
context "empty username followed by a list" do
def original_text; "hello @/my-list"; end
it "should not be rewritten" do
@block_args.should be_nil
@rewritten_text.should == "hello @/my-list"
end
end
context "list slug at beginning of line" do
def original_text; "@jacob/my-list"; end
it "should be rewritten" do
@block_args.should == ["@", "jacob", "/my-list"]
@rewritten_text.should == "[rewritten]"
end
end
context "username preceded by alpha-numeric character" do
def original_text; "meet@jacob/my-list"; end
it "should not be rewritten" do
@block_args.should be_nil
@rewritten_text.should == "meet@jacob/my-list"
end
end
context "username preceded by non-word character" do
def original_text; "great.@jacob/my-list"; end
it "should be rewritten" do
@block_args.should == ["@", "jacob", "/my-list"]
@rewritten_text.should == "great.[rewritten]"
end
end
context "username containing non-word characters" do
def original_text; "@jacob/my-list&^$%^"; end
it "should be rewritten" do
@block_args.should == ["@", "jacob", "/my-list"]
@rewritten_text.should == "[rewritten]&^$%^"
end
end
context "username over twenty characters" do
def original_text
@twentyfive_character_list = "a" * 25
"@jacob/#{@twentyfive_character_list}12345"
end
it "should be rewritten" do
@block_args.should == ["@", "jacob", "/#{@twentyfive_character_list}"]
@rewritten_text.should == "[rewritten]12345"
end
end
end #}}}
describe "rewrite hashtags" do #{{{
before do
@rewritten_text = Twitter::Rewriter.rewrite_hashtags(original_text, &method(:block))
end
context "with an all numeric hashtag" do
def original_text; "#123"; end
it "should not be rewritten" do
@block_args.should be_nil
@rewritten_text.should == "#123"
end
end
context "with a hashtag with alphanumeric characters" do
def original_text; "#ab1d"; end
it "should be rewritten" do
@block_args.should == ["#", "ab1d"]
@rewritten_text.should == "[rewritten]"
end
end
context "with a hashtag with underscores" do
def original_text; "#a_b_c_d"; end
it "should be rewritten" do
@block_args.should == ["#", "a_b_c_d"]
@rewritten_text.should == "[rewritten]"
end
end
context "with a hashtag that is preceded by a word character" do
def original_text; "ab#cd"; end
it "should not be rewritten" do
@block_args.should be_nil
@rewritten_text.should == "ab#cd"
end
end
context "with a hashtag that starts with a number but has word characters" do
def original_text; "#2ab"; end
it "should be rewritten" do
@block_args.should == ["#", "2ab"]
@rewritten_text.should == "[rewritten]"
end
end
context "with multiple valid hashtags" do
def original_text; "I'm frickin' awesome #ab #cd #ef"; end
it "rewrites each hashtag" do
@block_args.should == [["#", "ab"], ["#", "cd"], ["#", "ef"]]
@rewritten_text.should == "I'm frickin' awesome [rewritten] [rewritten] [rewritten]"
end
end
context "with a hashtag preceded by a ." do
def original_text; "ok, great.#abc"; end
it "should be rewritten" do
@block_args.should == ["#", "abc"]
@rewritten_text.should == "ok, great.[rewritten]"
end
end
context "with a hashtag preceded by a &" do
def original_text; "nbsp;"; end
it "should not be rewritten" do
@block_args.should be_nil
@rewritten_text.should == "nbsp;"
end
end
context "with a hashtag that ends in an !" do
def original_text; "#great!"; end
it "should be rewritten, but should not include the !" do
@block_args.should == ["#", "great"];
@rewritten_text.should == "[rewritten]!"
end
end
context "with a hashtag followed by Japanese" do
def original_text; "#twj_devの"; end
it "should be rewritten" do
@block_args.should == ["#", "twj_devの"];
@rewritten_text.should == "[rewritten]"
end
end
context "with a hashtag preceded by a full-width space" do
def original_text; "#{[0x3000].pack('U')}#twj_dev"; end
it "should be rewritten" do
@block_args.should == ["#", "twj_dev"];
@rewritten_text.should == " [rewritten]"
end
end
context "with a hashtag followed by a full-width space" do
def original_text; "#twj_dev#{[0x3000].pack('U')}"; end
it "should be rewritten" do
@block_args.should == ["#", "twj_dev"];
@rewritten_text.should == "[rewritten] "
end
end
context "with a hashtag using full-width hash" do
def original_text; "#{[0xFF03].pack('U')}twj_dev"; end
it "should be rewritten" do
@block_args.should == ["#", "twj_dev"];
@rewritten_text.should == "[rewritten]"
end
end
context "with a hashtag containing an accented latin character" do
def original_text
# the hashtag is #éhashtag
"##{[0x00e9].pack('U')}hashtag"
end
it "should be rewritten" do
@block_args.should == ["#", "éhashtag"];
@rewritten_text.should == "[rewritten]"
end
end
end #}}}
describe "rewrite urls" do #{{{
def url; "http://www.google.com"; end
before do
@rewritten_text = Twitter::Rewriter.rewrite_urls(original_text, &method(:block))
end
context "when embedded in plain text" do
def original_text; "On my search engine #{url} I found good links."; end
it "should be rewritten" do
@block_args.should == [url];
@rewritten_text.should == "On my search engine [rewritten] I found good links."
end
end
context "when surrounded by Japanese;" do
def original_text; "いまなにしてる#{url}いまなにしてる"; end
it "should be rewritten" do
@block_args.should == [url];
@rewritten_text.should == "いまなにしてる[rewritten]いまなにしてる"
end
end
context "with a path surrounded by parentheses;" do
def original_text; "I found a neatness (#{url})"; end
it "should be rewritten" do
@block_args.should == [url];
@rewritten_text.should == "I found a neatness ([rewritten])"
end
context "when the URL ends with a slash;" do
def url; "http://www.google.com/"; end
it "should be rewritten" do
@block_args.should == [url];
@rewritten_text.should == "I found a neatness ([rewritten])"
end
end
context "when the URL has a path;" do
def url; "http://www.google.com/fsdfasdf"; end
it "should be rewritten" do
@block_args.should == [url];
@rewritten_text.should == "I found a neatness ([rewritten])"
end
end
end
context "when path contains parens" do
def original_text; "I found a neatness (#{url})"; end
it "should be rewritten" do
@block_args.should == [url];
@rewritten_text.should == "I found a neatness ([rewritten])"
end
context "wikipedia" do
def url; "http://en.wikipedia.org/wiki/Madonna_(artist)"; end
it "should be rewritten" do
@block_args.should == [url];
@rewritten_text.should == "I found a neatness ([rewritten])"
end
end
context "IIS session" do
def url; "http://msdn.com/S(deadbeef)/page.htm"; end
it "should be rewritten" do
@block_args.should == [url];
@rewritten_text.should == "I found a neatness ([rewritten])"
end
end
context "unbalanced parens" do
def url; "http://example.com/i_has_a_("; end
it "should be rewritten" do
@block_args.should == ["http://example.com/i_has_a_"];
@rewritten_text.should == "I found a neatness ([rewritten]()"
end
end
context "balanced parens with a double quote inside" do
def url; "http://foo.bar.com/foo_(\")_bar" end
it "should be rewritten" do
@block_args.should == ["http://foo.bar.com/foo_"];
@rewritten_text.should == "I found a neatness ([rewritten](\")_bar)"
end
end
context "balanced parens hiding XSS" do
def url; 'http://x.xx.com/("style="color:red"onmouseover="alert(1)' end
it "should be rewritten" do
@block_args.should == ["http://x.xx.com/"];
@rewritten_text.should == 'I found a neatness ([rewritten]("style="color:red"onmouseover="alert(1))'
end
end
end
context "when preceded by a :" do
def original_text; "Check this out @hoverbird:#{url}"; end
it "should be rewritten" do
@block_args.should == [url];
@rewritten_text.should == "Check this out @hoverbird:[rewritten]"
end
end
context "with a URL ending in allowed punctuation" do
it "does not consume ending punctuation" do
%w| ? ! , . : ; ] ) } = \ ' |.each do |char|
Twitter::Rewriter.rewrite_urls("#{url}#{char}") do |url|
url.should == url; "[rewritten]"
end.should == "[rewritten]#{char}"
end
end
end
context "with a URL preceded in forbidden characters" do
it "should be rewritten" do
%w| \ ' / ! = |.each do |char|
Twitter::Rewriter.rewrite_urls("#{char}#{url}") do |url|
"[rewritten]" # should not be called here.
end.should == "#{char}[rewritten]"
end
end
end
context "when embedded in a link tag" do
def original_text; "#{url}"; end
it "should be rewritten" do
@block_args.should == [url];
@rewritten_text.should == "[rewritten]"
end
end
context "with multiple URLs" do
def original_text; "http://www.links.org link at start of page, link at end http://www.foo.org"; end
it "should autolink each one" do
@block_args.should == [["http://www.links.org"], ["http://www.foo.org"]];
@rewritten_text.should == "[rewritten] link at start of page, link at end [rewritten]"
end
end
context "with multiple URLs in different formats" do
def original_text; "http://foo.com https://bar.com http://mail.foobar.org"; end
it "should autolink each one, in the proper order" do
@block_args.should == [["http://foo.com"], ["https://bar.com"], ["http://mail.foobar.org"]];
@rewritten_text.should == "[rewritten] [rewritten] [rewritten]"
end
end
context "with a URL having a long TLD" do
def original_text; "Yahoo integriert Facebook http://golem.mobi/0912/71607.html"; end
it "should autolink it" do
@block_args.should == ["http://golem.mobi/0912/71607.html"]
@rewritten_text.should == "Yahoo integriert Facebook [rewritten]"
end
end
context "with a url lacking the protocol" do
def original_text; "I like www.foobar.com dudes"; end
it "does not link at all" do
@block_args.should be_nil
@rewritten_text.should == "I like www.foobar.com dudes"
end
end
context "with a @ in a URL" do
context "with XSS attack" do
def original_text; 'http://x.xx.com/@"style="color:pink"onmouseover=alert(1)//'; end
it "should not allow XSS follwing @" do
@block_args.should == ["http://x.xx.com/"]
@rewritten_text.should == '[rewritten]@"style="color:pink"onmouseover=alert(1)//'
end
end
context "with a username not followed by a /" do
def original_text; "http://example.com/@foobar"; end
it "should link url" do
@block_args.should == ["http://example.com/@foobar"]
@rewritten_text.should == "[rewritten]"
end
end
context "with a username followed by a /" do
def original_text; "http://example.com/@foobar/"; end
it "should not link the username but link full url" do
@block_args.should == ["http://example.com/@foobar/"]
@rewritten_text.should == "[rewritten]"
end
end
end
end #}}}
end
# vim: foldmethod=marker
twitter-text-1.13.4/spec/validation_spec.rb 0000644 0001750 0001750 00000003022 12667350232 021114 0 ustar sudheesh sudheesh # encoding: utf-8
require File.dirname(__FILE__) + '/spec_helper'
class TestValidation
include Twitter::Validation
end
describe Twitter::Validation do
it "should disallow invalid BOM character" do
TestValidation.new.tweet_invalid?("Bom:#{Twitter::Unicode::UFFFE}").should == :invalid_characters
TestValidation.new.tweet_invalid?("Bom:#{Twitter::Unicode::UFEFF}").should == :invalid_characters
end
it "should disallow invalid U+FFFF character" do
TestValidation.new.tweet_invalid?("Bom:#{Twitter::Unicode::UFFFF}").should == :invalid_characters
end
it "should disallow direction change characters" do
[0x202A, 0x202B, 0x202C, 0x202D, 0x202E].map{|cp| [cp].pack('U') }.each do |char|
TestValidation.new.tweet_invalid?("Invalid:#{char}").should == :invalid_characters
end
end
it "should disallow non-Unicode" do
TestValidation.new.tweet_invalid?("not-Unicode:\xfff0").should == :invalid_characters
end
it "should allow <= 140 combined accent characters" do
char = [0x65, 0x0301].pack('U')
TestValidation.new.tweet_invalid?(char * 139).should == false
TestValidation.new.tweet_invalid?(char * 140).should == false
TestValidation.new.tweet_invalid?(char * 141).should == :too_long
end
it "should allow <= 140 multi-byte characters" do
char = [ 0x1d106 ].pack('U')
TestValidation.new.tweet_invalid?(char * 139).should == false
TestValidation.new.tweet_invalid?(char * 140).should == false
TestValidation.new.tweet_invalid?(char * 141).should == :too_long
end
end
twitter-text-1.13.4/spec/autolinking_spec.rb 0000644 0001750 0001750 00000074420 12667350232 021320 0 ustar sudheesh sudheesh # encoding: utf-8
require File.dirname(__FILE__) + '/spec_helper'
class TestAutolink
include Twitter::Autolink
end
describe Twitter::Autolink do
def original_text; end
def url; end
describe "auto_link_custom" do
before do
@autolinked_text = TestAutolink.new.auto_link(original_text) if original_text
end
describe "username autolinking" do
context "username preceded by a space" do
def original_text; "hello @jacob"; end
it "should be linked" do
@autolinked_text.should link_to_screen_name('jacob')
end
end
context "username in camelCase" do
def original_text() "@jaCob iS cOoL" end
it "should be linked" do
@autolinked_text.should link_to_screen_name('jaCob')
end
end
context "username at beginning of line" do
def original_text; "@jacob you're cool"; end
it "should be linked" do
@autolinked_text.should link_to_screen_name('jacob')
end
end
context "username preceded by word character" do
def original_text; "meet@the beach"; end
it "should not be linked" do
Nokogiri::HTML(@autolinked_text).search('a').should be_empty
end
end
context "username preceded by non-word character" do
def original_text; "great.@jacob"; end
it "should be linked" do
@autolinked_text.should link_to_screen_name('jacob')
end
end
context "username containing non-word characters" do
def original_text; "@zach&^$%^"; end
it "should not be linked" do
@autolinked_text.should link_to_screen_name('zach')
end
end
context "username over twenty characters" do
def original_text
@twenty_character_username = "zach" * 5
"@" + @twenty_character_username + "1"
end
it "should not be linked" do
@autolinked_text.should link_to_screen_name(@twenty_character_username)
end
end
context "username followed by japanese" do
def original_text; "@jacobの"; end
it "should be linked" do
@autolinked_text.should link_to_screen_name('jacob')
end
end
context "username preceded by japanese" do
def original_text; "あ@matz"; end
it "should be linked" do
@autolinked_text.should link_to_screen_name('matz')
end
end
context "username surrounded by japanese" do
def original_text; "あ@yoshimiの"; end
it "should be linked" do
@autolinked_text.should link_to_screen_name('yoshimi')
end
end
context "username using full-width at-sign" do
def original_text
"#{[0xFF20].pack('U')}jacob"
end
it "should be linked" do
@autolinked_text.should link_to_screen_name('jacob')
end
end
end
describe "list path autolinking" do
context "when List is not available" do
it "should not be linked" do
@autolinked_text = TestAutolink.new.auto_link_usernames_or_lists("hello @jacob/my-list", :suppress_lists => true)
@autolinked_text.should_not link_to_list_path('jacob/my-list')
@autolinked_text.should include('my-list')
end
end
context "slug preceded by a space" do
def original_text; "hello @jacob/my-list"; end
it "should be linked" do
@autolinked_text.should link_to_list_path('jacob/my-list')
end
end
context "username followed by a slash but no list" do
def original_text; "hello @jacob/ my-list"; end
it "should NOT be linked" do
@autolinked_text.should_not link_to_list_path('jacob/my-list')
@autolinked_text.should link_to_screen_name('jacob')
end
end
context "empty username followed by a list" do
def original_text; "hello @/my-list"; end
it "should NOT be linked" do
Nokogiri::HTML(@autolinked_text).search('a').should be_empty
end
end
context "list slug at beginning of line" do
def original_text; "@jacob/my-list"; end
it "should be linked" do
@autolinked_text.should link_to_list_path('jacob/my-list')
end
end
context "username preceded by alpha-numeric character" do
def original_text; "meet@the/beach"; end
it "should not be linked" do
Nokogiri::HTML(@autolinked_text).search('a').should be_empty
end
end
context "username preceded by non-word character" do
def original_text; "great.@jacob/my-list"; end
it "should be linked" do
@autolinked_text = TestAutolink.new.auto_link("great.@jacob/my-list")
@autolinked_text.should link_to_list_path('jacob/my-list')
end
end
context "username containing non-word characters" do
def original_text; "@zach/test&^$%^"; end
it "should be linked" do
@autolinked_text.should link_to_list_path('zach/test')
end
end
context "username over twenty characters" do
def original_text
@twentyfive_character_list = "jack/" + ("a" * 25)
"@#{@twentyfive_character_list}12345"
end
it "should be linked" do
@autolinked_text.should link_to_list_path(@twentyfive_character_list)
end
end
end
describe "hashtag autolinking" do
context "with an all numeric hashtag" do
def original_text; "#123"; end
it "should not be linked" do
@autolinked_text.should_not have_autolinked_hashtag('#123')
end
end
context "with a hashtag with alphanumeric characters" do
def original_text; "#ab1d"; end
it "should be linked" do
@autolinked_text.should have_autolinked_hashtag('#ab1d')
end
end
context "with a hashtag with underscores" do
def original_text; "#a_b_c_d"; end
it "should be linked" do
@autolinked_text.should have_autolinked_hashtag(original_text)
end
end
context "with a hashtag that is preceded by a word character" do
def original_text; "ab#cd"; end
it "should not be linked" do
@autolinked_text.should_not have_autolinked_hashtag(original_text)
end
end
context "with a page anchor in a url" do
def original_text; "Here's my url: http://foobar.com/#home"; end
it "should not link the hashtag" do
@autolinked_text.should_not have_autolinked_hashtag('#home')
end
it "should link the url" do
@autolinked_text.should have_autolinked_url('http://foobar.com/#home')
end
end
context "with a hashtag that starts with a number but has word characters" do
def original_text; "#2ab"; end
it "should be linked" do
@autolinked_text.should have_autolinked_hashtag(original_text)
end
end
context "with multiple valid hashtags" do
def original_text; "I'm frickin' awesome #ab #cd #ef"; end
it "links each hashtag" do
@autolinked_text.should have_autolinked_hashtag('#ab')
@autolinked_text.should have_autolinked_hashtag('#cd')
@autolinked_text.should have_autolinked_hashtag('#ef')
end
end
context "with a hashtag preceded by a ." do
def original_text; "ok, great.#abc"; end
it "should be linked" do
@autolinked_text.should have_autolinked_hashtag('#abc')
end
end
context "with a hashtag preceded by a &" do
def original_text; "nbsp;"; end
it "should not be linked" do
@autolinked_text.should_not have_autolinked_hashtag('#nbsp;')
end
end
context "with a hashtag that ends in an !" do
def original_text; "#great!"; end
it "should be linked, but should not include the !" do
@autolinked_text.should have_autolinked_hashtag('#great')
end
end
context "with a hashtag followed by Japanese" do
def original_text; "#twj_devの"; end
it "should be linked" do
@autolinked_text.should have_autolinked_hashtag('#twj_devの')
end
end
context "with a hashtag preceded by a full-width space" do
def original_text; "#{[0x3000].pack('U')}#twj_dev"; end
it "should be linked" do
@autolinked_text.should have_autolinked_hashtag('#twj_dev')
end
end
context "with a hashtag followed by a full-width space" do
def original_text; "#twj_dev#{[0x3000].pack('U')}"; end
it "should be linked" do
@autolinked_text.should have_autolinked_hashtag('#twj_dev')
end
end
context "with a hashtag using full-width hash" do
def original_text; "#{[0xFF03].pack('U')}twj_dev"; end
it "should be linked" do
link = Nokogiri::HTML(@autolinked_text).search('a')
(link.inner_text.respond_to?(:force_encoding) ? link.inner_text.force_encoding("utf-8") : link.inner_text).should == "#{[0xFF03].pack('U')}twj_dev"
link.first['href'].should == 'https://twitter.com/#!/search?q=%23twj_dev'
end
end
context "with a hashtag containing an accented latin character" do
def original_text
# the hashtag is #éhashtag
"##{[0x00e9].pack('U')}hashtag"
end
it "should be linked" do
@autolinked_text.should == "#éhashtag"
end
end
end
describe "URL autolinking" do
def url; "http://www.google.com"; end
context "when embedded in plain text" do
def original_text; "On my search engine #{url} I found good links."; end
it "should be linked" do
@autolinked_text.should have_autolinked_url(url)
end
end
context "when surrounded by Japanese;" do
def original_text; "いまなにしてる#{url}いまなにしてる"; end
it "should be linked" do
@autolinked_text.should have_autolinked_url(url)
end
end
context "with a path surrounded by parentheses;" do
def original_text; "I found a neatness (#{url})"; end
it "should be linked" do
@autolinked_text.should have_autolinked_url(url)
end
context "when the URL ends with a slash;" do
def url; "http://www.google.com/"; end
it "should be linked" do
@autolinked_text.should have_autolinked_url(url)
end
end
context "when the URL has a path;" do
def url; "http://www.google.com/fsdfasdf"; end
it "should be linked" do
@autolinked_text.should have_autolinked_url(url)
end
end
end
context "when path contains parens" do
def original_text; "I found a neatness (#{url})"; end
it "should be linked" do
@autolinked_text.should have_autolinked_url(url)
end
context "wikipedia" do
def url; "http://en.wikipedia.org/wiki/Madonna_(artist)"; end
it "should be linked" do
@autolinked_text.should have_autolinked_url(url)
end
end
context "IIS session" do
def url; "http://msdn.com/S(deadbeef)/page.htm"; end
it "should be linked" do
@autolinked_text.should have_autolinked_url(url)
end
end
context "unbalanced parens" do
def url; "http://example.com/i_has_a_("; end
it "should be linked" do
@autolinked_text.should have_autolinked_url("http://example.com/i_has_a_")
end
end
context "balanced parens with a double quote inside" do
def url; "http://foo.com/foo_(\")_bar" end
it "should be linked" do
@autolinked_text.should have_autolinked_url("http://foo.com/foo_")
end
end
context "balanced parens hiding XSS" do
def url; 'http://x.xx.com/("style="color:red"onmouseover="alert(1)' end
it "should be linked" do
@autolinked_text.should have_autolinked_url("http://x.xx.com/")
end
end
end
context "when preceded by a :" do
def original_text; "Check this out @hoverbird:#{url}"; end
it "should be linked" do
@autolinked_text.should have_autolinked_url(url)
end
end
context "with a URL ending in allowed punctuation" do
it "does not consume ending punctuation" do
matcher = TestAutolink.new
%w| ? ! , . : ; ] ) } = \ ' |.each do |char|
matcher.auto_link("#{url}#{char}").should have_autolinked_url(url)
end
end
end
context "with a URL preceded in forbidden characters" do
it "should be linked" do
matcher = TestAutolink.new
%w| \ ' / ! = |.each do |char|
matcher.auto_link("#{char}#{url}").should have_autolinked_url(url)
end
end
end
context "when embedded in a link tag" do
def original_text; "#{url}"; end
it "should be linked" do
@autolinked_text.should have_autolinked_url(url)
end
end
context "with multiple URLs" do
def original_text; "http://www.links.org link at start of page, link at end http://www.foo.org"; end
it "should autolink each one" do
@autolinked_text.should have_autolinked_url('http://www.links.org')
@autolinked_text.should have_autolinked_url('http://www.foo.org')
end
end
context "with multiple URLs in different formats" do
def original_text; "http://foo.com https://bar.com http://mail.foobar.org"; end
it "should autolink each one, in the proper order" do
@autolinked_text.should have_autolinked_url('http://foo.com')
@autolinked_text.should have_autolinked_url('https://bar.com')
@autolinked_text.should have_autolinked_url('http://mail.foobar.org')
end
end
context "with a URL having a long TLD" do
def original_text; "Yahoo integriert Facebook http://golem.mobi/0912/71607.html"; end
it "should autolink it" do
@autolinked_text.should have_autolinked_url('http://golem.mobi/0912/71607.html')
end
end
context "with a url lacking the protocol" do
def original_text; "I like www.foobar.com dudes"; end
it "does not link at all" do
link = Nokogiri::HTML(@autolinked_text).search('a')
link.should be_empty
end
end
context "with a @ in a URL" do
context "with XSS attack" do
def original_text; 'http://x.xx.com/@"style="color:pink"onmouseover=alert(1)//'; end
it "should not allow XSS follwing @" do
@autolinked_text.should have_autolinked_url('http://x.xx.com/')
end
end
context "with a username not followed by a /" do
def original_text; 'http://example.com/@foobar'; end
it "should link url" do
@autolinked_text.should have_autolinked_url('http://example.com/@foobar')
end
end
context "with a username followed by a /" do
def original_text; 'http://example.com/@foobar/'; end
it "should not link the username but link full url" do
@autolinked_text.should have_autolinked_url('http://example.com/@foobar/')
@autolinked_text.should_not link_to_screen_name('foobar')
end
end
end
context "regex engine quirks" do
context "does not spiral out of control on repeated periods" do
def original_text; "Test a ton of periods http://example.com/path.........................................."; end
it "should autolink" do
@autolinked_text.should have_autolinked_url('http://example.com/path')
end
end
context "does not spiral out of control on repeated dashes" do
def original_text; "Single char file ext http://www.bestbuy.com/site/Currie+Technologies+-+Ezip+400+Scooter/9885188.p?id=1218189013070&skuId=9885188"; end
it "should autolink" do
@autolinked_text.should have_autolinked_url('http://www.bestbuy.com/site/Currie+Technologies+-+Ezip+400+Scooter/9885188.p?id=1218189013070&skuId=9885188')
end
end
end
end
describe "Autolink all" do
before do
@linker = TestAutolink.new
end
it "should allow url/hashtag overlap" do
auto_linked = @linker.auto_link("https://twitter.com/#search")
auto_linked.should have_autolinked_url('https://twitter.com/#search')
end
it "should not add invalid option in HTML tags" do
auto_linked = @linker.auto_link("https://twitter.com/ is a URL, not a hashtag", :hashtag_class => 'hashtag_classname')
auto_linked.should have_autolinked_url('https://twitter.com/')
auto_linked.should_not include('hashtag_class')
auto_linked.should_not include('hashtag_classname')
end
it "should autolink url/hashtag/mention in text with Unicode supplementary characters" do
auto_linked = @linker.auto_link("#{[0x10400].pack('U')} #hashtag #{[0x10400].pack('U')} @mention #{[0x10400].pack('U')} http://twitter.com/")
auto_linked.should have_autolinked_hashtag('#hashtag')
auto_linked.should link_to_screen_name('mention')
auto_linked.should have_autolinked_url('http://twitter.com/')
end
end
end
describe "autolinking options" do
before do
@linker = TestAutolink.new
end
it "should show display_url when :url_entities provided" do
linked = @linker.auto_link("http://t.co/0JG5Mcq", :url_entities => [{
"url" => "http://t.co/0JG5Mcq",
"display_url" => "blog.twitter.com/2011/05/twitte…",
"expanded_url" => "http://blog.twitter.com/2011/05/twitter-for-mac-update.html",
"indices" => [
84,
103
]
}])
html = Nokogiri::HTML(linked)
html.search('a').should_not be_empty
html.search('a[@href="http://t.co/0JG5Mcq"]').should_not be_empty
html.search('span[@class=js-display-url]').inner_text.should == "blog.twitter.com/2011/05/twitte"
html.inner_text.should == " http://blog.twitter.com/2011/05/twitter-for-mac-update.html …"
html.search('span[@style="position:absolute;left:-9999px;"]').size.should == 4
end
it "should accept invisible_tag_attrs option" do
linked = @linker.auto_link("http://t.co/0JG5Mcq",
{
:url_entities => [{
"url" => "http://t.co/0JG5Mcq",
"display_url" => "blog.twitter.com/2011/05/twitte…",
"expanded_url" => "http://blog.twitter.com/2011/05/twitter-for-mac-update.html",
"indices" => [
0,
19
]
}],
:invisible_tag_attrs => "style='dummy;'"
})
html = Nokogiri::HTML(linked)
html.search('span[@style="dummy;"]').size.should == 4
end
it "should show display_url if available in entity" do
linked = @linker.auto_link_entities("http://t.co/0JG5Mcq",
[{
:url => "http://t.co/0JG5Mcq",
:display_url => "blog.twitter.com/2011/05/twitte…",
:expanded_url => "http://blog.twitter.com/2011/05/twitter-for-mac-update.html",
:indices => [0, 19]
}]
)
html = Nokogiri::HTML(linked)
html.search('a').should_not be_empty
html.search('a[@href="http://t.co/0JG5Mcq"]').should_not be_empty
html.search('span[@class=js-display-url]').inner_text.should == "blog.twitter.com/2011/05/twitte"
html.inner_text.should == " http://blog.twitter.com/2011/05/twitter-for-mac-update.html …"
end
it "should apply :class as a CSS class" do
linked = @linker.auto_link("http://example.com/", :class => 'myclass')
linked.should have_autolinked_url('http://example.com/')
linked.should match(/myclass/)
end
it "should apply :url_class only on URL" do
linked = @linker.auto_link("http://twitter.com")
linked.should have_autolinked_url('http://twitter.com')
linked.should_not match(/class/)
linked = @linker.auto_link("http://twitter.com", :url_class => 'testClass')
linked.should have_autolinked_url('http://twitter.com')
linked.should match(/class=\"testClass\"/)
linked = @linker.auto_link("#hash @tw", :url_class => 'testClass')
linked.should match(/class=\"tweet-url hashtag\"/)
linked.should match(/class=\"tweet-url username\"/)
linked.should_not match(/class=\"testClass\"/)
end
it "should add rel=nofollow by default" do
linked = @linker.auto_link("http://example.com/")
linked.should have_autolinked_url('http://example.com/')
linked.should match(/nofollow/)
end
it "should include the '@' symbol in a username when passed :username_include_symbol" do
linked = @linker.auto_link("@user", :username_include_symbol => true)
linked.should link_to_screen_name('user', '@user')
end
it "should include the '@' symbol in a list when passed :username_include_symbol" do
linked = @linker.auto_link("@user/list", :username_include_symbol => true)
linked.should link_to_list_path('user/list', '@user/list')
end
it "should not add rel=nofollow when passed :suppress_no_follow" do
linked = @linker.auto_link("http://example.com/", :suppress_no_follow => true)
linked.should have_autolinked_url('http://example.com/')
linked.should_not match(/nofollow/)
end
it "should not add a target attribute by default" do
linked = @linker.auto_link("http://example.com/")
linked.should have_autolinked_url('http://example.com/')
linked.should_not match(/target=/)
end
it "should respect the :target option" do
linked = @linker.auto_link("http://example.com/", :target => 'mywindow')
linked.should have_autolinked_url('http://example.com/')
linked.should match(/target="mywindow"/)
end
it "should customize href by username_url_block option" do
linked = @linker.auto_link("@test", :username_url_block => lambda{|a| "dummy"})
linked.should have_autolinked_url('dummy', 'test')
end
it "should customize href by list_url_block option" do
linked = @linker.auto_link("@test/list", :list_url_block => lambda{|a| "dummy"})
linked.should have_autolinked_url('dummy', 'test/list')
end
it "should customize href by hashtag_url_block option" do
linked = @linker.auto_link("#hashtag", :hashtag_url_block => lambda{|a| "dummy"})
linked.should have_autolinked_url('dummy', '#hashtag')
end
it "should customize href by cashtag_url_block option" do
linked = @linker.auto_link("$CASH", :cashtag_url_block => lambda{|a| "dummy"})
linked.should have_autolinked_url('dummy', '$CASH')
end
it "should customize href by link_url_block option" do
linked = @linker.auto_link("http://example.com/", :link_url_block => lambda{|a| "dummy"})
linked.should have_autolinked_url('dummy', 'http://example.com/')
end
it "should modify link attributes by link_attribute_block" do
linked = @linker.auto_link("#hash @mention",
:link_attribute_block => lambda{|entity, attributes|
attributes[:"dummy-hash-attr"] = "test" if entity[:hashtag]
}
)
linked.should match(/]+hashtag[^>]+dummy-hash-attr=\"test\"[^>]+>/)
linked.should_not match(/]+username[^>]+dummy-hash-attr=\"test\"[^>]+>/)
linked.should_not match(/link_attribute_block/i)
linked = @linker.auto_link("@mention http://twitter.com/",
:link_attribute_block => lambda{|entity, attributes|
attributes["dummy-url-attr"] = entity[:url] if entity[:url]
}
)
linked.should_not match(/]+username[^>]+dummy-url-attr=\"http:\/\/twitter.com\/\"[^>]*>/)
linked.should match(/]+dummy-url-attr=\"http:\/\/twitter.com\/\"/)
end
it "should modify link text by link_text_block" do
linked = @linker.auto_link("#hash @mention",
:link_text_block => lambda{|entity, text|
entity[:hashtag] ? "#replaced" : "pre_#{text}_post"
}
)
linked.should match(/]+>#replaced<\/a>/)
linked.should match(/]+>pre_mention_post<\/a>/)
linked = @linker.auto_link("#hash @mention", {
:link_text_block => lambda{|entity, text|
"pre_#{text}_post"
},
:symbol_tag => "s", :text_with_symbol_tag => "b", :username_include_symbol => true
})
linked.should match(/]+>pre_#<\/s>hash<\/b>_post<\/a>/)
linked.should match(/]+>pre_@<\/s>mention<\/b>_post<\/a>/)
end
it "should apply :url_target only to auto-linked URLs" do
auto_linked = @linker.auto_link("#hashtag @mention http://test.com/", {:url_target => '_blank'})
auto_linked.should have_autolinked_hashtag('#hashtag')
auto_linked.should link_to_screen_name('mention')
auto_linked.should have_autolinked_url('http://test.com/')
auto_linked.should_not match(/]+hashtag[^>]+target[^>]+>/)
auto_linked.should_not match(/]+username[^>]+target[^>]+>/)
auto_linked.should match(/]+test.com[^>]+target=\"_blank\"[^>]*>/)
end
it "should apply target='_blank' only to auto-linked URLs when :target_blank is set to true" do
auto_linked = @linker.auto_link("#hashtag @mention http://test.com/", {:target_blank => true})
auto_linked.should have_autolinked_hashtag('#hashtag')
auto_linked.should link_to_screen_name('mention')
auto_linked.should have_autolinked_url('http://test.com/')
auto_linked.should match(/]+hashtag[^>]+target=\"_blank\"[^>]*>/)
auto_linked.should match(/]+username[^>]+target=\"_blank\"[^>]*>/)
auto_linked.should match(/]+test.com[^>]+target=\"_blank\"[^>]*>/)
end
end
describe "link_url_with_entity" do
before do
@linker = TestAutolink.new
end
it "should use display_url and expanded_url" do
@linker.send(:link_url_with_entity,
{
:url => "http://t.co/abcde",
:display_url => "twitter.com",
:expanded_url => "http://twitter.com/"},
{:invisible_tag_attrs => "class='invisible'"}).gsub('"', "'").should == "http://twitter.com/";
end
it "should correctly handle display_url ending with '…'" do
@linker.send(:link_url_with_entity,
{
:url => "http://t.co/abcde",
:display_url => "twitter.com…",
:expanded_url => "http://twitter.com/abcdefg"},
{:invisible_tag_attrs => "class='invisible'"}).gsub('"', "'").should == "http://twitter.com/abcdefg…";
end
it "should correctly handle display_url starting with '…'" do
@linker.send(:link_url_with_entity,
{
:url => "http://t.co/abcde",
:display_url => "…tter.com/abcdefg",
:expanded_url => "http://twitter.com/abcdefg"},
{:invisible_tag_attrs => "class='invisible'"}).gsub('"', "'").should == "…http://twitter.com/abcdefg";
end
it "should not create spans if display_url and expanded_url are on different domains" do
@linker.send(:link_url_with_entity,
{
:url => "http://t.co/abcde",
:display_url => "pic.twitter.com/xyz",
:expanded_url => "http://twitter.com/foo/statuses/123/photo/1"},
{:invisible_tag_attrs => "class='invisible'"}).gsub('"', "'").should == "pic.twitter.com/xyz"
end
end
describe "symbol_tag" do
before do
@linker = TestAutolink.new
end
it "should put :symbol_tag around symbol" do
@linker.auto_link("@mention", {:symbol_tag => 's', :username_include_symbol=>true}).should match(/@<\/s>mention/)
@linker.auto_link("#hash", {:symbol_tag => 's'}).should match(/#<\/s>hash/)
result = @linker.auto_link("@mention #hash $CASH", {:symbol_tag => 'b', :username_include_symbol=>true})
result.should match(/@<\/b>mention/)
result.should match(/#<\/b>hash/)
result.should match(/\$<\/b>CASH/)
end
it "should put :text_with_symbol_tag around text" do
result = @linker.auto_link("@mention #hash $CASH", {:text_with_symbol_tag => 'b'})
result.should match(/mention<\/b>/)
result.should match(/hash<\/b>/)
result.should match(/CASH<\/b>/)
end
it "should put :symbol_tag around symbol and :text_with_symbol_tag around text" do
result = @linker.auto_link("@mention #hash $CASH", {:symbol_tag => 's', :text_with_symbol_tag => 'b', :username_include_symbol=>true})
result.should match(/@<\/s>mention<\/b>/)
result.should match(/#<\/s>hash<\/b>/)
result.should match(/\$<\/s>CASH<\/b>/)
end
end
describe "html_escape" do
before do
@linker = TestAutolink.new
end
it "should escape html entities properly" do
@linker.html_escape("&").should == "&"
@linker.html_escape(">").should == ">"
@linker.html_escape("<").should == "<"
@linker.html_escape("\"").should == """
@linker.html_escape("'").should == "'"
@linker.html_escape("&<>\"").should == "&<>""
@linker.html_escape("
").should == "<div>"
@linker.html_escape("a&b").should == "a&b"
@linker.html_escape("twitter & friends").should == "<a href="https://twitter.com" target="_blank">twitter & friends</a>"
@linker.html_escape("&").should == "&"
@linker.html_escape(nil).should == nil
end
end
end
twitter-text-1.13.4/spec/extractor_spec.rb 0000644 0001750 0001750 00000032227 12667350232 021006 0 ustar sudheesh sudheesh # encoding: utf-8
require File.dirname(__FILE__) + '/spec_helper'
class TestExtractor
include Twitter::Extractor
end
describe Twitter::Extractor do
before do
@extractor = TestExtractor.new
end
describe "mentions" do
context "single screen name alone " do
it "should be linked" do
@extractor.extract_mentioned_screen_names("@alice").should == ["alice"]
end
it "should be linked with _" do
@extractor.extract_mentioned_screen_names("@alice_adams").should == ["alice_adams"]
end
it "should be linked if numeric" do
@extractor.extract_mentioned_screen_names("@1234").should == ["1234"]
end
end
context "multiple screen names" do
it "should both be linked" do
@extractor.extract_mentioned_screen_names("@alice @bob").should == ["alice", "bob"]
end
end
context "screen names embedded in text" do
it "should be linked in Latin text" do
@extractor.extract_mentioned_screen_names("waiting for @alice to arrive").should == ["alice"]
end
it "should be linked in Japanese text" do
@extractor.extract_mentioned_screen_names("の@aliceに到着を待っている").should == ["alice"]
end
it "should ignore mentions preceded by !, @, #, $, %, & or *" do
invalid_chars = ['!', '@', '#', '$', '%', '&', '*']
invalid_chars.each do |c|
@extractor.extract_mentioned_screen_names("f#{c}@kn").should == []
end
end
end
it "should accept a block arugment and call it in order" do
needed = ["alice", "bob"]
@extractor.extract_mentioned_screen_names("@alice @bob") do |sn|
sn.should == needed.shift
end
needed.should == []
end
end
describe "mentions with indices" do
context "single screen name alone " do
it "should be linked and the correct indices" do
@extractor.extract_mentioned_screen_names_with_indices("@alice").should == [{:screen_name => "alice", :indices => [0, 6]}]
end
it "should be linked with _ and the correct indices" do
@extractor.extract_mentioned_screen_names_with_indices("@alice_adams").should == [{:screen_name => "alice_adams", :indices => [0, 12]}]
end
it "should be linked if numeric and the correct indices" do
@extractor.extract_mentioned_screen_names_with_indices("@1234").should == [{:screen_name => "1234", :indices => [0, 5]}]
end
end
context "multiple screen names" do
it "should both be linked with the correct indices" do
@extractor.extract_mentioned_screen_names_with_indices("@alice @bob").should ==
[{:screen_name => "alice", :indices => [0, 6]},
{:screen_name => "bob", :indices => [7, 11]}]
end
it "should be linked with the correct indices even when repeated" do
@extractor.extract_mentioned_screen_names_with_indices("@alice @alice @bob").should ==
[{:screen_name => "alice", :indices => [0, 6]},
{:screen_name => "alice", :indices => [7, 13]},
{:screen_name => "bob", :indices => [14, 18]}]
end
end
context "screen names embedded in text" do
it "should be linked in Latin text with the correct indices" do
@extractor.extract_mentioned_screen_names_with_indices("waiting for @alice to arrive").should == [{:screen_name => "alice", :indices => [12, 18]}]
end
it "should be linked in Japanese text with the correct indices" do
@extractor.extract_mentioned_screen_names_with_indices("の@aliceに到着を待っている").should == [{:screen_name => "alice", :indices => [1, 7]}]
end
end
it "should accept a block arugment and call it in order" do
needed = [{:screen_name => "alice", :indices => [0, 6]}, {:screen_name => "bob", :indices => [7, 11]}]
@extractor.extract_mentioned_screen_names_with_indices("@alice @bob") do |sn, start_index, end_index|
data = needed.shift
sn.should == data[:screen_name]
start_index.should == data[:indices].first
end_index.should == data[:indices].last
end
needed.should == []
end
it "should extract screen name in text with supplementary character" do
@extractor.extract_mentioned_screen_names_with_indices("#{[0x10400].pack('U')} @alice").should == [{:screen_name => "alice", :indices => [2, 8]}]
end
end
describe "replies" do
context "should be extracted from" do
it "should extract from lone name" do
@extractor.extract_reply_screen_name("@alice").should == "alice"
end
it "should extract from the start" do
@extractor.extract_reply_screen_name("@alice reply text").should == "alice"
end
it "should extract preceded by a space" do
@extractor.extract_reply_screen_name(" @alice reply text").should == "alice"
end
it "should extract preceded by a full-width space" do
@extractor.extract_reply_screen_name("#{[0x3000].pack('U')}@alice reply text").should == "alice"
end
end
context "should not be extracted from" do
it "should not be extracted when preceded by text" do
@extractor.extract_reply_screen_name("reply @alice text").should == nil
end
it "should not be extracted when preceded by puctuation" do
%w(. / _ - + # ! @).each do |punct|
@extractor.extract_reply_screen_name("#{punct}@alice text").should == nil
end
end
end
context "should accept a block arugment" do
it "should call the block on match" do
@extractor.extract_reply_screen_name("@alice") do |sn|
sn.should == "alice"
end
end
it "should not call the block on no match" do
calls = 0
@extractor.extract_reply_screen_name("not a reply") do |sn|
calls += 1
end
calls.should == 0
end
end
end
describe "urls" do
describe "matching URLS" do
TestUrls::VALID.each do |url|
it "should extract the URL #{url} and prefix it with a protocol if missing" do
@extractor.extract_urls(url).first.should include(url)
end
it "should match the URL #{url} when it's embedded in other text" do
text = "Sweet url: #{url} I found. #awesome"
@extractor.extract_urls(text).first.should include(url)
end
end
end
describe "invalid URLS" do
it "does not link urls with invalid domains" do
@extractor.extract_urls("http://tld-too-short.x").should == []
end
end
describe "t.co URLS" do
TestUrls::TCO.each do |url|
it "should only extract the t.co URL from the URL #{url}" do
extracted_urls = @extractor.extract_urls(url)
extracted_urls.size.should == 1
extracted_url = extracted_urls.first
extracted_url.should_not == url
extracted_url.should == url[0...20]
end
it "should match the t.co URL from the URL #{url} when it's embedded in other text" do
text = "Sweet url: #{url} I found. #awesome"
extracted_urls = @extractor.extract_urls(text)
extracted_urls.size.should == 1
extracted_url = extracted_urls.first
extracted_url.should_not == url
extracted_url.should == url[0...20]
end
end
end
end
describe "urls with indices" do
describe "matching URLS" do
TestUrls::VALID.each do |url|
it "should extract the URL #{url} and prefix it with a protocol if missing" do
extracted_urls = @extractor.extract_urls_with_indices(url)
extracted_urls.size.should == 1
extracted_url = extracted_urls.first
extracted_url[:url].should include(url)
extracted_url[:indices].first.should == 0
extracted_url[:indices].last.should == url.chars.to_a.size
end
it "should match the URL #{url} when it's embedded in other text" do
text = "Sweet url: #{url} I found. #awesome"
extracted_urls = @extractor.extract_urls_with_indices(text)
extracted_urls.size.should == 1
extracted_url = extracted_urls.first
extracted_url[:url].should include(url)
extracted_url[:indices].first.should == 11
extracted_url[:indices].last.should == 11 + url.chars.to_a.size
end
end
it "should extract URL in text with supplementary character" do
@extractor.extract_urls_with_indices("#{[0x10400].pack('U')} http://twitter.com").should == [{:url => "http://twitter.com", :indices => [2, 20]}]
end
end
describe "invalid URLS" do
it "does not link urls with invalid domains" do
@extractor.extract_urls_with_indices("http://tld-too-short.x").should == []
end
end
describe "t.co URLS" do
TestUrls::TCO.each do |url|
it "should only extract the t.co URL from the URL #{url} and adjust indices correctly" do
extracted_urls = @extractor.extract_urls_with_indices(url)
extracted_urls.size.should == 1
extracted_url = extracted_urls.first
extracted_url[:url].should_not include(url)
extracted_url[:url].should include(url[0...20])
extracted_url[:indices].first.should == 0
extracted_url[:indices].last.should == 20
end
it "should match the t.co URL from the URL #{url} when it's embedded in other text" do
text = "Sweet url: #{url} I found. #awesome"
extracted_urls = @extractor.extract_urls_with_indices(text)
extracted_urls.size.should == 1
extracted_url = extracted_urls.first
extracted_url[:url].should_not include(url)
extracted_url[:url].should include(url[0...20])
extracted_url[:indices].first.should == 11
extracted_url[:indices].last.should == 31
end
end
end
end
describe "hashtags" do
context "extracts latin/numeric hashtags" do
%w(text text123 123text).each do |hashtag|
it "should extract ##{hashtag}" do
@extractor.extract_hashtags("##{hashtag}").should == [hashtag]
end
it "should extract ##{hashtag} within text" do
@extractor.extract_hashtags("pre-text ##{hashtag} post-text").should == [hashtag]
end
end
end
context "international hashtags" do
context "should allow accents" do
%w(mañana café münchen).each do |hashtag|
it "should extract ##{hashtag}" do
@extractor.extract_hashtags("##{hashtag}").should == [hashtag]
end
it "should extract ##{hashtag} within text" do
@extractor.extract_hashtags("pre-text ##{hashtag} post-text").should == [hashtag]
end
end
it "should not allow the multiplication character" do
@extractor.extract_hashtags("#pre#{Twitter::Unicode::U00D7}post").should == ["pre"]
end
it "should not allow the division character" do
@extractor.extract_hashtags("#pre#{Twitter::Unicode::U00F7}post").should == ["pre"]
end
end
end
it "should not extract numeric hashtags" do
@extractor.extract_hashtags("#1234").should == []
end
it "should extract hashtag followed by punctuations" do
@extractor.extract_hashtags("#test1: #test2; #test3\"").should == ["test1", "test2" ,"test3"]
end
end
describe "hashtags with indices" do
def match_hashtag_in_text(hashtag, text, offset = 0)
extracted_hashtags = @extractor.extract_hashtags_with_indices(text)
extracted_hashtags.size.should == 1
extracted_hashtag = extracted_hashtags.first
extracted_hashtag[:hashtag].should == hashtag
extracted_hashtag[:indices].first.should == offset
extracted_hashtag[:indices].last.should == offset + hashtag.chars.to_a.size + 1
end
def not_match_hashtag_in_text(text)
extracted_hashtags = @extractor.extract_hashtags_with_indices(text)
extracted_hashtags.size.should == 0
end
context "extracts latin/numeric hashtags" do
%w(text text123 123text).each do |hashtag|
it "should extract ##{hashtag}" do
match_hashtag_in_text(hashtag, "##{hashtag}")
end
it "should extract ##{hashtag} within text" do
match_hashtag_in_text(hashtag, "pre-text ##{hashtag} post-text", 9)
end
end
end
context "international hashtags" do
context "should allow accents" do
%w(mañana café münchen).each do |hashtag|
it "should extract ##{hashtag}" do
match_hashtag_in_text(hashtag, "##{hashtag}")
end
it "should extract ##{hashtag} within text" do
match_hashtag_in_text(hashtag, "pre-text ##{hashtag} post-text", 9)
end
end
it "should not allow the multiplication character" do
match_hashtag_in_text("pre", "#pre#{[0xd7].pack('U')}post", 0)
end
it "should not allow the division character" do
match_hashtag_in_text("pre", "#pre#{[0xf7].pack('U')}post", 0)
end
end
end
it "should not extract numeric hashtags" do
not_match_hashtag_in_text("#1234")
end
it "should extract hashtag in text with supplementary character" do
match_hashtag_in_text("hashtag", "#{[0x10400].pack('U')} #hashtag", 2)
end
end
end
twitter-text-1.13.4/spec/hithighlighter_spec.rb 0000644 0001750 0001750 00000006055 12667350232 021776 0 ustar sudheesh sudheesh # encoding: utf-8
require File.dirname(__FILE__) + '/spec_helper'
class TestHitHighlighter
include Twitter::HitHighlighter
end
describe Twitter::HitHighlighter do
describe "highlight" do
before do
@highlighter = TestHitHighlighter.new
end
context "with options" do
before do
@original = "Testing this hit highliter"
@hits = [[13,16]]
end
it "should default to tags" do
@highlighter.hit_highlight(@original, @hits).should == "Testing this hit highliter"
end
it "should allow tag override" do
@highlighter.hit_highlight(@original, @hits, :tag => 'b').should == "Testing this hit highliter"
end
end
context "without links" do
before do
@original = "Hey! this is a test tweet"
end
it "should return original when no hits are provided" do
@highlighter.hit_highlight(@original).should == @original
end
it "should highlight one hit" do
@highlighter.hit_highlight(@original, hits = [[5, 9]]).should == "Hey! this is a test tweet"
end
it "should highlight two hits" do
@highlighter.hit_highlight(@original, hits = [[5, 9], [15, 19]]).should == "Hey! this is a test tweet"
end
it "should correctly highlight first-word hits" do
@highlighter.hit_highlight(@original, hits = [[0, 3]]).should == "Hey! this is a test tweet"
end
it "should correctly highlight last-word hits" do
@highlighter.hit_highlight(@original, hits = [[20, 25]]).should == "Hey! this is a test tweet"
end
end
context "with links" do
it "should highlight with a single link" do
@highlighter.hit_highlight("@bcherry this was a test tweet", [[9, 13]]).should == "@bcherrythis was a test tweet"
end
it "should highlight with link at the end" do
@highlighter.hit_highlight("test test test", [[5, 9]]).should == "test testtest"
end
it "should highlight with a link at the beginning" do
@highlighter.hit_highlight("test test test", [[5, 9]]).should == "testtest test"
end
it "should highlight an entire link" do
@highlighter.hit_highlight("test test test", [[5, 9]]).should == "test test test"
end
it "should highlight within a link" do
@highlighter.hit_highlight("test test test", [[6, 8]]).should == "test test test"
end
it "should highlight around a link" do
@highlighter.hit_highlight("test test test", [[3, 11]]).should == "test test test"
end
it "should fail gracefully with bad hits" do
@highlighter.hit_highlight("test test", [[5, 20]]).should == "test test"
end
it "should not mess up with touching tags" do
@highlighter.hit_highlight("foofoo", [[3,6]]).should == "foofoo"
end
end
end
end
twitter-text-1.13.4/spec/regex_spec.rb 0000644 0001750 0001750 00000002132 12667350232 020075 0 ustar sudheesh sudheesh # encoding: utf-8
require File.dirname(__FILE__) + '/spec_helper'
describe "Twitter::Regex regular expressions" do
describe "matching URLS" do
TestUrls::VALID.each do |url|
it "should match the URL #{url}" do
url.should match_autolink_expression
end
it "should match the URL #{url} when it's embedded in other text" do
text = "Sweet url: #{url} I found. #awesome"
url.should match_autolink_expression_in(text)
end
end
end
describe "invalid URLS" do
it "does not link urls with invalid characters" do
TestUrls::INVALID.each {|url| url.should_not match_autolink_expression}
end
end
describe "matching List names" do
it "should match if less than 25 characters" do
name = "Shuffleboard Community"
name.length.should < 25
name.should match(Twitter::Regex::REGEXEN[:list_name])
end
it "should not match if greater than 25 characters" do
name = "Most Glorious Shady Meadows Shuffleboard Community"
name.length.should > 25
name.should match(Twitter::Regex[:list_name])
end
end
end
twitter-text-1.13.4/spec/unicode_spec.rb 0000644 0001750 0001750 00000001665 12667350232 020423 0 ustar sudheesh sudheesh # encoding: utf-8
require File.dirname(__FILE__) + '/spec_helper'
describe Twitter::Unicode do
it "should lazy-init constants" do
Twitter::Unicode.const_defined?(:UFEB6).should == false
Twitter::Unicode::UFEB6.should_not be_nil
Twitter::Unicode::UFEB6.should be_kind_of(String)
Twitter::Unicode.const_defined?(:UFEB6).should == true
end
it "should return corresponding character" do
Twitter::Unicode::UFEB6.should == [0xfeb6].pack('U')
end
it "should allow lowercase notation" do
Twitter::Unicode::Ufeb6.should == Twitter::Unicode::UFEB6
Twitter::Unicode::Ufeb6.should === Twitter::Unicode::UFEB6
end
it "should allow underscore notation" do
Twitter::Unicode::U_FEB6.should == Twitter::Unicode::UFEB6
Twitter::Unicode::U_FEB6.should === Twitter::Unicode::UFEB6
end
it "should raise on invalid codepoints" do
lambda { Twitter::Unicode::FFFFFF }.should raise_error(NameError)
end
end
twitter-text-1.13.4/spec/test_urls.rb 0000644 0001750 0001750 00000005265 12667350232 020007 0 ustar sudheesh sudheesh # encoding: utf-8
module TestUrls
VALID = [
"http://google.com",
"http://foobar.com/#",
"http://google.com/#foo",
"http://google.com/#search?q=iphone%20-filter%3Alinks",
"http://twitter.com/#search?q=iphone%20-filter%3Alinks",
"http://somedomain.com/index.php?path=/abc/def/",
"http://www.boingboing.net/2007/02/14/katamari_damacy_phon.html",
"http://somehost.com:3000",
"http://xo.com/~matthew+%-x",
"http://en.wikipedia.org/wiki/Primer_(film)",
"http://www.ams.org/bookstore-getitem/item=mbk-59",
"http://chilp.it/?77e8fd",
"http://tell.me/why",
"http://longtlds.info",
"http://✪df.ws/ejp",
"http://日本.com",
"http://search.twitter.com/search?q=avro&lang=en",
"http://mrs.domain-dash.biz",
"http://x.com/has/one/char/domain",
"http://t.co/nwcLTFF",
"http://sub_domain-dash.twitter.com",
"http://a.b.cd",
"http://a_b.c-d.com",
"http://a-b.b.com",
"http://twitter-dash.com",
"http://msdn.microsoft.com/ja-jp/library/system.net.httpwebrequest(v=VS.100).aspx",
"www.foobar.com",
"WWW.FOOBAR.COM",
"www.foobar.co.jp",
"http://t.co",
"t.co/nwcLTFF",
"http://foobar.みんな",
"http://foobar.中国",
"http://foobar.پاکستان",
"https://www.youtube.com/playlist?list=PL0ZPu8XSRTB7wZzn0mLHMvyzVFeRxbWn-"
] unless defined?(TestUrls::VALID)
INVALID = [
"http://no-tld",
"http://tld-too-short.x",
"http://-doman_dash.com",
"http://_leadingunderscore.twitter.com",
"http://trailingunderscore_.twitter.com",
"http://-leadingdash.twitter.com",
"http://trailingdash-.twitter.com",
"http://-leadingdash.com",
"http://trailingdash-.com",
"http://no_underscores.com",
"http://test.c_o_m",
"http://test.c-o-m",
"http://twitt#{[0x202A].pack('U')}er.com",
"http://twitt#{[0x202B].pack('U')}er.com",
"http://twitt#{[0x202C].pack('U')}er.com",
"http://twitt#{[0x202D].pack('U')}er.com",
"http://twitt#{[0x202E].pack('U')}er.com"
] unless defined?(TestUrls::INVALID)
TCO = [
"http://t.co/P53cv5yO!",
"http://t.co/fQJmiPGg***",
"http://t.co/pbY2NfTZ's",
"http://t.co/2vYHpAc5;",
"http://t.co/ulYGBYSo:",
"http://t.co/GeT4bSiw=win",
"http://t.co/8MkmHU0k+fun",
"http://t.co/TKLp64dY.yes,",
"http://t.co/8vuO27cI$$",
"http://t.co/rPYTvdA8/",
"http://t.co/WvtMw5ku%",
"http://t.co/8t7G3ddS#",
"http://t.co/nfHNJDV2/#!",
"http://t.co/gK6NOXHs[good]",
"http://t.co/dMrT0o1Y]bad",
"http://t.co/FNkPfmii-",
"http://t.co/sMgS3pjI_oh",
"http://t.co/F8Dq3Plb~",
"http://t.co/ivvH58vC&help",
"http://t.co/iUBL15zD|NZ5KYLQ8"
] unless defined?(TestUrls::TCO)
end
twitter-text-1.13.4/Gemfile 0000644 0001750 0001750 00000000137 12667350232 015770 0 ustar sudheesh sudheesh source "http://rubygems.org"
# Specify the gem's dependencies in twitter-text.gemspec
gemspec
twitter-text-1.13.4/lib/ 0000755 0001750 0001750 00000000000 12667350232 015242 5 ustar sudheesh sudheesh twitter-text-1.13.4/lib/assets/ 0000755 0001750 0001750 00000000000 12667350232 016544 5 ustar sudheesh sudheesh twitter-text-1.13.4/lib/assets/tld_lib.yml 0000644 0001750 0001750 00000021017 12667350232 020701 0 ustar sudheesh sudheesh ---
country:
- ac
- ad
- ae
- af
- ag
- ai
- al
- am
- an
- ao
- aq
- ar
- as
- at
- au
- aw
- ax
- az
- ba
- bb
- bd
- be
- bf
- bg
- bh
- bi
- bj
- bl
- bm
- bn
- bo
- bq
- br
- bs
- bt
- bv
- bw
- by
- bz
- ca
- cc
- cd
- cf
- cg
- ch
- ci
- ck
- cl
- cm
- cn
- co
- cr
- cu
- cv
- cw
- cx
- cy
- cz
- de
- dj
- dk
- dm
- do
- dz
- ec
- ee
- eg
- eh
- er
- es
- et
- eu
- fi
- fj
- fk
- fm
- fo
- fr
- ga
- gb
- gd
- ge
- gf
- gg
- gh
- gi
- gl
- gm
- gn
- gp
- gq
- gr
- gs
- gt
- gu
- gw
- gy
- hk
- hm
- hn
- hr
- ht
- hu
- id
- ie
- il
- im
- in
- io
- iq
- ir
- is
- it
- je
- jm
- jo
- jp
- ke
- kg
- kh
- ki
- km
- kn
- kp
- kr
- kw
- ky
- kz
- la
- lb
- lc
- li
- lk
- lr
- ls
- lt
- lu
- lv
- ly
- ma
- mc
- md
- me
- mf
- mg
- mh
- mk
- ml
- mm
- mn
- mo
- mp
- mq
- mr
- ms
- mt
- mu
- mv
- mw
- mx
- my
- mz
- na
- nc
- ne
- nf
- ng
- ni
- nl
- 'no'
- np
- nr
- nu
- nz
- om
- pa
- pe
- pf
- pg
- ph
- pk
- pl
- pm
- pn
- pr
- ps
- pt
- pw
- py
- qa
- re
- ro
- rs
- ru
- rw
- sa
- sb
- sc
- sd
- se
- sg
- sh
- si
- sj
- sk
- sl
- sm
- sn
- so
- sr
- ss
- st
- su
- sv
- sx
- sy
- sz
- tc
- td
- tf
- tg
- th
- tj
- tk
- tl
- tm
- tn
- to
- tp
- tr
- tt
- tv
- tw
- tz
- ua
- ug
- uk
- um
- us
- uy
- uz
- va
- vc
- ve
- vg
- vi
- vn
- vu
- wf
- ws
- ye
- yt
- za
- zm
- zw
- "ελ"
- "бел"
- "мкд"
- "мон"
- "рф"
- "срб"
- "укр"
- "қаз"
- "հայ"
- "الاردن"
- "الجزائر"
- "السعودية"
- "المغرب"
- "امارات"
- "ایران"
- "بھارت"
- "تونس"
- "سودان"
- "سورية"
- "عراق"
- "عمان"
- "فلسطين"
- "قطر"
- "مصر"
- "مليسيا"
- "پاکستان"
- "भारत"
- "বাংলা"
- "ভারত"
- "ਭਾਰਤ"
- "ભારત"
- "இந்தியா"
- "இலங்கை"
- "சிங்கப்பூர்"
- "భారత్"
- "ලංකා"
- "ไทย"
- "გე"
- "中国"
- "中國"
- "台湾"
- "台灣"
- "新加坡"
- "澳門"
- "香港"
- "한국"
generic:
- abb
- abbott
- abogado
- academy
- accenture
- accountant
- accountants
- aco
- active
- actor
- ads
- adult
- aeg
- aero
- afl
- agency
- aig
- airforce
- airtel
- allfinanz
- alsace
- amsterdam
- android
- apartments
- app
- aquarelle
- archi
- army
- arpa
- asia
- associates
- attorney
- auction
- audio
- auto
- autos
- axa
- azure
- band
- bank
- bar
- barcelona
- barclaycard
- barclays
- bargains
- bauhaus
- bayern
- bbc
- bbva
- bcn
- beer
- bentley
- berlin
- best
- bet
- bharti
- bible
- bid
- bike
- bing
- bingo
- bio
- biz
- black
- blackfriday
- bloomberg
- blue
- bmw
- bnl
- bnpparibas
- boats
- bond
- boo
- boots
- boutique
- bradesco
- bridgestone
- broker
- brother
- brussels
- budapest
- build
- builders
- business
- buzz
- bzh
- cab
- cafe
- cal
- camera
- camp
- cancerresearch
- canon
- capetown
- capital
- caravan
- cards
- care
- career
- careers
- cars
- cartier
- casa
- cash
- casino
- cat
- catering
- cba
- cbn
- ceb
- center
- ceo
- cern
- cfa
- cfd
- chanel
- channel
- chat
- cheap
- chloe
- christmas
- chrome
- church
- cisco
- citic
- city
- claims
- cleaning
- click
- clinic
- clothing
- cloud
- club
- coach
- codes
- coffee
- college
- cologne
- com
- commbank
- community
- company
- computer
- condos
- construction
- consulting
- contractors
- cooking
- cool
- coop
- corsica
- country
- coupons
- courses
- credit
- creditcard
- cricket
- crown
- crs
- cruises
- cuisinella
- cymru
- cyou
- dabur
- dad
- dance
- date
- dating
- datsun
- day
- dclk
- deals
- degree
- delivery
- delta
- democrat
- dental
- dentist
- desi
- design
- dev
- diamonds
- diet
- digital
- direct
- directory
- discount
- dnp
- docs
- dog
- doha
- domains
- doosan
- download
- drive
- durban
- dvag
- earth
- eat
- edu
- education
- email
- emerck
- energy
- engineer
- engineering
- enterprises
- epson
- equipment
- erni
- esq
- estate
- eurovision
- eus
- events
- everbank
- exchange
- expert
- exposed
- express
- fage
- fail
- faith
- family
- fan
- fans
- farm
- fashion
- feedback
- film
- finance
- financial
- firmdale
- fish
- fishing
- fit
- fitness
- flights
- florist
- flowers
- flsmidth
- fly
- foo
- football
- forex
- forsale
- forum
- foundation
- frl
- frogans
- fund
- furniture
- futbol
- fyi
- gal
- gallery
- game
- garden
- gbiz
- gdn
- gent
- genting
- ggee
- gift
- gifts
- gives
- giving
- glass
- gle
- global
- globo
- gmail
- gmo
- gmx
- gold
- goldpoint
- golf
- goo
- goog
- google
- gop
- gov
- graphics
- gratis
- green
- gripe
- group
- guge
- guide
- guitars
- guru
- hamburg
- hangout
- haus
- healthcare
- help
- here
- hermes
- hiphop
- hitachi
- hiv
- hockey
- holdings
- holiday
- homedepot
- homes
- honda
- horse
- host
- hosting
- hoteles
- hotmail
- house
- how
- hsbc
- ibm
- icbc
- ice
- icu
- ifm
- iinet
- immo
- immobilien
- industries
- infiniti
- info
- ing
- ink
- institute
- insure
- int
- international
- investments
- ipiranga
- irish
- ist
- istanbul
- itau
- iwc
- java
- jcb
- jetzt
- jewelry
- jlc
- jll
- jobs
- joburg
- jprs
- juegos
- kaufen
- kddi
- kim
- kitchen
- kiwi
- koeln
- komatsu
- krd
- kred
- kyoto
- lacaixa
- lancaster
- land
- lasalle
- lat
- latrobe
- law
- lawyer
- lds
- lease
- leclerc
- legal
- lexus
- lgbt
- liaison
- lidl
- life
- lighting
- limited
- limo
- link
- live
- lixil
- loan
- loans
- lol
- london
- lotte
- lotto
- love
- ltda
- lupin
- luxe
- luxury
- madrid
- maif
- maison
- man
- management
- mango
- market
- marketing
- markets
- marriott
- mba
- media
- meet
- melbourne
- meme
- memorial
- men
- menu
- miami
- microsoft
- mil
- mini
- mma
- mobi
- moda
- moe
- mom
- monash
- money
- montblanc
- mormon
- mortgage
- moscow
- motorcycles
- mov
- movie
- movistar
- mtn
- mtpc
- museum
- nadex
- nagoya
- name
- navy
- nec
- net
- netbank
- network
- neustar
- new
- news
- nexus
- ngo
- nhk
- nico
- ninja
- nissan
- nokia
- nra
- nrw
- ntt
- nyc
- office
- okinawa
- omega
- one
- ong
- onl
- online
- ooo
- oracle
- orange
- org
- organic
- osaka
- otsuka
- ovh
- page
- panerai
- paris
- partners
- parts
- party
- pet
- pharmacy
- philips
- photo
- photography
- photos
- physio
- piaget
- pics
- pictet
- pictures
- pink
- pizza
- place
- play
- plumbing
- plus
- pohl
- poker
- porn
- post
- praxi
- press
- pro
- prod
- productions
- prof
- properties
- property
- pub
- qpon
- quebec
- racing
- realtor
- realty
- recipes
- red
- redstone
- rehab
- reise
- reisen
- reit
- ren
- rent
- rentals
- repair
- report
- republican
- rest
- restaurant
- review
- reviews
- rich
- ricoh
- rio
- rip
- rocks
- rodeo
- rsvp
- ruhr
- run
- ryukyu
- saarland
- sakura
- sale
- samsung
- sandvik
- sandvikcoromant
- sanofi
- sap
- sarl
- saxo
- sca
- scb
- schmidt
- scholarships
- school
- schule
- schwarz
- science
- scor
- scot
- seat
- seek
- sener
- services
- sew
- sex
- sexy
- shiksha
- shoes
- show
- shriram
- singles
- site
- ski
- sky
- skype
- sncf
- soccer
- social
- software
- sohu
- solar
- solutions
- sony
- soy
- space
- spiegel
- spreadbetting
- srl
- starhub
- statoil
- studio
- study
- style
- sucks
- supplies
- supply
- support
- surf
- surgery
- suzuki
- swatch
- swiss
- sydney
- systems
- taipei
- tatamotors
- tatar
- tattoo
- tax
- taxi
- team
- tech
- technology
- tel
- telefonica
- temasek
- tennis
- thd
- theater
- tickets
- tienda
- tips
- tires
- tirol
- today
- tokyo
- tools
- top
- toray
- toshiba
- tours
- town
- toyota
- toys
- trade
- trading
- training
- travel
- trust
- tui
- ubs
- university
- uno
- uol
- vacations
- vegas
- ventures
- vermögensberater
- vermögensberatung
- versicherung
- vet
- viajes
- video
- villas
- vin
- vision
- vista
- vistaprint
- vlaanderen
- vodka
- vote
- voting
- voto
- voyage
- wales
- walter
- wang
- watch
- webcam
- website
- wed
- wedding
- weir
- whoswho
- wien
- wiki
- williamhill
- win
- windows
- wine
- wme
- work
- works
- world
- wtc
- wtf
- xbox
- xerox
- xin
- xperia
- xxx
- xyz
- yachts
- yandex
- yodobashi
- yoga
- yokohama
- youtube
- zip
- zone
- zuerich
- "дети"
- "ком"
- "москва"
- "онлайн"
- "орг"
- "рус"
- "сайт"
- "קום"
- "بازار"
- "شبكة"
- "كوم"
- "موقع"
- "कॉम"
- "नेट"
- "संगठन"
- "คอม"
- "みんな"
- "グーグル"
- "コム"
- "世界"
- "中信"
- "中文网"
- "企业"
- "佛山"
- "信息"
- "健康"
- "八卦"
- "公司"
- "公益"
- "商城"
- "商店"
- "商标"
- "在线"
- "大拿"
- "娱乐"
- "工行"
- "广东"
- "慈善"
- "我爱你"
- "手机"
- "政务"
- "政府"
- "新闻"
- "时尚"
- "机构"
- "淡马锡"
- "游戏"
- "点看"
- "移动"
- "组织机构"
- "网址"
- "网店"
- "网络"
- "谷歌"
- "集团"
- "飞利浦"
- "餐厅"
- "닷넷"
- "닷컴"
- "삼성"
- onion
twitter-text-1.13.4/lib/twitter-text.rb 0000644 0001750 0001750 00000000666 12667350232 020263 0 ustar sudheesh sudheesh major, minor, _patch = RUBY_VERSION.split('.')
$RUBY_1_9 = if major.to_i == 1 && minor.to_i < 9
# Ruby 1.8 KCODE check. Not needed on 1.9 and later.
raise("twitter-text requires the $KCODE variable be set to 'UTF8' or 'u'") unless $KCODE[0].chr =~ /u/i
false
else
true
end
%w(
deprecation
regex
rewriter
autolink
extractor
unicode
validation
hit_highlighter
).each do |name|
require "twitter-text/#{name}"
end
twitter-text-1.13.4/lib/twitter-text/ 0000755 0001750 0001750 00000000000 12667350232 017726 5 ustar sudheesh sudheesh twitter-text-1.13.4/lib/twitter-text/hash_helper.rb 0000644 0001750 0001750 00000001171 12667350232 022535 0 ustar sudheesh sudheesh module Twitter
module HashHelper
# Return a new hash with all keys converted to symbols, as long as
# they respond to +to_sym+.
#
# { 'name' => 'Rob', 'years' => '28' }.symbolize_keys
# #=> { :name => "Rob", :years => "28" }
def self.symbolize_keys(hash)
symbolize_keys!(hash.dup)
end
# Destructively convert all keys to symbols, as long as they respond
# to +to_sym+. Same as +symbolize_keys+, but modifies +self+.
def self.symbolize_keys!(hash)
hash.keys.each do |key|
hash[(key.to_sym rescue key) || key] = hash.delete(key)
end
hash
end
end
end
twitter-text-1.13.4/lib/twitter-text/extractor.rb 0000644 0001750 0001750 00000030114 12667350232 022265 0 ustar sudheesh sudheesh # encoding: UTF-8
class String
# Helper function to count the character length by first converting to an
# array. This is needed because with unicode strings, the return value
# of length may be incorrect
def char_length
if respond_to? :codepoints
length
else
chars.kind_of?(Enumerable) ? chars.to_a.size : chars.size
end
end
# Helper function to convert this string into an array of unicode characters.
def to_char_a
@to_char_a ||= if chars.kind_of?(Enumerable)
chars.to_a
else
char_array = []
0.upto(char_length - 1) { |i| char_array << [chars.slice(i)].pack('U') }
char_array
end
end
end
# Helper functions to return character offsets instead of byte offsets.
class MatchData
def char_begin(n)
if string.respond_to? :codepoints
self.begin(n)
else
string[0, self.begin(n)].char_length
end
end
def char_end(n)
if string.respond_to? :codepoints
self.end(n)
else
string[0, self.end(n)].char_length
end
end
end
module Twitter
# A module for including Tweet parsing in a class. This module provides function for the extraction and processing
# of usernames, lists, URLs and hashtags.
module Extractor extend self
# Remove overlapping entities.
# This returns a new array with no overlapping entities.
def remove_overlapping_entities(entities)
# sort by start index
entities = entities.sort_by{|entity| entity[:indices].first}
# remove duplicates
prev = nil
entities.reject!{|entity| (prev && prev[:indices].last > entity[:indices].first) || (prev = entity) && false}
entities
end
# Extracts all usernames, lists, hashtags and URLs in the Tweet text
# along with the indices for where the entity ocurred
# If the text is nil or contains no entity an empty array
# will be returned.
#
# If a block is given then it will be called for each entity.
def extract_entities_with_indices(text, options = {}, &block)
# extract all entities
entities = extract_urls_with_indices(text, options) +
extract_hashtags_with_indices(text, :check_url_overlap => false) +
extract_mentions_or_lists_with_indices(text) +
extract_cashtags_with_indices(text)
return [] if entities.empty?
entities = remove_overlapping_entities(entities)
entities.each(&block) if block_given?
entities
end
# Extracts a list of all usernames mentioned in the Tweet text. If the
# text is nil or contains no username mentions an empty array
# will be returned.
#
# If a block is given then it will be called for each username.
def extract_mentioned_screen_names(text, &block) # :yields: username
screen_names = extract_mentioned_screen_names_with_indices(text).map{|m| m[:screen_name]}
screen_names.each(&block) if block_given?
screen_names
end
# Extracts a list of all usernames mentioned in the Tweet text
# along with the indices for where the mention ocurred. If the
# text is nil or contains no username mentions, an empty array
# will be returned.
#
# If a block is given, then it will be called with each username, the start
# index, and the end index in the text.
def extract_mentioned_screen_names_with_indices(text) # :yields: username, start, end
return [] unless text
possible_screen_names = []
extract_mentions_or_lists_with_indices(text) do |screen_name, list_slug, start_position, end_position|
next unless list_slug.empty?
possible_screen_names << {
:screen_name => screen_name,
:indices => [start_position, end_position]
}
end
if block_given?
possible_screen_names.each do |mention|
yield mention[:screen_name], mention[:indices].first, mention[:indices].last
end
end
possible_screen_names
end
# Extracts a list of all usernames or lists mentioned in the Tweet text
# along with the indices for where the mention ocurred. If the
# text is nil or contains no username or list mentions, an empty array
# will be returned.
#
# If a block is given, then it will be called with each username, list slug, the start
# index, and the end index in the text. The list_slug will be an empty stirng
# if this is a username mention.
def extract_mentions_or_lists_with_indices(text) # :yields: username, list_slug, start, end
return [] unless text =~ /[@@]/
possible_entries = []
text.to_s.scan(Twitter::Regex[:valid_mention_or_list]) do |before, at, screen_name, list_slug|
match_data = $~
after = $'
unless after =~ Twitter::Regex[:end_mention_match]
start_position = match_data.char_begin(3) - 1
end_position = match_data.char_end(list_slug.nil? ? 3 : 4)
possible_entries << {
:screen_name => screen_name,
:list_slug => list_slug || "",
:indices => [start_position, end_position]
}
end
end
if block_given?
possible_entries.each do |mention|
yield mention[:screen_name], mention[:list_slug], mention[:indices].first, mention[:indices].last
end
end
possible_entries
end
# Extracts the username username replied to in the Tweet text. If the
# text is nil or is not a reply nil will be returned.
#
# If a block is given then it will be called with the username replied to (if any)
def extract_reply_screen_name(text) # :yields: username
return nil unless text
possible_screen_name = text.match(Twitter::Regex[:valid_reply])
return unless possible_screen_name.respond_to?(:captures)
return if $' =~ Twitter::Regex[:end_mention_match]
screen_name = possible_screen_name.captures.first
yield screen_name if block_given?
screen_name
end
# Extracts a list of all URLs included in the Tweet text. If the
# text is nil or contains no URLs an empty array
# will be returned.
#
# If a block is given then it will be called for each URL.
def extract_urls(text, &block) # :yields: url
urls = extract_urls_with_indices(text).map{|u| u[:url]}
urls.each(&block) if block_given?
urls
end
# Extracts a list of all URLs included in the Tweet text along
# with the indices. If the text is nil or contains no
# URLs an empty array will be returned.
#
# If a block is given then it will be called for each URL.
def extract_urls_with_indices(text, options = {:extract_url_without_protocol => true}) # :yields: url, start, end
return [] unless text && (options[:extract_url_without_protocol] ? text.index(".") : text.index(":"))
urls = []
text.to_s.scan(Twitter::Regex[:valid_url]) do |all, before, url, protocol, domain, port, path, query|
valid_url_match_data = $~
start_position = valid_url_match_data.char_begin(3)
end_position = valid_url_match_data.char_end(3)
# If protocol is missing and domain contains non-ASCII characters,
# extract ASCII-only domains.
if !protocol
next if !options[:extract_url_without_protocol] || before =~ Twitter::Regex[:invalid_url_without_protocol_preceding_chars]
last_url = nil
domain.scan(Twitter::Regex[:valid_ascii_domain]) do |ascii_domain|
last_url = {
:url => ascii_domain,
:indices => [start_position + $~.char_begin(0),
start_position + $~.char_end(0)]
}
if path ||
ascii_domain =~ Twitter::Regex[:valid_special_short_domain] ||
ascii_domain !~ Twitter::Regex[:invalid_short_domain]
urls << last_url
end
end
# no ASCII-only domain found. Skip the entire URL
next unless last_url
# last_url only contains domain. Need to add path and query if they exist.
if path
# last_url was not added. Add it to urls here.
last_url[:url] = url.sub(domain, last_url[:url])
last_url[:indices][1] = end_position
end
else
# In the case of t.co URLs, don't allow additional path characters
if url =~ Twitter::Regex[:valid_tco_url]
url = $&
end_position = start_position + url.char_length
end
urls << {
:url => url,
:indices => [start_position, end_position]
}
end
end
urls.each{|url| yield url[:url], url[:indices].first, url[:indices].last} if block_given?
urls
end
# Extracts a list of all hashtags included in the Tweet text. If the
# text is nil or contains no hashtags an empty array
# will be returned. The array returned will not include the leading #
# character.
#
# If a block is given then it will be called for each hashtag.
def extract_hashtags(text, &block) # :yields: hashtag_text
hashtags = extract_hashtags_with_indices(text).map{|h| h[:hashtag]}
hashtags.each(&block) if block_given?
hashtags
end
# Extracts a list of all hashtags included in the Tweet text. If the
# text is nil or contains no hashtags an empty array
# will be returned. The array returned will not include the leading #
# character.
#
# If a block is given then it will be called for each hashtag.
def extract_hashtags_with_indices(text, options = {:check_url_overlap => true}) # :yields: hashtag_text, start, end
return [] unless text =~ /[##]/
tags = []
text.scan(Twitter::Regex[:valid_hashtag]) do |before, hash, hash_text|
match_data = $~
start_position = match_data.char_begin(2)
end_position = match_data.char_end(3)
after = $'
unless after =~ Twitter::Regex[:end_hashtag_match]
tags << {
:hashtag => hash_text,
:indices => [start_position, end_position]
}
end
end
if options[:check_url_overlap]
# extract URLs
urls = extract_urls_with_indices(text)
unless urls.empty?
tags.concat(urls)
# remove duplicates
tags = remove_overlapping_entities(tags)
# remove URL entities
tags.reject!{|entity| !entity[:hashtag] }
end
end
tags.each{|tag| yield tag[:hashtag], tag[:indices].first, tag[:indices].last} if block_given?
tags
end
# Extracts a list of all cashtags included in the Tweet text. If the
# text is nil or contains no cashtags an empty array
# will be returned. The array returned will not include the leading $
# character.
#
# If a block is given then it will be called for each cashtag.
def extract_cashtags(text, &block) # :yields: cashtag_text
cashtags = extract_cashtags_with_indices(text).map{|h| h[:cashtag]}
cashtags.each(&block) if block_given?
cashtags
end
# Extracts a list of all cashtags included in the Tweet text. If the
# text is nil or contains no cashtags an empty array
# will be returned. The array returned will not include the leading $
# character.
#
# If a block is given then it will be called for each cashtag.
def extract_cashtags_with_indices(text) # :yields: cashtag_text, start, end
return [] unless text =~ /\$/
tags = []
text.scan(Twitter::Regex[:valid_cashtag]) do |before, dollar, cash_text|
match_data = $~
start_position = match_data.char_begin(2)
end_position = match_data.char_end(3)
tags << {
:cashtag => cash_text,
:indices => [start_position, end_position]
}
end
tags.each{|tag| yield tag[:cashtag], tag[:indices].first, tag[:indices].last} if block_given?
tags
end
end
end
twitter-text-1.13.4/lib/twitter-text/rewriter.rb 0000644 0001750 0001750 00000004052 12667350232 022117 0 ustar sudheesh sudheesh module Twitter
# A module provides base methods to rewrite usernames, lists, hashtags and URLs.
module Rewriter extend self
def rewrite_entities(text, entities)
chars = text.to_s.to_char_a
# sort by start index
entities = entities.sort_by do |entity|
indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices]
indices.first
end
result = []
last_index = entities.inject(0) do |index, entity|
indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices]
result << chars[index...indices.first]
result << yield(entity, chars)
indices.last
end
result << chars[last_index..-1]
result.flatten.join
end
# These methods are deprecated, will be removed in future.
extend Deprecation
def rewrite(text, options = {})
[:hashtags, :urls, :usernames_or_lists].inject(text) do |key|
options[key] ? send(:"rewrite_#{key}", text, &options[key]) : text
end
end
deprecate :rewrite, :rewrite_entities
def rewrite_usernames_or_lists(text)
entities = Extractor.extract_mentions_or_lists_with_indices(text)
rewrite_entities(text, entities) do |entity, chars|
at = chars[entity[:indices].first]
list_slug = entity[:list_slug]
list_slug = nil if list_slug.empty?
yield(at, entity[:screen_name], list_slug)
end
end
deprecate :rewrite_usernames_or_lists, :rewrite_entities
def rewrite_hashtags(text)
entities = Extractor.extract_hashtags_with_indices(text)
rewrite_entities(text, entities) do |entity, chars|
hash = chars[entity[:indices].first]
yield(hash, entity[:hashtag])
end
end
deprecate :rewrite_hashtags, :rewrite_entities
def rewrite_urls(text)
entities = Extractor.extract_urls_with_indices(text, :extract_url_without_protocol => false)
rewrite_entities(text, entities) do |entity, chars|
yield(entity[:url])
end
end
deprecate :rewrite_urls, :rewrite_entities
end
end
twitter-text-1.13.4/lib/twitter-text/unicode.rb 0000644 0001750 0001750 00000001624 12667350232 021704 0 ustar sudheesh sudheesh module Twitter
# This module lazily defines constants of the form Uxxxx for all Unicode
# codepoints from U0000 to U10FFFF. The value of each constant is the
# UTF-8 string for the codepoint.
# Examples:
# copyright = Unicode::U00A9
# euro = Unicode::U20AC
# infinity = Unicode::U221E
#
module Unicode
CODEPOINT_REGEX = /^U_?([0-9a-fA-F]{4,5}|10[0-9a-fA-F]{4})$/
def self.const_missing(name)
# Check that the constant name is of the right form: U0000 to U10FFFF
if name.to_s =~ CODEPOINT_REGEX
# Convert the codepoint to an immutable UTF-8 string,
# define a real constant for that value and return the value
#p name, name.class
const_set(name, [$1.to_i(16)].pack("U").freeze)
else # Raise an error for constants that are not Unicode.
raise NameError, "Uninitialized constant: Unicode::#{name}"
end
end
end
end
twitter-text-1.13.4/lib/twitter-text/regex.rb 0000644 0001750 0001750 00000034431 12667350232 021372 0 ustar sudheesh sudheesh # encoding: UTF-8
module Twitter
# A collection of regular expressions for parsing Tweet text. The regular expression
# list is frozen at load time to ensure immutability. These regular expressions are
# used throughout the Twitter classes. Special care has been taken to make
# sure these reular expressions work with Tweets in all languages.
class Regex
require 'yaml'
REGEXEN = {} # :nodoc:
def self.regex_range(from, to = nil) # :nodoc:
if $RUBY_1_9
if to
"\\u{#{from.to_s(16).rjust(4, '0')}}-\\u{#{to.to_s(16).rjust(4, '0')}}"
else
"\\u{#{from.to_s(16).rjust(4, '0')}}"
end
else
if to
[from].pack('U') + '-' + [to].pack('U')
else
[from].pack('U')
end
end
end
TLDS = YAML.load_file(
File.join(
File.expand_path('../../..', __FILE__), # project root
'lib', 'assets', 'tld_lib.yml'
)
)
# Space is more than %20, U+3000 for example is the full-width space used with Kanji. Provide a short-hand
# to access both the list of characters and a pattern suitible for use with String#split
# Taken from: ActiveSupport::Multibyte::Handlers::UTF8Handler::UNICODE_WHITESPACE
UNICODE_SPACES = [
(0x0009..0x000D).to_a, # White_Space # Cc [5] ..
0x0020, # White_Space # Zs SPACE
0x0085, # White_Space # Cc
0x00A0, # White_Space # Zs NO-BREAK SPACE
0x1680, # White_Space # Zs OGHAM SPACE MARK
0x180E, # White_Space # Zs MONGOLIAN VOWEL SEPARATOR
(0x2000..0x200A).to_a, # White_Space # Zs [11] EN QUAD..HAIR SPACE
0x2028, # White_Space # Zl LINE SEPARATOR
0x2029, # White_Space # Zp PARAGRAPH SEPARATOR
0x202F, # White_Space # Zs NARROW NO-BREAK SPACE
0x205F, # White_Space # Zs MEDIUM MATHEMATICAL SPACE
0x3000, # White_Space # Zs IDEOGRAPHIC SPACE
].flatten.map{|c| [c].pack('U*')}.freeze
REGEXEN[:spaces] = /[#{UNICODE_SPACES.join('')}]/o
# Character not allowed in Tweets
INVALID_CHARACTERS = [
0xFFFE, 0xFEFF, # BOM
0xFFFF, # Special
0x202A, 0x202B, 0x202C, 0x202D, 0x202E # Directional change
].map{|cp| [cp].pack('U') }.freeze
REGEXEN[:invalid_control_characters] = /[#{INVALID_CHARACTERS.join('')}]/o
major, minor, _patch = RUBY_VERSION.split('.')
if major.to_i >= 2 || major.to_i == 1 && minor.to_i >= 9 || (defined?(RUBY_ENGINE) && ["jruby", "rbx"].include?(RUBY_ENGINE))
REGEXEN[:list_name] = /[a-zA-Z][a-zA-Z0-9_\-\u0080-\u00ff]{0,24}/
else
# This line barfs at compile time in Ruby 1.9, JRuby, or Rubinius.
REGEXEN[:list_name] = eval("/[a-zA-Z][a-zA-Z0-9_\\-\x80-\xff]{0,24}/")
end
# Latin accented characters
# Excludes 0xd7 from the range (the multiplication sign, confusable with "x").
# Also excludes 0xf7, the division sign
LATIN_ACCENTS = [
regex_range(0xc0, 0xd6),
regex_range(0xd8, 0xf6),
regex_range(0xf8, 0xff),
regex_range(0x0100, 0x024f),
regex_range(0x0253, 0x0254),
regex_range(0x0256, 0x0257),
regex_range(0x0259),
regex_range(0x025b),
regex_range(0x0263),
regex_range(0x0268),
regex_range(0x026f),
regex_range(0x0272),
regex_range(0x0289),
regex_range(0x028b),
regex_range(0x02bb),
regex_range(0x0300, 0x036f),
regex_range(0x1e00, 0x1eff)
].join('').freeze
REGEXEN[:latin_accents] = /[#{LATIN_ACCENTS}]+/o
RTL_CHARACTERS = [
regex_range(0x0600,0x06FF),
regex_range(0x0750,0x077F),
regex_range(0x0590,0x05FF),
regex_range(0xFE70,0xFEFF)
].join('').freeze
PUNCTUATION_CHARS = '!"#$%&\'()*+,-./:;<=>?@\[\]^_\`{|}~'
SPACE_CHARS = " \t\n\x0B\f\r"
CTRL_CHARS = "\x00-\x1F\x7F"
# A hashtag must contain at least one unicode letter or mark, as well as numbers, underscores, and select special characters.
HASHTAG_ALPHA = /[\p{L}\p{M}]/
HASHTAG_ALPHANUMERIC = /[\p{L}\p{M}\p{Nd}_\u200c\u200d\u0482\ua673\ua67e\u05be\u05f3\u05f4\uff5e\u301c\u309b\u309c\u30a0\u30fb\u3003\u0f0b\u0f0c\u00b7]/
HASHTAG_BOUNDARY = /\A|\z|[^&\p{L}\p{M}\p{Nd}_\u200c\u200d\u0482\ua673\ua67e\u05be\u05f3\u05f4\u309b\u309c\u30a0\u30fb\u3003\u0f0b\u0f0c\u00b7]/
HASHTAG = /(#{HASHTAG_BOUNDARY})(#|#)(?!\ufe0f|\u20e3)(#{HASHTAG_ALPHANUMERIC}*#{HASHTAG_ALPHA}#{HASHTAG_ALPHANUMERIC}*)/io
REGEXEN[:valid_hashtag] = /#{HASHTAG}/io
# Used in Extractor for final filtering
REGEXEN[:end_hashtag_match] = /\A(?:[##]|:\/\/)/o
REGEXEN[:valid_mention_preceding_chars] = /(?:[^a-zA-Z0-9_!#\$%&*@@]|^|(?:^|[^a-zA-Z0-9_+~.-])[rR][tT]:?)/o
REGEXEN[:at_signs] = /[@@]/
REGEXEN[:valid_mention_or_list] = /
(#{REGEXEN[:valid_mention_preceding_chars]}) # $1: Preceeding character
(#{REGEXEN[:at_signs]}) # $2: At mark
([a-zA-Z0-9_]{1,20}) # $3: Screen name
(\/[a-zA-Z][a-zA-Z0-9_\-]{0,24})? # $4: List (optional)
/ox
REGEXEN[:valid_reply] = /^(?:#{REGEXEN[:spaces]})*#{REGEXEN[:at_signs]}([a-zA-Z0-9_]{1,20})/o
# Used in Extractor for final filtering
REGEXEN[:end_mention_match] = /\A(?:#{REGEXEN[:at_signs]}|#{REGEXEN[:latin_accents]}|:\/\/)/o
# URL related hash regex collection
REGEXEN[:valid_url_preceding_chars] = /(?:[^A-Z0-9@@$###{INVALID_CHARACTERS.join('')}]|^)/io
REGEXEN[:invalid_url_without_protocol_preceding_chars] = /[-_.\/]$/
DOMAIN_VALID_CHARS = "[^#{PUNCTUATION_CHARS}#{SPACE_CHARS}#{CTRL_CHARS}#{INVALID_CHARACTERS.join('')}#{UNICODE_SPACES.join('')}]"
REGEXEN[:valid_subdomain] = /(?:(?:#{DOMAIN_VALID_CHARS}(?:[_-]|#{DOMAIN_VALID_CHARS})*)?#{DOMAIN_VALID_CHARS}\.)/io
REGEXEN[:valid_domain_name] = /(?:(?:#{DOMAIN_VALID_CHARS}(?:[-]|#{DOMAIN_VALID_CHARS})*)?#{DOMAIN_VALID_CHARS}\.)/io
REGEXEN[:valid_gTLD] = %r{
(?:
(?:#{TLDS['generic'].join('|')})
(?=[^0-9a-z@]|$)
)
}ix
REGEXEN[:valid_ccTLD] = %r{
(?:
(?:#{TLDS['country'].join('|')})
(?=[^0-9a-z@]|$)
)
}ix
REGEXEN[:valid_punycode] = /(?:xn--[0-9a-z]+)/i
REGEXEN[:valid_special_cctld] = %r{
(?:
(?:co|tv)
(?=[^0-9a-z@]|$)
)
}ix
REGEXEN[:valid_domain] = /(?:
#{REGEXEN[:valid_subdomain]}*#{REGEXEN[:valid_domain_name]}
(?:#{REGEXEN[:valid_gTLD]}|#{REGEXEN[:valid_ccTLD]}|#{REGEXEN[:valid_punycode]})
)/iox
# This is used in Extractor
REGEXEN[:valid_ascii_domain] = /
(?:(?:[A-Za-z0-9\-_]|#{REGEXEN[:latin_accents]})+\.)+
(?:#{REGEXEN[:valid_gTLD]}|#{REGEXEN[:valid_ccTLD]}|#{REGEXEN[:valid_punycode]})
/iox
# This is used in Extractor for stricter t.co URL extraction
REGEXEN[:valid_tco_url] = /^https?:\/\/t\.co\/[a-z0-9]+/i
# This is used in Extractor to filter out unwanted URLs.
REGEXEN[:invalid_short_domain] = /\A#{REGEXEN[:valid_domain_name]}#{REGEXEN[:valid_ccTLD]}\Z/io
REGEXEN[:valid_special_short_domain] = /\A#{REGEXEN[:valid_domain_name]}#{REGEXEN[:valid_special_cctld]}\Z/io
REGEXEN[:valid_port_number] = /[0-9]+/
REGEXEN[:valid_general_url_path_chars] = /[a-z\p{Cyrillic}0-9!\*';:=\+\,\.\$\/%#\[\]\-_~&\|@#{LATIN_ACCENTS}]/io
# Allow URL paths to contain up to two nested levels of balanced parens
# 1. Used in Wikipedia URLs like /Primer_(film)
# 2. Used in IIS sessions like /S(dfd346)/
# 3. Used in Rdio URLs like /track/We_Up_(Album_Version_(Edited))/
REGEXEN[:valid_url_balanced_parens] = /
\(
(?:
#{REGEXEN[:valid_general_url_path_chars]}+
|
# allow one nested level of balanced parentheses
(?:
#{REGEXEN[:valid_general_url_path_chars]}*
\(
#{REGEXEN[:valid_general_url_path_chars]}+
\)
#{REGEXEN[:valid_general_url_path_chars]}*
)
)
\)
/iox
# Valid end-of-path chracters (so /foo. does not gobble the period).
# 1. Allow = for empty URL parameters and other URL-join artifacts
REGEXEN[:valid_url_path_ending_chars] = /[a-z\p{Cyrillic}0-9=_#\/\+\-#{LATIN_ACCENTS}]|(?:#{REGEXEN[:valid_url_balanced_parens]})/io
REGEXEN[:valid_url_path] = /(?:
(?:
#{REGEXEN[:valid_general_url_path_chars]}*
(?:#{REGEXEN[:valid_url_balanced_parens]} #{REGEXEN[:valid_general_url_path_chars]}*)*
#{REGEXEN[:valid_url_path_ending_chars]}
)|(?:#{REGEXEN[:valid_general_url_path_chars]}+\/)
)/iox
REGEXEN[:valid_url_query_chars] = /[a-z0-9!?\*'\(\);:&=\+\$\/%#\[\]\-_\.,~|@]/i
REGEXEN[:valid_url_query_ending_chars] = /[a-z0-9_&=#\/\-]/i
REGEXEN[:valid_url] = %r{
( # $1 total match
(#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceeding chracter
( # $3 URL
(https?:\/\/)? # $4 Protocol (optional)
(#{REGEXEN[:valid_domain]}) # $5 Domain(s)
(?::(#{REGEXEN[:valid_port_number]}))? # $6 Port number (optional)
(/#{REGEXEN[:valid_url_path]}*)? # $7 URL Path and anchor
(\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $8 Query String
)
)
}iox
REGEXEN[:cashtag] = /[a-z]{1,6}(?:[._][a-z]{1,2})?/i
REGEXEN[:valid_cashtag] = /(^|#{REGEXEN[:spaces]})(\$)(#{REGEXEN[:cashtag]})(?=$|\s|[#{PUNCTUATION_CHARS}])/i
# These URL validation pattern strings are based on the ABNF from RFC 3986
REGEXEN[:validate_url_unreserved] = /[a-z\p{Cyrillic}0-9\-._~]/i
REGEXEN[:validate_url_pct_encoded] = /(?:%[0-9a-f]{2})/i
REGEXEN[:validate_url_sub_delims] = /[!$&'()*+,;=]/i
REGEXEN[:validate_url_pchar] = /(?:
#{REGEXEN[:validate_url_unreserved]}|
#{REGEXEN[:validate_url_pct_encoded]}|
#{REGEXEN[:validate_url_sub_delims]}|
[:\|@]
)/iox
REGEXEN[:validate_url_scheme] = /(?:[a-z][a-z0-9+\-.]*)/i
REGEXEN[:validate_url_userinfo] = /(?:
#{REGEXEN[:validate_url_unreserved]}|
#{REGEXEN[:validate_url_pct_encoded]}|
#{REGEXEN[:validate_url_sub_delims]}|
:
)*/iox
REGEXEN[:validate_url_dec_octet] = /(?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5]))/i
REGEXEN[:validate_url_ipv4] =
/(?:#{REGEXEN[:validate_url_dec_octet]}(?:\.#{REGEXEN[:validate_url_dec_octet]}){3})/iox
# Punting on real IPv6 validation for now
REGEXEN[:validate_url_ipv6] = /(?:\[[a-f0-9:\.]+\])/i
# Also punting on IPvFuture for now
REGEXEN[:validate_url_ip] = /(?:
#{REGEXEN[:validate_url_ipv4]}|
#{REGEXEN[:validate_url_ipv6]}
)/iox
# This is more strict than the rfc specifies
REGEXEN[:validate_url_subdomain_segment] = /(?:[a-z0-9](?:[a-z0-9_\-]*[a-z0-9])?)/i
REGEXEN[:validate_url_domain_segment] = /(?:[a-z0-9](?:[a-z0-9\-]*[a-z0-9])?)/i
REGEXEN[:validate_url_domain_tld] = /(?:[a-z](?:[a-z0-9\-]*[a-z0-9])?)/i
REGEXEN[:validate_url_domain] = /(?:(?:#{REGEXEN[:validate_url_subdomain_segment]}\.)*
(?:#{REGEXEN[:validate_url_domain_segment]}\.)
#{REGEXEN[:validate_url_domain_tld]})/iox
REGEXEN[:validate_url_host] = /(?:
#{REGEXEN[:validate_url_ip]}|
#{REGEXEN[:validate_url_domain]}
)/iox
# Unencoded internationalized domains - this doesn't check for invalid UTF-8 sequences
REGEXEN[:validate_url_unicode_subdomain_segment] =
/(?:(?:[a-z0-9]|[^\x00-\x7f])(?:(?:[a-z0-9_\-]|[^\x00-\x7f])*(?:[a-z0-9]|[^\x00-\x7f]))?)/ix
REGEXEN[:validate_url_unicode_domain_segment] =
/(?:(?:[a-z0-9]|[^\x00-\x7f])(?:(?:[a-z0-9\-]|[^\x00-\x7f])*(?:[a-z0-9]|[^\x00-\x7f]))?)/ix
REGEXEN[:validate_url_unicode_domain_tld] =
/(?:(?:[a-z]|[^\x00-\x7f])(?:(?:[a-z0-9\-]|[^\x00-\x7f])*(?:[a-z0-9]|[^\x00-\x7f]))?)/ix
REGEXEN[:validate_url_unicode_domain] = /(?:(?:#{REGEXEN[:validate_url_unicode_subdomain_segment]}\.)*
(?:#{REGEXEN[:validate_url_unicode_domain_segment]}\.)
#{REGEXEN[:validate_url_unicode_domain_tld]})/iox
REGEXEN[:validate_url_unicode_host] = /(?:
#{REGEXEN[:validate_url_ip]}|
#{REGEXEN[:validate_url_unicode_domain]}
)/iox
REGEXEN[:validate_url_port] = /[0-9]{1,5}/
REGEXEN[:validate_url_unicode_authority] = %r{
(?:(#{REGEXEN[:validate_url_userinfo]})@)? # $1 userinfo
(#{REGEXEN[:validate_url_unicode_host]}) # $2 host
(?::(#{REGEXEN[:validate_url_port]}))? # $3 port
}iox
REGEXEN[:validate_url_authority] = %r{
(?:(#{REGEXEN[:validate_url_userinfo]})@)? # $1 userinfo
(#{REGEXEN[:validate_url_host]}) # $2 host
(?::(#{REGEXEN[:validate_url_port]}))? # $3 port
}iox
REGEXEN[:validate_url_path] = %r{(/#{REGEXEN[:validate_url_pchar]}*)*}i
REGEXEN[:validate_url_query] = %r{(#{REGEXEN[:validate_url_pchar]}|/|\?)*}i
REGEXEN[:validate_url_fragment] = %r{(#{REGEXEN[:validate_url_pchar]}|/|\?)*}i
# Modified version of RFC 3986 Appendix B
REGEXEN[:validate_url_unencoded] = %r{
\A # Full URL
(?:
([^:/?#]+):// # $1 Scheme
)?
([^/?#]*) # $2 Authority
([^?#]*) # $3 Path
(?:
\?([^#]*) # $4 Query
)?
(?:
\#(.*) # $5 Fragment
)?\Z
}ix
REGEXEN[:rtl_chars] = /[#{RTL_CHARACTERS}]/io
REGEXEN.each_pair{|k,v| v.freeze }
# Return the regular expression for a given key. If the key
# is not a known symbol a nil will be returned.
def self.[](key)
REGEXEN[key]
end
end
end
twitter-text-1.13.4/lib/twitter-text/deprecation.rb 0000644 0001750 0001750 00000000677 12667350232 022562 0 ustar sudheesh sudheesh module Twitter
module Deprecation
def deprecate(method, new_method = nil)
deprecated_method = :"deprecated_#{method}"
message = "Deprecation: `#{method}` is deprecated."
message << " Please use `#{new_method}` instead." if new_method
alias_method(deprecated_method, method)
define_method method do |*args, &block|
warn message
send(deprecated_method, *args, &block)
end
end
end
end
twitter-text-1.13.4/lib/twitter-text/autolink.rb 0000644 0001750 0001750 00000054370 12667350232 022112 0 ustar sudheesh sudheesh # encoding: UTF-8
require 'set'
require 'twitter-text/hash_helper'
module Twitter
# A module for including Tweet auto-linking in a class. The primary use of this is for helpers/views so they can auto-link
# usernames, lists, hashtags and URLs.
module Autolink extend self
# Default CSS class for auto-linked lists
DEFAULT_LIST_CLASS = "tweet-url list-slug".freeze
# Default CSS class for auto-linked usernames
DEFAULT_USERNAME_CLASS = "tweet-url username".freeze
# Default CSS class for auto-linked hashtags
DEFAULT_HASHTAG_CLASS = "tweet-url hashtag".freeze
# Default CSS class for auto-linked cashtags
DEFAULT_CASHTAG_CLASS = "tweet-url cashtag".freeze
# Default URL base for auto-linked usernames
DEFAULT_USERNAME_URL_BASE = "https://twitter.com/".freeze
# Default URL base for auto-linked lists
DEFAULT_LIST_URL_BASE = "https://twitter.com/".freeze
# Default URL base for auto-linked hashtags
DEFAULT_HASHTAG_URL_BASE = "https://twitter.com/#!/search?q=%23".freeze
# Default URL base for auto-linked cashtags
DEFAULT_CASHTAG_URL_BASE = "https://twitter.com/#!/search?q=%24".freeze
# Default attributes for invisible span tag
DEFAULT_INVISIBLE_TAG_ATTRS = "style='position:absolute;left:-9999px;'".freeze
DEFAULT_OPTIONS = {
:list_class => DEFAULT_LIST_CLASS,
:username_class => DEFAULT_USERNAME_CLASS,
:hashtag_class => DEFAULT_HASHTAG_CLASS,
:cashtag_class => DEFAULT_CASHTAG_CLASS,
:username_url_base => DEFAULT_USERNAME_URL_BASE,
:list_url_base => DEFAULT_LIST_URL_BASE,
:hashtag_url_base => DEFAULT_HASHTAG_URL_BASE,
:cashtag_url_base => DEFAULT_CASHTAG_URL_BASE,
:invisible_tag_attrs => DEFAULT_INVISIBLE_TAG_ATTRS
}.freeze
def auto_link_with_json(text, json, options = {})
# concatenate entities
entities = json.values().flatten()
# map JSON entity to twitter-text entity
# be careful not to alter arguments received
entities.map! do |entity|
entity = HashHelper.symbolize_keys(entity)
# hashtag
entity[:hashtag] = entity[:text] if entity[:text]
entity
end
auto_link_entities(text, entities, options)
end
def auto_link_entities(text, entities, options = {}, &block)
return text if entities.empty?
# NOTE deprecate these attributes not options keys in options hash, then use html_attrs
options = DEFAULT_OPTIONS.merge(options)
options[:html_attrs] = extract_html_attrs_from_options!(options)
options[:html_attrs][:rel] ||= "nofollow" unless options[:suppress_no_follow]
options[:html_attrs][:target] = "_blank" if options[:target_blank] == true
Twitter::Rewriter.rewrite_entities(text.dup, entities) do |entity, chars|
if entity[:url]
link_to_url(entity, chars, options, &block)
elsif entity[:hashtag]
link_to_hashtag(entity, chars, options, &block)
elsif entity[:screen_name]
link_to_screen_name(entity, chars, options, &block)
elsif entity[:cashtag]
link_to_cashtag(entity, chars, options, &block)
end
end
end
# Add tags around the usernames, lists, hashtags and URLs in the provided text.
# The tags can be controlled with the following entries in the options hash:
# Also any elements in the options hash will be converted to HTML attributes
# and place in the tag.
#
# :url_class:: class to add to url tags
# :list_class:: class to add to list tags
# :username_class:: class to add to username tags
# :hashtag_class:: class to add to hashtag tags
# :cashtag_class:: class to add to cashtag tags
# :username_url_base:: the value for href attribute on username links. The @username (minus the @) will be appended at the end of this.
# :list_url_base:: the value for href attribute on list links. The @username/list (minus the @) will be appended at the end of this.
# :hashtag_url_base:: the value for href attribute on hashtag links. The #hashtag (minus the #) will be appended at the end of this.
# :cashtag_url_base:: the value for href attribute on cashtag links. The $cashtag (minus the $) will be appended at the end of this.
# :invisible_tag_attrs:: HTML attribute to add to invisible span tags
# :username_include_symbol:: place the @ symbol within username and list links
# :suppress_lists:: disable auto-linking to lists
# :suppress_no_follow:: do not add rel="nofollow" to auto-linked items
# :symbol_tag:: tag to apply around symbol (@, #, $) in username / hashtag / cashtag links
# :text_with_symbol_tag:: tag to apply around text part in username / hashtag / cashtag links
# :url_target:: the value for target attribute on URL links.
# :target_blank:: adds target="_blank" to all auto_linked items username / hashtag / cashtag links / urls
# :link_attribute_block:: function to modify the attributes of a link based on the entity. called with |entity, attributes| params, and should modify the attributes hash.
# :link_text_block:: function to modify the text of a link based on the entity. called with |entity, text| params, and should return a modified text.
def auto_link(text, options = {}, &block)
auto_link_entities(text, Extractor.extract_entities_with_indices(text, :extract_url_without_protocol => false), options, &block)
end
# Add tags around the usernames and lists in the provided text. The
# tags can be controlled with the following entries in the options hash.
# Also any elements in the options hash will be converted to HTML attributes
# and place in the tag.
#
# :list_class:: class to add to list tags
# :username_class:: class to add to username tags
# :username_url_base:: the value for href attribute on username links. The @username (minus the @) will be appended at the end of this.
# :list_url_base:: the value for href attribute on list links. The @username/list (minus the @) will be appended at the end of this.
# :username_include_symbol:: place the @ symbol within username and list links
# :suppress_lists:: disable auto-linking to lists
# :suppress_no_follow:: do not add rel="nofollow" to auto-linked items
# :symbol_tag:: tag to apply around symbol (@, #, $) in username / hashtag / cashtag links
# :text_with_symbol_tag:: tag to apply around text part in username / hashtag / cashtag links
# :link_attribute_block:: function to modify the attributes of a link based on the entity. called with |entity, attributes| params, and should modify the attributes hash.
# :link_text_block:: function to modify the text of a link based on the entity. called with |entity, text| params, and should return a modified text.
def auto_link_usernames_or_lists(text, options = {}, &block) # :yields: list_or_username
auto_link_entities(text, Extractor.extract_mentions_or_lists_with_indices(text), options, &block)
end
# Add tags around the hashtags in the provided text.
# The tags can be controlled with the following entries in the options hash.
# Also any elements in the options hash will be converted to HTML attributes
# and place in the tag.
#
# :hashtag_class:: class to add to hashtag tags
# :hashtag_url_base:: the value for href attribute. The hashtag text (minus the #) will be appended at the end of this.
# :suppress_no_follow:: do not add rel="nofollow" to auto-linked items
# :symbol_tag:: tag to apply around symbol (@, #, $) in username / hashtag / cashtag links
# :text_with_symbol_tag:: tag to apply around text part in username / hashtag / cashtag links
# :link_attribute_block:: function to modify the attributes of a link based on the entity. called with |entity, attributes| params, and should modify the attributes hash.
# :link_text_block:: function to modify the text of a link based on the entity. called with |entity, text| params, and should return a modified text.
def auto_link_hashtags(text, options = {}, &block) # :yields: hashtag_text
auto_link_entities(text, Extractor.extract_hashtags_with_indices(text), options, &block)
end
# Add tags around the cashtags in the provided text.
# The tags can be controlled with the following entries in the options hash.
# Also any elements in the options hash will be converted to HTML attributes
# and place in the tag.
#
# :cashtag_class:: class to add to cashtag tags
# :cashtag_url_base:: the value for href attribute. The cashtag text (minus the $) will be appended at the end of this.
# :suppress_no_follow:: do not add rel="nofollow" to auto-linked items
# :symbol_tag:: tag to apply around symbol (@, #, $) in username / hashtag / cashtag links
# :text_with_symbol_tag:: tag to apply around text part in username / hashtag / cashtag links
# :link_attribute_block:: function to modify the attributes of a link based on the entity. called with |entity, attributes| params, and should modify the attributes hash.
# :link_text_block:: function to modify the text of a link based on the entity. called with |entity, text| params, and should return a modified text.
def auto_link_cashtags(text, options = {}, &block) # :yields: cashtag_text
auto_link_entities(text, Extractor.extract_cashtags_with_indices(text), options, &block)
end
# Add tags around the URLs in the provided text.
# The tags can be controlled with the following entries in the options hash.
# Also any elements in the options hash will be converted to HTML attributes
# and place in the tag.
#
# :url_class:: class to add to url tags
# :invisible_tag_attrs:: HTML attribute to add to invisible span tags
# :suppress_no_follow:: do not add rel="nofollow" to auto-linked items
# :symbol_tag:: tag to apply around symbol (@, #, $) in username / hashtag / cashtag links
# :text_with_symbol_tag:: tag to apply around text part in username / hashtag / cashtag links
# :url_target:: the value for target attribute on URL links.
# :link_attribute_block:: function to modify the attributes of a link based on the entity. called with |entity, attributes| params, and should modify the attributes hash.
# :link_text_block:: function to modify the text of a link based on the entity. called with |entity, text| params, and should return a modified text.
def auto_link_urls(text, options = {}, &block)
auto_link_entities(text, Extractor.extract_urls_with_indices(text, :extract_url_without_protocol => false), options, &block)
end
# These methods are deprecated, will be removed in future.
extend Deprecation
# Deprecated: Please use auto_link_urls instead.
# Add tags around the URLs in the provided text.
# Any elements in the href_options hash will be converted to HTML attributes
# and place in the tag.
# Unless href_options contains :suppress_no_follow
# the rel="nofollow" attribute will be added.
alias :auto_link_urls_custom :auto_link_urls
deprecate :auto_link_urls_custom, :auto_link_urls
private
HTML_ENTITIES = {
'&' => '&',
'>' => '>',
'<' => '<',
'"' => '"',
"'" => '''
}
def html_escape(text)
text && text.to_s.gsub(/[&"'><]/) do |character|
HTML_ENTITIES[character]
end
end
# NOTE We will make this private in future.
public :html_escape
# Options which should not be passed as HTML attributes
OPTIONS_NOT_ATTRIBUTES = Set.new([
:url_class, :list_class, :username_class, :hashtag_class, :cashtag_class,
:username_url_base, :list_url_base, :hashtag_url_base, :cashtag_url_base,
:username_url_block, :list_url_block, :hashtag_url_block, :cashtag_url_block, :link_url_block,
:username_include_symbol, :suppress_lists, :suppress_no_follow, :url_entities,
:invisible_tag_attrs, :symbol_tag, :text_with_symbol_tag, :url_target, :target_blank,
:link_attribute_block, :link_text_block
]).freeze
def extract_html_attrs_from_options!(options)
html_attrs = {}
options.reject! do |key, value|
unless OPTIONS_NOT_ATTRIBUTES.include?(key)
html_attrs[key] = value
true
end
end
html_attrs
end
def url_entities_hash(url_entities)
(url_entities || {}).inject({}) do |entities, entity|
# be careful not to alter arguments received
_entity = HashHelper.symbolize_keys(entity)
entities[_entity[:url]] = _entity
entities
end
end
def link_to_url(entity, chars, options = {})
url = entity[:url]
href = if options[:link_url_block]
options[:link_url_block].call(url)
else
url
end
# NOTE auto link to urls do not use any default values and options
# like url_class but use suppress_no_follow.
html_attrs = options[:html_attrs].dup
html_attrs[:class] = options[:url_class] if options.key?(:url_class)
# add target attribute only if :url_target is specified
html_attrs[:target] = options[:url_target] if options.key?(:url_target)
url_entities = url_entities_hash(options[:url_entities])
# use entity from urlEntities if available
url_entity = url_entities[url] || entity
link_text = if url_entity[:display_url]
html_attrs[:title] ||= url_entity[:expanded_url]
link_url_with_entity(url_entity, options)
else
html_escape(url)
end
link_to_text(entity, link_text, href, html_attrs, options)
end
def link_url_with_entity(entity, options)
display_url = entity[:display_url]
expanded_url = entity[:expanded_url]
invisible_tag_attrs = options[:invisible_tag_attrs] || DEFAULT_INVISIBLE_TAG_ATTRS
# Goal: If a user copies and pastes a tweet containing t.co'ed link, the resulting paste
# should contain the full original URL (expanded_url), not the display URL.
#
# Method: Whenever possible, we actually emit HTML that contains expanded_url, and use
# font-size:0 to hide those parts that should not be displayed (because they are not part of display_url).
# Elements with font-size:0 get copied even though they are not visible.
# Note that display:none doesn't work here. Elements with display:none don't get copied.
#
# Additionally, we want to *display* ellipses, but we don't want them copied. To make this happen we
# wrap the ellipses in a tco-ellipsis class and provide an onCopy handler that sets display:none on
# everything with the tco-ellipsis class.
#
# Exception: pic.twitter.com images, for which expandedUrl = "https://twitter.com/#!/username/status/1234/photo/1
# For those URLs, display_url is not a substring of expanded_url, so we don't do anything special to render the elided parts.
# For a pic.twitter.com URL, the only elided part will be the "https://", so this is fine.
display_url_sans_ellipses = display_url.gsub("…", "")
if expanded_url.include?(display_url_sans_ellipses)
before_display_url, after_display_url = expanded_url.split(display_url_sans_ellipses, 2)
preceding_ellipsis = /\A…/.match(display_url).to_s
following_ellipsis = /…\z/.match(display_url).to_s
# As an example: The user tweets "hi http://longdomainname.com/foo"
# This gets shortened to "hi http://t.co/xyzabc", with display_url = "…nname.com/foo"
# This will get rendered as:
#
# …
#
# http://longdomai
#
#
# nname.com/foo
#
#
#
# …
#
%(#{preceding_ellipsis}) <<
%(#{html_escape(before_display_url)}) <<
%(#{html_escape(display_url_sans_ellipses)}) <<
%(#{html_escape(after_display_url)}) <<
%(#{following_ellipsis})
else
html_escape(display_url)
end
end
def link_to_hashtag(entity, chars, options = {})
hash = chars[entity[:indices].first]
hashtag = entity[:hashtag]
hashtag = yield(hashtag) if block_given?
hashtag_class = options[:hashtag_class].to_s
if hashtag.match Twitter::Regex::REGEXEN[:rtl_chars]
hashtag_class += ' rtl'
end
href = if options[:hashtag_url_block]
options[:hashtag_url_block].call(hashtag)
else
"#{options[:hashtag_url_base]}#{hashtag}"
end
html_attrs = {
:class => hashtag_class,
# FIXME As our conformance test, hash in title should be half-width,
# this should be bug of conformance data.
:title => "##{hashtag}"
}.merge(options[:html_attrs])
link_to_text_with_symbol(entity, hash, hashtag, href, html_attrs, options)
end
def link_to_cashtag(entity, chars, options = {})
dollar = chars[entity[:indices].first]
cashtag = entity[:cashtag]
cashtag = yield(cashtag) if block_given?
href = if options[:cashtag_url_block]
options[:cashtag_url_block].call(cashtag)
else
"#{options[:cashtag_url_base]}#{cashtag}"
end
html_attrs = {
:class => "#{options[:cashtag_class]}",
:title => "$#{cashtag}"
}.merge(options[:html_attrs])
link_to_text_with_symbol(entity, dollar, cashtag, href, html_attrs, options)
end
def link_to_screen_name(entity, chars, options = {})
name = "#{entity[:screen_name]}#{entity[:list_slug]}"
chunk = name.dup
chunk = yield(chunk) if block_given?
at = chars[entity[:indices].first]
html_attrs = options[:html_attrs].dup
if entity[:list_slug] && !entity[:list_slug].empty? && !options[:suppress_lists]
href = if options[:list_url_block]
options[:list_url_block].call(name)
else
"#{options[:list_url_base]}#{name}"
end
html_attrs[:class] ||= "#{options[:list_class]}"
else
href = if options[:username_url_block]
options[:username_url_block].call(chunk)
else
"#{options[:username_url_base]}#{name}"
end
html_attrs[:class] ||= "#{options[:username_class]}"
end
link_to_text_with_symbol(entity, at, chunk, href, html_attrs, options)
end
def link_to_text_with_symbol(entity, symbol, text, href, attributes = {}, options = {})
tagged_symbol = options[:symbol_tag] ? "<#{options[:symbol_tag]}>#{symbol}#{options[:symbol_tag]}>" : symbol
text = html_escape(text)
tagged_text = options[:text_with_symbol_tag] ? "<#{options[:text_with_symbol_tag]}>#{text}#{options[:text_with_symbol_tag]}>" : text
if options[:username_include_symbol] || symbol !~ Twitter::Regex::REGEXEN[:at_signs]
"#{link_to_text(entity, tagged_symbol + tagged_text, href, attributes, options)}"
else
"#{tagged_symbol}#{link_to_text(entity, tagged_text, href, attributes, options)}"
end
end
def link_to_text(entity, text, href, attributes = {}, options = {})
attributes[:href] = href
options[:link_attribute_block].call(entity, attributes) if options[:link_attribute_block]
text = options[:link_text_block].call(entity, text) if options[:link_text_block]
%(#{text})
end
BOOLEAN_ATTRIBUTES = Set.new([:disabled, :readonly, :multiple, :checked]).freeze
def tag_attrs(attributes)
attributes.keys.sort_by{|k| k.to_s}.inject("") do |attrs, key|
value = attributes[key]
if BOOLEAN_ATTRIBUTES.include?(key)
value = value ? key : nil
end
unless value.nil?
value = case value
when Array
value.compact.join(" ")
else
value
end
attrs << %( #{html_escape(key)}="#{html_escape(value)}")
end
attrs
end
end
end
end
twitter-text-1.13.4/lib/twitter-text/validation.rb 0000644 0001750 0001750 00000010764 12667350232 022415 0 ustar sudheesh sudheesh require 'unf'
module Twitter
module Validation extend self
MAX_LENGTH = 140
DEFAULT_TCO_URL_LENGTHS = {
:short_url_length => 23,
:short_url_length_https => 23,
:characters_reserved_per_media => 23
}.freeze
# Returns the length of the string as it would be displayed. This is equivilent to the length of the Unicode NFC
# (See: http://www.unicode.org/reports/tr15). This is needed in order to consistently calculate the length of a
# string no matter which actual form was transmitted. For example:
#
# U+0065 Latin Small Letter E
# + U+0301 Combining Acute Accent
# ----------
# = 2 bytes, 2 characters, displayed as é (1 visual glyph)
# … The NFC of {U+0065, U+0301} is {U+00E9}, which is a single chracter and a +display_length+ of 1
#
# The string could also contain U+00E9 already, in which case the canonicalization will not change the value.
#
def tweet_length(text, options = {})
options = DEFAULT_TCO_URL_LENGTHS.merge(options)
length = text.to_nfc.unpack("U*").length
Twitter::Extractor.extract_urls_with_indices(text) do |url, start_position, end_position|
length += start_position - end_position
length += url.downcase =~ /^https:\/\// ? options[:short_url_length_https] : options[:short_url_length]
end
length
end
# Check the text for any reason that it may not be valid as a Tweet. This is meant as a pre-validation
# before posting to api.twitter.com. There are several server-side reasons for Tweets to fail but this pre-validation
# will allow quicker feedback.
#
# Returns false if this text is valid. Otherwise one of the following Symbols will be returned:
#
# :too_long:: if the text is too long
# :empty:: if the text is nil or empty
# :invalid_characters:: if the text contains non-Unicode or any of the disallowed Unicode characters
def tweet_invalid?(text)
return :empty if !text || text.empty?
begin
return :too_long if tweet_length(text) > MAX_LENGTH
return :invalid_characters if Twitter::Regex::INVALID_CHARACTERS.any?{|invalid_char| text.include?(invalid_char) }
rescue ArgumentError
# non-Unicode value.
return :invalid_characters
end
return false
end
def valid_tweet_text?(text)
!tweet_invalid?(text)
end
def valid_username?(username)
return false if !username || username.empty?
extracted = Twitter::Extractor.extract_mentioned_screen_names(username)
# Should extract the username minus the @ sign, hence the [1..-1]
extracted.size == 1 && extracted.first == username[1..-1]
end
VALID_LIST_RE = /\A#{Twitter::Regex[:valid_mention_or_list]}\z/o
def valid_list?(username_list)
match = username_list.match(VALID_LIST_RE)
# Must have matched and had nothing before or after
!!(match && match[1] == "" && match[4] && !match[4].empty?)
end
def valid_hashtag?(hashtag)
return false if !hashtag || hashtag.empty?
extracted = Twitter::Extractor.extract_hashtags(hashtag)
# Should extract the hashtag minus the # sign, hence the [1..-1]
extracted.size == 1 && extracted.first == hashtag[1..-1]
end
def valid_url?(url, unicode_domains=true, require_protocol=true)
return false if !url || url.empty?
url_parts = url.match(Twitter::Regex[:validate_url_unencoded])
return false unless (url_parts && url_parts.to_s == url)
scheme, authority, path, query, fragment = url_parts.captures
return false unless ((!require_protocol ||
(valid_match?(scheme, Twitter::Regex[:validate_url_scheme]) && scheme.match(/\Ahttps?\Z/i))) &&
valid_match?(path, Twitter::Regex[:validate_url_path]) &&
valid_match?(query, Twitter::Regex[:validate_url_query], true) &&
valid_match?(fragment, Twitter::Regex[:validate_url_fragment], true))
return (unicode_domains && valid_match?(authority, Twitter::Regex[:validate_url_unicode_authority])) ||
(!unicode_domains && valid_match?(authority, Twitter::Regex[:validate_url_authority]))
end
private
def valid_match?(string, regex, optional=false)
return (string && string.match(regex) && $~.to_s == string) unless optional
!(string && (!string.match(regex) || $~.to_s != string))
end
end
end
twitter-text-1.13.4/lib/twitter-text/hit_highlighter.rb 0000644 0001750 0001750 00000005320 12667350232 023415 0 ustar sudheesh sudheesh module Twitter
# Module for doing "hit highlighting" on tweets that have been auto-linked already.
# Useful with the results returned from the Search API.
module HitHighlighter extend self
# Default Tag used for hit highlighting
DEFAULT_HIGHLIGHT_TAG = "em"
# Add tags around the hits provided in the text. The
# hits should be an array of (start, end) index pairs, relative to the original
# text, before auto-linking (but the text may already be auto-linked if desired)
#
# The tags can be overridden using the :tag option. For example:
#
# irb> hit_highlight("test hit here", [[5, 8]], :tag => 'strong')
# => "test hit here"
def hit_highlight(text, hits = [], options = {})
if hits.empty?
return text
end
tag_name = options[:tag] || DEFAULT_HIGHLIGHT_TAG
tags = ["<" + tag_name + ">", "" + tag_name + ">"]
chunks = text.split(/[<>]/)
result = []
chunk_index, chunk = 0, chunks[0]
chunk_chars = chunk.to_s.to_char_a
prev_chunks_len = 0
chunk_cursor = 0
start_in_chunk = false
for hit, index in hits.flatten.each_with_index do
tag = tags[index % 2]
placed = false
until chunk.nil? || hit < prev_chunks_len + chunk.length do
result << chunk_chars[chunk_cursor..-1]
if start_in_chunk && hit == prev_chunks_len + chunk_chars.length
result << tag
placed = true
end
# correctly handle highlights that end on the final character.
if tag_text = chunks[chunk_index+1]
result << "<#{tag_text}>"
end
prev_chunks_len += chunk_chars.length
chunk_cursor = 0
chunk_index += 2
chunk = chunks[chunk_index]
chunk_chars = chunk.to_s.to_char_a
start_in_chunk = false
end
if !placed && !chunk.nil?
hit_spot = hit - prev_chunks_len
result << chunk_chars[chunk_cursor...hit_spot] << tag
chunk_cursor = hit_spot
if index % 2 == 0
start_in_chunk = true
else
start_in_chunk = false
end
placed = true
end
# ultimate fallback, hits that run off the end get a closing tag
if !placed
result << tag
end
end
if chunk
if chunk_cursor < chunk_chars.length
result << chunk_chars[chunk_cursor..-1]
end
(chunk_index+1).upto(chunks.length-1).each do |i|
result << (i.even? ? chunks[i] : "<#{chunks[i]}>")
end
end
result.flatten.join
end
end
end
twitter-text-1.13.4/.rspec 0000644 0001750 0001750 00000000030 12667350232 015602 0 ustar sudheesh sudheesh --color
--format=nested
twitter-text-1.13.4/README.rdoc 0000644 0001750 0001750 00000006254 12667350232 016311 0 ustar sudheesh sudheesh {rdoc-image:https://img.shields.io/gem/v/twitter-text.svg}[https://rubygems.org/gems/twitter-text]
== twitter-text
A gem that provides text processing routines for Twitter Tweets. The major
reason for this is to unify the various auto-linking and extraction of
usernames, lists, hashtags and URLs.
== Extraction Examples
# Extraction
class MyClass
include Twitter::Extractor
usernames = extract_mentioned_screen_names("Mentioning @twitter and @jack")
# usernames = ["twitter", "jack"]
end
# Extraction with a block argument
class MyClass
include Twitter::Extractor
extract_reply_screen_name("@twitter are you hiring?").do |username|
# username = "twitter"
end
end
== Auto-linking Examples
# Auto-link
class MyClass
include Twitter::Autolink
html = auto_link("link @user, please #request")
end
# For Ruby on Rails you want to add this to app/helpers/application_helper.rb
module ApplicationHelper
include Twitter::Autolink
end
# Now the auto_link function is available in every view. So in index.html.erb:
<%= auto_link("link @user, please #request") %>
=== Usernames
Username extraction and linking matches all valid Twitter usernames but does
not verify that the username is a valid Twitter account.
=== Lists
Auto-link and extract list names when they are written in @user/list-name
format.
=== Hashtags
Auto-link and extract hashtags, where a hashtag can contain most letters or
numbers but cannot be solely numbers and cannot contain punctuation.
=== URLs
Asian languages like Chinese, Japanese or Korean may not use a delimiter such as
a space to separate normal text from URLs making it difficult to identify where
the URL ends and the text starts.
For this reason twitter-text currently does not support extracting or auto-linking
of URLs immediately followed by non-Latin characters.
Example: "http://twitter.com/は素晴らしい" .
The normal text is "は素晴らしい" and is not part of the URL even though
it isn't space separated.
=== International
Special care has been taken to be sure that auto-linking and extraction work
in Tweets of all languages. This means that languages without spaces between
words should work equally well.
=== Hit Highlighting
Use to provide emphasis around the "hits" returned from the Search API, built
to work against text that has been auto-linked already.
=== Thanks
Thanks to everybody who has filed issues, provided feedback or contributed patches. Patches courtesy of:
* At Twitter …
* Matt Sanford - http://github.com/mzsanford
* Raffi Krikorian - http://github.com/r
* Ben Cherry - http://github.com/bcherry
* Patrick Ewing - http://github.com/hoverbird
* Jeff Smick - http://github.com/sprsquish
* Kenneth Kufluk - https://github.com/kennethkufluk
* Keita Fujii - https://github.com/keitaf
* Yoshimasa Niwa - https://github.com/niw
* Patches from the community …
* Jean-Philippe Bougie - http://github.com/jpbougie
* Erik Michaels-Ober - https://github.com/sferik
* Anyone who has filed an issue. It helps. Really.
=== Copyright and License
Copyright 2011 Twitter, Inc.
Licensed under the Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0
twitter-text-1.13.4/LICENSE 0000644 0001750 0001750 00000023610 12667350232 015503 0 ustar sudheesh sudheesh Copyright 2011 Twitter, Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this work except in compliance with the License.
You may obtain a copy of the License below, or at:
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
twitter-text-1.13.4/test/ 0000755 0001750 0001750 00000000000 12670063203 015444 5 ustar sudheesh sudheesh twitter-text-1.13.4/test/twitter-text-conformance/ 0000755 0001750 0001750 00000000000 12670063203 022420 5 ustar sudheesh sudheesh twitter-text-1.13.4/test/twitter-text-conformance/README.md 0000644 0001750 0001750 00000011722 12670063203 023702 0 ustar sudheesh sudheesh
## Purpose
This conformance package provides a cross-platform definition of the test cases for auto linking, extracting and hit
highlighting of Tweets. The primary use for this is the twitter-text-* libraries; both those managed by Twitter and
those created by the community.
The reason for this conformance suite is to provide a way to keep the various implementations of Twitter text handling
working in a consistent and interoperable way. While anyone can feel free to implement this logic however they choose
the recommendation to developers is to use libraries which pass this conformance suite.
## Format
The test cases are stored in YAML files. There is one YAML file for each major operation type, and within those files
there is one section for each publicly accessible API. Each test case is defined by:
* description: This provides a meaningful name for the test case, for use as an error message if a test fails.
* text: The input text of the Tweet.
* expected: What results are expected for this input text
## Guidelines for use
If you are creating a new twitter-text library in a different programming language please follow these few guidelines:
1. Create a test which reads these files and executes the test cases.
1.a. Do not convert these files to test cases statically. These test cases will change over time.
2. Be sure to implement all of the publicly accessible APIs (the keys to the YAML file)
3. Only expose the public API method and not the underlying regular expressions
3.a. If your language or environment does not allow for this please make a comment to the effect
3.b. This prevents breakage when regular expressions need to change in fundamental ways
## Submitting new conformance tests
* You can [fork the github repository](https://github.com/twitter/twitter-text) to add tests and send a pull request
* You can [open an issue on github](https://github.com/twitter/twitter-text/issues)
* Please be sure to provide example input and output as well as a brief description of the problem.
## Changelog
* v1.4.9 - 2011-12-01 [ Git tag v1.4.9 ]
* [FIX] Apply stricter parsing of t.co URLs
* [FIX] Extract @mention and hashtag before newline
* [FIX] Extract URLs without protocol on ccTLD domain with slash
* v1.4.8 - 2011-11-02 [ Git tag v1.4.8 ]
* [FIX] Extract URLs without protocol in CJK text
* [FIX] Do not extract URL in hashtag
* [FIX] Extract hashtag after bracket
* [FIX] Extract URL with '?' in fragment
* v1.4.7 - 2011-10-04 [ Git tag v1.4.7 ]
* [FIX] Extract URLs followed by punctuations
* [FIX] Extract URLs without protocol in CJK text
* [FIX] Extract URLs with '.'
* v1.4.5 - 2011-09-20 [ Git tag v1.4.5 ]
* [FIX] Extract URLs without protocol
* [FIX] Extract URLs with '.', '|' and '&'
* v1.4.4 - 2011-08-05 [ Get tag v1.4.4 ]
* [FIX] Support ş (U+015F) in hashtags
* [FIX] Support latin accents in URL paths
* [FIX] Add a test for a common cause for runaway regex engines
* v1.4.3 - 2011-07-13 [ Git tag v1.4.3 ]
* [FIX] Japanese autolink including long vowel mark (chouon)
* [FIX] Japanese autolink after a full-width exclamation point
* [FIX] Japanese autolink including ideographic iteration mark
* [FIX] Add hashtag extraction with indices test for new language hashtags
* [FIX] Add hashtag extraction with indices test for multiple latin hashtags
* v1.4.2 - 2011-07-08 [ Git tag v1.4.2 ]
* [FIX] Additional Japanese hashtag autolinking tests
* v1.4.1 - 2011-05-18 [ Git tag v1.4.1 ]
* [FIX] Add support for Russian hashtags
* [FIX] Add support for Korean hashtags
* [FIX] Add support for Japanese hashtags (Katakana, Hiragana and Kanji)
* [FIX] Add support for autolinking punycode domain names and TLDs (via punycode)
* [DOC] Update README and License
* v1.3.1 - 2010-12-03 - [ Git tag v1.3.1 ]
* [DOC] Updated README with Changelog section
* [FIX] Autolink URLs with paths ending in + and -
* [FIX] Extract URLs with paths ending in + and -
* v1.3.0 - 2010-12-03 - [ Git tag v1.3.0 ]
* [NOTE] First tagged version (sorry)
* [DOC] Updated README file with guidelines for use and format information
* [FIX] Do not autolink URLs without protocols
* [FIX] Do not extract URLs without protocols
* v1.0.0 - 2010-01-21 - [ Git tag v1.0.0 (retroactively) ]
* Initial version
## Copyright and License
Copyright 2011 Twitter, Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this work except in compliance with the License.
You may obtain a copy of the License in the LICENSE file, or at:
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
twitter-text-1.13.4/test/twitter-text-conformance/Gemfile 0000644 0001750 0001750 00000000056 12670063203 023714 0 ustar sudheesh sudheesh source "https://rubygems.org"
gem 'nokogiri'
twitter-text-1.13.4/test/twitter-text-conformance/autolink.yml 0000644 0001750 0001750 00000152207 12670063203 025000 0 ustar sudheesh sudheesh tests:
usernames:
- description: "Autolink trailing username"
text: "text @username"
expected: "text @username"
- description: "Autolink username at the beginning"
text: "@username text"
expected: "@username text"
- description: "DO NOT Autolink username preceded by a letter"
text: "meet@the beach"
expected: "meet@the beach"
- description: "Autolink username preceded by puctuation"
text: "great.@username"
expected: "great.@username"
- description: "Autolink username followed by puctuation"
text: "@username&^$%^"
expected: "@username&^$%^"
- description: "Autolink username followed by Japanese"
text: "@usernameの"
expected: "@usernameの"
- description: "Autolink username preceded by Japanese"
text: "あ@username"
expected: "あ@username"
- description: "Autolink username surrounded by Japanese"
text: "あ@usernameの"
expected: "あ@usernameの"
- description: "Autolink username in compressed RT"
text: "RT@username: long Tweet is loooong"
expected: "RT@username: long Tweet is loooong"
- description: "Autolink alternate RT format in middle of text"
text: "Check out RT:@username yas"
expected: "Check out RT:@username yas"
- description: "DO NOT Autolink domain of email address ending in RT like support@example.com"
text: "Email support@example.com"
expected: "Email support@example.com"
- description: "DO NOT Autolink username followed by accented latin characters"
text: "@aliceìnheiro something something"
expected: "@aliceìnheiro something something"
- description: "DO NOT Autolink username @_ in @_@"
text: "oh, snap! @_@"
expected: "oh, snap! @_@"
- description: "Autolink username with full-width at sign (U+FF20)"
text: "@username"
expected: "@username"
- description: "DO NOT Autolink username over 20 characters"
text: "@username9012345678901"
expected: "@username9012345678901"
- description: "Autolink two usernames"
text: "@foo @bar"
expected: "@foo @bar"
- description: "Autolink usernames followed by :"
text: "@foo: @bar"
expected: "@foo: @bar"
- description: "Autolink usernames that are followed by international characters"
text: "@foo îs in the house"
expected: "@foo îs in the house"
- description: "Preserve case when linking a username"
text: "@MixedCase"
expected: "@MixedCase"
lists:
- description: "Autolink list preceded by a space"
text: "text @username/list"
expected: "text @username/list"
- description: "DO NOT Autolink list when space follows slash"
text: "text @username/ list"
expected: "text @username/ list"
- description: "DO NOT Autolink list with empty username"
text: "text @/list"
expected: "text @/list"
- description: "Autolink list at the beginning"
text: "@username/list"
expected: "@username/list"
- description: "DO NOT Autolink list preceded by letter"
text: "meet@the/beach"
expected: "meet@the/beach"
- description: "Autolink list preceded by puctuation"
text: "great.@username/list"
expected: "great.@username/list"
- description: "Autolink list followed by puctuation"
text: "@username/list&^$%^"
expected: "@username/list&^$%^"
- description: "Autolink list name over 25 characters (truncated to 25)"
text: "@username/list567890123456789012345A"
expected: "@username/list567890123456789012345A"
- description: "Autolink list that contains an _"
text: "text @username/list_name"
expected: "text @username/list_name"
- description: "Autolink list that contains a -"
text: "text @username/list-name"
expected: "text @username/list-name"
- description: "Autolink list that contains a number"
text: "text @username/list123"
expected: "text @username/list123"
- description: "DO NOT Autolink list starting with a number"
text: "@username/1list"
expected: "@username/1list"
hashtags:
- description: "Autolink trailing hashtag"
text: "text #hashtag"
expected: "text #hashtag"
- description: "Autolink alphanumeric hashtag (letter-number-letter)"
text: "text #hash0tag"
expected: "text #hash0tag"
- description: "Autolink alphanumeric hashtag (number-letter)"
text: "text #1tag"
expected: "text #1tag"
- description: "Autolink hashtag with underscore"
text: "text #hash_tag"
expected: "text #hash_tag"
- description: "DO NOT Autolink all-numeric hashtags"
text: "text #1234"
expected: "text #1234"
- description: "DO NOT Autolink hashtag preceded by a letter"
text: "text#hashtag"
expected: "text#hashtag"
- description: "DO NOT Autolink hashtag that begins with \ufe0f (Emoji style hash sign)"
text: "#️hashtag"
expected: "#️hashtag"
- description: "DO NOT Autolink hashtag that begins with \ufe0f (Keycap style hash sign)"
text: "#⃣hashtag"
expected: "#⃣hashtag"
- description: "Autolink multiple hashtags"
text: "text #hashtag1 #hashtag2"
expected: "text #hashtag1#hashtag2"
- description: "Autolink hashtag preceded by a period"
text: "text.#hashtag"
expected: "text.#hashtag"
- description: "DO NOT Autolink hashtag preceded by &"
text: "nbsp;"
expected: "nbsp;"
- description: "Autolink hashtag followed by ! (! not included)"
text: "text #hashtag!"
expected: "text #hashtag!"
- description: "Autolink two hashtags separated by a slash"
text: "text #dodge/#answer"
expected: "text #dodge/#answer"
- description: "Autolink hashtag before a slash"
text: "text #dodge/answer"
expected: "text #dodge/answer"
- description: "Autolink hashtag after a slash"
text: "text dodge/#answer"
expected: "text dodge/#answer"
- description: "Autolink hashtag followed by Japanese"
text: "text #hashtagの"
expected: "text #hashtagの"
- description: "Autolink hashtag preceded by full-width space (U+3000)"
text: "text #hashtag"
expected: "text #hashtag"
- description: "Autolink hashtag followed by full-width space (U+3000)"
text: "#hashtag text"
expected: "#hashtag text"
- description: "Autolink hashtag with full-width hash (U+FF03)"
text: "#hashtag"
expected: "#hashtag"
- description: "Autolink hashtag with accented character at the start"
text: "#éhashtag"
expected: "#éhashtag"
- description: "Autolink hashtag with accented character at the end"
text: "#hashtagé"
expected: "#hashtagé"
- description: "Autolink hashtag with accented character in the middle"
text: "#hashétag"
expected: "#hashétag"
- description: "Autolink hashtags in Korean"
text: "What is #트위터 anyway?"
expected: "What is #트위터 anyway?"
- description: "Autolink hashtags in Russian"
text: "What is #ашок anyway?"
expected: "What is #ашок anyway?"
- description: "Autolink a katakana hashtag preceded by a space and followed by a space"
text: "カタカナ #カタカナ カタカナ"
expected: "カタカナ #カタカナ カタカナ"
- description: "Autolink a katakana hashtag preceded by a space and followed by a bracket"
text: "カタカナ #カタカナ」カタカナ"
expected: "カタカナ #カタカナ」カタカナ"
- description: "Autolink a katakana hashtag preceded by a space and followed by a edge"
text: "カタカナ #カタカナ"
expected: "カタカナ #カタカナ"
- description: "Autolink a katakana hashtag preceded by a bracket and followed by a space"
text: "カタカナ「#カタカナ カタカナ"
expected: "カタカナ「#カタカナ カタカナ"
- description: "Autolink a katakana hashtag preceded by a bracket and followed by a bracket"
text: "カタカナ「#カタカナ」カタカナ"
expected: "カタカナ「#カタカナ」カタカナ"
- description: "Autolink a katakana hashtag preceded by a bracket and followed by a edge"
text: "カタカナ「#カタカナ"
expected: "カタカナ「#カタカナ"
- description: "Autolink a katakana hashtag preceded by a edge and followed by a space"
text: "#カタカナ カタカナ"
expected: "#カタカナ カタカナ"
- description: "Autolink a katakana hashtag preceded by a edge and followed by a bracket"
text: "#カタカナ」カタカナ"
expected: "#カタカナ」カタカナ"
- description: "Autolink a katakana hashtag preceded by a edge and followed by a edge"
text: "#カタカナ"
expected: "#カタカナ"
- description: "Autolink a katakana hashtag with a voiced sounds mark followed by a space"
text: "#ハッシュタグ テスト"
expected: "#ハッシュタグ テスト"
- description: "Autolink a katakana hashtag with a voiced sounds mark followed by numbers"
text: "#ハッシュタグ123"
expected: "#ハッシュタグ123"
- description: "Autolink a katakana hashtag with another voiced sounds mark"
text: "#パピプペポ"
expected: "#パピプペポ"
- description: "Autolink a kanji hashtag preceded by a space and followed by a space"
text: "漢字 #漢字 漢字"
expected: "漢字 #漢字 漢字"
- description: "Autolink a kanji hashtag preceded by a space and followed by a bracket"
text: "漢字 #漢字」漢字"
expected: "漢字 #漢字」漢字"
- description: "Autolink a kanji hashtag preceded by a space and followed by a edge"
text: "漢字 #漢字"
expected: "漢字 #漢字"
- description: "Autolink a kanji hashtag preceded by a bracket and followed by a space"
text: "漢字「#漢字 漢字"
expected: "漢字「#漢字 漢字"
- description: "Autolink a kanji hashtag preceded by a bracket and followed by a bracket"
text: "漢字「#漢字」漢字"
expected: "漢字「#漢字」漢字"
- description: "Autolink a kanji hashtag preceded by a bracket and followed by a edge"
text: "漢字「#漢字"
expected: "漢字「#漢字"
- description: "Autolink a kanji hashtag preceded by a edge and followed by a space"
text: "#漢字 漢字"
expected: "#漢字 漢字"
- description: "Autolink a kanji hashtag preceded by a edge and followed by a bracket"
text: "#漢字」漢字"
expected: "#漢字」漢字"
- description: "Autolink a kanji hashtag preceded by a edge and followed by a edge"
text: "#漢字"
expected: "#漢字"
- description: "Autolink a kanji hashtag preceded by an ideographic comma, followed by an ideographic period"
text: "これは、#大丈夫。"
expected: "これは、#大丈夫。"
- description: "Autolink a hiragana hashtag preceded by a space and followed by a space"
text: "ひらがな #ひらがな ひらがな"
expected: "ひらがな #ひらがな ひらがな"
- description: "Autolink a hiragana hashtag preceded by a space and followed by a bracket"
text: "ひらがな #ひらがな」ひらがな"
expected: "ひらがな #ひらがな」ひらがな"
- description: "Autolink a hiragana hashtag preceded by a space and followed by a edge"
text: "ひらがな #ひらがな"
expected: "ひらがな #ひらがな"
- description: "Autolink a hiragana hashtag preceded by a bracket and followed by a space"
text: "ひらがな「#ひらがな ひらがな"
expected: "ひらがな「#ひらがな ひらがな"
- description: "Autolink a hiragana hashtag preceded by a bracket and followed by a bracket"
text: "ひらがな「#ひらがな」ひらがな"
expected: "ひらがな「#ひらがな」ひらがな"
- description: "Autolink a hiragana hashtag preceded by a bracket and followed by a edge"
text: "ひらがな「#ひらがな"
expected: "ひらがな「#ひらがな"
- description: "Autolink a hiragana hashtag preceded by a edge and followed by a space"
text: "#ひらがな ひらがな"
expected: "#ひらがな ひらがな"
- description: "Autolink a hiragana hashtag preceded by a edge and followed by a bracket"
text: "#ひらがな」ひらがな"
expected: "#ひらがな」ひらがな"
- description: "Autolink a hiragana hashtag preceded by a edge and followed by a edge"
text: "#ひらがな"
expected: "#ひらがな"
- description: "Autolink a Kanji/Katakana mix hashtag"
text: "日本語ハッシュタグ #日本語ハッシュタグ"
expected: "日本語ハッシュタグ #日本語ハッシュタグ"
- description: "DO NOT autolink a hashtag without a preceding space"
text: "日本語ハッシュタグ#日本語ハッシュタグ"
expected: "日本語ハッシュタグ#日本語ハッシュタグ"
- description: "DO NOT include a punctuation in a hashtag"
text: "#日本語ハッシュタグ。"
expected: "#日本語ハッシュタグ。"
- description: "Autolink a hashtag after a punctuation"
text: "日本語ハッシュタグ。#日本語ハッシュタグ"
expected: "日本語ハッシュタグ。#日本語ハッシュタグ"
- description: "Autolink a hashtag with chouon"
text: "長音ハッシュタグ。#サッカー"
expected: "長音ハッシュタグ。#サッカー"
- description: "Autolink a hashtag with half-width chouon"
text: "長音ハッシュタグ。#サッカー"
expected: "長音ハッシュタグ。#サッカー"
- description: "Autolink a hashtag with half-width # after full-width !"
text: "できましたよー!#日本語ハッシュタグ。"
expected: "できましたよー!#日本語ハッシュタグ。"
- description: "Autolink a hashtag with full-width # after full-width !"
text: "できましたよー!#日本語ハッシュタグ。"
expected: "できましたよー!#日本語ハッシュタグ。"
- description: "Autolink a hashtag containing ideographic iteration mark"
text: "#云々"
expected: "#云々"
- description: "Autolink multiple hashtags in multiple languages"
text: "Hashtags in #中文, #日本語, #한국말, and #русский! Try it out!"
expected: "Hashtags in #中文, #日本語, #한국말, and #русский! Try it out!"
- description: "Autolink should allow for ş (U+015F) in a hashtag"
text: "Here’s a test tweet for you: #Ateş #qrşt #ştu #ş"
expected: "Here’s a test tweet for you: #Ateş#qrşt#ştu#ş"
- description: "Autolink a hashtag with Latin extended character"
text: "#mûǁae"
expected: "#mûǁae"
# Please be careful with changes to this test case - what looks like "á" is really a + U+0301, and many editors will silently convert this to U+00E1.
- description: "Autolink hashtags with combining diacritics"
text: "#táim #hag̃ua"
expected: "#táim#hag̃ua"
- description: "Autolink Arabic hashtag"
text: "Arabic hashtag: #فارسی #لس_آنجلس"
expected: "Arabic hashtag: #فارسی#لس_آنجلس"
- description: "Autolink Thai hashtag"
text: "Thai hashtag: #รายละเอียด"
expected: "Thai hashtag: #รายละเอียด"
urls:
- description: "Autolink URL with pipe character"
text: "text http://example.com/pipe|character?yes|pipe|character"
expected: "text http://example.com/pipe|character?yes|pipe|character"
- description: "Autolink trailing url"
text: "text http://example.com"
expected: "text http://example.com"
- description: "Autolink url in mid-text"
text: "text http://example.com more text"
expected: "text http://example.com more text"
- description: "Autolink url in Japanese text"
text: "いまなにしてるhttp://example.comいまなにしてる"
expected: "いまなにしてるhttp://example.comいまなにしてる"
- description: "Autolink url surrounded by parentheses does not capture them"
text: "text (http://example.com)"
expected: "text (http://example.com)"
- description: "Autolink url with path surrounded by parentheses does not capture them"
text: "text (http://example.com/test)"
expected: "text (http://example.com/test)"
- description: "Autolink url with embedded parentheses"
text: "text http://msdn.com/S(deadbeef)/page.htm"
expected: "text http://msdn.com/S(deadbeef)/page.htm"
- description: "Autolink url with embedded parentheses without linking surrounding parentheses"
text: "text (URL in parentheses http://msdn.com/S(deadbeef))"
expected: "text (URL in parentheses http://msdn.com/S(deadbeef))"
- description: "Autolink Rdio #music url with double balanced nested parentheses"
text: "text https://rdio.com/artist/50_Cent/album/We_Up/track/We_Up_(Album_Version_(Edited))/"
expected: "text https://rdio.com/artist/50_Cent/album/We_Up/track/We_Up_(Album_Version_(Edited))/"
- description: "Autolink Rdio #music url with double balanced nested parentheses without linking surrounding parentheses"
text: "text (URL in parentheses https://rdio.com/artist/50_Cent/album/We_Up/track/We_Up_(Album_Version_(Edited))/)"
expected: "text (URL in parentheses https://rdio.com/artist/50_Cent/album/We_Up/track/We_Up_(Album_Version_(Edited))/)"
- description: "Autolink url followed by nested parentheses (without them)"
text: "text https://rdio.com/artist/50_Cent/album/We_Up/track/We_Up(URL description with spaces and (parentheses))"
expected: "text https://rdio.com/artist/50_Cent/album/We_Up/track/We_Up(URL description with spaces and (parentheses))"
- description: "Autolink url followed by completely unbalanced nested parentheses (without them)"
text: "text https://rdio.com/artist/50_Cent/album/We_Up/track/We_Up_(Album_Version_(Edited"
expected: "text https://rdio.com/artist/50_Cent/album/We_Up/track/We_Up_(Album_Version_(Edited"
- description: "Extract valid URL: http://msdn.microsoft.com/ja-jp/library/system.net.httpwebrequest(v=VS.100).aspx"
text: "text http://msdn.microsoft.com/ja-jp/library/system.net.httpwebrequest(v=VS.100).aspx"
expected: "text http://msdn.microsoft.com/ja-jp/library/system.net.httpwebrequest(v=VS.100).aspx"
- description: "Autolink url with balanced parens hiding XSS"
text: 'text http://foo.com/("onclick="alert(1)")'
expected: 'text http://foo.com/("onclick="alert(1)")'
- description: "Autolink url should NOT capture unbalanced parens"
text: "Parenthetically bad http://example.com/i_has_a_) thing"
expected: "Parenthetically bad http://example.com/i_has_a_) thing"
- description: "Autolink url containing unicode characters"
text: "I enjoy Macintosh Brand computers: http://✪df.ws/ejp"
expected: "I enjoy Macintosh Brand computers: http://✪df.ws/ejp"
- description: "Autolink url with .co. under TLD"
text: "test http://www.example.co.jp"
expected: "test http://www.example.co.jp"
- description: "Autolink url with .sx TLD"
text: "test http://www.example.sx"
expected: "test http://www.example.sx"
- description: "DO NOT Autolink url containing ! character in the domain"
text: "badly formatted http://foo!bar.com"
expected: "badly formatted http://foo!bar.com"
- description: "DO NOT Autolink url containing _ character in the domain"
text: "badly formatted http://foo_bar.com"
expected: "badly formatted http://foo_bar.com"
- description: "Autolink url preceded by :"
text: "text:http://example.com"
expected: "text:http://example.com"
- description: "Autolink url followed by ? (without it)"
text: "text http://example.com?"
expected: "text http://example.com?"
- description: "Autolink url followed by ! (without it)"
text: "text http://example.com!"
expected: "text http://example.com!"
- description: "Autolink url followed by , (without it)"
text: "text http://example.com,"
expected: "text http://example.com,"
- description: "Autolink url with path followed by a comma (wihout the comma)"
text: "In http://example.com/test, Douglas explains 42."
expected: "In http://example.com/test, Douglas explains 42."
- description: "Autolink url followed by . (without it)"
text: "text http://example.com."
expected: "text http://example.com."
- description: "Autolink url followed by : (without it)"
text: "text http://example.com:"
expected: "text http://example.com:"
- description: "Autolink url followed by ; (without it)"
text: "text http://example.com;"
expected: "text http://example.com;"
- description: "Autolink url followed by ] (without it)"
text: "text http://example.com]"
expected: "text http://example.com]"
- description: "Autolink url followed by ) (without it)"
text: "text http://example.com)"
expected: "text http://example.com)"
- description: "Autolink url followed by } (without it)"
text: "text http://example.com}"
expected: "text http://example.com}"
- description: "Autolink url followed by = (without it)"
text: "text http://example.com="
expected: "text http://example.com="
- description: "Autolink url followed by ' (without it)"
text: "text http://example.com'"
expected: "text http://example.com'"
- description: "Autolink url preceded by /"
text: "text /http://example.com"
expected: "text /http://example.com"
- description: "Autolink url preceded by !"
text: "text !http://example.com"
expected: "text !http://example.com"
- description: "DO NOT Autolink url preceded by ="
text: "text =http://example.com"
expected: "text =http://example.com"
- description: "Autolink url surrounded by double quotes"
text: "text \"http://example.com\""
expected: "text \"http://example.com\""
- description: "DO NOT Autolink url preceded by @"
text: "@http://example.com"
expected: "@http://example.com"
- description: "DO NOT Autolink domain in email address"
text: "foo@bar.com"
expected: "foo@bar.com"
- description: "Autolink url embedded in link tag"
text: "http://example.com"
expected: "http://example.com"
- description: "Autolink multiple urls"
text: "http://example.com https://sslexample.com http://sub.example.com"
expected: "http://example.comhttps://sslexample.comhttp://sub.example.com"
- description: "Autolink url with long TLD"
text: "http://example.mobi/path"
expected: "http://example.mobi/path"
- description: "Autolink url containing ending with #value (not as url + hashtag)"
text: "http://foo.com/?#foo"
expected: "http://foo.com/?#foo"
- description: "DO NOT Autolink url without protocol (with www)"
text: "www.example.biz"
expected: "www.example.biz"
- description: "DO NOT Autolink url without protocol (with WWW)"
text: "WWW.EXAMPLE.BIZ"
expected: "WWW.EXAMPLE.BIZ"
- description: "DO NOT Autolink URL without protocol and without www (ending in .com)"
text: "foo.com"
expected: "foo.com"
- description: "DO NOT Autolink URL without protocol and without www (ending in .org)"
text: "foo.org"
expected: "foo.org"
- description: "DO NOT Autolink URL without protocol and without www (ending in .net)"
text: "foo.net"
expected: "foo.net"
- description: "DO NOT Autolink URL without protocol and without www (ending in .gov)"
text: "foo.gov"
expected: "foo.gov"
- description: "DO NOT Autolink URL without protocol and without www (ending in .edu)"
text: "foo.edu"
expected: "foo.edu"
- description: "DO NOT Autolink URL without protocol and without www not ending in /.(edu|com|gov|org|net)/"
text: "foo.it twitter.co.jp foo.commerce foo.nettastic foo.us foo.co.uk"
expected: "foo.it twitter.co.jp foo.commerce foo.nettastic foo.us foo.co.uk"
- description: "Multiple URLs with different protocols but not without a protocol"
text: "http://foo.com AND https://bar.com AND www.foobar.com"
expected: "http://foo.com AND https://bar.com AND www.foobar.com"
- description: "Autolink raw domain followed by domain only links the first"
text: "See http://example.com example.com"
expected: "See http://example.com example.com"
- description: "Autolink url that includes @-sign and numeric dir under it"
text: "http://www.flickr.com/photos/29674651@N00/4382024406"
expected: "http://www.flickr.com/photos/29674651@N00/4382024406"
- description: "Autolink url that includes @-sign and non-numeric dir under it"
text: "http://www.flickr.com/photos/29674651@N00/foobar"
expected: "http://www.flickr.com/photos/29674651@N00/foobar"
- description: "Autolink url with a hashtag-looking fragment"
text: "http://www.example.com/#answer"
expected: "http://www.example.com/#answer"
- description: "Autolink URL with only a domain followed by a period doesn't swallow the period."
text: "I think it's proper to end sentences with a period http://tell.me.com. Even when they contain a URL."
expected: "I think it's proper to end sentences with a period http://tell.me.com. Even when they contain a URL."
- description: "Autolink URL with a path followed by a period doesn't swallow the period."
text: "I think it's proper to end sentences with a period http://tell.me/why. Even when they contain a URL."
expected: "I think it's proper to end sentences with a period http://tell.me/why. Even when they contain a URL."
- description: "Autolink URL with a query followed by a period doesn't swallow the period."
text: "I think it's proper to end sentences with a period http://tell.me/why?=because.i.want.it. Even when they contain a URL."
expected: "I think it's proper to end sentences with a period http://tell.me/why?=because.i.want.it. Even when they contain a URL."
- description: "Autolink URL with a hyphen in the domain name"
text: "Czech out sweet deals at http://mrs.domain-dash.biz ok?"
expected: "Czech out sweet deals at http://mrs.domain-dash.biz ok?"
- description: "Autolink an IDN (punycode) domain and TLD"
text: "See also: http://xn--80abe5aohbnkjb.xn--p1ai/"
expected: "See also: http://xn--80abe5aohbnkjb.xn--p1ai/"
- description: "Autolink URL should NOT autolink www...foo"
text: "Is www...foo a valid URL?"
expected: "Is www...foo a valid URL?"
- description: "Autolink URL should NOT autolink www.-foo.com"
text: "Is www.-foo.com a valid URL?"
expected: "Is www.-foo.com a valid URL?"
- description: "Autolink URL should NOT autolink a domain with a valid dash but no protocol"
text: "Is www.foo-bar.com a valid URL?"
expected: "Is www.foo-bar.com a valid URL?"
- description: "Autolink URL should autolink a domain with a valid dash and a protocol"
text: "Is http://www.foo-bar.com a valid URL?"
expected: "Is http://www.foo-bar.com a valid URL?"
- description: "Autolink URL should link search urls (with &lang=, not 〈)"
text: "Check out http://search.twitter.com/#!/search?q=avro&lang=en"
expected: "Check out http://search.twitter.com/#!/search?q=avro&lang=en"
- description: "Autolink URL should link urls with very long paths"
text: "Check out http://example.com/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
expected: "Check out http://example.com/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
- description: "Autolink URL should HTML escape the URL"
text: "example: https://twitter.com/otm_m@\"onmousedown=\"alert('foo')\" style=background-color:yellow;color:yellow;\"/"
expected: "example: https://twitter.com/otm_m@\"onmousedown=\"alert('foo')\" style=background-color:yellow;color:yellow;\"/"
- description: "Autolink URL should autolink a URL with a - or + at the end of the path"
text: "Go to http://example.com/a+ or http://example.com/a-"
expected: "Go to http://example.com/a+ or http://example.com/a-"
- description: "Autolink URL should autolink a URL with a - or + at the end of the path and query parameters"
text: "Go to http://example.com/a+?this=that or http://example.com/a-?this=that"
expected: "Go to http://example.com/a+?this=that or http://example.com/a-?this=that"
- description: "Autolink URL should autolink URLs with longer paths ending in -"
text: "Go to http://example.com/view/slug-url-?foo=bar"
expected: "Go to http://example.com/view/slug-url-?foo=bar"
- description: "Autolink URL should NOT link URLs with domains beginning in a space"
text: "@user Try http:// example.com/path"
expected: "@user Try http:// example.com/path"
- description: "Autolink URL should NOT link URLs with domains beginning in a non-breaking space (U+00A0)"
text: "@user Try http:// example.com/path"
expected: "@user Try http:// example.com/path"
- description: "Autolink URL should link paths containing accented characters"
text: "See: http://example.com/café"
expected: "See: http://example.com/café"
- description: "Autolink URL should link paths containing Cyrillic characters"
text: "Go to http://example.com/Русские_слова maybe?"
expected: "Go to http://example.com/Русские_слова maybe?"
- description: "Autolink URL should not link URL without protocol"
text: "See: www.twitter.com or twitter.com/twitter"
expected: "See: www.twitter.com or twitter.com/twitter"
- description: "Autolink t.co URL followed by punctuation"
text: "See: http://t.co/abcde's page"
expected: "See: http://t.co/abcde's page"
- description: "DO NOT autolink URL if preceded by $"
text: "$https://twitter.com $twitter.com $http://t.co/abcde $t.co/abcde $t.co $TVI.CA $RBS.CA"
expected: "$https://twitter.com $twitter.com $http://t.co/abcde $t.co/abcde $t.co $TVI.CA $RBS.CA"
cashtags:
- description: "Autolink a cashtag"
text: "$STOCK"
expected: "$STOCK"
- description: "Autolink a cashtag in text"
text: "Text $STOCK text $symbol text"
expected: "Text $STOCK text $symbol text"
all:
- description: "Autolink all does not break on URL with @"
text: "http://www.flickr.com/photos/29674651@N00/4382024406 if you know what's good for you."
expected: "http://www.flickr.com/photos/29674651@N00/4382024406 if you know what's good for you."
- description: "Correctly handles URL followed directly by @user"
text: "See: http://example.com/@user"
expected: "See: http://example.com/@user"
- description: "Correctly handles URL params containing @user"
text: "See: http://example.com/?@user=@user"
expected: "See: http://example.com/?@user=@user"
- description: "Correctly handles URL with an @user followed by trailing /"
text: "See: http://example.com/@user/"
expected: "See: http://example.com/@user/"
- description: "Does not allow an XSS after an @"
text: "See: http://x.xx.com/@\"style=\"color:pink\"onmouseover=alert(1)//"
expected: "See: http://x.xx.com/@\"style=\"color:pink\"onmouseover=alert(1)//"
- description: "DO NOT autolink URLs if preceded by # or @"
text: "#https://twitter.com @https://twitter.com"
expected: "#https://twitter.com @https://twitter.com"
- description: "Autolink url with a hashtag-looking fragment"
text: "http://www.example.com/#answer"
expected: "http://www.example.com/#answer"
- description: "Autolink hashtag if followed by . and TLD"
text: "#twitter.com #twitter.co.jp"
expected: "#twitter.com #twitter.co.jp"
- description: "Autolink @mention if followed by . and TLD"
text: "@twitter.com @twitter.co.jp"
expected: "@twitter.com @twitter.co.jp"
- description: "Autolink a cashtag"
text: "$STOCK"
expected: "$STOCK"
json:
- description: "Do not autolink if JSON is empty."
text: "This is a tweet with no entity."
json: '{"hashtags":[], "urls":[], "user_mentions":[]}'
expected: "This is a tweet with no entity."
- description: "Autolink username"
text: "text @username"
json: '{"hashtags":[], "urls":[], "user_mentions":[{"screen_name": "username", "name": "@username", "id": 318686216, "id_str": "318686216", "indices": [5, 14]}]}'
expected: "text @username"
- description: "Autolink hashtag"
text: "text #hashtag"
json: '{"hashtags":[{"text":"hashtag", "indices":[5,13]}], "urls":[], "user_mentions":[]}'
expected: "text #hashtag"
- description: "Autolink URL"
text: "text http://t.co/gksG6xlq"
json: '{"hashtags":[], "urls":[{"url": "http://t.co/gksG6xlq", "expanded_url": "http://twitter.com/", "display_url": "twitter.com", "indices": [5, 25]}], "user_mentions":[]}'
expected: "text http://twitter.com/"
- description: "Autolink all"
text: "text http://t.co/gksG6xlq text #hashtag text @username"
json: '{"hashtags":[{"text":"hashtag", "indices":[31,39]}], "urls":[{"url": "http://t.co/gksG6xlq", "expanded_url": "http://twitter.com/", "display_url": "twitter.com", "indices": [5, 25]}], "user_mentions":[{"screen_name": "username", "name": "@username", "id": 318686216, "id_str": "318686216", "indices": [45, 54]}]}'
expected: "text http://twitter.com/ text #hashtag text @username"
twitter-text-1.13.4/test/twitter-text-conformance/tlds.yml 0000644 0001750 0001750 00000371236 12670063203 024125 0 ustar sudheesh sudheesh ---
tests:
country:
- description: ac is a valid country tld
text: https://twitter.ac
expected:
- https://twitter.ac
- description: ad is a valid country tld
text: https://twitter.ad
expected:
- https://twitter.ad
- description: ae is a valid country tld
text: https://twitter.ae
expected:
- https://twitter.ae
- description: af is a valid country tld
text: https://twitter.af
expected:
- https://twitter.af
- description: ag is a valid country tld
text: https://twitter.ag
expected:
- https://twitter.ag
- description: ai is a valid country tld
text: https://twitter.ai
expected:
- https://twitter.ai
- description: al is a valid country tld
text: https://twitter.al
expected:
- https://twitter.al
- description: am is a valid country tld
text: https://twitter.am
expected:
- https://twitter.am
- description: an is a valid country tld
text: https://twitter.an
expected:
- https://twitter.an
- description: ao is a valid country tld
text: https://twitter.ao
expected:
- https://twitter.ao
- description: aq is a valid country tld
text: https://twitter.aq
expected:
- https://twitter.aq
- description: ar is a valid country tld
text: https://twitter.ar
expected:
- https://twitter.ar
- description: as is a valid country tld
text: https://twitter.as
expected:
- https://twitter.as
- description: at is a valid country tld
text: https://twitter.at
expected:
- https://twitter.at
- description: au is a valid country tld
text: https://twitter.au
expected:
- https://twitter.au
- description: aw is a valid country tld
text: https://twitter.aw
expected:
- https://twitter.aw
- description: ax is a valid country tld
text: https://twitter.ax
expected:
- https://twitter.ax
- description: az is a valid country tld
text: https://twitter.az
expected:
- https://twitter.az
- description: ba is a valid country tld
text: https://twitter.ba
expected:
- https://twitter.ba
- description: bb is a valid country tld
text: https://twitter.bb
expected:
- https://twitter.bb
- description: bd is a valid country tld
text: https://twitter.bd
expected:
- https://twitter.bd
- description: be is a valid country tld
text: https://twitter.be
expected:
- https://twitter.be
- description: bf is a valid country tld
text: https://twitter.bf
expected:
- https://twitter.bf
- description: bg is a valid country tld
text: https://twitter.bg
expected:
- https://twitter.bg
- description: bh is a valid country tld
text: https://twitter.bh
expected:
- https://twitter.bh
- description: bi is a valid country tld
text: https://twitter.bi
expected:
- https://twitter.bi
- description: bj is a valid country tld
text: https://twitter.bj
expected:
- https://twitter.bj
- description: bl is a valid country tld
text: https://twitter.bl
expected:
- https://twitter.bl
- description: bm is a valid country tld
text: https://twitter.bm
expected:
- https://twitter.bm
- description: bn is a valid country tld
text: https://twitter.bn
expected:
- https://twitter.bn
- description: bo is a valid country tld
text: https://twitter.bo
expected:
- https://twitter.bo
- description: bq is a valid country tld
text: https://twitter.bq
expected:
- https://twitter.bq
- description: br is a valid country tld
text: https://twitter.br
expected:
- https://twitter.br
- description: bs is a valid country tld
text: https://twitter.bs
expected:
- https://twitter.bs
- description: bt is a valid country tld
text: https://twitter.bt
expected:
- https://twitter.bt
- description: bv is a valid country tld
text: https://twitter.bv
expected:
- https://twitter.bv
- description: bw is a valid country tld
text: https://twitter.bw
expected:
- https://twitter.bw
- description: by is a valid country tld
text: https://twitter.by
expected:
- https://twitter.by
- description: bz is a valid country tld
text: https://twitter.bz
expected:
- https://twitter.bz
- description: ca is a valid country tld
text: https://twitter.ca
expected:
- https://twitter.ca
- description: cc is a valid country tld
text: https://twitter.cc
expected:
- https://twitter.cc
- description: cd is a valid country tld
text: https://twitter.cd
expected:
- https://twitter.cd
- description: cf is a valid country tld
text: https://twitter.cf
expected:
- https://twitter.cf
- description: cg is a valid country tld
text: https://twitter.cg
expected:
- https://twitter.cg
- description: ch is a valid country tld
text: https://twitter.ch
expected:
- https://twitter.ch
- description: ci is a valid country tld
text: https://twitter.ci
expected:
- https://twitter.ci
- description: ck is a valid country tld
text: https://twitter.ck
expected:
- https://twitter.ck
- description: cl is a valid country tld
text: https://twitter.cl
expected:
- https://twitter.cl
- description: cm is a valid country tld
text: https://twitter.cm
expected:
- https://twitter.cm
- description: cn is a valid country tld
text: https://twitter.cn
expected:
- https://twitter.cn
- description: co is a valid country tld
text: https://twitter.co
expected:
- https://twitter.co
- description: cr is a valid country tld
text: https://twitter.cr
expected:
- https://twitter.cr
- description: cu is a valid country tld
text: https://twitter.cu
expected:
- https://twitter.cu
- description: cv is a valid country tld
text: https://twitter.cv
expected:
- https://twitter.cv
- description: cw is a valid country tld
text: https://twitter.cw
expected:
- https://twitter.cw
- description: cx is a valid country tld
text: https://twitter.cx
expected:
- https://twitter.cx
- description: cy is a valid country tld
text: https://twitter.cy
expected:
- https://twitter.cy
- description: cz is a valid country tld
text: https://twitter.cz
expected:
- https://twitter.cz
- description: de is a valid country tld
text: https://twitter.de
expected:
- https://twitter.de
- description: dj is a valid country tld
text: https://twitter.dj
expected:
- https://twitter.dj
- description: dk is a valid country tld
text: https://twitter.dk
expected:
- https://twitter.dk
- description: dm is a valid country tld
text: https://twitter.dm
expected:
- https://twitter.dm
- description: do is a valid country tld
text: https://twitter.do
expected:
- https://twitter.do
- description: dz is a valid country tld
text: https://twitter.dz
expected:
- https://twitter.dz
- description: ec is a valid country tld
text: https://twitter.ec
expected:
- https://twitter.ec
- description: ee is a valid country tld
text: https://twitter.ee
expected:
- https://twitter.ee
- description: eg is a valid country tld
text: https://twitter.eg
expected:
- https://twitter.eg
- description: eh is a valid country tld
text: https://twitter.eh
expected:
- https://twitter.eh
- description: er is a valid country tld
text: https://twitter.er
expected:
- https://twitter.er
- description: es is a valid country tld
text: https://twitter.es
expected:
- https://twitter.es
- description: et is a valid country tld
text: https://twitter.et
expected:
- https://twitter.et
- description: eu is a valid country tld
text: https://twitter.eu
expected:
- https://twitter.eu
- description: fi is a valid country tld
text: https://twitter.fi
expected:
- https://twitter.fi
- description: fj is a valid country tld
text: https://twitter.fj
expected:
- https://twitter.fj
- description: fk is a valid country tld
text: https://twitter.fk
expected:
- https://twitter.fk
- description: fm is a valid country tld
text: https://twitter.fm
expected:
- https://twitter.fm
- description: fo is a valid country tld
text: https://twitter.fo
expected:
- https://twitter.fo
- description: fr is a valid country tld
text: https://twitter.fr
expected:
- https://twitter.fr
- description: ga is a valid country tld
text: https://twitter.ga
expected:
- https://twitter.ga
- description: gb is a valid country tld
text: https://twitter.gb
expected:
- https://twitter.gb
- description: gd is a valid country tld
text: https://twitter.gd
expected:
- https://twitter.gd
- description: ge is a valid country tld
text: https://twitter.ge
expected:
- https://twitter.ge
- description: gf is a valid country tld
text: https://twitter.gf
expected:
- https://twitter.gf
- description: gg is a valid country tld
text: https://twitter.gg
expected:
- https://twitter.gg
- description: gh is a valid country tld
text: https://twitter.gh
expected:
- https://twitter.gh
- description: gi is a valid country tld
text: https://twitter.gi
expected:
- https://twitter.gi
- description: gl is a valid country tld
text: https://twitter.gl
expected:
- https://twitter.gl
- description: gm is a valid country tld
text: https://twitter.gm
expected:
- https://twitter.gm
- description: gn is a valid country tld
text: https://twitter.gn
expected:
- https://twitter.gn
- description: gp is a valid country tld
text: https://twitter.gp
expected:
- https://twitter.gp
- description: gq is a valid country tld
text: https://twitter.gq
expected:
- https://twitter.gq
- description: gr is a valid country tld
text: https://twitter.gr
expected:
- https://twitter.gr
- description: gs is a valid country tld
text: https://twitter.gs
expected:
- https://twitter.gs
- description: gt is a valid country tld
text: https://twitter.gt
expected:
- https://twitter.gt
- description: gu is a valid country tld
text: https://twitter.gu
expected:
- https://twitter.gu
- description: gw is a valid country tld
text: https://twitter.gw
expected:
- https://twitter.gw
- description: gy is a valid country tld
text: https://twitter.gy
expected:
- https://twitter.gy
- description: hk is a valid country tld
text: https://twitter.hk
expected:
- https://twitter.hk
- description: hm is a valid country tld
text: https://twitter.hm
expected:
- https://twitter.hm
- description: hn is a valid country tld
text: https://twitter.hn
expected:
- https://twitter.hn
- description: hr is a valid country tld
text: https://twitter.hr
expected:
- https://twitter.hr
- description: ht is a valid country tld
text: https://twitter.ht
expected:
- https://twitter.ht
- description: hu is a valid country tld
text: https://twitter.hu
expected:
- https://twitter.hu
- description: id is a valid country tld
text: https://twitter.id
expected:
- https://twitter.id
- description: ie is a valid country tld
text: https://twitter.ie
expected:
- https://twitter.ie
- description: il is a valid country tld
text: https://twitter.il
expected:
- https://twitter.il
- description: im is a valid country tld
text: https://twitter.im
expected:
- https://twitter.im
- description: in is a valid country tld
text: https://twitter.in
expected:
- https://twitter.in
- description: io is a valid country tld
text: https://twitter.io
expected:
- https://twitter.io
- description: iq is a valid country tld
text: https://twitter.iq
expected:
- https://twitter.iq
- description: ir is a valid country tld
text: https://twitter.ir
expected:
- https://twitter.ir
- description: is is a valid country tld
text: https://twitter.is
expected:
- https://twitter.is
- description: it is a valid country tld
text: https://twitter.it
expected:
- https://twitter.it
- description: je is a valid country tld
text: https://twitter.je
expected:
- https://twitter.je
- description: jm is a valid country tld
text: https://twitter.jm
expected:
- https://twitter.jm
- description: jo is a valid country tld
text: https://twitter.jo
expected:
- https://twitter.jo
- description: jp is a valid country tld
text: https://twitter.jp
expected:
- https://twitter.jp
- description: ke is a valid country tld
text: https://twitter.ke
expected:
- https://twitter.ke
- description: kg is a valid country tld
text: https://twitter.kg
expected:
- https://twitter.kg
- description: kh is a valid country tld
text: https://twitter.kh
expected:
- https://twitter.kh
- description: ki is a valid country tld
text: https://twitter.ki
expected:
- https://twitter.ki
- description: km is a valid country tld
text: https://twitter.km
expected:
- https://twitter.km
- description: kn is a valid country tld
text: https://twitter.kn
expected:
- https://twitter.kn
- description: kp is a valid country tld
text: https://twitter.kp
expected:
- https://twitter.kp
- description: kr is a valid country tld
text: https://twitter.kr
expected:
- https://twitter.kr
- description: kw is a valid country tld
text: https://twitter.kw
expected:
- https://twitter.kw
- description: ky is a valid country tld
text: https://twitter.ky
expected:
- https://twitter.ky
- description: kz is a valid country tld
text: https://twitter.kz
expected:
- https://twitter.kz
- description: la is a valid country tld
text: https://twitter.la
expected:
- https://twitter.la
- description: lb is a valid country tld
text: https://twitter.lb
expected:
- https://twitter.lb
- description: lc is a valid country tld
text: https://twitter.lc
expected:
- https://twitter.lc
- description: li is a valid country tld
text: https://twitter.li
expected:
- https://twitter.li
- description: lk is a valid country tld
text: https://twitter.lk
expected:
- https://twitter.lk
- description: lr is a valid country tld
text: https://twitter.lr
expected:
- https://twitter.lr
- description: ls is a valid country tld
text: https://twitter.ls
expected:
- https://twitter.ls
- description: lt is a valid country tld
text: https://twitter.lt
expected:
- https://twitter.lt
- description: lu is a valid country tld
text: https://twitter.lu
expected:
- https://twitter.lu
- description: lv is a valid country tld
text: https://twitter.lv
expected:
- https://twitter.lv
- description: ly is a valid country tld
text: https://twitter.ly
expected:
- https://twitter.ly
- description: ma is a valid country tld
text: https://twitter.ma
expected:
- https://twitter.ma
- description: mc is a valid country tld
text: https://twitter.mc
expected:
- https://twitter.mc
- description: md is a valid country tld
text: https://twitter.md
expected:
- https://twitter.md
- description: me is a valid country tld
text: https://twitter.me
expected:
- https://twitter.me
- description: mf is a valid country tld
text: https://twitter.mf
expected:
- https://twitter.mf
- description: mg is a valid country tld
text: https://twitter.mg
expected:
- https://twitter.mg
- description: mh is a valid country tld
text: https://twitter.mh
expected:
- https://twitter.mh
- description: mk is a valid country tld
text: https://twitter.mk
expected:
- https://twitter.mk
- description: ml is a valid country tld
text: https://twitter.ml
expected:
- https://twitter.ml
- description: mm is a valid country tld
text: https://twitter.mm
expected:
- https://twitter.mm
- description: mn is a valid country tld
text: https://twitter.mn
expected:
- https://twitter.mn
- description: mo is a valid country tld
text: https://twitter.mo
expected:
- https://twitter.mo
- description: mp is a valid country tld
text: https://twitter.mp
expected:
- https://twitter.mp
- description: mq is a valid country tld
text: https://twitter.mq
expected:
- https://twitter.mq
- description: mr is a valid country tld
text: https://twitter.mr
expected:
- https://twitter.mr
- description: ms is a valid country tld
text: https://twitter.ms
expected:
- https://twitter.ms
- description: mt is a valid country tld
text: https://twitter.mt
expected:
- https://twitter.mt
- description: mu is a valid country tld
text: https://twitter.mu
expected:
- https://twitter.mu
- description: mv is a valid country tld
text: https://twitter.mv
expected:
- https://twitter.mv
- description: mw is a valid country tld
text: https://twitter.mw
expected:
- https://twitter.mw
- description: mx is a valid country tld
text: https://twitter.mx
expected:
- https://twitter.mx
- description: my is a valid country tld
text: https://twitter.my
expected:
- https://twitter.my
- description: mz is a valid country tld
text: https://twitter.mz
expected:
- https://twitter.mz
- description: na is a valid country tld
text: https://twitter.na
expected:
- https://twitter.na
- description: nc is a valid country tld
text: https://twitter.nc
expected:
- https://twitter.nc
- description: ne is a valid country tld
text: https://twitter.ne
expected:
- https://twitter.ne
- description: nf is a valid country tld
text: https://twitter.nf
expected:
- https://twitter.nf
- description: ng is a valid country tld
text: https://twitter.ng
expected:
- https://twitter.ng
- description: ni is a valid country tld
text: https://twitter.ni
expected:
- https://twitter.ni
- description: nl is a valid country tld
text: https://twitter.nl
expected:
- https://twitter.nl
- description: no is a valid country tld
text: https://twitter.no
expected:
- https://twitter.no
- description: np is a valid country tld
text: https://twitter.np
expected:
- https://twitter.np
- description: nr is a valid country tld
text: https://twitter.nr
expected:
- https://twitter.nr
- description: nu is a valid country tld
text: https://twitter.nu
expected:
- https://twitter.nu
- description: nz is a valid country tld
text: https://twitter.nz
expected:
- https://twitter.nz
- description: om is a valid country tld
text: https://twitter.om
expected:
- https://twitter.om
- description: pa is a valid country tld
text: https://twitter.pa
expected:
- https://twitter.pa
- description: pe is a valid country tld
text: https://twitter.pe
expected:
- https://twitter.pe
- description: pf is a valid country tld
text: https://twitter.pf
expected:
- https://twitter.pf
- description: pg is a valid country tld
text: https://twitter.pg
expected:
- https://twitter.pg
- description: ph is a valid country tld
text: https://twitter.ph
expected:
- https://twitter.ph
- description: pk is a valid country tld
text: https://twitter.pk
expected:
- https://twitter.pk
- description: pl is a valid country tld
text: https://twitter.pl
expected:
- https://twitter.pl
- description: pm is a valid country tld
text: https://twitter.pm
expected:
- https://twitter.pm
- description: pn is a valid country tld
text: https://twitter.pn
expected:
- https://twitter.pn
- description: pr is a valid country tld
text: https://twitter.pr
expected:
- https://twitter.pr
- description: ps is a valid country tld
text: https://twitter.ps
expected:
- https://twitter.ps
- description: pt is a valid country tld
text: https://twitter.pt
expected:
- https://twitter.pt
- description: pw is a valid country tld
text: https://twitter.pw
expected:
- https://twitter.pw
- description: py is a valid country tld
text: https://twitter.py
expected:
- https://twitter.py
- description: qa is a valid country tld
text: https://twitter.qa
expected:
- https://twitter.qa
- description: re is a valid country tld
text: https://twitter.re
expected:
- https://twitter.re
- description: ro is a valid country tld
text: https://twitter.ro
expected:
- https://twitter.ro
- description: rs is a valid country tld
text: https://twitter.rs
expected:
- https://twitter.rs
- description: ru is a valid country tld
text: https://twitter.ru
expected:
- https://twitter.ru
- description: rw is a valid country tld
text: https://twitter.rw
expected:
- https://twitter.rw
- description: sa is a valid country tld
text: https://twitter.sa
expected:
- https://twitter.sa
- description: sb is a valid country tld
text: https://twitter.sb
expected:
- https://twitter.sb
- description: sc is a valid country tld
text: https://twitter.sc
expected:
- https://twitter.sc
- description: sd is a valid country tld
text: https://twitter.sd
expected:
- https://twitter.sd
- description: se is a valid country tld
text: https://twitter.se
expected:
- https://twitter.se
- description: sg is a valid country tld
text: https://twitter.sg
expected:
- https://twitter.sg
- description: sh is a valid country tld
text: https://twitter.sh
expected:
- https://twitter.sh
- description: si is a valid country tld
text: https://twitter.si
expected:
- https://twitter.si
- description: sj is a valid country tld
text: https://twitter.sj
expected:
- https://twitter.sj
- description: sk is a valid country tld
text: https://twitter.sk
expected:
- https://twitter.sk
- description: sl is a valid country tld
text: https://twitter.sl
expected:
- https://twitter.sl
- description: sm is a valid country tld
text: https://twitter.sm
expected:
- https://twitter.sm
- description: sn is a valid country tld
text: https://twitter.sn
expected:
- https://twitter.sn
- description: so is a valid country tld
text: https://twitter.so
expected:
- https://twitter.so
- description: sr is a valid country tld
text: https://twitter.sr
expected:
- https://twitter.sr
- description: ss is a valid country tld
text: https://twitter.ss
expected:
- https://twitter.ss
- description: st is a valid country tld
text: https://twitter.st
expected:
- https://twitter.st
- description: su is a valid country tld
text: https://twitter.su
expected:
- https://twitter.su
- description: sv is a valid country tld
text: https://twitter.sv
expected:
- https://twitter.sv
- description: sx is a valid country tld
text: https://twitter.sx
expected:
- https://twitter.sx
- description: sy is a valid country tld
text: https://twitter.sy
expected:
- https://twitter.sy
- description: sz is a valid country tld
text: https://twitter.sz
expected:
- https://twitter.sz
- description: tc is a valid country tld
text: https://twitter.tc
expected:
- https://twitter.tc
- description: td is a valid country tld
text: https://twitter.td
expected:
- https://twitter.td
- description: tf is a valid country tld
text: https://twitter.tf
expected:
- https://twitter.tf
- description: tg is a valid country tld
text: https://twitter.tg
expected:
- https://twitter.tg
- description: th is a valid country tld
text: https://twitter.th
expected:
- https://twitter.th
- description: tj is a valid country tld
text: https://twitter.tj
expected:
- https://twitter.tj
- description: tk is a valid country tld
text: https://twitter.tk
expected:
- https://twitter.tk
- description: tl is a valid country tld
text: https://twitter.tl
expected:
- https://twitter.tl
- description: tm is a valid country tld
text: https://twitter.tm
expected:
- https://twitter.tm
- description: tn is a valid country tld
text: https://twitter.tn
expected:
- https://twitter.tn
- description: to is a valid country tld
text: https://twitter.to
expected:
- https://twitter.to
- description: tp is a valid country tld
text: https://twitter.tp
expected:
- https://twitter.tp
- description: tr is a valid country tld
text: https://twitter.tr
expected:
- https://twitter.tr
- description: tt is a valid country tld
text: https://twitter.tt
expected:
- https://twitter.tt
- description: tv is a valid country tld
text: https://twitter.tv
expected:
- https://twitter.tv
- description: tw is a valid country tld
text: https://twitter.tw
expected:
- https://twitter.tw
- description: tz is a valid country tld
text: https://twitter.tz
expected:
- https://twitter.tz
- description: ua is a valid country tld
text: https://twitter.ua
expected:
- https://twitter.ua
- description: ug is a valid country tld
text: https://twitter.ug
expected:
- https://twitter.ug
- description: uk is a valid country tld
text: https://twitter.uk
expected:
- https://twitter.uk
- description: um is a valid country tld
text: https://twitter.um
expected:
- https://twitter.um
- description: us is a valid country tld
text: https://twitter.us
expected:
- https://twitter.us
- description: uy is a valid country tld
text: https://twitter.uy
expected:
- https://twitter.uy
- description: uz is a valid country tld
text: https://twitter.uz
expected:
- https://twitter.uz
- description: va is a valid country tld
text: https://twitter.va
expected:
- https://twitter.va
- description: vc is a valid country tld
text: https://twitter.vc
expected:
- https://twitter.vc
- description: ve is a valid country tld
text: https://twitter.ve
expected:
- https://twitter.ve
- description: vg is a valid country tld
text: https://twitter.vg
expected:
- https://twitter.vg
- description: vi is a valid country tld
text: https://twitter.vi
expected:
- https://twitter.vi
- description: vn is a valid country tld
text: https://twitter.vn
expected:
- https://twitter.vn
- description: vu is a valid country tld
text: https://twitter.vu
expected:
- https://twitter.vu
- description: wf is a valid country tld
text: https://twitter.wf
expected:
- https://twitter.wf
- description: ws is a valid country tld
text: https://twitter.ws
expected:
- https://twitter.ws
- description: ye is a valid country tld
text: https://twitter.ye
expected:
- https://twitter.ye
- description: yt is a valid country tld
text: https://twitter.yt
expected:
- https://twitter.yt
- description: za is a valid country tld
text: https://twitter.za
expected:
- https://twitter.za
- description: zm is a valid country tld
text: https://twitter.zm
expected:
- https://twitter.zm
- description: zw is a valid country tld
text: https://twitter.zw
expected:
- https://twitter.zw
- description: "ελ is a valid country tld"
text: https://twitter.ελ
expected:
- https://twitter.ελ
- description: "бел is a valid country tld"
text: https://twitter.бел
expected:
- https://twitter.бел
- description: "мкд is a valid country tld"
text: https://twitter.мкд
expected:
- https://twitter.мкд
- description: "мон is a valid country tld"
text: https://twitter.мон
expected:
- https://twitter.мон
- description: "рф is a valid country tld"
text: https://twitter.рф
expected:
- https://twitter.рф
- description: "срб is a valid country tld"
text: https://twitter.срб
expected:
- https://twitter.срб
- description: "укр is a valid country tld"
text: https://twitter.укр
expected:
- https://twitter.укр
- description: "қаз is a valid country tld"
text: https://twitter.қаз
expected:
- https://twitter.қаз
- description: "հայ is a valid country tld"
text: https://twitter.հայ
expected:
- https://twitter.հայ
- description: "الاردن is a valid country tld"
text: https://twitter.الاردن
expected:
- https://twitter.الاردن
- description: "الجزائر is a valid country tld"
text: https://twitter.الجزائر
expected:
- https://twitter.الجزائر
- description: "السعودية is a valid country tld"
text: https://twitter.السعودية
expected:
- https://twitter.السعودية
- description: "المغرب is a valid country tld"
text: https://twitter.المغرب
expected:
- https://twitter.المغرب
- description: "امارات is a valid country tld"
text: https://twitter.امارات
expected:
- https://twitter.امارات
- description: "ایران is a valid country tld"
text: https://twitter.ایران
expected:
- https://twitter.ایران
- description: "بھارت is a valid country tld"
text: https://twitter.بھارت
expected:
- https://twitter.بھارت
- description: "تونس is a valid country tld"
text: https://twitter.تونس
expected:
- https://twitter.تونس
- description: "سودان is a valid country tld"
text: https://twitter.سودان
expected:
- https://twitter.سودان
- description: "سورية is a valid country tld"
text: https://twitter.سورية
expected:
- https://twitter.سورية
- description: "عراق is a valid country tld"
text: https://twitter.عراق
expected:
- https://twitter.عراق
- description: "عمان is a valid country tld"
text: https://twitter.عمان
expected:
- https://twitter.عمان
- description: "فلسطين is a valid country tld"
text: https://twitter.فلسطين
expected:
- https://twitter.فلسطين
- description: "قطر is a valid country tld"
text: https://twitter.قطر
expected:
- https://twitter.قطر
- description: "مصر is a valid country tld"
text: https://twitter.مصر
expected:
- https://twitter.مصر
- description: "مليسيا is a valid country tld"
text: https://twitter.مليسيا
expected:
- https://twitter.مليسيا
- description: "پاکستان is a valid country tld"
text: https://twitter.پاکستان
expected:
- https://twitter.پاکستان
- description: "भारत is a valid country tld"
text: https://twitter.भारत
expected:
- https://twitter.भारत
- description: "বাংলা is a valid country tld"
text: https://twitter.বাংলা
expected:
- https://twitter.বাংলা
- description: "ভারত is a valid country tld"
text: https://twitter.ভারত
expected:
- https://twitter.ভারত
- description: "ਭਾਰਤ is a valid country tld"
text: https://twitter.ਭਾਰਤ
expected:
- https://twitter.ਭਾਰਤ
- description: "ભારત is a valid country tld"
text: https://twitter.ભારત
expected:
- https://twitter.ભારત
- description: "இந்தியா is a valid country tld"
text: https://twitter.இந்தியா
expected:
- https://twitter.இந்தியா
- description: "இலங்கை is a valid country tld"
text: https://twitter.இலங்கை
expected:
- https://twitter.இலங்கை
- description: "சிங்கப்பூர் is a valid country tld"
text: https://twitter.சிங்கப்பூர்
expected:
- https://twitter.சிங்கப்பூர்
- description: "భారత్ is a valid country tld"
text: https://twitter.భారత్
expected:
- https://twitter.భారత్
- description: "ලංකා is a valid country tld"
text: https://twitter.ලංකා
expected:
- https://twitter.ලංකා
- description: "ไทย is a valid country tld"
text: https://twitter.ไทย
expected:
- https://twitter.ไทย
- description: "გე is a valid country tld"
text: https://twitter.გე
expected:
- https://twitter.გე
- description: "中国 is a valid country tld"
text: https://twitter.中国
expected:
- https://twitter.中国
- description: "中國 is a valid country tld"
text: https://twitter.中國
expected:
- https://twitter.中國
- description: "台湾 is a valid country tld"
text: https://twitter.台湾
expected:
- https://twitter.台湾
- description: "台灣 is a valid country tld"
text: https://twitter.台灣
expected:
- https://twitter.台灣
- description: "新加坡 is a valid country tld"
text: https://twitter.新加坡
expected:
- https://twitter.新加坡
- description: "澳門 is a valid country tld"
text: https://twitter.澳門
expected:
- https://twitter.澳門
- description: "香港 is a valid country tld"
text: https://twitter.香港
expected:
- https://twitter.香港
- description: "한국 is a valid country tld"
text: https://twitter.한국
expected:
- https://twitter.한국
generic:
- description: abb is a valid generic tld
text: https://twitter.abb
expected:
- https://twitter.abb
- description: abbott is a valid generic tld
text: https://twitter.abbott
expected:
- https://twitter.abbott
- description: abogado is a valid generic tld
text: https://twitter.abogado
expected:
- https://twitter.abogado
- description: academy is a valid generic tld
text: https://twitter.academy
expected:
- https://twitter.academy
- description: accenture is a valid generic tld
text: https://twitter.accenture
expected:
- https://twitter.accenture
- description: accountant is a valid generic tld
text: https://twitter.accountant
expected:
- https://twitter.accountant
- description: accountants is a valid generic tld
text: https://twitter.accountants
expected:
- https://twitter.accountants
- description: aco is a valid generic tld
text: https://twitter.aco
expected:
- https://twitter.aco
- description: active is a valid generic tld
text: https://twitter.active
expected:
- https://twitter.active
- description: actor is a valid generic tld
text: https://twitter.actor
expected:
- https://twitter.actor
- description: ads is a valid generic tld
text: https://twitter.ads
expected:
- https://twitter.ads
- description: adult is a valid generic tld
text: https://twitter.adult
expected:
- https://twitter.adult
- description: aeg is a valid generic tld
text: https://twitter.aeg
expected:
- https://twitter.aeg
- description: aero is a valid generic tld
text: https://twitter.aero
expected:
- https://twitter.aero
- description: afl is a valid generic tld
text: https://twitter.afl
expected:
- https://twitter.afl
- description: agency is a valid generic tld
text: https://twitter.agency
expected:
- https://twitter.agency
- description: aig is a valid generic tld
text: https://twitter.aig
expected:
- https://twitter.aig
- description: airforce is a valid generic tld
text: https://twitter.airforce
expected:
- https://twitter.airforce
- description: airtel is a valid generic tld
text: https://twitter.airtel
expected:
- https://twitter.airtel
- description: allfinanz is a valid generic tld
text: https://twitter.allfinanz
expected:
- https://twitter.allfinanz
- description: alsace is a valid generic tld
text: https://twitter.alsace
expected:
- https://twitter.alsace
- description: amsterdam is a valid generic tld
text: https://twitter.amsterdam
expected:
- https://twitter.amsterdam
- description: android is a valid generic tld
text: https://twitter.android
expected:
- https://twitter.android
- description: apartments is a valid generic tld
text: https://twitter.apartments
expected:
- https://twitter.apartments
- description: app is a valid generic tld
text: https://twitter.app
expected:
- https://twitter.app
- description: aquarelle is a valid generic tld
text: https://twitter.aquarelle
expected:
- https://twitter.aquarelle
- description: archi is a valid generic tld
text: https://twitter.archi
expected:
- https://twitter.archi
- description: army is a valid generic tld
text: https://twitter.army
expected:
- https://twitter.army
- description: arpa is a valid generic tld
text: https://twitter.arpa
expected:
- https://twitter.arpa
- description: asia is a valid generic tld
text: https://twitter.asia
expected:
- https://twitter.asia
- description: associates is a valid generic tld
text: https://twitter.associates
expected:
- https://twitter.associates
- description: attorney is a valid generic tld
text: https://twitter.attorney
expected:
- https://twitter.attorney
- description: auction is a valid generic tld
text: https://twitter.auction
expected:
- https://twitter.auction
- description: audio is a valid generic tld
text: https://twitter.audio
expected:
- https://twitter.audio
- description: auto is a valid generic tld
text: https://twitter.auto
expected:
- https://twitter.auto
- description: autos is a valid generic tld
text: https://twitter.autos
expected:
- https://twitter.autos
- description: axa is a valid generic tld
text: https://twitter.axa
expected:
- https://twitter.axa
- description: azure is a valid generic tld
text: https://twitter.azure
expected:
- https://twitter.azure
- description: band is a valid generic tld
text: https://twitter.band
expected:
- https://twitter.band
- description: bank is a valid generic tld
text: https://twitter.bank
expected:
- https://twitter.bank
- description: bar is a valid generic tld
text: https://twitter.bar
expected:
- https://twitter.bar
- description: barcelona is a valid generic tld
text: https://twitter.barcelona
expected:
- https://twitter.barcelona
- description: barclaycard is a valid generic tld
text: https://twitter.barclaycard
expected:
- https://twitter.barclaycard
- description: barclays is a valid generic tld
text: https://twitter.barclays
expected:
- https://twitter.barclays
- description: bargains is a valid generic tld
text: https://twitter.bargains
expected:
- https://twitter.bargains
- description: bauhaus is a valid generic tld
text: https://twitter.bauhaus
expected:
- https://twitter.bauhaus
- description: bayern is a valid generic tld
text: https://twitter.bayern
expected:
- https://twitter.bayern
- description: bbc is a valid generic tld
text: https://twitter.bbc
expected:
- https://twitter.bbc
- description: bbva is a valid generic tld
text: https://twitter.bbva
expected:
- https://twitter.bbva
- description: bcn is a valid generic tld
text: https://twitter.bcn
expected:
- https://twitter.bcn
- description: beer is a valid generic tld
text: https://twitter.beer
expected:
- https://twitter.beer
- description: bentley is a valid generic tld
text: https://twitter.bentley
expected:
- https://twitter.bentley
- description: berlin is a valid generic tld
text: https://twitter.berlin
expected:
- https://twitter.berlin
- description: best is a valid generic tld
text: https://twitter.best
expected:
- https://twitter.best
- description: bet is a valid generic tld
text: https://twitter.bet
expected:
- https://twitter.bet
- description: bharti is a valid generic tld
text: https://twitter.bharti
expected:
- https://twitter.bharti
- description: bible is a valid generic tld
text: https://twitter.bible
expected:
- https://twitter.bible
- description: bid is a valid generic tld
text: https://twitter.bid
expected:
- https://twitter.bid
- description: bike is a valid generic tld
text: https://twitter.bike
expected:
- https://twitter.bike
- description: bing is a valid generic tld
text: https://twitter.bing
expected:
- https://twitter.bing
- description: bingo is a valid generic tld
text: https://twitter.bingo
expected:
- https://twitter.bingo
- description: bio is a valid generic tld
text: https://twitter.bio
expected:
- https://twitter.bio
- description: biz is a valid generic tld
text: https://twitter.biz
expected:
- https://twitter.biz
- description: black is a valid generic tld
text: https://twitter.black
expected:
- https://twitter.black
- description: blackfriday is a valid generic tld
text: https://twitter.blackfriday
expected:
- https://twitter.blackfriday
- description: bloomberg is a valid generic tld
text: https://twitter.bloomberg
expected:
- https://twitter.bloomberg
- description: blue is a valid generic tld
text: https://twitter.blue
expected:
- https://twitter.blue
- description: bmw is a valid generic tld
text: https://twitter.bmw
expected:
- https://twitter.bmw
- description: bnl is a valid generic tld
text: https://twitter.bnl
expected:
- https://twitter.bnl
- description: bnpparibas is a valid generic tld
text: https://twitter.bnpparibas
expected:
- https://twitter.bnpparibas
- description: boats is a valid generic tld
text: https://twitter.boats
expected:
- https://twitter.boats
- description: bond is a valid generic tld
text: https://twitter.bond
expected:
- https://twitter.bond
- description: boo is a valid generic tld
text: https://twitter.boo
expected:
- https://twitter.boo
- description: boots is a valid generic tld
text: https://twitter.boots
expected:
- https://twitter.boots
- description: boutique is a valid generic tld
text: https://twitter.boutique
expected:
- https://twitter.boutique
- description: bradesco is a valid generic tld
text: https://twitter.bradesco
expected:
- https://twitter.bradesco
- description: bridgestone is a valid generic tld
text: https://twitter.bridgestone
expected:
- https://twitter.bridgestone
- description: broker is a valid generic tld
text: https://twitter.broker
expected:
- https://twitter.broker
- description: brother is a valid generic tld
text: https://twitter.brother
expected:
- https://twitter.brother
- description: brussels is a valid generic tld
text: https://twitter.brussels
expected:
- https://twitter.brussels
- description: budapest is a valid generic tld
text: https://twitter.budapest
expected:
- https://twitter.budapest
- description: build is a valid generic tld
text: https://twitter.build
expected:
- https://twitter.build
- description: builders is a valid generic tld
text: https://twitter.builders
expected:
- https://twitter.builders
- description: business is a valid generic tld
text: https://twitter.business
expected:
- https://twitter.business
- description: buzz is a valid generic tld
text: https://twitter.buzz
expected:
- https://twitter.buzz
- description: bzh is a valid generic tld
text: https://twitter.bzh
expected:
- https://twitter.bzh
- description: cab is a valid generic tld
text: https://twitter.cab
expected:
- https://twitter.cab
- description: cafe is a valid generic tld
text: https://twitter.cafe
expected:
- https://twitter.cafe
- description: cal is a valid generic tld
text: https://twitter.cal
expected:
- https://twitter.cal
- description: camera is a valid generic tld
text: https://twitter.camera
expected:
- https://twitter.camera
- description: camp is a valid generic tld
text: https://twitter.camp
expected:
- https://twitter.camp
- description: cancerresearch is a valid generic tld
text: https://twitter.cancerresearch
expected:
- https://twitter.cancerresearch
- description: canon is a valid generic tld
text: https://twitter.canon
expected:
- https://twitter.canon
- description: capetown is a valid generic tld
text: https://twitter.capetown
expected:
- https://twitter.capetown
- description: capital is a valid generic tld
text: https://twitter.capital
expected:
- https://twitter.capital
- description: caravan is a valid generic tld
text: https://twitter.caravan
expected:
- https://twitter.caravan
- description: cards is a valid generic tld
text: https://twitter.cards
expected:
- https://twitter.cards
- description: care is a valid generic tld
text: https://twitter.care
expected:
- https://twitter.care
- description: career is a valid generic tld
text: https://twitter.career
expected:
- https://twitter.career
- description: careers is a valid generic tld
text: https://twitter.careers
expected:
- https://twitter.careers
- description: cars is a valid generic tld
text: https://twitter.cars
expected:
- https://twitter.cars
- description: cartier is a valid generic tld
text: https://twitter.cartier
expected:
- https://twitter.cartier
- description: casa is a valid generic tld
text: https://twitter.casa
expected:
- https://twitter.casa
- description: cash is a valid generic tld
text: https://twitter.cash
expected:
- https://twitter.cash
- description: casino is a valid generic tld
text: https://twitter.casino
expected:
- https://twitter.casino
- description: cat is a valid generic tld
text: https://twitter.cat
expected:
- https://twitter.cat
- description: catering is a valid generic tld
text: https://twitter.catering
expected:
- https://twitter.catering
- description: cba is a valid generic tld
text: https://twitter.cba
expected:
- https://twitter.cba
- description: cbn is a valid generic tld
text: https://twitter.cbn
expected:
- https://twitter.cbn
- description: ceb is a valid generic tld
text: https://twitter.ceb
expected:
- https://twitter.ceb
- description: center is a valid generic tld
text: https://twitter.center
expected:
- https://twitter.center
- description: ceo is a valid generic tld
text: https://twitter.ceo
expected:
- https://twitter.ceo
- description: cern is a valid generic tld
text: https://twitter.cern
expected:
- https://twitter.cern
- description: cfa is a valid generic tld
text: https://twitter.cfa
expected:
- https://twitter.cfa
- description: cfd is a valid generic tld
text: https://twitter.cfd
expected:
- https://twitter.cfd
- description: chanel is a valid generic tld
text: https://twitter.chanel
expected:
- https://twitter.chanel
- description: channel is a valid generic tld
text: https://twitter.channel
expected:
- https://twitter.channel
- description: chat is a valid generic tld
text: https://twitter.chat
expected:
- https://twitter.chat
- description: cheap is a valid generic tld
text: https://twitter.cheap
expected:
- https://twitter.cheap
- description: chloe is a valid generic tld
text: https://twitter.chloe
expected:
- https://twitter.chloe
- description: christmas is a valid generic tld
text: https://twitter.christmas
expected:
- https://twitter.christmas
- description: chrome is a valid generic tld
text: https://twitter.chrome
expected:
- https://twitter.chrome
- description: church is a valid generic tld
text: https://twitter.church
expected:
- https://twitter.church
- description: cisco is a valid generic tld
text: https://twitter.cisco
expected:
- https://twitter.cisco
- description: citic is a valid generic tld
text: https://twitter.citic
expected:
- https://twitter.citic
- description: city is a valid generic tld
text: https://twitter.city
expected:
- https://twitter.city
- description: claims is a valid generic tld
text: https://twitter.claims
expected:
- https://twitter.claims
- description: cleaning is a valid generic tld
text: https://twitter.cleaning
expected:
- https://twitter.cleaning
- description: click is a valid generic tld
text: https://twitter.click
expected:
- https://twitter.click
- description: clinic is a valid generic tld
text: https://twitter.clinic
expected:
- https://twitter.clinic
- description: clothing is a valid generic tld
text: https://twitter.clothing
expected:
- https://twitter.clothing
- description: cloud is a valid generic tld
text: https://twitter.cloud
expected:
- https://twitter.cloud
- description: club is a valid generic tld
text: https://twitter.club
expected:
- https://twitter.club
- description: coach is a valid generic tld
text: https://twitter.coach
expected:
- https://twitter.coach
- description: codes is a valid generic tld
text: https://twitter.codes
expected:
- https://twitter.codes
- description: coffee is a valid generic tld
text: https://twitter.coffee
expected:
- https://twitter.coffee
- description: college is a valid generic tld
text: https://twitter.college
expected:
- https://twitter.college
- description: cologne is a valid generic tld
text: https://twitter.cologne
expected:
- https://twitter.cologne
- description: com is a valid generic tld
text: https://twitter.com
expected:
- https://twitter.com
- description: commbank is a valid generic tld
text: https://twitter.commbank
expected:
- https://twitter.commbank
- description: community is a valid generic tld
text: https://twitter.community
expected:
- https://twitter.community
- description: company is a valid generic tld
text: https://twitter.company
expected:
- https://twitter.company
- description: computer is a valid generic tld
text: https://twitter.computer
expected:
- https://twitter.computer
- description: condos is a valid generic tld
text: https://twitter.condos
expected:
- https://twitter.condos
- description: construction is a valid generic tld
text: https://twitter.construction
expected:
- https://twitter.construction
- description: consulting is a valid generic tld
text: https://twitter.consulting
expected:
- https://twitter.consulting
- description: contractors is a valid generic tld
text: https://twitter.contractors
expected:
- https://twitter.contractors
- description: cooking is a valid generic tld
text: https://twitter.cooking
expected:
- https://twitter.cooking
- description: cool is a valid generic tld
text: https://twitter.cool
expected:
- https://twitter.cool
- description: coop is a valid generic tld
text: https://twitter.coop
expected:
- https://twitter.coop
- description: corsica is a valid generic tld
text: https://twitter.corsica
expected:
- https://twitter.corsica
- description: country is a valid generic tld
text: https://twitter.country
expected:
- https://twitter.country
- description: coupons is a valid generic tld
text: https://twitter.coupons
expected:
- https://twitter.coupons
- description: courses is a valid generic tld
text: https://twitter.courses
expected:
- https://twitter.courses
- description: credit is a valid generic tld
text: https://twitter.credit
expected:
- https://twitter.credit
- description: creditcard is a valid generic tld
text: https://twitter.creditcard
expected:
- https://twitter.creditcard
- description: cricket is a valid generic tld
text: https://twitter.cricket
expected:
- https://twitter.cricket
- description: crown is a valid generic tld
text: https://twitter.crown
expected:
- https://twitter.crown
- description: crs is a valid generic tld
text: https://twitter.crs
expected:
- https://twitter.crs
- description: cruises is a valid generic tld
text: https://twitter.cruises
expected:
- https://twitter.cruises
- description: cuisinella is a valid generic tld
text: https://twitter.cuisinella
expected:
- https://twitter.cuisinella
- description: cymru is a valid generic tld
text: https://twitter.cymru
expected:
- https://twitter.cymru
- description: cyou is a valid generic tld
text: https://twitter.cyou
expected:
- https://twitter.cyou
- description: dabur is a valid generic tld
text: https://twitter.dabur
expected:
- https://twitter.dabur
- description: dad is a valid generic tld
text: https://twitter.dad
expected:
- https://twitter.dad
- description: dance is a valid generic tld
text: https://twitter.dance
expected:
- https://twitter.dance
- description: date is a valid generic tld
text: https://twitter.date
expected:
- https://twitter.date
- description: dating is a valid generic tld
text: https://twitter.dating
expected:
- https://twitter.dating
- description: datsun is a valid generic tld
text: https://twitter.datsun
expected:
- https://twitter.datsun
- description: day is a valid generic tld
text: https://twitter.day
expected:
- https://twitter.day
- description: dclk is a valid generic tld
text: https://twitter.dclk
expected:
- https://twitter.dclk
- description: deals is a valid generic tld
text: https://twitter.deals
expected:
- https://twitter.deals
- description: degree is a valid generic tld
text: https://twitter.degree
expected:
- https://twitter.degree
- description: delivery is a valid generic tld
text: https://twitter.delivery
expected:
- https://twitter.delivery
- description: delta is a valid generic tld
text: https://twitter.delta
expected:
- https://twitter.delta
- description: democrat is a valid generic tld
text: https://twitter.democrat
expected:
- https://twitter.democrat
- description: dental is a valid generic tld
text: https://twitter.dental
expected:
- https://twitter.dental
- description: dentist is a valid generic tld
text: https://twitter.dentist
expected:
- https://twitter.dentist
- description: desi is a valid generic tld
text: https://twitter.desi
expected:
- https://twitter.desi
- description: design is a valid generic tld
text: https://twitter.design
expected:
- https://twitter.design
- description: dev is a valid generic tld
text: https://twitter.dev
expected:
- https://twitter.dev
- description: diamonds is a valid generic tld
text: https://twitter.diamonds
expected:
- https://twitter.diamonds
- description: diet is a valid generic tld
text: https://twitter.diet
expected:
- https://twitter.diet
- description: digital is a valid generic tld
text: https://twitter.digital
expected:
- https://twitter.digital
- description: direct is a valid generic tld
text: https://twitter.direct
expected:
- https://twitter.direct
- description: directory is a valid generic tld
text: https://twitter.directory
expected:
- https://twitter.directory
- description: discount is a valid generic tld
text: https://twitter.discount
expected:
- https://twitter.discount
- description: dnp is a valid generic tld
text: https://twitter.dnp
expected:
- https://twitter.dnp
- description: docs is a valid generic tld
text: https://twitter.docs
expected:
- https://twitter.docs
- description: dog is a valid generic tld
text: https://twitter.dog
expected:
- https://twitter.dog
- description: doha is a valid generic tld
text: https://twitter.doha
expected:
- https://twitter.doha
- description: domains is a valid generic tld
text: https://twitter.domains
expected:
- https://twitter.domains
- description: doosan is a valid generic tld
text: https://twitter.doosan
expected:
- https://twitter.doosan
- description: download is a valid generic tld
text: https://twitter.download
expected:
- https://twitter.download
- description: drive is a valid generic tld
text: https://twitter.drive
expected:
- https://twitter.drive
- description: durban is a valid generic tld
text: https://twitter.durban
expected:
- https://twitter.durban
- description: dvag is a valid generic tld
text: https://twitter.dvag
expected:
- https://twitter.dvag
- description: earth is a valid generic tld
text: https://twitter.earth
expected:
- https://twitter.earth
- description: eat is a valid generic tld
text: https://twitter.eat
expected:
- https://twitter.eat
- description: edu is a valid generic tld
text: https://twitter.edu
expected:
- https://twitter.edu
- description: education is a valid generic tld
text: https://twitter.education
expected:
- https://twitter.education
- description: email is a valid generic tld
text: https://twitter.email
expected:
- https://twitter.email
- description: emerck is a valid generic tld
text: https://twitter.emerck
expected:
- https://twitter.emerck
- description: energy is a valid generic tld
text: https://twitter.energy
expected:
- https://twitter.energy
- description: engineer is a valid generic tld
text: https://twitter.engineer
expected:
- https://twitter.engineer
- description: engineering is a valid generic tld
text: https://twitter.engineering
expected:
- https://twitter.engineering
- description: enterprises is a valid generic tld
text: https://twitter.enterprises
expected:
- https://twitter.enterprises
- description: epson is a valid generic tld
text: https://twitter.epson
expected:
- https://twitter.epson
- description: equipment is a valid generic tld
text: https://twitter.equipment
expected:
- https://twitter.equipment
- description: erni is a valid generic tld
text: https://twitter.erni
expected:
- https://twitter.erni
- description: esq is a valid generic tld
text: https://twitter.esq
expected:
- https://twitter.esq
- description: estate is a valid generic tld
text: https://twitter.estate
expected:
- https://twitter.estate
- description: eurovision is a valid generic tld
text: https://twitter.eurovision
expected:
- https://twitter.eurovision
- description: eus is a valid generic tld
text: https://twitter.eus
expected:
- https://twitter.eus
- description: events is a valid generic tld
text: https://twitter.events
expected:
- https://twitter.events
- description: everbank is a valid generic tld
text: https://twitter.everbank
expected:
- https://twitter.everbank
- description: exchange is a valid generic tld
text: https://twitter.exchange
expected:
- https://twitter.exchange
- description: expert is a valid generic tld
text: https://twitter.expert
expected:
- https://twitter.expert
- description: exposed is a valid generic tld
text: https://twitter.exposed
expected:
- https://twitter.exposed
- description: express is a valid generic tld
text: https://twitter.express
expected:
- https://twitter.express
- description: fage is a valid generic tld
text: https://twitter.fage
expected:
- https://twitter.fage
- description: fail is a valid generic tld
text: https://twitter.fail
expected:
- https://twitter.fail
- description: faith is a valid generic tld
text: https://twitter.faith
expected:
- https://twitter.faith
- description: family is a valid generic tld
text: https://twitter.family
expected:
- https://twitter.family
- description: fan is a valid generic tld
text: https://twitter.fan
expected:
- https://twitter.fan
- description: fans is a valid generic tld
text: https://twitter.fans
expected:
- https://twitter.fans
- description: farm is a valid generic tld
text: https://twitter.farm
expected:
- https://twitter.farm
- description: fashion is a valid generic tld
text: https://twitter.fashion
expected:
- https://twitter.fashion
- description: feedback is a valid generic tld
text: https://twitter.feedback
expected:
- https://twitter.feedback
- description: film is a valid generic tld
text: https://twitter.film
expected:
- https://twitter.film
- description: finance is a valid generic tld
text: https://twitter.finance
expected:
- https://twitter.finance
- description: financial is a valid generic tld
text: https://twitter.financial
expected:
- https://twitter.financial
- description: firmdale is a valid generic tld
text: https://twitter.firmdale
expected:
- https://twitter.firmdale
- description: fish is a valid generic tld
text: https://twitter.fish
expected:
- https://twitter.fish
- description: fishing is a valid generic tld
text: https://twitter.fishing
expected:
- https://twitter.fishing
- description: fit is a valid generic tld
text: https://twitter.fit
expected:
- https://twitter.fit
- description: fitness is a valid generic tld
text: https://twitter.fitness
expected:
- https://twitter.fitness
- description: flights is a valid generic tld
text: https://twitter.flights
expected:
- https://twitter.flights
- description: florist is a valid generic tld
text: https://twitter.florist
expected:
- https://twitter.florist
- description: flowers is a valid generic tld
text: https://twitter.flowers
expected:
- https://twitter.flowers
- description: flsmidth is a valid generic tld
text: https://twitter.flsmidth
expected:
- https://twitter.flsmidth
- description: fly is a valid generic tld
text: https://twitter.fly
expected:
- https://twitter.fly
- description: foo is a valid generic tld
text: https://twitter.foo
expected:
- https://twitter.foo
- description: football is a valid generic tld
text: https://twitter.football
expected:
- https://twitter.football
- description: forex is a valid generic tld
text: https://twitter.forex
expected:
- https://twitter.forex
- description: forsale is a valid generic tld
text: https://twitter.forsale
expected:
- https://twitter.forsale
- description: forum is a valid generic tld
text: https://twitter.forum
expected:
- https://twitter.forum
- description: foundation is a valid generic tld
text: https://twitter.foundation
expected:
- https://twitter.foundation
- description: frl is a valid generic tld
text: https://twitter.frl
expected:
- https://twitter.frl
- description: frogans is a valid generic tld
text: https://twitter.frogans
expected:
- https://twitter.frogans
- description: fund is a valid generic tld
text: https://twitter.fund
expected:
- https://twitter.fund
- description: furniture is a valid generic tld
text: https://twitter.furniture
expected:
- https://twitter.furniture
- description: futbol is a valid generic tld
text: https://twitter.futbol
expected:
- https://twitter.futbol
- description: fyi is a valid generic tld
text: https://twitter.fyi
expected:
- https://twitter.fyi
- description: gal is a valid generic tld
text: https://twitter.gal
expected:
- https://twitter.gal
- description: gallery is a valid generic tld
text: https://twitter.gallery
expected:
- https://twitter.gallery
- description: game is a valid generic tld
text: https://twitter.game
expected:
- https://twitter.game
- description: garden is a valid generic tld
text: https://twitter.garden
expected:
- https://twitter.garden
- description: gbiz is a valid generic tld
text: https://twitter.gbiz
expected:
- https://twitter.gbiz
- description: gdn is a valid generic tld
text: https://twitter.gdn
expected:
- https://twitter.gdn
- description: gent is a valid generic tld
text: https://twitter.gent
expected:
- https://twitter.gent
- description: genting is a valid generic tld
text: https://twitter.genting
expected:
- https://twitter.genting
- description: ggee is a valid generic tld
text: https://twitter.ggee
expected:
- https://twitter.ggee
- description: gift is a valid generic tld
text: https://twitter.gift
expected:
- https://twitter.gift
- description: gifts is a valid generic tld
text: https://twitter.gifts
expected:
- https://twitter.gifts
- description: gives is a valid generic tld
text: https://twitter.gives
expected:
- https://twitter.gives
- description: giving is a valid generic tld
text: https://twitter.giving
expected:
- https://twitter.giving
- description: glass is a valid generic tld
text: https://twitter.glass
expected:
- https://twitter.glass
- description: gle is a valid generic tld
text: https://twitter.gle
expected:
- https://twitter.gle
- description: global is a valid generic tld
text: https://twitter.global
expected:
- https://twitter.global
- description: globo is a valid generic tld
text: https://twitter.globo
expected:
- https://twitter.globo
- description: gmail is a valid generic tld
text: https://twitter.gmail
expected:
- https://twitter.gmail
- description: gmo is a valid generic tld
text: https://twitter.gmo
expected:
- https://twitter.gmo
- description: gmx is a valid generic tld
text: https://twitter.gmx
expected:
- https://twitter.gmx
- description: gold is a valid generic tld
text: https://twitter.gold
expected:
- https://twitter.gold
- description: goldpoint is a valid generic tld
text: https://twitter.goldpoint
expected:
- https://twitter.goldpoint
- description: golf is a valid generic tld
text: https://twitter.golf
expected:
- https://twitter.golf
- description: goo is a valid generic tld
text: https://twitter.goo
expected:
- https://twitter.goo
- description: goog is a valid generic tld
text: https://twitter.goog
expected:
- https://twitter.goog
- description: google is a valid generic tld
text: https://twitter.google
expected:
- https://twitter.google
- description: gop is a valid generic tld
text: https://twitter.gop
expected:
- https://twitter.gop
- description: gov is a valid generic tld
text: https://twitter.gov
expected:
- https://twitter.gov
- description: graphics is a valid generic tld
text: https://twitter.graphics
expected:
- https://twitter.graphics
- description: gratis is a valid generic tld
text: https://twitter.gratis
expected:
- https://twitter.gratis
- description: green is a valid generic tld
text: https://twitter.green
expected:
- https://twitter.green
- description: gripe is a valid generic tld
text: https://twitter.gripe
expected:
- https://twitter.gripe
- description: group is a valid generic tld
text: https://twitter.group
expected:
- https://twitter.group
- description: guge is a valid generic tld
text: https://twitter.guge
expected:
- https://twitter.guge
- description: guide is a valid generic tld
text: https://twitter.guide
expected:
- https://twitter.guide
- description: guitars is a valid generic tld
text: https://twitter.guitars
expected:
- https://twitter.guitars
- description: guru is a valid generic tld
text: https://twitter.guru
expected:
- https://twitter.guru
- description: hamburg is a valid generic tld
text: https://twitter.hamburg
expected:
- https://twitter.hamburg
- description: hangout is a valid generic tld
text: https://twitter.hangout
expected:
- https://twitter.hangout
- description: haus is a valid generic tld
text: https://twitter.haus
expected:
- https://twitter.haus
- description: healthcare is a valid generic tld
text: https://twitter.healthcare
expected:
- https://twitter.healthcare
- description: help is a valid generic tld
text: https://twitter.help
expected:
- https://twitter.help
- description: here is a valid generic tld
text: https://twitter.here
expected:
- https://twitter.here
- description: hermes is a valid generic tld
text: https://twitter.hermes
expected:
- https://twitter.hermes
- description: hiphop is a valid generic tld
text: https://twitter.hiphop
expected:
- https://twitter.hiphop
- description: hitachi is a valid generic tld
text: https://twitter.hitachi
expected:
- https://twitter.hitachi
- description: hiv is a valid generic tld
text: https://twitter.hiv
expected:
- https://twitter.hiv
- description: hockey is a valid generic tld
text: https://twitter.hockey
expected:
- https://twitter.hockey
- description: holdings is a valid generic tld
text: https://twitter.holdings
expected:
- https://twitter.holdings
- description: holiday is a valid generic tld
text: https://twitter.holiday
expected:
- https://twitter.holiday
- description: homedepot is a valid generic tld
text: https://twitter.homedepot
expected:
- https://twitter.homedepot
- description: homes is a valid generic tld
text: https://twitter.homes
expected:
- https://twitter.homes
- description: honda is a valid generic tld
text: https://twitter.honda
expected:
- https://twitter.honda
- description: horse is a valid generic tld
text: https://twitter.horse
expected:
- https://twitter.horse
- description: host is a valid generic tld
text: https://twitter.host
expected:
- https://twitter.host
- description: hosting is a valid generic tld
text: https://twitter.hosting
expected:
- https://twitter.hosting
- description: hoteles is a valid generic tld
text: https://twitter.hoteles
expected:
- https://twitter.hoteles
- description: hotmail is a valid generic tld
text: https://twitter.hotmail
expected:
- https://twitter.hotmail
- description: house is a valid generic tld
text: https://twitter.house
expected:
- https://twitter.house
- description: how is a valid generic tld
text: https://twitter.how
expected:
- https://twitter.how
- description: hsbc is a valid generic tld
text: https://twitter.hsbc
expected:
- https://twitter.hsbc
- description: ibm is a valid generic tld
text: https://twitter.ibm
expected:
- https://twitter.ibm
- description: icbc is a valid generic tld
text: https://twitter.icbc
expected:
- https://twitter.icbc
- description: ice is a valid generic tld
text: https://twitter.ice
expected:
- https://twitter.ice
- description: icu is a valid generic tld
text: https://twitter.icu
expected:
- https://twitter.icu
- description: ifm is a valid generic tld
text: https://twitter.ifm
expected:
- https://twitter.ifm
- description: iinet is a valid generic tld
text: https://twitter.iinet
expected:
- https://twitter.iinet
- description: immo is a valid generic tld
text: https://twitter.immo
expected:
- https://twitter.immo
- description: immobilien is a valid generic tld
text: https://twitter.immobilien
expected:
- https://twitter.immobilien
- description: industries is a valid generic tld
text: https://twitter.industries
expected:
- https://twitter.industries
- description: infiniti is a valid generic tld
text: https://twitter.infiniti
expected:
- https://twitter.infiniti
- description: info is a valid generic tld
text: https://twitter.info
expected:
- https://twitter.info
- description: ing is a valid generic tld
text: https://twitter.ing
expected:
- https://twitter.ing
- description: ink is a valid generic tld
text: https://twitter.ink
expected:
- https://twitter.ink
- description: institute is a valid generic tld
text: https://twitter.institute
expected:
- https://twitter.institute
- description: insure is a valid generic tld
text: https://twitter.insure
expected:
- https://twitter.insure
- description: int is a valid generic tld
text: https://twitter.int
expected:
- https://twitter.int
- description: international is a valid generic tld
text: https://twitter.international
expected:
- https://twitter.international
- description: investments is a valid generic tld
text: https://twitter.investments
expected:
- https://twitter.investments
- description: ipiranga is a valid generic tld
text: https://twitter.ipiranga
expected:
- https://twitter.ipiranga
- description: irish is a valid generic tld
text: https://twitter.irish
expected:
- https://twitter.irish
- description: ist is a valid generic tld
text: https://twitter.ist
expected:
- https://twitter.ist
- description: istanbul is a valid generic tld
text: https://twitter.istanbul
expected:
- https://twitter.istanbul
- description: itau is a valid generic tld
text: https://twitter.itau
expected:
- https://twitter.itau
- description: iwc is a valid generic tld
text: https://twitter.iwc
expected:
- https://twitter.iwc
- description: java is a valid generic tld
text: https://twitter.java
expected:
- https://twitter.java
- description: jcb is a valid generic tld
text: https://twitter.jcb
expected:
- https://twitter.jcb
- description: jetzt is a valid generic tld
text: https://twitter.jetzt
expected:
- https://twitter.jetzt
- description: jewelry is a valid generic tld
text: https://twitter.jewelry
expected:
- https://twitter.jewelry
- description: jlc is a valid generic tld
text: https://twitter.jlc
expected:
- https://twitter.jlc
- description: jll is a valid generic tld
text: https://twitter.jll
expected:
- https://twitter.jll
- description: jobs is a valid generic tld
text: https://twitter.jobs
expected:
- https://twitter.jobs
- description: joburg is a valid generic tld
text: https://twitter.joburg
expected:
- https://twitter.joburg
- description: jprs is a valid generic tld
text: https://twitter.jprs
expected:
- https://twitter.jprs
- description: juegos is a valid generic tld
text: https://twitter.juegos
expected:
- https://twitter.juegos
- description: kaufen is a valid generic tld
text: https://twitter.kaufen
expected:
- https://twitter.kaufen
- description: kddi is a valid generic tld
text: https://twitter.kddi
expected:
- https://twitter.kddi
- description: kim is a valid generic tld
text: https://twitter.kim
expected:
- https://twitter.kim
- description: kitchen is a valid generic tld
text: https://twitter.kitchen
expected:
- https://twitter.kitchen
- description: kiwi is a valid generic tld
text: https://twitter.kiwi
expected:
- https://twitter.kiwi
- description: koeln is a valid generic tld
text: https://twitter.koeln
expected:
- https://twitter.koeln
- description: komatsu is a valid generic tld
text: https://twitter.komatsu
expected:
- https://twitter.komatsu
- description: krd is a valid generic tld
text: https://twitter.krd
expected:
- https://twitter.krd
- description: kred is a valid generic tld
text: https://twitter.kred
expected:
- https://twitter.kred
- description: kyoto is a valid generic tld
text: https://twitter.kyoto
expected:
- https://twitter.kyoto
- description: lacaixa is a valid generic tld
text: https://twitter.lacaixa
expected:
- https://twitter.lacaixa
- description: lancaster is a valid generic tld
text: https://twitter.lancaster
expected:
- https://twitter.lancaster
- description: land is a valid generic tld
text: https://twitter.land
expected:
- https://twitter.land
- description: lasalle is a valid generic tld
text: https://twitter.lasalle
expected:
- https://twitter.lasalle
- description: lat is a valid generic tld
text: https://twitter.lat
expected:
- https://twitter.lat
- description: latrobe is a valid generic tld
text: https://twitter.latrobe
expected:
- https://twitter.latrobe
- description: law is a valid generic tld
text: https://twitter.law
expected:
- https://twitter.law
- description: lawyer is a valid generic tld
text: https://twitter.lawyer
expected:
- https://twitter.lawyer
- description: lds is a valid generic tld
text: https://twitter.lds
expected:
- https://twitter.lds
- description: lease is a valid generic tld
text: https://twitter.lease
expected:
- https://twitter.lease
- description: leclerc is a valid generic tld
text: https://twitter.leclerc
expected:
- https://twitter.leclerc
- description: legal is a valid generic tld
text: https://twitter.legal
expected:
- https://twitter.legal
- description: lexus is a valid generic tld
text: https://twitter.lexus
expected:
- https://twitter.lexus
- description: lgbt is a valid generic tld
text: https://twitter.lgbt
expected:
- https://twitter.lgbt
- description: liaison is a valid generic tld
text: https://twitter.liaison
expected:
- https://twitter.liaison
- description: lidl is a valid generic tld
text: https://twitter.lidl
expected:
- https://twitter.lidl
- description: life is a valid generic tld
text: https://twitter.life
expected:
- https://twitter.life
- description: lighting is a valid generic tld
text: https://twitter.lighting
expected:
- https://twitter.lighting
- description: limited is a valid generic tld
text: https://twitter.limited
expected:
- https://twitter.limited
- description: limo is a valid generic tld
text: https://twitter.limo
expected:
- https://twitter.limo
- description: link is a valid generic tld
text: https://twitter.link
expected:
- https://twitter.link
- description: live is a valid generic tld
text: https://twitter.live
expected:
- https://twitter.live
- description: lixil is a valid generic tld
text: https://twitter.lixil
expected:
- https://twitter.lixil
- description: loan is a valid generic tld
text: https://twitter.loan
expected:
- https://twitter.loan
- description: loans is a valid generic tld
text: https://twitter.loans
expected:
- https://twitter.loans
- description: lol is a valid generic tld
text: https://twitter.lol
expected:
- https://twitter.lol
- description: london is a valid generic tld
text: https://twitter.london
expected:
- https://twitter.london
- description: lotte is a valid generic tld
text: https://twitter.lotte
expected:
- https://twitter.lotte
- description: lotto is a valid generic tld
text: https://twitter.lotto
expected:
- https://twitter.lotto
- description: love is a valid generic tld
text: https://twitter.love
expected:
- https://twitter.love
- description: ltda is a valid generic tld
text: https://twitter.ltda
expected:
- https://twitter.ltda
- description: lupin is a valid generic tld
text: https://twitter.lupin
expected:
- https://twitter.lupin
- description: luxe is a valid generic tld
text: https://twitter.luxe
expected:
- https://twitter.luxe
- description: luxury is a valid generic tld
text: https://twitter.luxury
expected:
- https://twitter.luxury
- description: madrid is a valid generic tld
text: https://twitter.madrid
expected:
- https://twitter.madrid
- description: maif is a valid generic tld
text: https://twitter.maif
expected:
- https://twitter.maif
- description: maison is a valid generic tld
text: https://twitter.maison
expected:
- https://twitter.maison
- description: man is a valid generic tld
text: https://twitter.man
expected:
- https://twitter.man
- description: management is a valid generic tld
text: https://twitter.management
expected:
- https://twitter.management
- description: mango is a valid generic tld
text: https://twitter.mango
expected:
- https://twitter.mango
- description: market is a valid generic tld
text: https://twitter.market
expected:
- https://twitter.market
- description: marketing is a valid generic tld
text: https://twitter.marketing
expected:
- https://twitter.marketing
- description: markets is a valid generic tld
text: https://twitter.markets
expected:
- https://twitter.markets
- description: marriott is a valid generic tld
text: https://twitter.marriott
expected:
- https://twitter.marriott
- description: mba is a valid generic tld
text: https://twitter.mba
expected:
- https://twitter.mba
- description: media is a valid generic tld
text: https://twitter.media
expected:
- https://twitter.media
- description: meet is a valid generic tld
text: https://twitter.meet
expected:
- https://twitter.meet
- description: melbourne is a valid generic tld
text: https://twitter.melbourne
expected:
- https://twitter.melbourne
- description: meme is a valid generic tld
text: https://twitter.meme
expected:
- https://twitter.meme
- description: memorial is a valid generic tld
text: https://twitter.memorial
expected:
- https://twitter.memorial
- description: men is a valid generic tld
text: https://twitter.men
expected:
- https://twitter.men
- description: menu is a valid generic tld
text: https://twitter.menu
expected:
- https://twitter.menu
- description: miami is a valid generic tld
text: https://twitter.miami
expected:
- https://twitter.miami
- description: microsoft is a valid generic tld
text: https://twitter.microsoft
expected:
- https://twitter.microsoft
- description: mil is a valid generic tld
text: https://twitter.mil
expected:
- https://twitter.mil
- description: mini is a valid generic tld
text: https://twitter.mini
expected:
- https://twitter.mini
- description: mma is a valid generic tld
text: https://twitter.mma
expected:
- https://twitter.mma
- description: mobi is a valid generic tld
text: https://twitter.mobi
expected:
- https://twitter.mobi
- description: moda is a valid generic tld
text: https://twitter.moda
expected:
- https://twitter.moda
- description: moe is a valid generic tld
text: https://twitter.moe
expected:
- https://twitter.moe
- description: mom is a valid generic tld
text: https://twitter.mom
expected:
- https://twitter.mom
- description: monash is a valid generic tld
text: https://twitter.monash
expected:
- https://twitter.monash
- description: money is a valid generic tld
text: https://twitter.money
expected:
- https://twitter.money
- description: montblanc is a valid generic tld
text: https://twitter.montblanc
expected:
- https://twitter.montblanc
- description: mormon is a valid generic tld
text: https://twitter.mormon
expected:
- https://twitter.mormon
- description: mortgage is a valid generic tld
text: https://twitter.mortgage
expected:
- https://twitter.mortgage
- description: moscow is a valid generic tld
text: https://twitter.moscow
expected:
- https://twitter.moscow
- description: motorcycles is a valid generic tld
text: https://twitter.motorcycles
expected:
- https://twitter.motorcycles
- description: mov is a valid generic tld
text: https://twitter.mov
expected:
- https://twitter.mov
- description: movie is a valid generic tld
text: https://twitter.movie
expected:
- https://twitter.movie
- description: movistar is a valid generic tld
text: https://twitter.movistar
expected:
- https://twitter.movistar
- description: mtn is a valid generic tld
text: https://twitter.mtn
expected:
- https://twitter.mtn
- description: mtpc is a valid generic tld
text: https://twitter.mtpc
expected:
- https://twitter.mtpc
- description: museum is a valid generic tld
text: https://twitter.museum
expected:
- https://twitter.museum
- description: nadex is a valid generic tld
text: https://twitter.nadex
expected:
- https://twitter.nadex
- description: nagoya is a valid generic tld
text: https://twitter.nagoya
expected:
- https://twitter.nagoya
- description: name is a valid generic tld
text: https://twitter.name
expected:
- https://twitter.name
- description: navy is a valid generic tld
text: https://twitter.navy
expected:
- https://twitter.navy
- description: nec is a valid generic tld
text: https://twitter.nec
expected:
- https://twitter.nec
- description: net is a valid generic tld
text: https://twitter.net
expected:
- https://twitter.net
- description: netbank is a valid generic tld
text: https://twitter.netbank
expected:
- https://twitter.netbank
- description: network is a valid generic tld
text: https://twitter.network
expected:
- https://twitter.network
- description: neustar is a valid generic tld
text: https://twitter.neustar
expected:
- https://twitter.neustar
- description: new is a valid generic tld
text: https://twitter.new
expected:
- https://twitter.new
- description: news is a valid generic tld
text: https://twitter.news
expected:
- https://twitter.news
- description: nexus is a valid generic tld
text: https://twitter.nexus
expected:
- https://twitter.nexus
- description: ngo is a valid generic tld
text: https://twitter.ngo
expected:
- https://twitter.ngo
- description: nhk is a valid generic tld
text: https://twitter.nhk
expected:
- https://twitter.nhk
- description: nico is a valid generic tld
text: https://twitter.nico
expected:
- https://twitter.nico
- description: ninja is a valid generic tld
text: https://twitter.ninja
expected:
- https://twitter.ninja
- description: nissan is a valid generic tld
text: https://twitter.nissan
expected:
- https://twitter.nissan
- description: nokia is a valid generic tld
text: https://twitter.nokia
expected:
- https://twitter.nokia
- description: nra is a valid generic tld
text: https://twitter.nra
expected:
- https://twitter.nra
- description: nrw is a valid generic tld
text: https://twitter.nrw
expected:
- https://twitter.nrw
- description: ntt is a valid generic tld
text: https://twitter.ntt
expected:
- https://twitter.ntt
- description: nyc is a valid generic tld
text: https://twitter.nyc
expected:
- https://twitter.nyc
- description: office is a valid generic tld
text: https://twitter.office
expected:
- https://twitter.office
- description: okinawa is a valid generic tld
text: https://twitter.okinawa
expected:
- https://twitter.okinawa
- description: omega is a valid generic tld
text: https://twitter.omega
expected:
- https://twitter.omega
- description: one is a valid generic tld
text: https://twitter.one
expected:
- https://twitter.one
- description: ong is a valid generic tld
text: https://twitter.ong
expected:
- https://twitter.ong
- description: onl is a valid generic tld
text: https://twitter.onl
expected:
- https://twitter.onl
- description: online is a valid generic tld
text: https://twitter.online
expected:
- https://twitter.online
- description: ooo is a valid generic tld
text: https://twitter.ooo
expected:
- https://twitter.ooo
- description: oracle is a valid generic tld
text: https://twitter.oracle
expected:
- https://twitter.oracle
- description: orange is a valid generic tld
text: https://twitter.orange
expected:
- https://twitter.orange
- description: org is a valid generic tld
text: https://twitter.org
expected:
- https://twitter.org
- description: organic is a valid generic tld
text: https://twitter.organic
expected:
- https://twitter.organic
- description: osaka is a valid generic tld
text: https://twitter.osaka
expected:
- https://twitter.osaka
- description: otsuka is a valid generic tld
text: https://twitter.otsuka
expected:
- https://twitter.otsuka
- description: ovh is a valid generic tld
text: https://twitter.ovh
expected:
- https://twitter.ovh
- description: page is a valid generic tld
text: https://twitter.page
expected:
- https://twitter.page
- description: panerai is a valid generic tld
text: https://twitter.panerai
expected:
- https://twitter.panerai
- description: paris is a valid generic tld
text: https://twitter.paris
expected:
- https://twitter.paris
- description: partners is a valid generic tld
text: https://twitter.partners
expected:
- https://twitter.partners
- description: parts is a valid generic tld
text: https://twitter.parts
expected:
- https://twitter.parts
- description: party is a valid generic tld
text: https://twitter.party
expected:
- https://twitter.party
- description: pet is a valid generic tld
text: https://twitter.pet
expected:
- https://twitter.pet
- description: pharmacy is a valid generic tld
text: https://twitter.pharmacy
expected:
- https://twitter.pharmacy
- description: philips is a valid generic tld
text: https://twitter.philips
expected:
- https://twitter.philips
- description: photo is a valid generic tld
text: https://twitter.photo
expected:
- https://twitter.photo
- description: photography is a valid generic tld
text: https://twitter.photography
expected:
- https://twitter.photography
- description: photos is a valid generic tld
text: https://twitter.photos
expected:
- https://twitter.photos
- description: physio is a valid generic tld
text: https://twitter.physio
expected:
- https://twitter.physio
- description: piaget is a valid generic tld
text: https://twitter.piaget
expected:
- https://twitter.piaget
- description: pics is a valid generic tld
text: https://twitter.pics
expected:
- https://twitter.pics
- description: pictet is a valid generic tld
text: https://twitter.pictet
expected:
- https://twitter.pictet
- description: pictures is a valid generic tld
text: https://twitter.pictures
expected:
- https://twitter.pictures
- description: pink is a valid generic tld
text: https://twitter.pink
expected:
- https://twitter.pink
- description: pizza is a valid generic tld
text: https://twitter.pizza
expected:
- https://twitter.pizza
- description: place is a valid generic tld
text: https://twitter.place
expected:
- https://twitter.place
- description: play is a valid generic tld
text: https://twitter.play
expected:
- https://twitter.play
- description: plumbing is a valid generic tld
text: https://twitter.plumbing
expected:
- https://twitter.plumbing
- description: plus is a valid generic tld
text: https://twitter.plus
expected:
- https://twitter.plus
- description: pohl is a valid generic tld
text: https://twitter.pohl
expected:
- https://twitter.pohl
- description: poker is a valid generic tld
text: https://twitter.poker
expected:
- https://twitter.poker
- description: porn is a valid generic tld
text: https://twitter.porn
expected:
- https://twitter.porn
- description: post is a valid generic tld
text: https://twitter.post
expected:
- https://twitter.post
- description: praxi is a valid generic tld
text: https://twitter.praxi
expected:
- https://twitter.praxi
- description: press is a valid generic tld
text: https://twitter.press
expected:
- https://twitter.press
- description: pro is a valid generic tld
text: https://twitter.pro
expected:
- https://twitter.pro
- description: prod is a valid generic tld
text: https://twitter.prod
expected:
- https://twitter.prod
- description: productions is a valid generic tld
text: https://twitter.productions
expected:
- https://twitter.productions
- description: prof is a valid generic tld
text: https://twitter.prof
expected:
- https://twitter.prof
- description: properties is a valid generic tld
text: https://twitter.properties
expected:
- https://twitter.properties
- description: property is a valid generic tld
text: https://twitter.property
expected:
- https://twitter.property
- description: pub is a valid generic tld
text: https://twitter.pub
expected:
- https://twitter.pub
- description: qpon is a valid generic tld
text: https://twitter.qpon
expected:
- https://twitter.qpon
- description: quebec is a valid generic tld
text: https://twitter.quebec
expected:
- https://twitter.quebec
- description: racing is a valid generic tld
text: https://twitter.racing
expected:
- https://twitter.racing
- description: realtor is a valid generic tld
text: https://twitter.realtor
expected:
- https://twitter.realtor
- description: realty is a valid generic tld
text: https://twitter.realty
expected:
- https://twitter.realty
- description: recipes is a valid generic tld
text: https://twitter.recipes
expected:
- https://twitter.recipes
- description: red is a valid generic tld
text: https://twitter.red
expected:
- https://twitter.red
- description: redstone is a valid generic tld
text: https://twitter.redstone
expected:
- https://twitter.redstone
- description: rehab is a valid generic tld
text: https://twitter.rehab
expected:
- https://twitter.rehab
- description: reise is a valid generic tld
text: https://twitter.reise
expected:
- https://twitter.reise
- description: reisen is a valid generic tld
text: https://twitter.reisen
expected:
- https://twitter.reisen
- description: reit is a valid generic tld
text: https://twitter.reit
expected:
- https://twitter.reit
- description: ren is a valid generic tld
text: https://twitter.ren
expected:
- https://twitter.ren
- description: rent is a valid generic tld
text: https://twitter.rent
expected:
- https://twitter.rent
- description: rentals is a valid generic tld
text: https://twitter.rentals
expected:
- https://twitter.rentals
- description: repair is a valid generic tld
text: https://twitter.repair
expected:
- https://twitter.repair
- description: report is a valid generic tld
text: https://twitter.report
expected:
- https://twitter.report
- description: republican is a valid generic tld
text: https://twitter.republican
expected:
- https://twitter.republican
- description: rest is a valid generic tld
text: https://twitter.rest
expected:
- https://twitter.rest
- description: restaurant is a valid generic tld
text: https://twitter.restaurant
expected:
- https://twitter.restaurant
- description: review is a valid generic tld
text: https://twitter.review
expected:
- https://twitter.review
- description: reviews is a valid generic tld
text: https://twitter.reviews
expected:
- https://twitter.reviews
- description: rich is a valid generic tld
text: https://twitter.rich
expected:
- https://twitter.rich
- description: ricoh is a valid generic tld
text: https://twitter.ricoh
expected:
- https://twitter.ricoh
- description: rio is a valid generic tld
text: https://twitter.rio
expected:
- https://twitter.rio
- description: rip is a valid generic tld
text: https://twitter.rip
expected:
- https://twitter.rip
- description: rocks is a valid generic tld
text: https://twitter.rocks
expected:
- https://twitter.rocks
- description: rodeo is a valid generic tld
text: https://twitter.rodeo
expected:
- https://twitter.rodeo
- description: rsvp is a valid generic tld
text: https://twitter.rsvp
expected:
- https://twitter.rsvp
- description: ruhr is a valid generic tld
text: https://twitter.ruhr
expected:
- https://twitter.ruhr
- description: run is a valid generic tld
text: https://twitter.run
expected:
- https://twitter.run
- description: ryukyu is a valid generic tld
text: https://twitter.ryukyu
expected:
- https://twitter.ryukyu
- description: saarland is a valid generic tld
text: https://twitter.saarland
expected:
- https://twitter.saarland
- description: sakura is a valid generic tld
text: https://twitter.sakura
expected:
- https://twitter.sakura
- description: sale is a valid generic tld
text: https://twitter.sale
expected:
- https://twitter.sale
- description: samsung is a valid generic tld
text: https://twitter.samsung
expected:
- https://twitter.samsung
- description: sandvik is a valid generic tld
text: https://twitter.sandvik
expected:
- https://twitter.sandvik
- description: sandvikcoromant is a valid generic tld
text: https://twitter.sandvikcoromant
expected:
- https://twitter.sandvikcoromant
- description: sanofi is a valid generic tld
text: https://twitter.sanofi
expected:
- https://twitter.sanofi
- description: sap is a valid generic tld
text: https://twitter.sap
expected:
- https://twitter.sap
- description: sarl is a valid generic tld
text: https://twitter.sarl
expected:
- https://twitter.sarl
- description: saxo is a valid generic tld
text: https://twitter.saxo
expected:
- https://twitter.saxo
- description: sca is a valid generic tld
text: https://twitter.sca
expected:
- https://twitter.sca
- description: scb is a valid generic tld
text: https://twitter.scb
expected:
- https://twitter.scb
- description: schmidt is a valid generic tld
text: https://twitter.schmidt
expected:
- https://twitter.schmidt
- description: scholarships is a valid generic tld
text: https://twitter.scholarships
expected:
- https://twitter.scholarships
- description: school is a valid generic tld
text: https://twitter.school
expected:
- https://twitter.school
- description: schule is a valid generic tld
text: https://twitter.schule
expected:
- https://twitter.schule
- description: schwarz is a valid generic tld
text: https://twitter.schwarz
expected:
- https://twitter.schwarz
- description: science is a valid generic tld
text: https://twitter.science
expected:
- https://twitter.science
- description: scor is a valid generic tld
text: https://twitter.scor
expected:
- https://twitter.scor
- description: scot is a valid generic tld
text: https://twitter.scot
expected:
- https://twitter.scot
- description: seat is a valid generic tld
text: https://twitter.seat
expected:
- https://twitter.seat
- description: seek is a valid generic tld
text: https://twitter.seek
expected:
- https://twitter.seek
- description: sener is a valid generic tld
text: https://twitter.sener
expected:
- https://twitter.sener
- description: services is a valid generic tld
text: https://twitter.services
expected:
- https://twitter.services
- description: sew is a valid generic tld
text: https://twitter.sew
expected:
- https://twitter.sew
- description: sex is a valid generic tld
text: https://twitter.sex
expected:
- https://twitter.sex
- description: sexy is a valid generic tld
text: https://twitter.sexy
expected:
- https://twitter.sexy
- description: shiksha is a valid generic tld
text: https://twitter.shiksha
expected:
- https://twitter.shiksha
- description: shoes is a valid generic tld
text: https://twitter.shoes
expected:
- https://twitter.shoes
- description: show is a valid generic tld
text: https://twitter.show
expected:
- https://twitter.show
- description: shriram is a valid generic tld
text: https://twitter.shriram
expected:
- https://twitter.shriram
- description: singles is a valid generic tld
text: https://twitter.singles
expected:
- https://twitter.singles
- description: site is a valid generic tld
text: https://twitter.site
expected:
- https://twitter.site
- description: ski is a valid generic tld
text: https://twitter.ski
expected:
- https://twitter.ski
- description: sky is a valid generic tld
text: https://twitter.sky
expected:
- https://twitter.sky
- description: skype is a valid generic tld
text: https://twitter.skype
expected:
- https://twitter.skype
- description: sncf is a valid generic tld
text: https://twitter.sncf
expected:
- https://twitter.sncf
- description: soccer is a valid generic tld
text: https://twitter.soccer
expected:
- https://twitter.soccer
- description: social is a valid generic tld
text: https://twitter.social
expected:
- https://twitter.social
- description: software is a valid generic tld
text: https://twitter.software
expected:
- https://twitter.software
- description: sohu is a valid generic tld
text: https://twitter.sohu
expected:
- https://twitter.sohu
- description: solar is a valid generic tld
text: https://twitter.solar
expected:
- https://twitter.solar
- description: solutions is a valid generic tld
text: https://twitter.solutions
expected:
- https://twitter.solutions
- description: sony is a valid generic tld
text: https://twitter.sony
expected:
- https://twitter.sony
- description: soy is a valid generic tld
text: https://twitter.soy
expected:
- https://twitter.soy
- description: space is a valid generic tld
text: https://twitter.space
expected:
- https://twitter.space
- description: spiegel is a valid generic tld
text: https://twitter.spiegel
expected:
- https://twitter.spiegel
- description: spreadbetting is a valid generic tld
text: https://twitter.spreadbetting
expected:
- https://twitter.spreadbetting
- description: srl is a valid generic tld
text: https://twitter.srl
expected:
- https://twitter.srl
- description: starhub is a valid generic tld
text: https://twitter.starhub
expected:
- https://twitter.starhub
- description: statoil is a valid generic tld
text: https://twitter.statoil
expected:
- https://twitter.statoil
- description: studio is a valid generic tld
text: https://twitter.studio
expected:
- https://twitter.studio
- description: study is a valid generic tld
text: https://twitter.study
expected:
- https://twitter.study
- description: style is a valid generic tld
text: https://twitter.style
expected:
- https://twitter.style
- description: sucks is a valid generic tld
text: https://twitter.sucks
expected:
- https://twitter.sucks
- description: supplies is a valid generic tld
text: https://twitter.supplies
expected:
- https://twitter.supplies
- description: supply is a valid generic tld
text: https://twitter.supply
expected:
- https://twitter.supply
- description: support is a valid generic tld
text: https://twitter.support
expected:
- https://twitter.support
- description: surf is a valid generic tld
text: https://twitter.surf
expected:
- https://twitter.surf
- description: surgery is a valid generic tld
text: https://twitter.surgery
expected:
- https://twitter.surgery
- description: suzuki is a valid generic tld
text: https://twitter.suzuki
expected:
- https://twitter.suzuki
- description: swatch is a valid generic tld
text: https://twitter.swatch
expected:
- https://twitter.swatch
- description: swiss is a valid generic tld
text: https://twitter.swiss
expected:
- https://twitter.swiss
- description: sydney is a valid generic tld
text: https://twitter.sydney
expected:
- https://twitter.sydney
- description: systems is a valid generic tld
text: https://twitter.systems
expected:
- https://twitter.systems
- description: taipei is a valid generic tld
text: https://twitter.taipei
expected:
- https://twitter.taipei
- description: tatamotors is a valid generic tld
text: https://twitter.tatamotors
expected:
- https://twitter.tatamotors
- description: tatar is a valid generic tld
text: https://twitter.tatar
expected:
- https://twitter.tatar
- description: tattoo is a valid generic tld
text: https://twitter.tattoo
expected:
- https://twitter.tattoo
- description: tax is a valid generic tld
text: https://twitter.tax
expected:
- https://twitter.tax
- description: taxi is a valid generic tld
text: https://twitter.taxi
expected:
- https://twitter.taxi
- description: team is a valid generic tld
text: https://twitter.team
expected:
- https://twitter.team
- description: tech is a valid generic tld
text: https://twitter.tech
expected:
- https://twitter.tech
- description: technology is a valid generic tld
text: https://twitter.technology
expected:
- https://twitter.technology
- description: tel is a valid generic tld
text: https://twitter.tel
expected:
- https://twitter.tel
- description: telefonica is a valid generic tld
text: https://twitter.telefonica
expected:
- https://twitter.telefonica
- description: temasek is a valid generic tld
text: https://twitter.temasek
expected:
- https://twitter.temasek
- description: tennis is a valid generic tld
text: https://twitter.tennis
expected:
- https://twitter.tennis
- description: thd is a valid generic tld
text: https://twitter.thd
expected:
- https://twitter.thd
- description: theater is a valid generic tld
text: https://twitter.theater
expected:
- https://twitter.theater
- description: tickets is a valid generic tld
text: https://twitter.tickets
expected:
- https://twitter.tickets
- description: tienda is a valid generic tld
text: https://twitter.tienda
expected:
- https://twitter.tienda
- description: tips is a valid generic tld
text: https://twitter.tips
expected:
- https://twitter.tips
- description: tires is a valid generic tld
text: https://twitter.tires
expected:
- https://twitter.tires
- description: tirol is a valid generic tld
text: https://twitter.tirol
expected:
- https://twitter.tirol
- description: today is a valid generic tld
text: https://twitter.today
expected:
- https://twitter.today
- description: tokyo is a valid generic tld
text: https://twitter.tokyo
expected:
- https://twitter.tokyo
- description: tools is a valid generic tld
text: https://twitter.tools
expected:
- https://twitter.tools
- description: top is a valid generic tld
text: https://twitter.top
expected:
- https://twitter.top
- description: toray is a valid generic tld
text: https://twitter.toray
expected:
- https://twitter.toray
- description: toshiba is a valid generic tld
text: https://twitter.toshiba
expected:
- https://twitter.toshiba
- description: tours is a valid generic tld
text: https://twitter.tours
expected:
- https://twitter.tours
- description: town is a valid generic tld
text: https://twitter.town
expected:
- https://twitter.town
- description: toyota is a valid generic tld
text: https://twitter.toyota
expected:
- https://twitter.toyota
- description: toys is a valid generic tld
text: https://twitter.toys
expected:
- https://twitter.toys
- description: trade is a valid generic tld
text: https://twitter.trade
expected:
- https://twitter.trade
- description: trading is a valid generic tld
text: https://twitter.trading
expected:
- https://twitter.trading
- description: training is a valid generic tld
text: https://twitter.training
expected:
- https://twitter.training
- description: travel is a valid generic tld
text: https://twitter.travel
expected:
- https://twitter.travel
- description: trust is a valid generic tld
text: https://twitter.trust
expected:
- https://twitter.trust
- description: tui is a valid generic tld
text: https://twitter.tui
expected:
- https://twitter.tui
- description: ubs is a valid generic tld
text: https://twitter.ubs
expected:
- https://twitter.ubs
- description: university is a valid generic tld
text: https://twitter.university
expected:
- https://twitter.university
- description: uno is a valid generic tld
text: https://twitter.uno
expected:
- https://twitter.uno
- description: uol is a valid generic tld
text: https://twitter.uol
expected:
- https://twitter.uol
- description: vacations is a valid generic tld
text: https://twitter.vacations
expected:
- https://twitter.vacations
- description: vegas is a valid generic tld
text: https://twitter.vegas
expected:
- https://twitter.vegas
- description: ventures is a valid generic tld
text: https://twitter.ventures
expected:
- https://twitter.ventures
- description: vermögensberater is a valid generic tld
text: https://twitter.vermögensberater
expected:
- https://twitter.vermögensberater
- description: vermögensberatung is a valid generic tld
text: https://twitter.vermögensberatung
expected:
- https://twitter.vermögensberatung
- description: versicherung is a valid generic tld
text: https://twitter.versicherung
expected:
- https://twitter.versicherung
- description: vet is a valid generic tld
text: https://twitter.vet
expected:
- https://twitter.vet
- description: viajes is a valid generic tld
text: https://twitter.viajes
expected:
- https://twitter.viajes
- description: video is a valid generic tld
text: https://twitter.video
expected:
- https://twitter.video
- description: villas is a valid generic tld
text: https://twitter.villas
expected:
- https://twitter.villas
- description: vin is a valid generic tld
text: https://twitter.vin
expected:
- https://twitter.vin
- description: vision is a valid generic tld
text: https://twitter.vision
expected:
- https://twitter.vision
- description: vista is a valid generic tld
text: https://twitter.vista
expected:
- https://twitter.vista
- description: vistaprint is a valid generic tld
text: https://twitter.vistaprint
expected:
- https://twitter.vistaprint
- description: vlaanderen is a valid generic tld
text: https://twitter.vlaanderen
expected:
- https://twitter.vlaanderen
- description: vodka is a valid generic tld
text: https://twitter.vodka
expected:
- https://twitter.vodka
- description: vote is a valid generic tld
text: https://twitter.vote
expected:
- https://twitter.vote
- description: voting is a valid generic tld
text: https://twitter.voting
expected:
- https://twitter.voting
- description: voto is a valid generic tld
text: https://twitter.voto
expected:
- https://twitter.voto
- description: voyage is a valid generic tld
text: https://twitter.voyage
expected:
- https://twitter.voyage
- description: wales is a valid generic tld
text: https://twitter.wales
expected:
- https://twitter.wales
- description: walter is a valid generic tld
text: https://twitter.walter
expected:
- https://twitter.walter
- description: wang is a valid generic tld
text: https://twitter.wang
expected:
- https://twitter.wang
- description: watch is a valid generic tld
text: https://twitter.watch
expected:
- https://twitter.watch
- description: webcam is a valid generic tld
text: https://twitter.webcam
expected:
- https://twitter.webcam
- description: website is a valid generic tld
text: https://twitter.website
expected:
- https://twitter.website
- description: wed is a valid generic tld
text: https://twitter.wed
expected:
- https://twitter.wed
- description: wedding is a valid generic tld
text: https://twitter.wedding
expected:
- https://twitter.wedding
- description: weir is a valid generic tld
text: https://twitter.weir
expected:
- https://twitter.weir
- description: whoswho is a valid generic tld
text: https://twitter.whoswho
expected:
- https://twitter.whoswho
- description: wien is a valid generic tld
text: https://twitter.wien
expected:
- https://twitter.wien
- description: wiki is a valid generic tld
text: https://twitter.wiki
expected:
- https://twitter.wiki
- description: williamhill is a valid generic tld
text: https://twitter.williamhill
expected:
- https://twitter.williamhill
- description: win is a valid generic tld
text: https://twitter.win
expected:
- https://twitter.win
- description: windows is a valid generic tld
text: https://twitter.windows
expected:
- https://twitter.windows
- description: wine is a valid generic tld
text: https://twitter.wine
expected:
- https://twitter.wine
- description: wme is a valid generic tld
text: https://twitter.wme
expected:
- https://twitter.wme
- description: work is a valid generic tld
text: https://twitter.work
expected:
- https://twitter.work
- description: works is a valid generic tld
text: https://twitter.works
expected:
- https://twitter.works
- description: world is a valid generic tld
text: https://twitter.world
expected:
- https://twitter.world
- description: wtc is a valid generic tld
text: https://twitter.wtc
expected:
- https://twitter.wtc
- description: wtf is a valid generic tld
text: https://twitter.wtf
expected:
- https://twitter.wtf
- description: xbox is a valid generic tld
text: https://twitter.xbox
expected:
- https://twitter.xbox
- description: xerox is a valid generic tld
text: https://twitter.xerox
expected:
- https://twitter.xerox
- description: xin is a valid generic tld
text: https://twitter.xin
expected:
- https://twitter.xin
- description: xperia is a valid generic tld
text: https://twitter.xperia
expected:
- https://twitter.xperia
- description: xxx is a valid generic tld
text: https://twitter.xxx
expected:
- https://twitter.xxx
- description: xyz is a valid generic tld
text: https://twitter.xyz
expected:
- https://twitter.xyz
- description: yachts is a valid generic tld
text: https://twitter.yachts
expected:
- https://twitter.yachts
- description: yandex is a valid generic tld
text: https://twitter.yandex
expected:
- https://twitter.yandex
- description: yodobashi is a valid generic tld
text: https://twitter.yodobashi
expected:
- https://twitter.yodobashi
- description: yoga is a valid generic tld
text: https://twitter.yoga
expected:
- https://twitter.yoga
- description: yokohama is a valid generic tld
text: https://twitter.yokohama
expected:
- https://twitter.yokohama
- description: youtube is a valid generic tld
text: https://twitter.youtube
expected:
- https://twitter.youtube
- description: zip is a valid generic tld
text: https://twitter.zip
expected:
- https://twitter.zip
- description: zone is a valid generic tld
text: https://twitter.zone
expected:
- https://twitter.zone
- description: zuerich is a valid generic tld
text: https://twitter.zuerich
expected:
- https://twitter.zuerich
- description: "дети is a valid generic tld"
text: https://twitter.дети
expected:
- https://twitter.дети
- description: "ком is a valid generic tld"
text: https://twitter.ком
expected:
- https://twitter.ком
- description: "москва is a valid generic tld"
text: https://twitter.москва
expected:
- https://twitter.москва
- description: "онлайн is a valid generic tld"
text: https://twitter.онлайн
expected:
- https://twitter.онлайн
- description: "орг is a valid generic tld"
text: https://twitter.орг
expected:
- https://twitter.орг
- description: "рус is a valid generic tld"
text: https://twitter.рус
expected:
- https://twitter.рус
- description: "сайт is a valid generic tld"
text: https://twitter.сайт
expected:
- https://twitter.сайт
- description: "קום is a valid generic tld"
text: https://twitter.קום
expected:
- https://twitter.קום
- description: "بازار is a valid generic tld"
text: https://twitter.بازار
expected:
- https://twitter.بازار
- description: "شبكة is a valid generic tld"
text: https://twitter.شبكة
expected:
- https://twitter.شبكة
- description: "كوم is a valid generic tld"
text: https://twitter.كوم
expected:
- https://twitter.كوم
- description: "موقع is a valid generic tld"
text: https://twitter.موقع
expected:
- https://twitter.موقع
- description: "कॉम is a valid generic tld"
text: https://twitter.कॉम
expected:
- https://twitter.कॉम
- description: "नेट is a valid generic tld"
text: https://twitter.नेट
expected:
- https://twitter.नेट
- description: "संगठन is a valid generic tld"
text: https://twitter.संगठन
expected:
- https://twitter.संगठन
- description: "คอม is a valid generic tld"
text: https://twitter.คอม
expected:
- https://twitter.คอม
- description: "みんな is a valid generic tld"
text: https://twitter.みんな
expected:
- https://twitter.みんな
- description: "グーグル is a valid generic tld"
text: https://twitter.グーグル
expected:
- https://twitter.グーグル
- description: "コム is a valid generic tld"
text: https://twitter.コム
expected:
- https://twitter.コム
- description: "世界 is a valid generic tld"
text: https://twitter.世界
expected:
- https://twitter.世界
- description: "中信 is a valid generic tld"
text: https://twitter.中信
expected:
- https://twitter.中信
- description: "中文网 is a valid generic tld"
text: https://twitter.中文网
expected:
- https://twitter.中文网
- description: "企业 is a valid generic tld"
text: https://twitter.企业
expected:
- https://twitter.企业
- description: "佛山 is a valid generic tld"
text: https://twitter.佛山
expected:
- https://twitter.佛山
- description: "信息 is a valid generic tld"
text: https://twitter.信息
expected:
- https://twitter.信息
- description: "健康 is a valid generic tld"
text: https://twitter.健康
expected:
- https://twitter.健康
- description: "八卦 is a valid generic tld"
text: https://twitter.八卦
expected:
- https://twitter.八卦
- description: "公司 is a valid generic tld"
text: https://twitter.公司
expected:
- https://twitter.公司
- description: "公益 is a valid generic tld"
text: https://twitter.公益
expected:
- https://twitter.公益
- description: "商城 is a valid generic tld"
text: https://twitter.商城
expected:
- https://twitter.商城
- description: "商店 is a valid generic tld"
text: https://twitter.商店
expected:
- https://twitter.商店
- description: "商标 is a valid generic tld"
text: https://twitter.商标
expected:
- https://twitter.商标
- description: "在线 is a valid generic tld"
text: https://twitter.在线
expected:
- https://twitter.在线
- description: "大拿 is a valid generic tld"
text: https://twitter.大拿
expected:
- https://twitter.大拿
- description: "娱乐 is a valid generic tld"
text: https://twitter.娱乐
expected:
- https://twitter.娱乐
- description: "工行 is a valid generic tld"
text: https://twitter.工行
expected:
- https://twitter.工行
- description: "广东 is a valid generic tld"
text: https://twitter.广东
expected:
- https://twitter.广东
- description: "慈善 is a valid generic tld"
text: https://twitter.慈善
expected:
- https://twitter.慈善
- description: "我爱你 is a valid generic tld"
text: https://twitter.我爱你
expected:
- https://twitter.我爱你
- description: "手机 is a valid generic tld"
text: https://twitter.手机
expected:
- https://twitter.手机
- description: "政务 is a valid generic tld"
text: https://twitter.政务
expected:
- https://twitter.政务
- description: "政府 is a valid generic tld"
text: https://twitter.政府
expected:
- https://twitter.政府
- description: "新闻 is a valid generic tld"
text: https://twitter.新闻
expected:
- https://twitter.新闻
- description: "时尚 is a valid generic tld"
text: https://twitter.时尚
expected:
- https://twitter.时尚
- description: "机构 is a valid generic tld"
text: https://twitter.机构
expected:
- https://twitter.机构
- description: "淡马锡 is a valid generic tld"
text: https://twitter.淡马锡
expected:
- https://twitter.淡马锡
- description: "游戏 is a valid generic tld"
text: https://twitter.游戏
expected:
- https://twitter.游戏
- description: "点看 is a valid generic tld"
text: https://twitter.点看
expected:
- https://twitter.点看
- description: "移动 is a valid generic tld"
text: https://twitter.移动
expected:
- https://twitter.移动
- description: "组织机构 is a valid generic tld"
text: https://twitter.组织机构
expected:
- https://twitter.组织机构
- description: "网址 is a valid generic tld"
text: https://twitter.网址
expected:
- https://twitter.网址
- description: "网店 is a valid generic tld"
text: https://twitter.网店
expected:
- https://twitter.网店
- description: "网络 is a valid generic tld"
text: https://twitter.网络
expected:
- https://twitter.网络
- description: "谷歌 is a valid generic tld"
text: https://twitter.谷歌
expected:
- https://twitter.谷歌
- description: "集团 is a valid generic tld"
text: https://twitter.集团
expected:
- https://twitter.集团
- description: "飞利浦 is a valid generic tld"
text: https://twitter.飞利浦
expected:
- https://twitter.飞利浦
- description: "餐厅 is a valid generic tld"
text: https://twitter.餐厅
expected:
- https://twitter.餐厅
- description: "닷넷 is a valid generic tld"
text: https://twitter.닷넷
expected:
- https://twitter.닷넷
- description: "닷컴 is a valid generic tld"
text: https://twitter.닷컴
expected:
- https://twitter.닷컴
- description: "삼성 is a valid generic tld"
text: https://twitter.삼성
expected:
- https://twitter.삼성
- description: onion is a valid generic tld
text: https://twitter.onion
expected:
- https://twitter.onion
twitter-text-1.13.4/test/twitter-text-conformance/hit_highlighting.yml 0000644 0001750 0001750 00000005315 12670063203 026460 0 ustar sudheesh sudheesh
tests:
plain_text:
- description: "Highlight the beginning of a string"
text: "this is a test"
hits: [ [0, 4] ]
expected: "this is a test"
- description: "Highlight the middle of a string"
text: "this is a test"
hits: [ [5, 7] ]
expected: "this is a test"
- description: "Highlight the end of a string"
text: "this is a test"
hits: [ [10, 14] ]
expected: "this is a test"
- description: "Highlight multiple terms"
text: "this is a test"
hits: [ [0, 4], [10, 14] ]
expected: "this is a test"
- description: "DO NOT highlight with empty hits"
text: "this is a test"
hits: []
expected: "this is a test"
- description: "Highlight within Japanese text"
text: "東京の天気"
hits: [ [0, 2] ]
expected: "東京の天気"
with_links:
- description: "Highlight after a link (offset does not include markup)"
text: "@username this is an example"
hits: [ [10, 14] ]
expected: "@usernamethis is an example"
- description: "Highlight anchor text of a link (offset does not include markup)"
text: "@username this is an example"
hits: [ [1, 9] ]
expected: "@username this is an example"
- description: "Highlight around a link (offset does not include markup)"
text: "@username this is an example"
hits: [ [0, 14] ]
expected: "@username this is an example"
- description: "Highlight touching tags"
text: "foofoo"
hits: [ [3, 6] ]
expected: "foofoo"
- description: "Highlight two links"
text: "foo barbaz"
hits: [ [4, 7], [8, 11] ]
expected: "foo barbaz"
- description: "Highlight non-link then link not at end"
text: "foo bar baz something else"
hits: [ [4, 7], [8, 11] ]
expected: "foo barbaz something else"
- description: "Highlight non-link then link at end"
text: "foo bar baz"
hits: [ [4, 7], [8, 11] ]
expected: "foo barbaz"
- description: "Highlight mention at end"
text: "something via @twitter"
hits: [ [14, 22] ]
expected: "something via @twitter"
twitter-text-1.13.4/test/twitter-text-conformance/tld_lib.yml 0000644 0001750 0001750 00000021017 12670063203 024555 0 ustar sudheesh sudheesh ---
country:
- ac
- ad
- ae
- af
- ag
- ai
- al
- am
- an
- ao
- aq
- ar
- as
- at
- au
- aw
- ax
- az
- ba
- bb
- bd
- be
- bf
- bg
- bh
- bi
- bj
- bl
- bm
- bn
- bo
- bq
- br
- bs
- bt
- bv
- bw
- by
- bz
- ca
- cc
- cd
- cf
- cg
- ch
- ci
- ck
- cl
- cm
- cn
- co
- cr
- cu
- cv
- cw
- cx
- cy
- cz
- de
- dj
- dk
- dm
- do
- dz
- ec
- ee
- eg
- eh
- er
- es
- et
- eu
- fi
- fj
- fk
- fm
- fo
- fr
- ga
- gb
- gd
- ge
- gf
- gg
- gh
- gi
- gl
- gm
- gn
- gp
- gq
- gr
- gs
- gt
- gu
- gw
- gy
- hk
- hm
- hn
- hr
- ht
- hu
- id
- ie
- il
- im
- in
- io
- iq
- ir
- is
- it
- je
- jm
- jo
- jp
- ke
- kg
- kh
- ki
- km
- kn
- kp
- kr
- kw
- ky
- kz
- la
- lb
- lc
- li
- lk
- lr
- ls
- lt
- lu
- lv
- ly
- ma
- mc
- md
- me
- mf
- mg
- mh
- mk
- ml
- mm
- mn
- mo
- mp
- mq
- mr
- ms
- mt
- mu
- mv
- mw
- mx
- my
- mz
- na
- nc
- ne
- nf
- ng
- ni
- nl
- 'no'
- np
- nr
- nu
- nz
- om
- pa
- pe
- pf
- pg
- ph
- pk
- pl
- pm
- pn
- pr
- ps
- pt
- pw
- py
- qa
- re
- ro
- rs
- ru
- rw
- sa
- sb
- sc
- sd
- se
- sg
- sh
- si
- sj
- sk
- sl
- sm
- sn
- so
- sr
- ss
- st
- su
- sv
- sx
- sy
- sz
- tc
- td
- tf
- tg
- th
- tj
- tk
- tl
- tm
- tn
- to
- tp
- tr
- tt
- tv
- tw
- tz
- ua
- ug
- uk
- um
- us
- uy
- uz
- va
- vc
- ve
- vg
- vi
- vn
- vu
- wf
- ws
- ye
- yt
- za
- zm
- zw
- "ελ"
- "бел"
- "мкд"
- "мон"
- "рф"
- "срб"
- "укр"
- "қаз"
- "հայ"
- "الاردن"
- "الجزائر"
- "السعودية"
- "المغرب"
- "امارات"
- "ایران"
- "بھارت"
- "تونس"
- "سودان"
- "سورية"
- "عراق"
- "عمان"
- "فلسطين"
- "قطر"
- "مصر"
- "مليسيا"
- "پاکستان"
- "भारत"
- "বাংলা"
- "ভারত"
- "ਭਾਰਤ"
- "ભારત"
- "இந்தியா"
- "இலங்கை"
- "சிங்கப்பூர்"
- "భారత్"
- "ලංකා"
- "ไทย"
- "გე"
- "中国"
- "中國"
- "台湾"
- "台灣"
- "新加坡"
- "澳門"
- "香港"
- "한국"
generic:
- abb
- abbott
- abogado
- academy
- accenture
- accountant
- accountants
- aco
- active
- actor
- ads
- adult
- aeg
- aero
- afl
- agency
- aig
- airforce
- airtel
- allfinanz
- alsace
- amsterdam
- android
- apartments
- app
- aquarelle
- archi
- army
- arpa
- asia
- associates
- attorney
- auction
- audio
- auto
- autos
- axa
- azure
- band
- bank
- bar
- barcelona
- barclaycard
- barclays
- bargains
- bauhaus
- bayern
- bbc
- bbva
- bcn
- beer
- bentley
- berlin
- best
- bet
- bharti
- bible
- bid
- bike
- bing
- bingo
- bio
- biz
- black
- blackfriday
- bloomberg
- blue
- bmw
- bnl
- bnpparibas
- boats
- bond
- boo
- boots
- boutique
- bradesco
- bridgestone
- broker
- brother
- brussels
- budapest
- build
- builders
- business
- buzz
- bzh
- cab
- cafe
- cal
- camera
- camp
- cancerresearch
- canon
- capetown
- capital
- caravan
- cards
- care
- career
- careers
- cars
- cartier
- casa
- cash
- casino
- cat
- catering
- cba
- cbn
- ceb
- center
- ceo
- cern
- cfa
- cfd
- chanel
- channel
- chat
- cheap
- chloe
- christmas
- chrome
- church
- cisco
- citic
- city
- claims
- cleaning
- click
- clinic
- clothing
- cloud
- club
- coach
- codes
- coffee
- college
- cologne
- com
- commbank
- community
- company
- computer
- condos
- construction
- consulting
- contractors
- cooking
- cool
- coop
- corsica
- country
- coupons
- courses
- credit
- creditcard
- cricket
- crown
- crs
- cruises
- cuisinella
- cymru
- cyou
- dabur
- dad
- dance
- date
- dating
- datsun
- day
- dclk
- deals
- degree
- delivery
- delta
- democrat
- dental
- dentist
- desi
- design
- dev
- diamonds
- diet
- digital
- direct
- directory
- discount
- dnp
- docs
- dog
- doha
- domains
- doosan
- download
- drive
- durban
- dvag
- earth
- eat
- edu
- education
- email
- emerck
- energy
- engineer
- engineering
- enterprises
- epson
- equipment
- erni
- esq
- estate
- eurovision
- eus
- events
- everbank
- exchange
- expert
- exposed
- express
- fage
- fail
- faith
- family
- fan
- fans
- farm
- fashion
- feedback
- film
- finance
- financial
- firmdale
- fish
- fishing
- fit
- fitness
- flights
- florist
- flowers
- flsmidth
- fly
- foo
- football
- forex
- forsale
- forum
- foundation
- frl
- frogans
- fund
- furniture
- futbol
- fyi
- gal
- gallery
- game
- garden
- gbiz
- gdn
- gent
- genting
- ggee
- gift
- gifts
- gives
- giving
- glass
- gle
- global
- globo
- gmail
- gmo
- gmx
- gold
- goldpoint
- golf
- goo
- goog
- google
- gop
- gov
- graphics
- gratis
- green
- gripe
- group
- guge
- guide
- guitars
- guru
- hamburg
- hangout
- haus
- healthcare
- help
- here
- hermes
- hiphop
- hitachi
- hiv
- hockey
- holdings
- holiday
- homedepot
- homes
- honda
- horse
- host
- hosting
- hoteles
- hotmail
- house
- how
- hsbc
- ibm
- icbc
- ice
- icu
- ifm
- iinet
- immo
- immobilien
- industries
- infiniti
- info
- ing
- ink
- institute
- insure
- int
- international
- investments
- ipiranga
- irish
- ist
- istanbul
- itau
- iwc
- java
- jcb
- jetzt
- jewelry
- jlc
- jll
- jobs
- joburg
- jprs
- juegos
- kaufen
- kddi
- kim
- kitchen
- kiwi
- koeln
- komatsu
- krd
- kred
- kyoto
- lacaixa
- lancaster
- land
- lasalle
- lat
- latrobe
- law
- lawyer
- lds
- lease
- leclerc
- legal
- lexus
- lgbt
- liaison
- lidl
- life
- lighting
- limited
- limo
- link
- live
- lixil
- loan
- loans
- lol
- london
- lotte
- lotto
- love
- ltda
- lupin
- luxe
- luxury
- madrid
- maif
- maison
- man
- management
- mango
- market
- marketing
- markets
- marriott
- mba
- media
- meet
- melbourne
- meme
- memorial
- men
- menu
- miami
- microsoft
- mil
- mini
- mma
- mobi
- moda
- moe
- mom
- monash
- money
- montblanc
- mormon
- mortgage
- moscow
- motorcycles
- mov
- movie
- movistar
- mtn
- mtpc
- museum
- nadex
- nagoya
- name
- navy
- nec
- net
- netbank
- network
- neustar
- new
- news
- nexus
- ngo
- nhk
- nico
- ninja
- nissan
- nokia
- nra
- nrw
- ntt
- nyc
- office
- okinawa
- omega
- one
- ong
- onl
- online
- ooo
- oracle
- orange
- org
- organic
- osaka
- otsuka
- ovh
- page
- panerai
- paris
- partners
- parts
- party
- pet
- pharmacy
- philips
- photo
- photography
- photos
- physio
- piaget
- pics
- pictet
- pictures
- pink
- pizza
- place
- play
- plumbing
- plus
- pohl
- poker
- porn
- post
- praxi
- press
- pro
- prod
- productions
- prof
- properties
- property
- pub
- qpon
- quebec
- racing
- realtor
- realty
- recipes
- red
- redstone
- rehab
- reise
- reisen
- reit
- ren
- rent
- rentals
- repair
- report
- republican
- rest
- restaurant
- review
- reviews
- rich
- ricoh
- rio
- rip
- rocks
- rodeo
- rsvp
- ruhr
- run
- ryukyu
- saarland
- sakura
- sale
- samsung
- sandvik
- sandvikcoromant
- sanofi
- sap
- sarl
- saxo
- sca
- scb
- schmidt
- scholarships
- school
- schule
- schwarz
- science
- scor
- scot
- seat
- seek
- sener
- services
- sew
- sex
- sexy
- shiksha
- shoes
- show
- shriram
- singles
- site
- ski
- sky
- skype
- sncf
- soccer
- social
- software
- sohu
- solar
- solutions
- sony
- soy
- space
- spiegel
- spreadbetting
- srl
- starhub
- statoil
- studio
- study
- style
- sucks
- supplies
- supply
- support
- surf
- surgery
- suzuki
- swatch
- swiss
- sydney
- systems
- taipei
- tatamotors
- tatar
- tattoo
- tax
- taxi
- team
- tech
- technology
- tel
- telefonica
- temasek
- tennis
- thd
- theater
- tickets
- tienda
- tips
- tires
- tirol
- today
- tokyo
- tools
- top
- toray
- toshiba
- tours
- town
- toyota
- toys
- trade
- trading
- training
- travel
- trust
- tui
- ubs
- university
- uno
- uol
- vacations
- vegas
- ventures
- vermögensberater
- vermögensberatung
- versicherung
- vet
- viajes
- video
- villas
- vin
- vision
- vista
- vistaprint
- vlaanderen
- vodka
- vote
- voting
- voto
- voyage
- wales
- walter
- wang
- watch
- webcam
- website
- wed
- wedding
- weir
- whoswho
- wien
- wiki
- williamhill
- win
- windows
- wine
- wme
- work
- works
- world
- wtc
- wtf
- xbox
- xerox
- xin
- xperia
- xxx
- xyz
- yachts
- yandex
- yodobashi
- yoga
- yokohama
- youtube
- zip
- zone
- zuerich
- "дети"
- "ком"
- "москва"
- "онлайн"
- "орг"
- "рус"
- "сайт"
- "קום"
- "بازار"
- "شبكة"
- "كوم"
- "موقع"
- "कॉम"
- "नेट"
- "संगठन"
- "คอม"
- "みんな"
- "グーグル"
- "コム"
- "世界"
- "中信"
- "中文网"
- "企业"
- "佛山"
- "信息"
- "健康"
- "八卦"
- "公司"
- "公益"
- "商城"
- "商店"
- "商标"
- "在线"
- "大拿"
- "娱乐"
- "工行"
- "广东"
- "慈善"
- "我爱你"
- "手机"
- "政务"
- "政府"
- "新闻"
- "时尚"
- "机构"
- "淡马锡"
- "游戏"
- "点看"
- "移动"
- "组织机构"
- "网址"
- "网店"
- "网络"
- "谷歌"
- "集团"
- "飞利浦"
- "餐厅"
- "닷넷"
- "닷컴"
- "삼성"
- onion
twitter-text-1.13.4/test/twitter-text-conformance/validate.yml 0000644 0001750 0001750 00000022635 12670063203 024744 0 ustar sudheesh sudheesh
tests:
tweets:
- description: "Valid Tweet: < 20 characters"
text: "I am a Tweet"
expected: true
- description: "Valid Tweet: 140 characters"
text: "A lie gets halfway around the world before the truth has a chance to get its pants on. Winston Churchill (1874-1965) http://bit.ly/dJpywL"
expected: true
- description: "Valid Tweet: 140 characters (with accents)"
text: "A lié géts halfway arøünd thé wørld béføré thé truth has a chance tø get its pants øn. Winston Churchill (1874-1965) http://bit.ly/dJpywL"
expected: true
- description: "Valid Tweet: 140 characters (double byte characters)"
text: "のののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののの"
expected: true
- description: "Valid Tweet: 140 characters (double word characters)"
text: "\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431"
expected: true
- description: "Invalid Tweet: no characters (empty)"
text: ""
expected: false
- description: "Invalid Tweet: 141 characters"
text: "A lie gets halfway around the world before the truth has a chance to get its pants on. -- Winston Churchill (1874-1965) http://bit.ly/dJpywL"
expected: false
- description: "Invalid Tweet: 141 characters (due to newline)"
text: "A lie gets halfway around the world before the truth has a chance to get its pants on. \n- Winston Churchill (1874-1965) http://bit.ly/dJpywL"
expected: false
usernames:
- description: "Valid username: a-z < 20 characters"
text: "@username"
expected: true
- description: "All numeric username are allowed"
text: "@12345"
expected: true
- description: "Usernames should allow the _ character"
text: "@example_name"
expected: true
- description: "Usernames SHOULD NOT allow the - character"
text: "@example-name"
expected: false
lists:
- description: "Valid list: a-z < 20 characters"
text: "@username/list"
expected: true
- description: "A username alone SHOULD NOT be considered a valid list"
text: "@username"
expected: false
- description: "A username followed by a slash SHOULD NOT be considered a valid list"
text: "@username/"
expected: false
- description: "Validation SHOULD NOT allow leading spaces"
text: " @username/list"
expected: false
- description: "Validation SHOULD NOT allow trailing spaces"
text: "@username/list "
expected: false
hashtags:
- description: "Valid hashtag: a-z < 20 characters"
text: "#hashtag"
expected: true
- description: "Valid hashtag: number followed by letters"
text: "#1st"
expected: true
- description: "Valid hashtag: letters and numbers mixed"
text: "#that1time"
expected: true
- description: "Valid hashtag: letter followed by numbers"
text: "#easyas123"
expected: true
- description: "Invalid hashtag: all numbers"
text: "#12345"
expected: false
- description: "Valid hashtag: Russian text"
text: "#ашок"
expected: true
- description: "Valid hashtag: Korean text"
text: "#트위터"
expected: true
urls:
- description: "Valid url: protocol + domain"
text: "http://example.com"
expected: true
- description: "Valid url: ssl + domain + path + query"
text: "https://example.com/path/to/resource?search=foo&lang=en"
expected: true
- description: "Valid url: protocol + domain + path + fragment"
text: "http://twitter.com/#!/twitter"
expected: true
- description: "Valid url: cased protocol and domain"
text: "HTTPS://www.ExaMPLE.COM/index.html"
expected: true
- description: "Valid url: port and userinfo"
text: "http://user:PASSW0RD@example.com:8080/login.php"
expected: true
- description: "Valid url: matrix path parameters"
text: "http://sports.yahoo.com/nfl/news;_ylt=Aom0;ylu=XyZ?slug=ap-superbowlnotebook"
expected: true
- description: "Valid url: ipv4"
text: "http://192.168.0.1/index.html?src=asdf"
expected: true
- description: "Valid url: ipv6"
text: "http://[3ffe:1900:4545:3:200:f8ff:fe21:67cf]:80/index.html"
expected: true
- description: "Valid url: underscore in subdomain"
text: "http://test_underscore.twitter.com"
expected: true
- description: "Valid url: sub delims and question marks"
text: "http://example.com?foo=$bar.;baz?BAZ&c=d-#top/?stories+"
expected: true
- description: "Valid unicode url: unicode domain"
text: "http://☃.net/"
expected: true
- description: "Valid url: Cyrillic characters in path"
text: "http://example.com/Русские_слова"
expected: true
- description: "Valid url: trailing hyphen"
text: "https://www.youtube.com/playlist?list=PL0ZPu8XSRTB7wZzn0mLHMvyzVFeRxbWn-"
expected: true
- description: "Invalid url: invalid scheme"
text: "ftp://www.example.com/"
expected: false
- description: "Invalid url: invalid userinfo characters"
text: "https://user:pass[word]@www.example.com/"
expected: false
- description: "Invalid url: underscore in domain"
text: "http://domain-dash_2314352345_dfasd.foo-cow_4352.com"
expected: false
- description: "Invalid url: domain beginning dash"
text: "http://www.-domain4352.com/"
expected: false
- description: "Invalid url: domain trailing dash"
text: "http://www.domain4352-.com/"
expected: false
- description: "Invalid url: unicode domain trailing dash"
text: "http://☃-.net/"
expected: false
- description: "Invalid url: improperly encoded unicode domain"
text: "http://%e2%98%83.net/"
expected: false
- description: "Invalid url: invalid IP"
text: "http://256.1.2.3/"
expected: false
- description: "Invalid url: invalid char in path"
text: "http://en.wikipedia.org/wiki/\"#Punctuation"
expected: false
- description: "Invalid url: trailing space"
text: "http://example.com/#anchor "
expected: false
urls_without_protocol:
- description: "Valid url without protocol: domain + gTLD"
text: "example.com"
expected: true
- description: "Valid url without protocol: subdomain + domain + gTLD"
text: "www.example.com"
expected: true
- description: "Valid url without protocol: domain + ccTLD"
text: "t.co"
expected: true
- description: "Valid url without protocol: subdomain + domain + ccTLD"
text: "foo.co.jp"
expected: true
- description: "Valid url without protocol: domain + path + query"
text: "example.com/path/to/resource?search=foo&lang=en"
expected: true
lengths:
- description: "Count the number of characters"
text: "This is a test."
expected: 15
- description: "Count a URL starting with http:// as 23 characters"
text: "http://test.com"
expected: 23
- description: "Count a URL starting with https:// as 23 characters"
text: "https://test.com"
expected: 23
- description: "Count a URL without protocol as 23 characters"
text: "test.com"
expected: 23
- description: "Count multiple URLs correctly"
text: "Test https://test.com test https://test.com test.com test"
expected: 86
- description: "Count unicode chars outside the basic multilingual plane (double word)"
text: "\U00010000\U0010ffff"
expected: 2
- description: "Count unicode chars inside the basic multilingual plane"
text: "저찀쯿쿿"
expected: 4
- description: "Count a mix of single byte single word, and double word unicode characters"
text: "H\U0001f431☺"
expected: 3
twitter-text-1.13.4/test/twitter-text-conformance/LICENSE 0000644 0001750 0001750 00000023610 12670063203 023427 0 ustar sudheesh sudheesh Copyright 2011 Twitter, Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this work except in compliance with the License.
You may obtain a copy of the License below, or at:
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
twitter-text-1.13.4/test/twitter-text-conformance/extract.yml 0000644 0001750 0001750 00000112337 12670063203 024624 0 ustar sudheesh sudheesh tests:
mentions:
- description: "Extract mention at the begining of a tweet"
text: "@username reply"
expected: ["username"]
- description: "Extract mention at the end of a tweet"
text: "mention @username"
expected: ["username"]
- description: "Extract mention in the middle of a tweet"
text: "mention @username in the middle"
expected: ["username"]
- description: "Extract mention of username with underscore"
text: "mention @user_name"
expected: ["user_name"]
- description: "Extract mention of all numeric username"
text: "mention @12345"
expected: ["12345"]
- description: "Extract mention or multiple usernames"
text: "mention @username1 @username2"
expected: ["username1", "username2"]
- description: "Extract mention in the middle of a Japanese tweet"
text: "の@usernameに到着を待っている"
expected: ["username"]
- description: "DO NOT extract username ending in @"
text: "Current Status: @_@ (cc: @username)"
expected: ["username"]
- description: "DO NOT extract username followed by accented latin characters"
text: "@aliceìnheiro something something"
expected: []
- description: "Extract lone metion but not @user@user (too close to an email)"
text: "@username email me @test@example.com"
expected: ["username"]
- description: "DO NOT extract 'http' in '@http://' as username"
text: "@http://twitter.com"
expected: []
- description: "Extract mentions before newline"
text: "@username\n@mention"
expected: ["username", "mention"]
- description: "Extract mentions after 'RT'"
text: "RT@username RT:@mention RT @test"
expected: ["username", "mention", "test"]
- description: "Extract mentions after 'rt'"
text: "rt@username rt:@mention rt @test"
expected: ["username", "mention", "test"]
- description: "Extract mentions after 'Rt'"
text: "Rt@username Rt:@mention Rt @test"
expected: ["username", "mention", "test"]
- description: "Extract mentions after 'rT'"
text: "rT@username rT:@mention rT @test"
expected: ["username", "mention", "test"]
- description: "DO NOT extract username preceded by !"
text: "f!@kn"
expected: []
- description: "DO NOT extract username preceded by @"
text: "f@@kn"
expected: []
- description: "DO NOT extract username preceded by #"
text: "f#@kn"
expected: []
- description: "DO NOT extract username preceded by $"
text: "f$@kn"
expected: []
- description: "DO NOT extract username preceded by %"
text: "f%@kn"
expected: []
- description: "DO NOT extract username preceded by &"
text: "f&@kn"
expected: []
- description: "DO NOT extract username preceded by *"
text: "f*@kn"
expected: []
mentions_with_indices:
- description: "Extract a mention at the start"
text: "@username yo!"
expected:
- screen_name: "username"
indices: [0, 9]
- description: "Extract a mention that has the same thing mentioned at the start"
text: "username @username"
expected:
- screen_name: "username"
indices: [9, 18]
- description: "Extract a mention in the middle of a Japanese tweet"
text: "の@usernameに到着を待っている"
expected:
- screen_name: "username"
indices: [1, 10]
mentions_or_lists_with_indices:
- description: "Extract a mention"
text: "@username yo!"
expected:
- screen_name: "username"
list_slug: ""
indices: [0, 9]
- description: "Extract a list"
text: "@username/list-name is a great list!"
expected:
- screen_name: "username"
list_slug: "/list-name"
indices: [0, 19]
- description: "Extract a mention and list"
text: "Hey @username, check out out @otheruser/list_name-01!"
expected:
- screen_name: "username"
list_slug: ""
indices: [4, 13]
- screen_name: "otheruser"
list_slug: "/list_name-01"
indices: [29, 52]
- description: "Extract a list in the middle of a Japanese tweet"
text: "の@username/list_name-01に到着を待っている"
expected:
- screen_name: "username"
list_slug: "/list_name-01"
indices: [1, 23]
- description: "DO NOT extract a list with slug that starts with a number"
text: "@username/7list-name is a great list!"
expected:
- screen_name: "username"
list_slug: ""
indices: [0, 9]
replies:
- description: "Extract reply at the begining of a tweet"
text: "@username reply"
expected: "username"
- description: "Extract reply preceded by only a space"
text: " @username reply"
expected: "username"
- description: "Extract reply preceded by only a full-width space (U+3000)"
text: " @username reply"
expected: "username"
- description: "DO NOT Extract reply when preceded by text"
text: "a @username mention, not a reply"
expected:
- description: "DO NOT Extract reply when preceded by ."
text: ".@username mention, not a reply"
expected:
- description: "DO NOT Extract reply when preceded by /"
text: "/@username mention, not a reply"
expected:
- description: "DO NOT Extract reply when preceded by _"
text: "_@username mention, not a reply"
expected:
- description: "DO NOT Extract reply when preceded by -"
text: "-@username mention, not a reply"
expected:
- description: "DO NOT Extract reply when preceded by +"
text: "+@username mention, not a reply"
expected:
- description: "DO NOT Extract reply when preceded by #"
text: "#@username mention, not a reply"
expected:
- description: "DO NOT Extract reply when preceded by !"
text: "!@username mention, not a reply"
expected:
- description: "DO NOT Extract reply when preceded by @"
text: "@@username mention, not a reply"
expected:
- description: "DO NOT Extract reply when followed by URL"
text: "@http://twitter.com"
expected:
urls:
- description: "Extract a lone URL"
text: "http://example.com"
expected: ["http://example.com"]
- description: "Extract valid URL: http://google.com"
text: "text http://google.com"
expected: ["http://google.com"]
- description: "Extract valid URL: http://foobar.com/#"
text: "text http://foobar.com/#"
expected: ["http://foobar.com/#"]
- description: "Extract valid URL: http://google.com/#foo"
text: "text http://google.com/#foo"
expected: ["http://google.com/#foo"]
- description: "Extract valid URL: http://google.com/#search?q=iphone%20-filter%3Alinks"
text: "text http://google.com/#search?q=iphone%20-filter%3Alinks"
expected: ["http://google.com/#search?q=iphone%20-filter%3Alinks"]
- description: "Extract valid URL: http://twitter.com/#search?q=iphone%20-filter%3Alinks"
text: "text http://twitter.com/#search?q=iphone%20-filter%3Alinks"
expected: ["http://twitter.com/#search?q=iphone%20-filter%3Alinks"]
- description: "Extract valid URL: http://somedomain.com/index.php?path=/abc/def/"
text: "text http://somedomain.com/index.php?path=/abc/def/"
expected: ["http://somedomain.com/index.php?path=/abc/def/"]
- description: "Extract valid URL: http://www.boingboing.net/2007/02/14/katamari_damacy_phon.html"
text: "text http://www.boingboing.net/2007/02/14/katamari_damacy_phon.html"
expected: ["http://www.boingboing.net/2007/02/14/katamari_damacy_phon.html"]
- description: "Extract valid URL: http://somehost.com:3000"
text: "text http://somehost.com:3000"
expected: ["http://somehost.com:3000"]
- description: "Extract valid URL: http://xo.com/~matthew+%ff-x"
text: "text http://xo.com/~matthew+%ff-x"
expected: ["http://xo.com/~matthew+%ff-x"]
- description: "Extract valid URL: http://xo.com/~matthew+%ff-,.;x"
text: "text http://xo.com/~matthew+%ff-,.;x"
expected: ["http://xo.com/~matthew+%ff-,.;x"]
- description: "Extract valid URL: http://xo.com/,.;x"
text: "text http://xo.com/,.;x"
expected: ["http://xo.com/,.;x"]
- description: "Extract valid URL: http://en.wikipedia.org/wiki/Primer_(film)"
text: "text http://en.wikipedia.org/wiki/Primer_(film)"
expected: ["http://en.wikipedia.org/wiki/Primer_(film)"]
- description: "Extract valid URL: http://www.ams.org/bookstore-getitem/item=mbk-59"
text: "text http://www.ams.org/bookstore-getitem/item=mbk-59"
expected: ["http://www.ams.org/bookstore-getitem/item=mbk-59"]
- description: "Extract valid URL: http://✪df.ws/ejp"
text: "text http://✪df.ws/ejp"
expected: ["http://✪df.ws/ejp"]
- description: "Extract valid URL: http://chilp.it/?77e8fd"
text: "text http://chilp.it/?77e8fd"
expected: ["http://chilp.it/?77e8fd"]
- description: "Extract valid URL: http://x.com/oneletterdomain"
text: "text http://x.com/oneletterdomain"
expected: ["http://x.com/oneletterdomain"]
- description: "Extract valid URL: http://msdn.microsoft.com/ja-jp/library/system.net.httpwebrequest(v=VS.100).aspx"
text: "text http://msdn.microsoft.com/ja-jp/library/system.net.httpwebrequest(v=VS.100).aspx"
expected: ["http://msdn.microsoft.com/ja-jp/library/system.net.httpwebrequest(v=VS.100).aspx"]
- description: "DO NOT extract invalid URL: http://domain-begin_dash_2314352345_dfasd.foo-cow_4352.com"
text: "text http://domain-dash_2314352345_dfasd.foo-cow_4352.com"
expected: []
- description: "DO NOT extract invalid URL: http://-begin_dash_2314352345_dfasd.foo-cow_4352.com"
text: "text http://-dash_2314352345_dfasd.foo-cow_4352.com"
expected: []
- description: "DO NOT extract invalid URL: http://no-tld"
text: "text http://no-tld"
expected: []
- description: "DO NOT extract invalid URL: http://tld-too-short.x"
text: "text http://tld-too-short.x"
expected: []
- description: "DO NOT extract invalid URL with invalid preceding character: (http://twitter.com"
text: "(http://twitter.com"
expected: ["http://twitter.com"]
- description: "Extract a very long hyphenated sub-domain URL (single letter hyphens)"
text: "text http://word-and-a-number-8-ftw.domain.com/"
expected: ["http://word-and-a-number-8-ftw.domain.com/"]
- description: "Extract a hyphenated TLD (usually a typo)"
text: "text http://domain.com-that-you-should-have-put-a-space-after"
expected: ["http://domain.com"]
- description: "Extract URL ending with # value"
text: "text http://foo.com?#foo text"
expected: ["http://foo.com?#foo"]
- description: "Extract URLs without protocol on (com|org|edu|gov|net) domains"
text: "foo.com foo.net foo.org foo.edu foo.gov"
expected: ["foo.com", "foo.net", "foo.org", "foo.edu", "foo.gov"]
- description: "Extract URLs without protocol not on (com|org|edu|gov|net) domains"
text: "foo.baz foo.co.jp www.xxxxxxx.baz www.foo.co.uk wwwww.xxxxxxx foo.comm foo.somecom foo.govedu foo.jp"
expected: ["foo.co.jp", "www.foo.co.uk"]
- description: "Extract URLs without protocol on ccTLD with slash"
text: "t.co/abcde bit.ly/abcde"
expected: ["t.co/abcde", "bit.ly/abcde"]
- description: "Extract URLs with protocol on ccTLD domains"
text: "http://foo.jp http://fooooo.jp"
expected: ["http://foo.jp", "http://fooooo.jp"]
- description: "Extract URLs with a - or + at the end of the path"
text: "Go to http://example.com/a+ or http://example.com/a-"
expected: ["http://example.com/a+", "http://example.com/a-"]
- description: "Extract URLs with longer paths ending in -"
text: "Go to http://example.com/view/slug-url-?foo=bar"
expected: ["http://example.com/view/slug-url-?foo=bar"]
- description: "Extract URLs beginning with a space"
text: "@user Try http:// example.com/path"
expected: ["example.com/path"]
- description: "Extract long URL without protocol surrounded by CJK characters"
text: "これは日本語です。example.com/path/index.html中国語example.com/path한국"
expected: ["example.com/path/index.html", "example.com/path"]
- description: "Extract short URL without protocol surrounded by CJK characters"
text: "twitter.comこれは日本語です。example.com中国語t.co/abcde한국twitter.com example2.comテストtwitter.com/abcde"
expected: ["twitter.com", "example.com", "t.co/abcde", "twitter.com", "example2.com", "twitter.com/abcde"]
- description: "Extract URLs with and without protocol surrounded by CJK characters"
text: "http://twitter.com/これは日本語です。example.com中国語http://t.co/abcde한국twitter.comテストexample2.comテストhttp://twitter.com/abcde"
expected: ["http://twitter.com/", "example.com", "http://t.co/abcde", "twitter.com", "example2.com", "http://twitter.com/abcde"]
- description: "Extract URLs with protocol and path containing Cyrillic characters"
text: "Go to http://twitter.com/Русские_слова"
expected: ["http://twitter.com/Русские_слова"]
- description: "DO NOT extract short URLs without protocol on ccTLD domains without path"
text: "twitter.jp日本語it.so中国語foo.jp it.so foo.jp"
expected: []
- description: "Extract some (tv|co) short URLs without protocol on ccTLD domains without path"
text: "MLB.tv vine.co twitch.tv t.co"
expected: ["MLB.tv", "vine.co", "twitch.tv", "t.co"]
- description: "Extract URLs beginning with a non-breaking space (U+00A0)"
text: "@user Try http:// example.com/path"
expected: ["example.com/path"]
- description: "Extract URLs with underscores and dashes in the subdomain"
text: "test http://sub_domain-dash.twitter.com"
expected: ["http://sub_domain-dash.twitter.com"]
- description: "Extract URL with minimum number of valid characters"
text: "test http://a.b.cd"
expected: ["http://a.b.cd"]
- description: "Extract URLs containing underscores and dashes"
text: "test http://a_b.c-d.com"
expected: ["http://a_b.c-d.com"]
- description: "Extract URLs containing dashes in the subdomain"
text: "test http://a-b.c.com"
expected: ["http://a-b.c.com"]
- description: "Extract URLs with dashes in the domain name"
text: "test http://twitter-dash.com"
expected: ["http://twitter-dash.com"]
- description: "Extract URLs with lots of symbols then a period"
text: "http://www.bestbuy.com/site/Currie+Technologies+-+Ezip+400+Scooter/9885188.p?id=1218189013070&skuId=9885188"
expected: ["http://www.bestbuy.com/site/Currie+Technologies+-+Ezip+400+Scooter/9885188.p?id=1218189013070&skuId=9885188"]
- description: "DO NOT extract URLs containing leading dashes in the subdomain"
text: "test http://-leadingdash.twitter.com"
expected: []
- description: "DO NOT extract URLs containing trailing dashes in the subdomain"
text: "test http://trailingdash-.twitter.com"
expected: []
- description: "DO NOT extract URLs containing leading underscores in the subdomain"
text: "test http://_leadingunderscore.twitter.com"
expected: []
- description: "DO NOT extract URLs containing trailing underscores in the subdomain"
text: "test http://trailingunderscore_.twitter.com"
expected: []
- description: "DO NOT extract URLs containing leading dashes in the domain name"
text: "test http://-twitter.com"
expected: []
- description: "DO NOT extract URLs containing trailing dashes in the domain name"
text: "test http://twitter-.com"
expected: []
- description: "DO NOT extract URLs containing underscores in the domain name"
text: "test http://twitter_underscore.com"
expected: []
- description: "DO NOT extract URLs containing underscores in the tld"
text: "test http://twitter.c_o_m"
expected: []
- description: "Extract valid URL http://www.foo.com/foo/path-with-period./"
text: "test http://www.foo.com/foo/path-with-period./"
expected: ["http://www.foo.com/foo/path-with-period./"]
- description: "Extract valid URL http://www.foo.org.za/foo/bar/688.1"
text: "test http://www.foo.org.za/foo/bar/688.1"
expected: ["http://www.foo.org.za/foo/bar/688.1"]
- description: "Extract valid URL http://www.foo.com/bar-path/some.stm?param1=foo;param2=P1|0||P2|0"
text: "test http://www.foo.com/bar-path/some.stm?param1=foo;param2=P1|0||P2|0"
expected: ["http://www.foo.com/bar-path/some.stm?param1=foo;param2=P1|0||P2|0"]
- description: "Extract valid URL http://foo.com/bar/123/foo_&_bar/"
text: "test http://foo.com/bar/123/foo_&_bar/"
expected: ["http://foo.com/bar/123/foo_&_bar/"]
- description: "Extract valid URL http://www.cp.sc.edu/events/65"
text: "test http://www.cp.sc.edu/events/65 test"
expected: ["http://www.cp.sc.edu/events/65"]
- description: "Extract valid URL http://www.andersondaradio.no.comunidades.net/"
text: "http://www.andersondaradio.no.comunidades.net/ test test"
expected: ["http://www.andersondaradio.no.comunidades.net/"]
- description: "Extract valid URL ELPAÍS.com"
text: "test ELPAÍS.com"
expected: ["ELPAÍS.com"]
- description: "DO NOT include period at the end of URL"
text: "test http://twitter.com/."
expected: ["http://twitter.com/"]
- description: "Extract a URL with '?' in fragment"
text: "http://tn.com.ar/show/00056158/la-estrella-del-certamen-el-turno-de-pamela-anderson?fb_xd_fragment#?=&cb=fe17523f223b7&relation=parent.parent&transport=fragment&type=resize&height=20&ackdata"
expected: ["http://tn.com.ar/show/00056158/la-estrella-del-certamen-el-turno-de-pamela-anderson?fb_xd_fragment#?=&cb=fe17523f223b7&relation=parent.parent&transport=fragment&type=resize&height=20&ackdata"]
- description: "Extract a URL with '?' in fragment in a text"
text: "text http://tn.com.ar/show/00056158/la-estrella-del-certamen-el-turno-de-pamela-anderson?fb_xd_fragment#?=&cb=fe17523f223b7&relation=parent.parent&transport=fragment&type=resize&height=20&ackdata text"
expected: ["http://tn.com.ar/show/00056158/la-estrella-del-certamen-el-turno-de-pamela-anderson?fb_xd_fragment#?=&cb=fe17523f223b7&relation=parent.parent&transport=fragment&type=resize&height=20&ackdata"]
# A common cause of runaway regex engines.
- description: "Extract a URL with a ton of trailing periods"
text: "Test a ton of periods http://example.com/path.........................................."
expected: ["http://example.com/path"]
- description: "Extract a URL with a ton of trailing commas"
text: "Test a ton of periods http://example.com/,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"
expected: ["http://example.com/"]
- description: "Extract a URL with a ton of trailing '!'"
text: "Test a ton of periods http://example.com/path/!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
expected: ["http://example.com/path/"]
- description: "DO NOT extract URLs in hashtag or @mention"
text: "#test.com @test.com #http://test.com @http://test.com #t.co/abcde @t.co/abcde"
expected: []
- description: "Extract a t.co URL with a trailing apostrophe"
text: "I really like http://t.co/pbY2NfTZ's website"
expected: ["http://t.co/pbY2NfTZ"]
- description: "Extract a t.co URL with a trailing hyphen"
text: "Check this site out http://t.co/FNkPfmii- it's great"
expected: ["http://t.co/FNkPfmii"]
- description: "Extract a t.co URL with a trailing colon"
text: "According to http://t.co/ulYGBYSo: the internet is cool"
expected: ["http://t.co/ulYGBYSo"]
- description: "Extract URL before newline"
text: "http://twitter.com\nhttp://example.com\nhttp://example.com/path\nexample.com/path\nit.so\nit.so/abcde"
expected: ["http://twitter.com", "http://example.com", "http://example.com/path", "example.com/path", "it.so/abcde"]
- description: "DO NOT extract URL if preceded by $"
text: "$http://twitter.com $twitter.com $http://t.co/abcde $t.co/abcde $t.co $TVI.CA $RBS.CA"
expected: []
- description: "DO NOT extract .bz2 file name as URL"
text: "long.test.tar.bz2 test.tar.bz2 tar.bz2"
expected: []
- description: "DO NOT extract URL with gTLD followed by @ sign"
text: "john.doe.gov@mail.com"
expected: []
- description: "DO NOT extract URL with ccTLD followed by @ sign"
text: "john.doe.jp@mail.com"
expected: []
urls_with_indices:
- description: "Extract a URL"
text: "text http://google.com"
expected:
- url: "http://google.com"
indices: [5, 22]
- description: "Extract a URL from a Japanese tweet"
text: "皆さん見てください! http://google.com"
expected:
- url: "http://google.com"
indices: [11, 28]
- description: "Extract URLs without protocol on ccTLD with slash"
text: "t.co/abcde bit.ly/abcde"
expected:
- url: "t.co/abcde"
indices: [0, 10]
- url: "bit.ly/abcde"
indices: [11, 23]
- description: "Extract URLs without protocol surrounded by CJK characters"
text: "twitter.comこれは日本語です。example.com中国語t.co/abcde한국twitter.com example2.comテストtwitter.com/abcde"
expected:
- url: "twitter.com"
indices: [0, 11]
- url: "example.com"
indices: [20, 31]
- url: "t.co/abcde"
indices: [34, 44]
- url: "twitter.com"
indices: [46, 57]
- url: "example2.com"
indices: [58, 70]
- url: "twitter.com/abcde"
indices: [73, 90]
- description: "Extract URLs with and without protocol surrounded by CJK characters"
text: "http://twitter.com/これは日本語です。example.com中国語http://t.co/abcde한국twitter.comテストexample2.comテストhttp://twitter.com/abcde"
expected:
- url: "http://twitter.com/"
indices: [0, 19]
- url: "example.com"
indices: [28, 39]
- url: "http://t.co/abcde"
indices: [42, 59]
- url: "twitter.com"
indices: [61, 72]
- url: "example2.com"
indices: [75, 87]
- url: "http://twitter.com/abcde"
indices: [90, 114]
- description: "Extract t.co URLs skipping trailing characters and adjusting indices correctly"
text: "http://t.co/pbY2NfTZ's http://t.co/2vYHpAc5; http://t.co/ulYGBYSo: http://t.co/8MkmHU0k+c http://t.co/TKLp64dY.x http://t.co/8t7G3ddS#a http://t.co/FNkPfmii-"
expected:
- url: "http://t.co/pbY2NfTZ"
indices: [0, 20]
- url: "http://t.co/2vYHpAc5"
indices: [23, 43]
- url: "http://t.co/ulYGBYSo"
indices: [45, 65]
- url: "http://t.co/8MkmHU0k"
indices: [67, 87]
- url: "http://t.co/TKLp64dY"
indices: [90, 110]
- url: "http://t.co/8t7G3ddS"
indices: [113, 133]
- url: "http://t.co/FNkPfmii"
indices: [136, 156]
- description: "Extract correct indices for duplicate instances of the same URL"
text: "http://t.co http://t.co"
expected:
- url: "http://t.co"
indices: [0, 11]
- url: "http://t.co"
indices: [12, 23]
- description: "Extract I18N URL"
text: "test http://xn--ls8h.XN--ls8h.la/"
expected:
- url: "http://xn--ls8h.XN--ls8h.la/"
indices: [5, 33]
- description: "Extract URLs with IDN(not encoded)"
text: "test http://foobar.みんな/ http://foobar.中国/ http://foobar.پاکستان/ "
expected:
- url: "http://foobar.みんな/"
indices: [5, 23]
- url: "http://foobar.中国/"
indices: [24, 41]
- url: "http://foobar.پاکستان/"
indices: [42, 64]
hashtags:
- description: "Extract an all-alpha hashtag"
text: "a #hashtag here"
expected: ["hashtag"]
- description: "Extract a letter-then-number hashtag"
text: "this is #hashtag1"
expected: ["hashtag1"]
- description: "Extract a number-then-letter hashtag"
text: "#1hashtag is this"
expected: ["1hashtag"]
- description: "DO NOT Extract an all-numeric hashtag"
text: "On the #16 bus"
expected: []
- description: "DO NOT Extract a single numeric hashtag"
text: "#0"
expected: []
- description: "Extract hashtag after bracket"
text: "(#hashtag1 )#hashtag2 [#hashtag3 ]#hashtag4 ’#hashtag5’#hashtag6"
expected: ["hashtag1", "hashtag2", "hashtag3", "hashtag4", "hashtag5", "hashtag6"]
- description: "Extract a hashtag containing ñ"
text: "I'll write more tests #mañana"
expected: ["mañana"]
- description: "Extract a hashtag containing é"
text: "Working remotely #café"
expected: ["café"]
- description: "Extract a hashtag containing ü"
text: "Getting my Oktoberfest on #münchen"
expected: ["münchen"]
- description: "DO NOT Extract a hashtag containing Japanese"
text: "this is not valid: # 会議中 ハッシュ"
expected: []
- description: "Extract a hashtag in Korean"
text: "What is #트위터 anyway?"
expected: ["트위터"]
- description: "Extract a half-width Hangul hashtag"
text: "Just random half-width Hangul #ᆪᆭᄚ"
expected: ["ᆪᆭᄚ"]
- description: "Extract a hashtag in Russian"
text: "What is #ашок anyway?"
expected: ["ашок"]
- description: "Extract a starting katakana hashtag"
text: "#カタカナ is a hashtag"
expected: ["カタカナ"]
- description: "Extract a starting hiragana hashtag"
text: "#ひらがな FTW!"
expected: ["ひらがな"]
- description: "Extract a starting kanji hashtag"
text: "#漢字 is the future"
expected: ["漢字"]
- description: "Extract a trailing katakana hashtag"
text: "Hashtag #カタカナ"
expected: ["カタカナ"]
- description: "Extract a trailing hiragana hashtag"
text: "Japanese hashtags #ひらがな"
expected: ["ひらがな"]
- description: "Extract a trailing kanji hashtag"
text: "Study time #漢字"
expected: ["漢字"]
- description: "Extract a central katakana hashtag"
text: "See my #カタカナ hashtag?"
expected: ["カタカナ"]
- description: "Extract a central hiragana hashtag"
text: "Study #ひらがな for fun and profit"
expected: ["ひらがな"]
- description: "Extract a central kanji hashtag"
text: "Some say #漢字 is the past. what do they know?"
expected: ["漢字"]
- description: "Extract a Kanji/Katakana mixed hashtag"
text: "日本語ハッシュタグテスト #日本語ハッシュタグ"
expected: ["日本語ハッシュタグ"]
- description: "Extract a hashtag after a punctuation"
text: "日本語ハッシュテスト。#日本語ハッシュタグ"
expected: ["日本語ハッシュタグ"]
- description: "DO NOT include a punctuation in a hashtag"
text: "#日本語ハッシュタグ。"
expected: ["日本語ハッシュタグ"]
- description: "Extract a full-width Alnum hashtag"
text: "全角英数字ハッシュタグ #hashtag123"
expected: ["hashtag123"]
- description: "DO NOT extract a hashtag without a preceding space"
text: "日本語ハッシュタグ#日本語ハッシュタグ"
expected: []
- description: "Hashtag with chouon"
text: "長音ハッシュタグ。#サッカー"
expected: ["サッカー"]
- description: "Hashtag with half-width chouon"
text: "長音ハッシュタグ。#サッカー"
expected: ["サッカー"]
- description: "Hashtag with half-widh voiced sounds marks"
text: "#ハッシュタグ #パピプペポ"
expected: ["ハッシュタグ", "パピプペポ"]
- description: "Hashtag with half-width # after full-width !"
text: "できましたよー!#日本語ハッシュタグ。"
expected: ["日本語ハッシュタグ"]
- description: "Hashtag with full-width # after full-width !"
text: "できましたよー!#日本語ハッシュタグ。"
expected: ["日本語ハッシュタグ"]
- description: "Hashtag with ideographic iteration mark"
text: "#云々 #学問のすゝめ #いすゞ #各〻 #各〃"
expected: ["云々", "学問のすゝめ", "いすゞ", "各〻", "各〃"]
- description: "Extract hashtag with fullwidth tilde"
text: "#メ~テレ ハッシュタグ内で~が認識されず"
expected: ["メ~テレ"]
- description: "Extract hashtag with wave dash"
text: "#メ〜テレ ハッシュタグ内で~が認識されず"
expected: ["メ〜テレ"]
- description: "Hashtags with ş (U+015F)"
text: "Here’s a test tweet for you: #Ateş #qrşt #ştu #ş"
expected: ["Ateş", "qrşt", "ştu", "ş"]
- description: "Hashtags with İ (U+0130) and ı (U+0131)"
text: "Here’s a test tweet for you: #İn #ın"
expected: ["İn", "ın"]
- description: "Hashtag before punctuations"
text: "#hashtag: #hashtag; #hashtag, #hashtag. #hashtag! #hashtag?"
expected: ["hashtag", "hashtag", "hashtag", "hashtag", "hashtag", "hashtag"]
- description: "Hashtag after punctuations"
text: ":#hashtag ;#hashtag ,#hashtag .#hashtag !#hashtag ?#hashtag"
expected: ["hashtag", "hashtag", "hashtag", "hashtag", "hashtag", "hashtag"]
- description: "Hashtag before newline"
text: "#hashtag\ntest\n#hashtag2\ntest\n#hashtag3\n"
expected: ["hashtag", "hashtag2", "hashtag3"]
- description: "DO NOT extract hashtag when # is followed by URL"
text: "#http://twitter.com #https://twitter.com"
expected: []
- description: "DO NOT extract hashtag if it's a part of URL"
text: "http://twitter.com/#hashtag twitter.com/#hashtag"
expected: []
- description: "Extract hashtags with Latin extended characters"
text: "#Azərbaycanca #mûǁae #Čeština #Ċaoiṁín"
expected: ["Azərbaycanca", "mûǁae", "Čeština", "Ċaoiṁín"]
- description: "Extract Arabic hashtags"
text: "#سیاست #ایران #السياسة #السياح #لغات #اتمی #کنفرانس #العربية #الجزيرة #فارسی"
expected: ["سیاست", "ایران", "السياسة", "السياح", "لغات", "اتمی", "کنفرانس", "العربية", "الجزيرة", "فارسی"]
- description: "Extract Arabic hashtags with underscore"
text: "#برنامه_نویسی #رییس_جمهور #رئيس_الوزراء, #ثبت_نام. #لس_آنجلس"
expected: ["برنامه_نویسی", "رییس_جمهور", "رئيس_الوزراء", "ثبت_نام", "لس_آنجلس"]
- description: "Extract Hebrew hashtags"
text: "#עַל־יְדֵי #וכו׳ #מ״כ"
expected: ["עַל־יְדֵי", "וכו׳", "מ״כ"]
- description: "Extract Thai hashtags"
text: "#ผู้เริ่ม #การเมือง #รายละเอียด #นักท่องเที่ยว #ของขวัญ #สนามบิน #เดินทาง #ประธาน"
expected: ["ผู้เริ่ม", "การเมือง", "รายละเอียด", "นักท่องเที่ยว", "ของขวัญ", "สนามบิน", "เดินทาง", "ประธาน"]
- description: "Extract Arabic hashtags with Zero-Width Non-Joiner"
text: "#أيبيإم #میخواهم"
expected: ["أيبيإم", "میخواهم"]
- description: "Extract Amharic hashtag"
text: "የአላህ መልእክተኛ ሰለላሁ ዓለይሂ ወሰለም #ኢትዮሙስሊምስ"
expected: ["ኢትዮሙስሊምስ"]
- description: "Extract Sinhala hashtag with Zero-Width Joiner (U+200D)"
text: "#ශ්රීලංකා"
expected: ["ශ්රීලංකා"]
- description: "Extract Arabic and Persian hashtags with numbers"
text: "#۳۴۵هشتگ #هشتگ۶۷۸ #ســـلام_عليكم_٤٠٦"
expected: ["۳۴۵هشتگ","هشتگ۶۷۸","ســـلام_عليكم_٤٠٦"]
- description: "Extract Hindi hashtags"
text: "#महात्मा #महात्मा_१२३४ #१२३४ गांधी"
expected: ["महात्मा","महात्मा_१२३४"]
- description: "Extract Indic script hashtags"
text: "#বাংলা #ગુજરાતી #ಕನ್ನಡ #മലയാളം #ଓଡ଼ିଆ #ਪੰਜਾਬੀ #සිංහල #தமிழ் #తెలుగు"
expected: ["বাংলা","ગુજરાતી","ಕನ್ನಡ","മലയാളം","ଓଡ଼ିଆ","ਪੰਜਾਬੀ","සිංහල","தமிழ்","తెలుగు"]
- description: "Extract Tibetan hashtags"
text: "#བོད་སྐད་ #བོད་སྐད"
expected: ["བོད་སྐད་","བོད་སྐད"]
- description: "Extract Khmer, Burmese, Laotian hashtags"
text: "#មហាត្មះគន្ធី #မြင့်မြတ်သော #ຊີວະສາດ"
expected: ["មហាត្មះគន្ធី","မြင့်မြတ်သော","ຊີວະສາດ"]
- description: "Extract Greek hashtag"
text: "#Μαχάτμα_Γκάντι ήταν Ινδός πολιτικός"
expected: ["Μαχάτμα_Γκάντι"]
- description: "Extract Armenian and Georgian hashtags"
text: "#Մահաթմա #მაჰათმა"
expected: ["Մահաթմա","მაჰათმა"]
- description: "Extract hashtag with middle dot"
text: "#il·lusió"
expected: ["il·lusió"]
- description: "DO NOT extract hashtags without a letter"
text: "#_ #1_2 #122 #〃"
expected: []
hashtags_with_indices:
- description: "Extract a hastag at the start"
text: "#hashtag here"
expected:
- hashtag: "hashtag"
indices: [0, 8]
- description: "Extract a hastag at the end"
text: "test a #hashtag"
expected:
- hashtag: "hashtag"
indices: [7, 15]
- description: "Extract a hastag in the middle"
text: "test a #hashtag in a string"
expected:
- hashtag: "hashtag"
indices: [7, 15]
- description: "Extract only a valid hashtag"
text: "#123 a #hashtag in a string"
expected:
- hashtag: "hashtag"
indices: [7, 15]
- description: "Extract a hashtag in a string of multi-byte characters"
text: "会議中 #hashtag 会議中"
expected:
- hashtag: "hashtag"
indices: [4, 12]
- description: "Extract multiple valid hashtags"
text: "One #two three #four"
expected:
- hashtag: "two"
indices: [4, 8]
- hashtag: "four"
indices: [15, 20]
- description: "Extract a non-latin hashtag"
text: "Hashtags in #русский!"
expected:
- hashtag: "русский"
indices: [12, 20]
- description: "Extract multiple non-latin hashtags"
text: "Hashtags in #中文, #日本語, #한국말, and #русский! Try it out!"
expected:
- hashtag: "中文"
indices: [12, 15]
- hashtag: "日本語"
indices: [17, 21]
- hashtag: "한국말"
indices: [23, 27]
- hashtag: "русский"
indices: [33, 41]
cashtags:
- description: "Extract cashtags"
text: "Example cashtags: $TEST $Stock $symbol"
expected: ["TEST", "Stock", "symbol"]
- description: "Extract cashtags with . or _"
text: "Example cashtags: $TEST.T $test.tt $Stock_X $symbol_ab"
expected: ["TEST.T", "test.tt", "Stock_X", "symbol_ab"]
- description: "Do not extract cashtags if they contain numbers"
text: "$123 $test123 $TE123ST"
expected: []
- description: "Do not extract cashtags with non-ASCII characters"
text: "$ストック $株"
expected: []
- description: "Do not extract cashtags with punctuations"
text: "$ $. $- $@ $! $() $+"
expected: []
- description: "Do not include trailing . or _"
text: "$TEST. $TEST_"
expected: ["TEST", "TEST"]
- description: "Do not extract cashtags if there is no space before $"
text: "$OK$NG$BAD text$NO .$NG $$NG"
expected: ["OK"]
- description: "Do not extract too long cashtags"
text: "$CashtagMustBeLessThanSixCharacter"
expected: []
cashtags_with_indices:
- description: "Extract cashtags"
text: "Example: $TEST $symbol test"
expected:
- cashtag: "TEST"
indices: [9, 14]
- cashtag: "symbol"
indices: [15, 22]
- description: "Extract cashtags with . or _"
text: "Example: $TEST.T test $symbol_ab end"
expected:
- cashtag: "TEST.T"
indices: [9, 16]
- cashtag: "symbol_ab"
indices: [22, 32]
twitter-text-1.13.4/test/twitter-text-conformance/Rakefile 0000644 0001750 0001750 00000003766 12670063203 024101 0 ustar sudheesh sudheesh require 'open-uri'
require 'nokogiri'
require 'yaml'
namespace :tlds do
desc 'Grab tlds from iana and save to tld_lib.yml'
task :iana_update do
doc = Nokogiri::HTML(open('http://www.iana.org/domains/root/db'))
tlds = []
types = {
'country' => /country-code/,
'generic' => /generic|sponsored|infrastructure|generic-restricted/,
}
doc.css('table#tld-table tr').each do |tr|
info = tr.css('td')
next if info.empty?
tlds << {
domain: info[0].text.gsub('.', ''),
type: info[1].text
}
end
def select_tld(tlds, type)
tlds.select {|i| i[:type] =~ type}.map {|i| i[:domain]}.sort
end
yml = {}
types.each do |name, regex|
yml[name] = select_tld(tlds, regex)
end
yml["generic"] << "onion"
File.open(repo_path('tld_lib.yml'), 'w') do |file|
file.write(yml.to_yaml)
end
File.open(repo_path("TldLists.java"), 'w') do |file|
file.write(<<-EOF
// Auto-generated by conformance/Rakefile
package com.twitter;
import java.util.Arrays;
import java.util.List;
public class TldLists {
public static final List GTLDS = Arrays.asList(
#{yml["generic"].sort.map {|el| " \"#{el}\""}.join(",\n")}
);
public static final List CTLDS = Arrays.asList(
#{yml["country"].sort.map {|el| " \"#{el}\""}.join(",\n")}
);
}
EOF
)
end
end
desc 'Update tests from tld_lib.yml'
task :generate_tests do
test_yml = { 'tests' => { } }
path = repo_path('tld_lib.yml')
yml = YAML.load_file(path)
yml.each do |type, tlds|
test_yml['tests'][type] = []
tlds.each do |tld|
test_yml['tests'][type].push(
'description' => "#{tld} is a valid #{type} tld",
'text' => "https://twitter.#{tld}",
'expected' => ["https://twitter.#{tld}"],
)
end
end
File.open('tlds.yml', 'w') do |file|
file.write(test_yml.to_yaml)
end
end
end
def repo_path(*path)
File.join(File.dirname(__FILE__), *path)
end
twitter-text-1.13.4/test/twitter-text-conformance/Gemfile.lock 0000644 0001750 0001750 00000000245 12670063203 024643 0 ustar sudheesh sudheesh GEM
remote: https://rubygems.org/
specs:
mini_portile (0.6.0)
nokogiri (1.6.3.1)
mini_portile (= 0.6.0)
PLATFORMS
ruby
DEPENDENCIES
nokogiri
twitter-text-1.13.4/test/twitter-text-conformance/TldLists.java 0000644 0001750 0001750 00000033352 12670063203 025033 0 ustar sudheesh sudheesh // Auto-generated by conformance/Rakefile
package com.twitter;
import java.util.Arrays;
import java.util.List;
public class TldLists {
public static final List GTLDS = Arrays.asList(
"abb",
"abbott",
"abogado",
"academy",
"accenture",
"accountant",
"accountants",
"aco",
"active",
"actor",
"ads",
"adult",
"aeg",
"aero",
"afl",
"agency",
"aig",
"airforce",
"airtel",
"allfinanz",
"alsace",
"amsterdam",
"android",
"apartments",
"app",
"aquarelle",
"archi",
"army",
"arpa",
"asia",
"associates",
"attorney",
"auction",
"audio",
"auto",
"autos",
"axa",
"azure",
"band",
"bank",
"bar",
"barcelona",
"barclaycard",
"barclays",
"bargains",
"bauhaus",
"bayern",
"bbc",
"bbva",
"bcn",
"beer",
"bentley",
"berlin",
"best",
"bet",
"bharti",
"bible",
"bid",
"bike",
"bing",
"bingo",
"bio",
"biz",
"black",
"blackfriday",
"bloomberg",
"blue",
"bmw",
"bnl",
"bnpparibas",
"boats",
"bond",
"boo",
"boots",
"boutique",
"bradesco",
"bridgestone",
"broker",
"brother",
"brussels",
"budapest",
"build",
"builders",
"business",
"buzz",
"bzh",
"cab",
"cafe",
"cal",
"camera",
"camp",
"cancerresearch",
"canon",
"capetown",
"capital",
"caravan",
"cards",
"care",
"career",
"careers",
"cars",
"cartier",
"casa",
"cash",
"casino",
"cat",
"catering",
"cba",
"cbn",
"ceb",
"center",
"ceo",
"cern",
"cfa",
"cfd",
"chanel",
"channel",
"chat",
"cheap",
"chloe",
"christmas",
"chrome",
"church",
"cisco",
"citic",
"city",
"claims",
"cleaning",
"click",
"clinic",
"clothing",
"cloud",
"club",
"coach",
"codes",
"coffee",
"college",
"cologne",
"com",
"commbank",
"community",
"company",
"computer",
"condos",
"construction",
"consulting",
"contractors",
"cooking",
"cool",
"coop",
"corsica",
"country",
"coupons",
"courses",
"credit",
"creditcard",
"cricket",
"crown",
"crs",
"cruises",
"cuisinella",
"cymru",
"cyou",
"dabur",
"dad",
"dance",
"date",
"dating",
"datsun",
"day",
"dclk",
"deals",
"degree",
"delivery",
"delta",
"democrat",
"dental",
"dentist",
"desi",
"design",
"dev",
"diamonds",
"diet",
"digital",
"direct",
"directory",
"discount",
"dnp",
"docs",
"dog",
"doha",
"domains",
"doosan",
"download",
"drive",
"durban",
"dvag",
"earth",
"eat",
"edu",
"education",
"email",
"emerck",
"energy",
"engineer",
"engineering",
"enterprises",
"epson",
"equipment",
"erni",
"esq",
"estate",
"eurovision",
"eus",
"events",
"everbank",
"exchange",
"expert",
"exposed",
"express",
"fage",
"fail",
"faith",
"family",
"fan",
"fans",
"farm",
"fashion",
"feedback",
"film",
"finance",
"financial",
"firmdale",
"fish",
"fishing",
"fit",
"fitness",
"flights",
"florist",
"flowers",
"flsmidth",
"fly",
"foo",
"football",
"forex",
"forsale",
"forum",
"foundation",
"frl",
"frogans",
"fund",
"furniture",
"futbol",
"fyi",
"gal",
"gallery",
"game",
"garden",
"gbiz",
"gdn",
"gent",
"genting",
"ggee",
"gift",
"gifts",
"gives",
"giving",
"glass",
"gle",
"global",
"globo",
"gmail",
"gmo",
"gmx",
"gold",
"goldpoint",
"golf",
"goo",
"goog",
"google",
"gop",
"gov",
"graphics",
"gratis",
"green",
"gripe",
"group",
"guge",
"guide",
"guitars",
"guru",
"hamburg",
"hangout",
"haus",
"healthcare",
"help",
"here",
"hermes",
"hiphop",
"hitachi",
"hiv",
"hockey",
"holdings",
"holiday",
"homedepot",
"homes",
"honda",
"horse",
"host",
"hosting",
"hoteles",
"hotmail",
"house",
"how",
"hsbc",
"ibm",
"icbc",
"ice",
"icu",
"ifm",
"iinet",
"immo",
"immobilien",
"industries",
"infiniti",
"info",
"ing",
"ink",
"institute",
"insure",
"int",
"international",
"investments",
"ipiranga",
"irish",
"ist",
"istanbul",
"itau",
"iwc",
"java",
"jcb",
"jetzt",
"jewelry",
"jlc",
"jll",
"jobs",
"joburg",
"jprs",
"juegos",
"kaufen",
"kddi",
"kim",
"kitchen",
"kiwi",
"koeln",
"komatsu",
"krd",
"kred",
"kyoto",
"lacaixa",
"lancaster",
"land",
"lasalle",
"lat",
"latrobe",
"law",
"lawyer",
"lds",
"lease",
"leclerc",
"legal",
"lexus",
"lgbt",
"liaison",
"lidl",
"life",
"lighting",
"limited",
"limo",
"link",
"live",
"lixil",
"loan",
"loans",
"lol",
"london",
"lotte",
"lotto",
"love",
"ltda",
"lupin",
"luxe",
"luxury",
"madrid",
"maif",
"maison",
"man",
"management",
"mango",
"market",
"marketing",
"markets",
"marriott",
"mba",
"media",
"meet",
"melbourne",
"meme",
"memorial",
"men",
"menu",
"miami",
"microsoft",
"mil",
"mini",
"mma",
"mobi",
"moda",
"moe",
"mom",
"monash",
"money",
"montblanc",
"mormon",
"mortgage",
"moscow",
"motorcycles",
"mov",
"movie",
"movistar",
"mtn",
"mtpc",
"museum",
"nadex",
"nagoya",
"name",
"navy",
"nec",
"net",
"netbank",
"network",
"neustar",
"new",
"news",
"nexus",
"ngo",
"nhk",
"nico",
"ninja",
"nissan",
"nokia",
"nra",
"nrw",
"ntt",
"nyc",
"office",
"okinawa",
"omega",
"one",
"ong",
"onion",
"onl",
"online",
"ooo",
"oracle",
"orange",
"org",
"organic",
"osaka",
"otsuka",
"ovh",
"page",
"panerai",
"paris",
"partners",
"parts",
"party",
"pet",
"pharmacy",
"philips",
"photo",
"photography",
"photos",
"physio",
"piaget",
"pics",
"pictet",
"pictures",
"pink",
"pizza",
"place",
"play",
"plumbing",
"plus",
"pohl",
"poker",
"porn",
"post",
"praxi",
"press",
"pro",
"prod",
"productions",
"prof",
"properties",
"property",
"pub",
"qpon",
"quebec",
"racing",
"realtor",
"realty",
"recipes",
"red",
"redstone",
"rehab",
"reise",
"reisen",
"reit",
"ren",
"rent",
"rentals",
"repair",
"report",
"republican",
"rest",
"restaurant",
"review",
"reviews",
"rich",
"ricoh",
"rio",
"rip",
"rocks",
"rodeo",
"rsvp",
"ruhr",
"run",
"ryukyu",
"saarland",
"sakura",
"sale",
"samsung",
"sandvik",
"sandvikcoromant",
"sanofi",
"sap",
"sarl",
"saxo",
"sca",
"scb",
"schmidt",
"scholarships",
"school",
"schule",
"schwarz",
"science",
"scor",
"scot",
"seat",
"seek",
"sener",
"services",
"sew",
"sex",
"sexy",
"shiksha",
"shoes",
"show",
"shriram",
"singles",
"site",
"ski",
"sky",
"skype",
"sncf",
"soccer",
"social",
"software",
"sohu",
"solar",
"solutions",
"sony",
"soy",
"space",
"spiegel",
"spreadbetting",
"srl",
"starhub",
"statoil",
"studio",
"study",
"style",
"sucks",
"supplies",
"supply",
"support",
"surf",
"surgery",
"suzuki",
"swatch",
"swiss",
"sydney",
"systems",
"taipei",
"tatamotors",
"tatar",
"tattoo",
"tax",
"taxi",
"team",
"tech",
"technology",
"tel",
"telefonica",
"temasek",
"tennis",
"thd",
"theater",
"tickets",
"tienda",
"tips",
"tires",
"tirol",
"today",
"tokyo",
"tools",
"top",
"toray",
"toshiba",
"tours",
"town",
"toyota",
"toys",
"trade",
"trading",
"training",
"travel",
"trust",
"tui",
"ubs",
"university",
"uno",
"uol",
"vacations",
"vegas",
"ventures",
"vermögensberater",
"vermögensberatung",
"versicherung",
"vet",
"viajes",
"video",
"villas",
"vin",
"vision",
"vista",
"vistaprint",
"vlaanderen",
"vodka",
"vote",
"voting",
"voto",
"voyage",
"wales",
"walter",
"wang",
"watch",
"webcam",
"website",
"wed",
"wedding",
"weir",
"whoswho",
"wien",
"wiki",
"williamhill",
"win",
"windows",
"wine",
"wme",
"work",
"works",
"world",
"wtc",
"wtf",
"xbox",
"xerox",
"xin",
"xperia",
"xxx",
"xyz",
"yachts",
"yandex",
"yodobashi",
"yoga",
"yokohama",
"youtube",
"zip",
"zone",
"zuerich",
"дети",
"ком",
"москва",
"онлайн",
"орг",
"рус",
"сайт",
"קום",
"بازار",
"شبكة",
"كوم",
"موقع",
"कॉम",
"नेट",
"संगठन",
"คอม",
"みんな",
"グーグル",
"コム",
"世界",
"中信",
"中文网",
"企业",
"佛山",
"信息",
"健康",
"八卦",
"公司",
"公益",
"商城",
"商店",
"商标",
"在线",
"大拿",
"娱乐",
"工行",
"广东",
"慈善",
"我爱你",
"手机",
"政务",
"政府",
"新闻",
"时尚",
"机构",
"淡马锡",
"游戏",
"点看",
"移动",
"组织机构",
"网址",
"网店",
"网络",
"谷歌",
"集团",
"飞利浦",
"餐厅",
"닷넷",
"닷컴",
"삼성"
);
public static final List CTLDS = Arrays.asList(
"ac",
"ad",
"ae",
"af",
"ag",
"ai",
"al",
"am",
"an",
"ao",
"aq",
"ar",
"as",
"at",
"au",
"aw",
"ax",
"az",
"ba",
"bb",
"bd",
"be",
"bf",
"bg",
"bh",
"bi",
"bj",
"bl",
"bm",
"bn",
"bo",
"bq",
"br",
"bs",
"bt",
"bv",
"bw",
"by",
"bz",
"ca",
"cc",
"cd",
"cf",
"cg",
"ch",
"ci",
"ck",
"cl",
"cm",
"cn",
"co",
"cr",
"cu",
"cv",
"cw",
"cx",
"cy",
"cz",
"de",
"dj",
"dk",
"dm",
"do",
"dz",
"ec",
"ee",
"eg",
"eh",
"er",
"es",
"et",
"eu",
"fi",
"fj",
"fk",
"fm",
"fo",
"fr",
"ga",
"gb",
"gd",
"ge",
"gf",
"gg",
"gh",
"gi",
"gl",
"gm",
"gn",
"gp",
"gq",
"gr",
"gs",
"gt",
"gu",
"gw",
"gy",
"hk",
"hm",
"hn",
"hr",
"ht",
"hu",
"id",
"ie",
"il",
"im",
"in",
"io",
"iq",
"ir",
"is",
"it",
"je",
"jm",
"jo",
"jp",
"ke",
"kg",
"kh",
"ki",
"km",
"kn",
"kp",
"kr",
"kw",
"ky",
"kz",
"la",
"lb",
"lc",
"li",
"lk",
"lr",
"ls",
"lt",
"lu",
"lv",
"ly",
"ma",
"mc",
"md",
"me",
"mf",
"mg",
"mh",
"mk",
"ml",
"mm",
"mn",
"mo",
"mp",
"mq",
"mr",
"ms",
"mt",
"mu",
"mv",
"mw",
"mx",
"my",
"mz",
"na",
"nc",
"ne",
"nf",
"ng",
"ni",
"nl",
"no",
"np",
"nr",
"nu",
"nz",
"om",
"pa",
"pe",
"pf",
"pg",
"ph",
"pk",
"pl",
"pm",
"pn",
"pr",
"ps",
"pt",
"pw",
"py",
"qa",
"re",
"ro",
"rs",
"ru",
"rw",
"sa",
"sb",
"sc",
"sd",
"se",
"sg",
"sh",
"si",
"sj",
"sk",
"sl",
"sm",
"sn",
"so",
"sr",
"ss",
"st",
"su",
"sv",
"sx",
"sy",
"sz",
"tc",
"td",
"tf",
"tg",
"th",
"tj",
"tk",
"tl",
"tm",
"tn",
"to",
"tp",
"tr",
"tt",
"tv",
"tw",
"tz",
"ua",
"ug",
"uk",
"um",
"us",
"uy",
"uz",
"va",
"vc",
"ve",
"vg",
"vi",
"vn",
"vu",
"wf",
"ws",
"ye",
"yt",
"za",
"zm",
"zw",
"ελ",
"бел",
"мкд",
"мон",
"рф",
"срб",
"укр",
"қаз",
"հայ",
"الاردن",
"الجزائر",
"السعودية",
"المغرب",
"امارات",
"ایران",
"بھارت",
"تونس",
"سودان",
"سورية",
"عراق",
"عمان",
"فلسطين",
"قطر",
"مصر",
"مليسيا",
"پاکستان",
"भारत",
"বাংলা",
"ভারত",
"ਭਾਰਤ",
"ભારત",
"இந்தியா",
"இலங்கை",
"சிங்கப்பூர்",
"భారత్",
"ලංකා",
"ไทย",
"გე",
"中国",
"中國",
"台湾",
"台灣",
"新加坡",
"澳門",
"香港",
"한국"
);
}
twitter-text-1.13.4/test/conformance_test.rb 0000644 0001750 0001750 00000015276 12667350232 021344 0 ustar sudheesh sudheesh require 'multi_json'
require 'nokogiri'
require 'test/unit'
require 'yaml'
# Detect Ruby 1.8 and older to apply necessary encoding fixes
major, minor, patch = RUBY_VERSION.split('.')
OLD_RUBY = major.to_i == 1 && minor.to_i < 9
if OLD_RUBY
$KCODE='u'
end
$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
require 'twitter-text'
class ConformanceTest < Test::Unit::TestCase
include Twitter::Extractor
include Twitter::Autolink
include Twitter::HitHighlighter
include Twitter::Validation
private
%w(description expected json hits).each do |key|
define_method key.to_sym do
@test_info[key]
end
end
if OLD_RUBY
def text
@test_info['text'].gsub(/\\u([0-9a-f]{8})/i) do
[$1.to_i(16)].pack('U*')
end
end
else
def text
@test_info['text']
end
end
def assert_equal_without_attribute_order(expected, actual, failure_message = nil)
assert_block(build_message(failure_message, "> expected but was\n>", expected, actual)) do
equal_nodes?(Nokogiri::HTML(expected).root, Nokogiri::HTML(actual).root)
end
end
def equal_nodes?(expected, actual)
return false unless expected.name == actual.name
return false unless ordered_attributes(expected) == ordered_attributes(actual)
return false if expected.text? && actual.text? && expected.content != actual.content
expected.children.each_with_index do |child, index|
return false unless equal_nodes?(child, actual.children[index])
end
true
end
def ordered_attributes(element)
element.attribute_nodes.map{|attr| [attr.name, attr.value]}.sort
end
CONFORMANCE_DIR = ENV['CONFORMANCE_DIR'] || File.expand_path("../../../conformance", __FILE__)
def self.def_conformance_test(file, test_type, &block)
yaml = YAML.load_file(File.join(CONFORMANCE_DIR, file))
raise "No such test suite: #{test_type.to_s}" unless yaml["tests"][test_type.to_s]
file_name = file.split('.').first
yaml["tests"][test_type.to_s].each do |test_info|
name = :"test_#{file_name}_#{test_type} #{test_info['description']}"
define_method name do
@test_info = test_info
instance_eval(&block)
end
end
end
public
# Extractor Conformance
def_conformance_test("extract.yml", :replies) do
assert_equal expected, extract_reply_screen_name(text), description
end
def_conformance_test("extract.yml", :mentions) do
assert_equal expected, extract_mentioned_screen_names(text), description
end
def_conformance_test("extract.yml", :mentions_with_indices) do
e = expected.map{|elem| elem.inject({}){|h, (k,v)| h[k.to_sym] = v; h} }
assert_equal e, extract_mentioned_screen_names_with_indices(text), description
end
def_conformance_test("extract.yml", :mentions_or_lists_with_indices) do
e = expected.map{|elem| elem.inject({}){|h, (k,v)| h[k.to_sym] = v; h} }
assert_equal e, extract_mentions_or_lists_with_indices(text), description
end
def_conformance_test("extract.yml", :urls) do
assert_equal expected, extract_urls(text), description
expected.each do |expected_url|
assert_equal true, valid_url?(expected_url, true, false), "expected url [#{expected_url}] not valid"
end
end
def_conformance_test("tlds.yml", :generic) do
assert_equal expected, extract_urls(text), description
end
def_conformance_test("tlds.yml", :country) do
assert_equal expected, extract_urls(text), description
end
def_conformance_test("extract.yml", :urls_with_indices) do
e = expected.map{|elem| elem.inject({}){|h, (k,v)| h[k.to_sym] = v; h} }
assert_equal e, extract_urls_with_indices(text), description
end
def_conformance_test("extract.yml", :hashtags) do
assert_equal expected, extract_hashtags(text), description
end
def_conformance_test("extract.yml", :hashtags_with_indices) do
e = expected.map{|elem| elem.inject({}){|h, (k,v)| h[k.to_sym] = v; h} }
assert_equal e, extract_hashtags_with_indices(text), description
end
def_conformance_test("extract.yml", :cashtags) do
assert_equal expected, extract_cashtags(text), description
end
def_conformance_test("extract.yml", :cashtags_with_indices) do
e = expected.map{|elem| elem.inject({}){|h, (k,v)| h[k.to_sym] = v; h} }
assert_equal e, extract_cashtags_with_indices(text), description
end
# Autolink Conformance
def_conformance_test("autolink.yml", :usernames) do
assert_equal_without_attribute_order expected, auto_link_usernames_or_lists(text, :suppress_no_follow => true), description
end
def_conformance_test("autolink.yml", :lists) do
assert_equal_without_attribute_order expected, auto_link_usernames_or_lists(text, :suppress_no_follow => true), description
end
def_conformance_test("autolink.yml", :urls) do
assert_equal_without_attribute_order expected, auto_link_urls(text, :suppress_no_follow => true), description
end
def_conformance_test("autolink.yml", :hashtags) do
assert_equal_without_attribute_order expected, auto_link_hashtags(text, :suppress_no_follow => true), description
end
def_conformance_test("autolink.yml", :cashtags) do
assert_equal_without_attribute_order expected, auto_link_cashtags(text, :suppress_no_follow => true), description
end
def_conformance_test("autolink.yml", :all) do
assert_equal_without_attribute_order expected, auto_link(text, :suppress_no_follow => true), description
end
def_conformance_test("autolink.yml", :json) do
assert_equal_without_attribute_order expected, auto_link_with_json(text, MultiJson.load(json), :suppress_no_follow => true), description
end
# HitHighlighter Conformance
def_conformance_test("hit_highlighting.yml", :plain_text) do
assert_equal expected, hit_highlight(text, hits), description
end
def_conformance_test("hit_highlighting.yml", :with_links) do
assert_equal expected, hit_highlight(text, hits), description
end
# Validation Conformance
def_conformance_test("validate.yml", :tweets) do
assert_equal expected, valid_tweet_text?(text), description
end
def_conformance_test("validate.yml", :usernames) do
assert_equal expected, valid_username?(text), description
end
def_conformance_test("validate.yml", :lists) do
assert_equal expected, valid_list?(text), description
end
def_conformance_test("validate.yml", :urls) do
assert_equal expected, valid_url?(text), description
end
def_conformance_test("validate.yml", :urls_without_protocol) do
assert_equal expected, valid_url?(text, true, false), description
end
def_conformance_test("validate.yml", :hashtags) do
assert_equal expected, valid_hashtag?(text), description
end
def_conformance_test("validate.yml", :lengths) do
assert_equal expected, tweet_length(text), description
end
end
twitter-text-1.13.4/script/ 0000755 0001750 0001750 00000000000 12667350232 016000 5 ustar sudheesh sudheesh twitter-text-1.13.4/script/generate 0000755 0001750 0001750 00000000562 12667350232 017523 0 ustar sudheesh sudheesh #!/usr/bin/env ruby
APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
begin
require 'rubigen'
rescue LoadError
require 'rubygems'
require 'rubigen'
end
require 'rubigen/scripts/generate'
ARGV.shift if ['--help', '-h'].include?(ARGV[0])
RubiGen::Base.use_component_sources! [:newgem_simple, :test_unit]
RubiGen::Scripts::Generate.new.run(ARGV)
twitter-text-1.13.4/script/destroy 0000755 0001750 0001750 00000000560 12667350232 017420 0 ustar sudheesh sudheesh #!/usr/bin/env ruby
APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
begin
require 'rubigen'
rescue LoadError
require 'rubygems'
require 'rubigen'
end
require 'rubigen/scripts/destroy'
ARGV.shift if ['--help', '-h'].include?(ARGV[0])
RubiGen::Base.use_component_sources! [:newgem_simple, :test_unit]
RubiGen::Scripts::Destroy.new.run(ARGV)
twitter-text-1.13.4/Rakefile 0000644 0001750 0001750 00000001353 12667350232 016143 0 ustar sudheesh sudheesh require 'bundler'
include Rake::DSL
Bundler::GemHelper.install_tasks
task :default => ['spec', 'test:conformance']
task :test => :spec
require 'rubygems'
require 'rspec/core/rake_task'
RSpec::Core::RakeTask.new(:spec)
namespace :test do
namespace :conformance do
desc "Run conformance test suite"
task :run do
ruby '-rubygems', "test/conformance_test.rb"
end
end
desc "Run conformance test suite"
task :conformance => ['conformance:run'] do
end
end
require 'rdoc/task'
namespace :doc do
RDoc::Task.new do |rd|
rd.main = "README.rdoc"
rd.rdoc_dir = 'doc'
rd.rdoc_files.include("README.rdoc", "lib/**/*.rb")
end
end
desc "Run cruise control build"
task :cruise => [:spec, 'test:conformance'] do
end
twitter-text-1.13.4/twitter-text.gemspec 0000644 0001750 0001750 00000002510 12667350232 020523 0 ustar sudheesh sudheesh # encoding: utf-8
Gem::Specification.new do |s|
s.name = "twitter-text"
s.version = "1.13.4"
s.authors = ["Matt Sanford", "Patrick Ewing", "Ben Cherry", "Britt Selvitelle",
"Raffi Krikorian", "J.P. Cummins", "Yoshimasa Niwa", "Keita Fujii", "James Koval"]
s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com", "bcherry@gmail.com", "bs@brittspace.com",
"raffi@twitter.com", "jcummins@twitter.com", "niw@niw.at", "keita@twitter.com", "jkoval@twitter.com"]
s.homepage = "http://twitter.com"
s.description = s.summary = "A gem that provides text handling for Twitter"
s.license = "Apache 2.0"
s.platform = Gem::Platform::RUBY
s.has_rdoc = true
s.summary = "Twitter text handling library"
s.add_development_dependency "multi_json", "~> 1.3"
s.add_development_dependency "nokogiri", "~> 1.5.10"
s.add_development_dependency "rake"
s.add_development_dependency "rdoc"
s.add_development_dependency "rspec", "~> 2.14.0"
s.add_development_dependency "simplecov", "~> 0.8.0"
s.add_runtime_dependency "unf", "~> 0.1.0"
s.files = `git ls-files`.split("\n") + ['lib/assets/tld_lib.yml']
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
s.require_paths = ["lib"]
end
twitter-text-1.13.4/.gitmodules 0000644 0001750 0001750 00000000217 12667350232 016651 0 ustar sudheesh sudheesh [submodule "test/twitter-text-conformance"]
path = test/twitter-text-conformance
url = git://github.com/twitter/twitter-text-conformance.git
twitter-text-1.13.4/.gitignore 0000644 0001750 0001750 00000000563 12667350232 016470 0 ustar sudheesh sudheesh *.gem
*.rbc
*.sw[a-p]
*.tmproj
*.tmproject
*.un~
*~
.DS_Store
.Spotlight-V100
.Trashes
._*
.bundle
.config
.directory
.elc
.emacs.desktop
.emacs.desktop.lock
.redcar
.yardoc
Desktop.ini
Gemfile.lock
Icon?
InstalledFiles
Session.vim
Thumbs.db
\#*\#
_yardoc
auto-save-list
coverage
doc
lib/bundler/man
pkg
pkg/*
rdoc
spec/reports
test/tmp
test/version_tmp
tmp
tmtags
tramp
twitter-text-1.13.4/.gemtest 0000644 0001750 0001750 00000000000 12667350232 016133 0 ustar sudheesh sudheesh