twitter-text-1.13.4/0000755000175000017500000000000012667350232014474 5ustar sudheeshsudheeshtwitter-text-1.13.4/spec/0000755000175000017500000000000012667350232015426 5ustar sudheeshsudheeshtwitter-text-1.13.4/spec/twitter_text_spec.rb0000644000175000017500000000063112667350232021533 0ustar sudheeshsudheesh# encoding: utf-8 require File.dirname(__FILE__) + '/spec_helper' major, minor, patch = RUBY_VERSION.split('.') if major.to_i == 1 && minor.to_i < 9 describe "base" do before do $KCODE = 'NONE' end after do $KCODE = 'u' end it "should raise with invalid KCODE on Ruby < 1.9" do lambda do require 'twitter-text' end.should raise_error end end end twitter-text-1.13.4/spec/spec_helper.rb0000644000175000017500000000757112667350232020256 0ustar sudheeshsudheesh$TESTING=true # Ruby 1.8 encoding check major, minor, patch = RUBY_VERSION.split('.') if major.to_i == 1 && minor.to_i < 9 $KCODE='u' end $:.push File.join(File.dirname(__FILE__), '..', 'lib') require 'nokogiri' require 'json' require 'simplecov' SimpleCov.start do add_group 'Libraries', 'lib' end require File.expand_path('../../lib/twitter-text', __FILE__) require File.expand_path('../test_urls', __FILE__) RSpec.configure do |config| config.include TestUrls end RSpec::Matchers.define :match_autolink_expression do match do |string| !Twitter::Extractor.extract_urls(string).empty? end end RSpec::Matchers.define :match_autolink_expression_in do |text| match do |url| @match_data = Twitter::Regex[:valid_url].match(text) @match_data && @match_data.to_s.strip == url end failure_message_for_should do |url| "Expected to find url '#{url}' in text '#{text}', but the match was #{@match_data.captures}'" end end RSpec::Matchers.define :have_autolinked_url do |url, inner_text| match do |text| @link = Nokogiri::HTML(text).search("a[@href='#{url}']") @link && @link.inner_text && (inner_text && @link.inner_text == inner_text) || (!inner_text && @link.inner_text == url) end failure_message_for_should do |text| "Expected url '#{url}'#{", inner_text '#{inner_text}'" if inner_text} to be autolinked in '#{text}'" end end RSpec::Matchers.define :link_to_screen_name do |screen_name, inner_text| expected = inner_text ? inner_text : screen_name match do |text| @link = Nokogiri::HTML(text).search("a.username") @link && @link.inner_text == expected && "https://twitter.com/#{screen_name}".should == @link.first['href'] end failure_message_for_should do |text| if @link.first "Expected link '#{@link.inner_text}' with href '#{@link.first['href']}' to match screen_name '#{expected}', but it does not." else "Expected screen name '#{screen_name}' to be autolinked in '#{text}', but no link was found." end end failure_message_for_should_not do |text| "Expected link '#{@link.inner_text}' with href '#{@link.first['href']}' not to match screen_name '#{expected}', but it does." end description do "contain a link with the name and href pointing to the expected screen_name" end end RSpec::Matchers.define :link_to_list_path do |list_path, inner_text| expected = inner_text ? inner_text : list_path match do |text| @link = Nokogiri::HTML(text).search("a.list-slug") @link && @link.inner_text == expected && "https://twitter.com/#{list_path}".downcase.should == @link.first['href'] end failure_message_for_should do |text| if @link.first "Expected link '#{@link.inner_text}' with href '#{@link.first['href']}' to match the list path '#{expected}', but it does not." else "Expected list path '#{list_path}' to be autolinked in '#{text}', but no link was found." end end failure_message_for_should_not do |text| "Expected link '#{@link.inner_text}' with href '#{@link.first['href']}' not to match the list path '#{expected}', but it does." end description do "contain a link with the list title and an href pointing to the list path" end end RSpec::Matchers.define :have_autolinked_hashtag do |hashtag| match do |text| @link = Nokogiri::HTML(text).search("a[@href='https://twitter.com/#!/search?q=#{hashtag.sub(/^#/, '%23')}']") @link && @link.inner_text && @link.inner_text == hashtag end failure_message_for_should do |text| if @link.first "Expected link text to be [#{hashtag}], but it was [#{@link.inner_text}] in #{text}" else "Expected hashtag #{hashtag} to be autolinked in '#{text}', but no link was found." end end failure_message_for_should_not do |text| "Expected link '#{@link.inner_text}' with href '#{@link.first['href']}' not to match the hashtag '#{hashtag}', but it does." end end twitter-text-1.13.4/spec/rewriter_spec.rb0000644000175000017500000004000212667350232020624 0ustar sudheeshsudheesh# encoding: utf-8 require File.dirname(__FILE__) + '/spec_helper' describe Twitter::Rewriter do def original_text; end def url; end def block(*args) if Array === @block_args unless Array === @block_args.first @block_args = [@block_args] end @block_args << args else @block_args = args end "[rewritten]" end describe "rewrite usernames" do #{{{ before do @rewritten_text = Twitter::Rewriter.rewrite_usernames_or_lists(original_text, &method(:block)) end context "username preceded by a space" do def original_text; "hello @jacob"; end it "should be rewritten" do @block_args.should == ["@", "jacob", nil] @rewritten_text.should == "hello [rewritten]" end end context "username at beginning of line" do def original_text; "@jacob you're cool"; end it "should be rewritten" do @block_args.should == ["@", "jacob", nil] @rewritten_text.should == "[rewritten] you're cool" end end context "username preceded by word character" do def original_text; "meet@the beach"; end it "should not be rewritten" do @block_args.should be_nil @rewritten_text.should == "meet@the beach" end end context "username preceded by non-word character" do def original_text; "great.@jacob"; end it "should be rewritten" do @block_args.should == ["@", "jacob", nil] @rewritten_text.should == "great.[rewritten]" end end context "username containing non-word characters" do def original_text; "@jacob&^$%^"; end it "should be rewritten" do @block_args.should == ["@", "jacob", nil] @rewritten_text.should == "[rewritten]&^$%^" end end context "username over twenty characters" do def original_text @twenty_character_username = "zach" * 5 "@" + @twenty_character_username + "1" end it "should be rewritten" do @block_args.should == ["@", @twenty_character_username, nil] @rewritten_text.should == "[rewritten]1" end end context "username followed by japanese" do def original_text; "@jacobの"; end it "should be rewritten" do @block_args.should == ["@", "jacob", nil] @rewritten_text.should == "[rewritten]の" end end context "username preceded by japanese" do def original_text; "あ@jacob"; end it "should be rewritten" do @block_args.should == ["@", "jacob", nil] @rewritten_text.should == "あ[rewritten]" end end context "username surrounded by japanese" do def original_text; "あ@jacobの"; end it "should be rewritten" do @block_args.should == ["@", "jacob", nil] @rewritten_text.should == "あ[rewritten]の" end end context "username using full-width at-sign" do def original_text "#{[0xFF20].pack('U')}jacob" end it "should be rewritten" do @block_args.should == ["@", "jacob", nil] @rewritten_text.should == "[rewritten]" end end end #}}} describe "rewrite lists" do #{{{ before do @rewritten_text = Twitter::Rewriter.rewrite_usernames_or_lists(original_text, &method(:block)) end context "slug preceded by a space" do def original_text; "hello @jacob/my-list"; end it "should be rewritten" do @block_args.should == ["@", "jacob", "/my-list"] @rewritten_text.should == "hello [rewritten]" end end context "username followed by a slash but no list" do def original_text; "hello @jacob/ my-list"; end it "should not be rewritten" do @block_args.should == ["@", "jacob", nil] @rewritten_text.should == "hello [rewritten]/ my-list" end end context "empty username followed by a list" do def original_text; "hello @/my-list"; end it "should not be rewritten" do @block_args.should be_nil @rewritten_text.should == "hello @/my-list" end end context "list slug at beginning of line" do def original_text; "@jacob/my-list"; end it "should be rewritten" do @block_args.should == ["@", "jacob", "/my-list"] @rewritten_text.should == "[rewritten]" end end context "username preceded by alpha-numeric character" do def original_text; "meet@jacob/my-list"; end it "should not be rewritten" do @block_args.should be_nil @rewritten_text.should == "meet@jacob/my-list" end end context "username preceded by non-word character" do def original_text; "great.@jacob/my-list"; end it "should be rewritten" do @block_args.should == ["@", "jacob", "/my-list"] @rewritten_text.should == "great.[rewritten]" end end context "username containing non-word characters" do def original_text; "@jacob/my-list&^$%^"; end it "should be rewritten" do @block_args.should == ["@", "jacob", "/my-list"] @rewritten_text.should == "[rewritten]&^$%^" end end context "username over twenty characters" do def original_text @twentyfive_character_list = "a" * 25 "@jacob/#{@twentyfive_character_list}12345" end it "should be rewritten" do @block_args.should == ["@", "jacob", "/#{@twentyfive_character_list}"] @rewritten_text.should == "[rewritten]12345" end end end #}}} describe "rewrite hashtags" do #{{{ before do @rewritten_text = Twitter::Rewriter.rewrite_hashtags(original_text, &method(:block)) end context "with an all numeric hashtag" do def original_text; "#123"; end it "should not be rewritten" do @block_args.should be_nil @rewritten_text.should == "#123" end end context "with a hashtag with alphanumeric characters" do def original_text; "#ab1d"; end it "should be rewritten" do @block_args.should == ["#", "ab1d"] @rewritten_text.should == "[rewritten]" end end context "with a hashtag with underscores" do def original_text; "#a_b_c_d"; end it "should be rewritten" do @block_args.should == ["#", "a_b_c_d"] @rewritten_text.should == "[rewritten]" end end context "with a hashtag that is preceded by a word character" do def original_text; "ab#cd"; end it "should not be rewritten" do @block_args.should be_nil @rewritten_text.should == "ab#cd" end end context "with a hashtag that starts with a number but has word characters" do def original_text; "#2ab"; end it "should be rewritten" do @block_args.should == ["#", "2ab"] @rewritten_text.should == "[rewritten]" end end context "with multiple valid hashtags" do def original_text; "I'm frickin' awesome #ab #cd #ef"; end it "rewrites each hashtag" do @block_args.should == [["#", "ab"], ["#", "cd"], ["#", "ef"]] @rewritten_text.should == "I'm frickin' awesome [rewritten] [rewritten] [rewritten]" end end context "with a hashtag preceded by a ." do def original_text; "ok, great.#abc"; end it "should be rewritten" do @block_args.should == ["#", "abc"] @rewritten_text.should == "ok, great.[rewritten]" end end context "with a hashtag preceded by a &" do def original_text; "&#nbsp;"; end it "should not be rewritten" do @block_args.should be_nil @rewritten_text.should == "&#nbsp;" end end context "with a hashtag that ends in an !" do def original_text; "#great!"; end it "should be rewritten, but should not include the !" do @block_args.should == ["#", "great"]; @rewritten_text.should == "[rewritten]!" end end context "with a hashtag followed by Japanese" do def original_text; "#twj_devの"; end it "should be rewritten" do @block_args.should == ["#", "twj_devの"]; @rewritten_text.should == "[rewritten]" end end context "with a hashtag preceded by a full-width space" do def original_text; "#{[0x3000].pack('U')}#twj_dev"; end it "should be rewritten" do @block_args.should == ["#", "twj_dev"]; @rewritten_text.should == " [rewritten]" end end context "with a hashtag followed by a full-width space" do def original_text; "#twj_dev#{[0x3000].pack('U')}"; end it "should be rewritten" do @block_args.should == ["#", "twj_dev"]; @rewritten_text.should == "[rewritten] " end end context "with a hashtag using full-width hash" do def original_text; "#{[0xFF03].pack('U')}twj_dev"; end it "should be rewritten" do @block_args.should == ["#", "twj_dev"]; @rewritten_text.should == "[rewritten]" end end context "with a hashtag containing an accented latin character" do def original_text # the hashtag is #éhashtag "##{[0x00e9].pack('U')}hashtag" end it "should be rewritten" do @block_args.should == ["#", "éhashtag"]; @rewritten_text.should == "[rewritten]" end end end #}}} describe "rewrite urls" do #{{{ def url; "http://www.google.com"; end before do @rewritten_text = Twitter::Rewriter.rewrite_urls(original_text, &method(:block)) end context "when embedded in plain text" do def original_text; "On my search engine #{url} I found good links."; end it "should be rewritten" do @block_args.should == [url]; @rewritten_text.should == "On my search engine [rewritten] I found good links." end end context "when surrounded by Japanese;" do def original_text; "いまなにしてる#{url}いまなにしてる"; end it "should be rewritten" do @block_args.should == [url]; @rewritten_text.should == "いまなにしてる[rewritten]いまなにしてる" end end context "with a path surrounded by parentheses;" do def original_text; "I found a neatness (#{url})"; end it "should be rewritten" do @block_args.should == [url]; @rewritten_text.should == "I found a neatness ([rewritten])" end context "when the URL ends with a slash;" do def url; "http://www.google.com/"; end it "should be rewritten" do @block_args.should == [url]; @rewritten_text.should == "I found a neatness ([rewritten])" end end context "when the URL has a path;" do def url; "http://www.google.com/fsdfasdf"; end it "should be rewritten" do @block_args.should == [url]; @rewritten_text.should == "I found a neatness ([rewritten])" end end end context "when path contains parens" do def original_text; "I found a neatness (#{url})"; end it "should be rewritten" do @block_args.should == [url]; @rewritten_text.should == "I found a neatness ([rewritten])" end context "wikipedia" do def url; "http://en.wikipedia.org/wiki/Madonna_(artist)"; end it "should be rewritten" do @block_args.should == [url]; @rewritten_text.should == "I found a neatness ([rewritten])" end end context "IIS session" do def url; "http://msdn.com/S(deadbeef)/page.htm"; end it "should be rewritten" do @block_args.should == [url]; @rewritten_text.should == "I found a neatness ([rewritten])" end end context "unbalanced parens" do def url; "http://example.com/i_has_a_("; end it "should be rewritten" do @block_args.should == ["http://example.com/i_has_a_"]; @rewritten_text.should == "I found a neatness ([rewritten]()" end end context "balanced parens with a double quote inside" do def url; "http://foo.bar.com/foo_(\")_bar" end it "should be rewritten" do @block_args.should == ["http://foo.bar.com/foo_"]; @rewritten_text.should == "I found a neatness ([rewritten](\")_bar)" end end context "balanced parens hiding XSS" do def url; 'http://x.xx.com/("style="color:red"onmouseover="alert(1)' end it "should be rewritten" do @block_args.should == ["http://x.xx.com/"]; @rewritten_text.should == 'I found a neatness ([rewritten]("style="color:red"onmouseover="alert(1))' end end end context "when preceded by a :" do def original_text; "Check this out @hoverbird:#{url}"; end it "should be rewritten" do @block_args.should == [url]; @rewritten_text.should == "Check this out @hoverbird:[rewritten]" end end context "with a URL ending in allowed punctuation" do it "does not consume ending punctuation" do %w| ? ! , . : ; ] ) } = \ ' |.each do |char| Twitter::Rewriter.rewrite_urls("#{url}#{char}") do |url| url.should == url; "[rewritten]" end.should == "[rewritten]#{char}" end end end context "with a URL preceded in forbidden characters" do it "should be rewritten" do %w| \ ' / ! = |.each do |char| Twitter::Rewriter.rewrite_urls("#{char}#{url}") do |url| "[rewritten]" # should not be called here. end.should == "#{char}[rewritten]" end end end context "when embedded in a link tag" do def original_text; "#{url}"; end it "should be rewritten" do @block_args.should == [url]; @rewritten_text.should == "[rewritten]" end end context "with multiple URLs" do def original_text; "http://www.links.org link at start of page, link at end http://www.foo.org"; end it "should autolink each one" do @block_args.should == [["http://www.links.org"], ["http://www.foo.org"]]; @rewritten_text.should == "[rewritten] link at start of page, link at end [rewritten]" end end context "with multiple URLs in different formats" do def original_text; "http://foo.com https://bar.com http://mail.foobar.org"; end it "should autolink each one, in the proper order" do @block_args.should == [["http://foo.com"], ["https://bar.com"], ["http://mail.foobar.org"]]; @rewritten_text.should == "[rewritten] [rewritten] [rewritten]" end end context "with a URL having a long TLD" do def original_text; "Yahoo integriert Facebook http://golem.mobi/0912/71607.html"; end it "should autolink it" do @block_args.should == ["http://golem.mobi/0912/71607.html"] @rewritten_text.should == "Yahoo integriert Facebook [rewritten]" end end context "with a url lacking the protocol" do def original_text; "I like www.foobar.com dudes"; end it "does not link at all" do @block_args.should be_nil @rewritten_text.should == "I like www.foobar.com dudes" end end context "with a @ in a URL" do context "with XSS attack" do def original_text; 'http://x.xx.com/@"style="color:pink"onmouseover=alert(1)//'; end it "should not allow XSS follwing @" do @block_args.should == ["http://x.xx.com/"] @rewritten_text.should == '[rewritten]@"style="color:pink"onmouseover=alert(1)//' end end context "with a username not followed by a /" do def original_text; "http://example.com/@foobar"; end it "should link url" do @block_args.should == ["http://example.com/@foobar"] @rewritten_text.should == "[rewritten]" end end context "with a username followed by a /" do def original_text; "http://example.com/@foobar/"; end it "should not link the username but link full url" do @block_args.should == ["http://example.com/@foobar/"] @rewritten_text.should == "[rewritten]" end end end end #}}} end # vim: foldmethod=marker twitter-text-1.13.4/spec/validation_spec.rb0000644000175000017500000000302212667350232021114 0ustar sudheeshsudheesh# encoding: utf-8 require File.dirname(__FILE__) + '/spec_helper' class TestValidation include Twitter::Validation end describe Twitter::Validation do it "should disallow invalid BOM character" do TestValidation.new.tweet_invalid?("Bom:#{Twitter::Unicode::UFFFE}").should == :invalid_characters TestValidation.new.tweet_invalid?("Bom:#{Twitter::Unicode::UFEFF}").should == :invalid_characters end it "should disallow invalid U+FFFF character" do TestValidation.new.tweet_invalid?("Bom:#{Twitter::Unicode::UFFFF}").should == :invalid_characters end it "should disallow direction change characters" do [0x202A, 0x202B, 0x202C, 0x202D, 0x202E].map{|cp| [cp].pack('U') }.each do |char| TestValidation.new.tweet_invalid?("Invalid:#{char}").should == :invalid_characters end end it "should disallow non-Unicode" do TestValidation.new.tweet_invalid?("not-Unicode:\xfff0").should == :invalid_characters end it "should allow <= 140 combined accent characters" do char = [0x65, 0x0301].pack('U') TestValidation.new.tweet_invalid?(char * 139).should == false TestValidation.new.tweet_invalid?(char * 140).should == false TestValidation.new.tweet_invalid?(char * 141).should == :too_long end it "should allow <= 140 multi-byte characters" do char = [ 0x1d106 ].pack('U') TestValidation.new.tweet_invalid?(char * 139).should == false TestValidation.new.tweet_invalid?(char * 140).should == false TestValidation.new.tweet_invalid?(char * 141).should == :too_long end end twitter-text-1.13.4/spec/autolinking_spec.rb0000644000175000017500000007442012667350232021320 0ustar sudheeshsudheesh# encoding: utf-8 require File.dirname(__FILE__) + '/spec_helper' class TestAutolink include Twitter::Autolink end describe Twitter::Autolink do def original_text; end def url; end describe "auto_link_custom" do before do @autolinked_text = TestAutolink.new.auto_link(original_text) if original_text end describe "username autolinking" do context "username preceded by a space" do def original_text; "hello @jacob"; end it "should be linked" do @autolinked_text.should link_to_screen_name('jacob') end end context "username in camelCase" do def original_text() "@jaCob iS cOoL" end it "should be linked" do @autolinked_text.should link_to_screen_name('jaCob') end end context "username at beginning of line" do def original_text; "@jacob you're cool"; end it "should be linked" do @autolinked_text.should link_to_screen_name('jacob') end end context "username preceded by word character" do def original_text; "meet@the beach"; end it "should not be linked" do Nokogiri::HTML(@autolinked_text).search('a').should be_empty end end context "username preceded by non-word character" do def original_text; "great.@jacob"; end it "should be linked" do @autolinked_text.should link_to_screen_name('jacob') end end context "username containing non-word characters" do def original_text; "@zach&^$%^"; end it "should not be linked" do @autolinked_text.should link_to_screen_name('zach') end end context "username over twenty characters" do def original_text @twenty_character_username = "zach" * 5 "@" + @twenty_character_username + "1" end it "should not be linked" do @autolinked_text.should link_to_screen_name(@twenty_character_username) end end context "username followed by japanese" do def original_text; "@jacobの"; end it "should be linked" do @autolinked_text.should link_to_screen_name('jacob') end end context "username preceded by japanese" do def original_text; "あ@matz"; end it "should be linked" do @autolinked_text.should link_to_screen_name('matz') end end context "username surrounded by japanese" do def original_text; "あ@yoshimiの"; end it "should be linked" do @autolinked_text.should link_to_screen_name('yoshimi') end end context "username using full-width at-sign" do def original_text "#{[0xFF20].pack('U')}jacob" end it "should be linked" do @autolinked_text.should link_to_screen_name('jacob') end end end describe "list path autolinking" do context "when List is not available" do it "should not be linked" do @autolinked_text = TestAutolink.new.auto_link_usernames_or_lists("hello @jacob/my-list", :suppress_lists => true) @autolinked_text.should_not link_to_list_path('jacob/my-list') @autolinked_text.should include('my-list') end end context "slug preceded by a space" do def original_text; "hello @jacob/my-list"; end it "should be linked" do @autolinked_text.should link_to_list_path('jacob/my-list') end end context "username followed by a slash but no list" do def original_text; "hello @jacob/ my-list"; end it "should NOT be linked" do @autolinked_text.should_not link_to_list_path('jacob/my-list') @autolinked_text.should link_to_screen_name('jacob') end end context "empty username followed by a list" do def original_text; "hello @/my-list"; end it "should NOT be linked" do Nokogiri::HTML(@autolinked_text).search('a').should be_empty end end context "list slug at beginning of line" do def original_text; "@jacob/my-list"; end it "should be linked" do @autolinked_text.should link_to_list_path('jacob/my-list') end end context "username preceded by alpha-numeric character" do def original_text; "meet@the/beach"; end it "should not be linked" do Nokogiri::HTML(@autolinked_text).search('a').should be_empty end end context "username preceded by non-word character" do def original_text; "great.@jacob/my-list"; end it "should be linked" do @autolinked_text = TestAutolink.new.auto_link("great.@jacob/my-list") @autolinked_text.should link_to_list_path('jacob/my-list') end end context "username containing non-word characters" do def original_text; "@zach/test&^$%^"; end it "should be linked" do @autolinked_text.should link_to_list_path('zach/test') end end context "username over twenty characters" do def original_text @twentyfive_character_list = "jack/" + ("a" * 25) "@#{@twentyfive_character_list}12345" end it "should be linked" do @autolinked_text.should link_to_list_path(@twentyfive_character_list) end end end describe "hashtag autolinking" do context "with an all numeric hashtag" do def original_text; "#123"; end it "should not be linked" do @autolinked_text.should_not have_autolinked_hashtag('#123') end end context "with a hashtag with alphanumeric characters" do def original_text; "#ab1d"; end it "should be linked" do @autolinked_text.should have_autolinked_hashtag('#ab1d') end end context "with a hashtag with underscores" do def original_text; "#a_b_c_d"; end it "should be linked" do @autolinked_text.should have_autolinked_hashtag(original_text) end end context "with a hashtag that is preceded by a word character" do def original_text; "ab#cd"; end it "should not be linked" do @autolinked_text.should_not have_autolinked_hashtag(original_text) end end context "with a page anchor in a url" do def original_text; "Here's my url: http://foobar.com/#home"; end it "should not link the hashtag" do @autolinked_text.should_not have_autolinked_hashtag('#home') end it "should link the url" do @autolinked_text.should have_autolinked_url('http://foobar.com/#home') end end context "with a hashtag that starts with a number but has word characters" do def original_text; "#2ab"; end it "should be linked" do @autolinked_text.should have_autolinked_hashtag(original_text) end end context "with multiple valid hashtags" do def original_text; "I'm frickin' awesome #ab #cd #ef"; end it "links each hashtag" do @autolinked_text.should have_autolinked_hashtag('#ab') @autolinked_text.should have_autolinked_hashtag('#cd') @autolinked_text.should have_autolinked_hashtag('#ef') end end context "with a hashtag preceded by a ." do def original_text; "ok, great.#abc"; end it "should be linked" do @autolinked_text.should have_autolinked_hashtag('#abc') end end context "with a hashtag preceded by a &" do def original_text; "&#nbsp;"; end it "should not be linked" do @autolinked_text.should_not have_autolinked_hashtag('#nbsp;') end end context "with a hashtag that ends in an !" do def original_text; "#great!"; end it "should be linked, but should not include the !" do @autolinked_text.should have_autolinked_hashtag('#great') end end context "with a hashtag followed by Japanese" do def original_text; "#twj_devの"; end it "should be linked" do @autolinked_text.should have_autolinked_hashtag('#twj_devの') end end context "with a hashtag preceded by a full-width space" do def original_text; "#{[0x3000].pack('U')}#twj_dev"; end it "should be linked" do @autolinked_text.should have_autolinked_hashtag('#twj_dev') end end context "with a hashtag followed by a full-width space" do def original_text; "#twj_dev#{[0x3000].pack('U')}"; end it "should be linked" do @autolinked_text.should have_autolinked_hashtag('#twj_dev') end end context "with a hashtag using full-width hash" do def original_text; "#{[0xFF03].pack('U')}twj_dev"; end it "should be linked" do link = Nokogiri::HTML(@autolinked_text).search('a') (link.inner_text.respond_to?(:force_encoding) ? link.inner_text.force_encoding("utf-8") : link.inner_text).should == "#{[0xFF03].pack('U')}twj_dev" link.first['href'].should == 'https://twitter.com/#!/search?q=%23twj_dev' end end context "with a hashtag containing an accented latin character" do def original_text # the hashtag is #éhashtag "##{[0x00e9].pack('U')}hashtag" end it "should be linked" do @autolinked_text.should == "#éhashtag" end end end describe "URL autolinking" do def url; "http://www.google.com"; end context "when embedded in plain text" do def original_text; "On my search engine #{url} I found good links."; end it "should be linked" do @autolinked_text.should have_autolinked_url(url) end end context "when surrounded by Japanese;" do def original_text; "いまなにしてる#{url}いまなにしてる"; end it "should be linked" do @autolinked_text.should have_autolinked_url(url) end end context "with a path surrounded by parentheses;" do def original_text; "I found a neatness (#{url})"; end it "should be linked" do @autolinked_text.should have_autolinked_url(url) end context "when the URL ends with a slash;" do def url; "http://www.google.com/"; end it "should be linked" do @autolinked_text.should have_autolinked_url(url) end end context "when the URL has a path;" do def url; "http://www.google.com/fsdfasdf"; end it "should be linked" do @autolinked_text.should have_autolinked_url(url) end end end context "when path contains parens" do def original_text; "I found a neatness (#{url})"; end it "should be linked" do @autolinked_text.should have_autolinked_url(url) end context "wikipedia" do def url; "http://en.wikipedia.org/wiki/Madonna_(artist)"; end it "should be linked" do @autolinked_text.should have_autolinked_url(url) end end context "IIS session" do def url; "http://msdn.com/S(deadbeef)/page.htm"; end it "should be linked" do @autolinked_text.should have_autolinked_url(url) end end context "unbalanced parens" do def url; "http://example.com/i_has_a_("; end it "should be linked" do @autolinked_text.should have_autolinked_url("http://example.com/i_has_a_") end end context "balanced parens with a double quote inside" do def url; "http://foo.com/foo_(\")_bar" end it "should be linked" do @autolinked_text.should have_autolinked_url("http://foo.com/foo_") end end context "balanced parens hiding XSS" do def url; 'http://x.xx.com/("style="color:red"onmouseover="alert(1)' end it "should be linked" do @autolinked_text.should have_autolinked_url("http://x.xx.com/") end end end context "when preceded by a :" do def original_text; "Check this out @hoverbird:#{url}"; end it "should be linked" do @autolinked_text.should have_autolinked_url(url) end end context "with a URL ending in allowed punctuation" do it "does not consume ending punctuation" do matcher = TestAutolink.new %w| ? ! , . : ; ] ) } = \ ' |.each do |char| matcher.auto_link("#{url}#{char}").should have_autolinked_url(url) end end end context "with a URL preceded in forbidden characters" do it "should be linked" do matcher = TestAutolink.new %w| \ ' / ! = |.each do |char| matcher.auto_link("#{char}#{url}").should have_autolinked_url(url) end end end context "when embedded in a link tag" do def original_text; "#{url}"; end it "should be linked" do @autolinked_text.should have_autolinked_url(url) end end context "with multiple URLs" do def original_text; "http://www.links.org link at start of page, link at end http://www.foo.org"; end it "should autolink each one" do @autolinked_text.should have_autolinked_url('http://www.links.org') @autolinked_text.should have_autolinked_url('http://www.foo.org') end end context "with multiple URLs in different formats" do def original_text; "http://foo.com https://bar.com http://mail.foobar.org"; end it "should autolink each one, in the proper order" do @autolinked_text.should have_autolinked_url('http://foo.com') @autolinked_text.should have_autolinked_url('https://bar.com') @autolinked_text.should have_autolinked_url('http://mail.foobar.org') end end context "with a URL having a long TLD" do def original_text; "Yahoo integriert Facebook http://golem.mobi/0912/71607.html"; end it "should autolink it" do @autolinked_text.should have_autolinked_url('http://golem.mobi/0912/71607.html') end end context "with a url lacking the protocol" do def original_text; "I like www.foobar.com dudes"; end it "does not link at all" do link = Nokogiri::HTML(@autolinked_text).search('a') link.should be_empty end end context "with a @ in a URL" do context "with XSS attack" do def original_text; 'http://x.xx.com/@"style="color:pink"onmouseover=alert(1)//'; end it "should not allow XSS follwing @" do @autolinked_text.should have_autolinked_url('http://x.xx.com/') end end context "with a username not followed by a /" do def original_text; 'http://example.com/@foobar'; end it "should link url" do @autolinked_text.should have_autolinked_url('http://example.com/@foobar') end end context "with a username followed by a /" do def original_text; 'http://example.com/@foobar/'; end it "should not link the username but link full url" do @autolinked_text.should have_autolinked_url('http://example.com/@foobar/') @autolinked_text.should_not link_to_screen_name('foobar') end end end context "regex engine quirks" do context "does not spiral out of control on repeated periods" do def original_text; "Test a ton of periods http://example.com/path.........................................."; end it "should autolink" do @autolinked_text.should have_autolinked_url('http://example.com/path') end end context "does not spiral out of control on repeated dashes" do def original_text; "Single char file ext http://www.bestbuy.com/site/Currie+Technologies+-+Ezip+400+Scooter/9885188.p?id=1218189013070&skuId=9885188"; end it "should autolink" do @autolinked_text.should have_autolinked_url('http://www.bestbuy.com/site/Currie+Technologies+-+Ezip+400+Scooter/9885188.p?id=1218189013070&skuId=9885188') end end end end describe "Autolink all" do before do @linker = TestAutolink.new end it "should allow url/hashtag overlap" do auto_linked = @linker.auto_link("https://twitter.com/#search") auto_linked.should have_autolinked_url('https://twitter.com/#search') end it "should not add invalid option in HTML tags" do auto_linked = @linker.auto_link("https://twitter.com/ is a URL, not a hashtag", :hashtag_class => 'hashtag_classname') auto_linked.should have_autolinked_url('https://twitter.com/') auto_linked.should_not include('hashtag_class') auto_linked.should_not include('hashtag_classname') end it "should autolink url/hashtag/mention in text with Unicode supplementary characters" do auto_linked = @linker.auto_link("#{[0x10400].pack('U')} #hashtag #{[0x10400].pack('U')} @mention #{[0x10400].pack('U')} http://twitter.com/") auto_linked.should have_autolinked_hashtag('#hashtag') auto_linked.should link_to_screen_name('mention') auto_linked.should have_autolinked_url('http://twitter.com/') end end end describe "autolinking options" do before do @linker = TestAutolink.new end it "should show display_url when :url_entities provided" do linked = @linker.auto_link("http://t.co/0JG5Mcq", :url_entities => [{ "url" => "http://t.co/0JG5Mcq", "display_url" => "blog.twitter.com/2011/05/twitte…", "expanded_url" => "http://blog.twitter.com/2011/05/twitter-for-mac-update.html", "indices" => [ 84, 103 ] }]) html = Nokogiri::HTML(linked) html.search('a').should_not be_empty html.search('a[@href="http://t.co/0JG5Mcq"]').should_not be_empty html.search('span[@class=js-display-url]').inner_text.should == "blog.twitter.com/2011/05/twitte" html.inner_text.should == " http://blog.twitter.com/2011/05/twitter-for-mac-update.html …" html.search('span[@style="position:absolute;left:-9999px;"]').size.should == 4 end it "should accept invisible_tag_attrs option" do linked = @linker.auto_link("http://t.co/0JG5Mcq", { :url_entities => [{ "url" => "http://t.co/0JG5Mcq", "display_url" => "blog.twitter.com/2011/05/twitte…", "expanded_url" => "http://blog.twitter.com/2011/05/twitter-for-mac-update.html", "indices" => [ 0, 19 ] }], :invisible_tag_attrs => "style='dummy;'" }) html = Nokogiri::HTML(linked) html.search('span[@style="dummy;"]').size.should == 4 end it "should show display_url if available in entity" do linked = @linker.auto_link_entities("http://t.co/0JG5Mcq", [{ :url => "http://t.co/0JG5Mcq", :display_url => "blog.twitter.com/2011/05/twitte…", :expanded_url => "http://blog.twitter.com/2011/05/twitter-for-mac-update.html", :indices => [0, 19] }] ) html = Nokogiri::HTML(linked) html.search('a').should_not be_empty html.search('a[@href="http://t.co/0JG5Mcq"]').should_not be_empty html.search('span[@class=js-display-url]').inner_text.should == "blog.twitter.com/2011/05/twitte" html.inner_text.should == " http://blog.twitter.com/2011/05/twitter-for-mac-update.html …" end it "should apply :class as a CSS class" do linked = @linker.auto_link("http://example.com/", :class => 'myclass') linked.should have_autolinked_url('http://example.com/') linked.should match(/myclass/) end it "should apply :url_class only on URL" do linked = @linker.auto_link("http://twitter.com") linked.should have_autolinked_url('http://twitter.com') linked.should_not match(/class/) linked = @linker.auto_link("http://twitter.com", :url_class => 'testClass') linked.should have_autolinked_url('http://twitter.com') linked.should match(/class=\"testClass\"/) linked = @linker.auto_link("#hash @tw", :url_class => 'testClass') linked.should match(/class=\"tweet-url hashtag\"/) linked.should match(/class=\"tweet-url username\"/) linked.should_not match(/class=\"testClass\"/) end it "should add rel=nofollow by default" do linked = @linker.auto_link("http://example.com/") linked.should have_autolinked_url('http://example.com/') linked.should match(/nofollow/) end it "should include the '@' symbol in a username when passed :username_include_symbol" do linked = @linker.auto_link("@user", :username_include_symbol => true) linked.should link_to_screen_name('user', '@user') end it "should include the '@' symbol in a list when passed :username_include_symbol" do linked = @linker.auto_link("@user/list", :username_include_symbol => true) linked.should link_to_list_path('user/list', '@user/list') end it "should not add rel=nofollow when passed :suppress_no_follow" do linked = @linker.auto_link("http://example.com/", :suppress_no_follow => true) linked.should have_autolinked_url('http://example.com/') linked.should_not match(/nofollow/) end it "should not add a target attribute by default" do linked = @linker.auto_link("http://example.com/") linked.should have_autolinked_url('http://example.com/') linked.should_not match(/target=/) end it "should respect the :target option" do linked = @linker.auto_link("http://example.com/", :target => 'mywindow') linked.should have_autolinked_url('http://example.com/') linked.should match(/target="mywindow"/) end it "should customize href by username_url_block option" do linked = @linker.auto_link("@test", :username_url_block => lambda{|a| "dummy"}) linked.should have_autolinked_url('dummy', 'test') end it "should customize href by list_url_block option" do linked = @linker.auto_link("@test/list", :list_url_block => lambda{|a| "dummy"}) linked.should have_autolinked_url('dummy', 'test/list') end it "should customize href by hashtag_url_block option" do linked = @linker.auto_link("#hashtag", :hashtag_url_block => lambda{|a| "dummy"}) linked.should have_autolinked_url('dummy', '#hashtag') end it "should customize href by cashtag_url_block option" do linked = @linker.auto_link("$CASH", :cashtag_url_block => lambda{|a| "dummy"}) linked.should have_autolinked_url('dummy', '$CASH') end it "should customize href by link_url_block option" do linked = @linker.auto_link("http://example.com/", :link_url_block => lambda{|a| "dummy"}) linked.should have_autolinked_url('dummy', 'http://example.com/') end it "should modify link attributes by link_attribute_block" do linked = @linker.auto_link("#hash @mention", :link_attribute_block => lambda{|entity, attributes| attributes[:"dummy-hash-attr"] = "test" if entity[:hashtag] } ) linked.should match(/]+hashtag[^>]+dummy-hash-attr=\"test\"[^>]+>/) linked.should_not match(/]+username[^>]+dummy-hash-attr=\"test\"[^>]+>/) linked.should_not match(/link_attribute_block/i) linked = @linker.auto_link("@mention http://twitter.com/", :link_attribute_block => lambda{|entity, attributes| attributes["dummy-url-attr"] = entity[:url] if entity[:url] } ) linked.should_not match(/]+username[^>]+dummy-url-attr=\"http:\/\/twitter.com\/\"[^>]*>/) linked.should match(/]+dummy-url-attr=\"http:\/\/twitter.com\/\"/) end it "should modify link text by link_text_block" do linked = @linker.auto_link("#hash @mention", :link_text_block => lambda{|entity, text| entity[:hashtag] ? "#replaced" : "pre_#{text}_post" } ) linked.should match(/]+>#replaced<\/a>/) linked.should match(/]+>pre_mention_post<\/a>/) linked = @linker.auto_link("#hash @mention", { :link_text_block => lambda{|entity, text| "pre_#{text}_post" }, :symbol_tag => "s", :text_with_symbol_tag => "b", :username_include_symbol => true }) linked.should match(/]+>pre_#<\/s>hash<\/b>_post<\/a>/) linked.should match(/]+>pre_@<\/s>mention<\/b>_post<\/a>/) end it "should apply :url_target only to auto-linked URLs" do auto_linked = @linker.auto_link("#hashtag @mention http://test.com/", {:url_target => '_blank'}) auto_linked.should have_autolinked_hashtag('#hashtag') auto_linked.should link_to_screen_name('mention') auto_linked.should have_autolinked_url('http://test.com/') auto_linked.should_not match(/]+hashtag[^>]+target[^>]+>/) auto_linked.should_not match(/]+username[^>]+target[^>]+>/) auto_linked.should match(/]+test.com[^>]+target=\"_blank\"[^>]*>/) end it "should apply target='_blank' only to auto-linked URLs when :target_blank is set to true" do auto_linked = @linker.auto_link("#hashtag @mention http://test.com/", {:target_blank => true}) auto_linked.should have_autolinked_hashtag('#hashtag') auto_linked.should link_to_screen_name('mention') auto_linked.should have_autolinked_url('http://test.com/') auto_linked.should match(/]+hashtag[^>]+target=\"_blank\"[^>]*>/) auto_linked.should match(/]+username[^>]+target=\"_blank\"[^>]*>/) auto_linked.should match(/]+test.com[^>]+target=\"_blank\"[^>]*>/) end end describe "link_url_with_entity" do before do @linker = TestAutolink.new end it "should use display_url and expanded_url" do @linker.send(:link_url_with_entity, { :url => "http://t.co/abcde", :display_url => "twitter.com", :expanded_url => "http://twitter.com/"}, {:invisible_tag_attrs => "class='invisible'"}).gsub('"', "'").should == "twitter.com"; end it "should correctly handle display_url ending with '…'" do @linker.send(:link_url_with_entity, { :url => "http://t.co/abcde", :display_url => "twitter.com…", :expanded_url => "http://twitter.com/abcdefg"}, {:invisible_tag_attrs => "class='invisible'"}).gsub('"', "'").should == "twitter.com"; end it "should correctly handle display_url starting with '…'" do @linker.send(:link_url_with_entity, { :url => "http://t.co/abcde", :display_url => "…tter.com/abcdefg", :expanded_url => "http://twitter.com/abcdefg"}, {:invisible_tag_attrs => "class='invisible'"}).gsub('"', "'").should == "tter.com/abcdefg"; end it "should not create spans if display_url and expanded_url are on different domains" do @linker.send(:link_url_with_entity, { :url => "http://t.co/abcde", :display_url => "pic.twitter.com/xyz", :expanded_url => "http://twitter.com/foo/statuses/123/photo/1"}, {:invisible_tag_attrs => "class='invisible'"}).gsub('"', "'").should == "pic.twitter.com/xyz" end end describe "symbol_tag" do before do @linker = TestAutolink.new end it "should put :symbol_tag around symbol" do @linker.auto_link("@mention", {:symbol_tag => 's', :username_include_symbol=>true}).should match(/@<\/s>mention/) @linker.auto_link("#hash", {:symbol_tag => 's'}).should match(/#<\/s>hash/) result = @linker.auto_link("@mention #hash $CASH", {:symbol_tag => 'b', :username_include_symbol=>true}) result.should match(/@<\/b>mention/) result.should match(/#<\/b>hash/) result.should match(/\$<\/b>CASH/) end it "should put :text_with_symbol_tag around text" do result = @linker.auto_link("@mention #hash $CASH", {:text_with_symbol_tag => 'b'}) result.should match(/mention<\/b>/) result.should match(/hash<\/b>/) result.should match(/CASH<\/b>/) end it "should put :symbol_tag around symbol and :text_with_symbol_tag around text" do result = @linker.auto_link("@mention #hash $CASH", {:symbol_tag => 's', :text_with_symbol_tag => 'b', :username_include_symbol=>true}) result.should match(/@<\/s>mention<\/b>/) result.should match(/#<\/s>hash<\/b>/) result.should match(/\$<\/s>CASH<\/b>/) end end describe "html_escape" do before do @linker = TestAutolink.new end it "should escape html entities properly" do @linker.html_escape("&").should == "&" @linker.html_escape(">").should == ">" @linker.html_escape("<").should == "<" @linker.html_escape("\"").should == """ @linker.html_escape("'").should == "'" @linker.html_escape("&<>\"").should == "&<>"" @linker.html_escape("
").should == "<div>" @linker.html_escape("a&b").should == "a&b" @linker.html_escape("twitter & friends").should == "<a href="https://twitter.com" target="_blank">twitter & friends</a>" @linker.html_escape("&").should == "&amp;" @linker.html_escape(nil).should == nil end end end twitter-text-1.13.4/spec/extractor_spec.rb0000644000175000017500000003222712667350232021006 0ustar sudheeshsudheesh# encoding: utf-8 require File.dirname(__FILE__) + '/spec_helper' class TestExtractor include Twitter::Extractor end describe Twitter::Extractor do before do @extractor = TestExtractor.new end describe "mentions" do context "single screen name alone " do it "should be linked" do @extractor.extract_mentioned_screen_names("@alice").should == ["alice"] end it "should be linked with _" do @extractor.extract_mentioned_screen_names("@alice_adams").should == ["alice_adams"] end it "should be linked if numeric" do @extractor.extract_mentioned_screen_names("@1234").should == ["1234"] end end context "multiple screen names" do it "should both be linked" do @extractor.extract_mentioned_screen_names("@alice @bob").should == ["alice", "bob"] end end context "screen names embedded in text" do it "should be linked in Latin text" do @extractor.extract_mentioned_screen_names("waiting for @alice to arrive").should == ["alice"] end it "should be linked in Japanese text" do @extractor.extract_mentioned_screen_names("の@aliceに到着を待っている").should == ["alice"] end it "should ignore mentions preceded by !, @, #, $, %, & or *" do invalid_chars = ['!', '@', '#', '$', '%', '&', '*'] invalid_chars.each do |c| @extractor.extract_mentioned_screen_names("f#{c}@kn").should == [] end end end it "should accept a block arugment and call it in order" do needed = ["alice", "bob"] @extractor.extract_mentioned_screen_names("@alice @bob") do |sn| sn.should == needed.shift end needed.should == [] end end describe "mentions with indices" do context "single screen name alone " do it "should be linked and the correct indices" do @extractor.extract_mentioned_screen_names_with_indices("@alice").should == [{:screen_name => "alice", :indices => [0, 6]}] end it "should be linked with _ and the correct indices" do @extractor.extract_mentioned_screen_names_with_indices("@alice_adams").should == [{:screen_name => "alice_adams", :indices => [0, 12]}] end it "should be linked if numeric and the correct indices" do @extractor.extract_mentioned_screen_names_with_indices("@1234").should == [{:screen_name => "1234", :indices => [0, 5]}] end end context "multiple screen names" do it "should both be linked with the correct indices" do @extractor.extract_mentioned_screen_names_with_indices("@alice @bob").should == [{:screen_name => "alice", :indices => [0, 6]}, {:screen_name => "bob", :indices => [7, 11]}] end it "should be linked with the correct indices even when repeated" do @extractor.extract_mentioned_screen_names_with_indices("@alice @alice @bob").should == [{:screen_name => "alice", :indices => [0, 6]}, {:screen_name => "alice", :indices => [7, 13]}, {:screen_name => "bob", :indices => [14, 18]}] end end context "screen names embedded in text" do it "should be linked in Latin text with the correct indices" do @extractor.extract_mentioned_screen_names_with_indices("waiting for @alice to arrive").should == [{:screen_name => "alice", :indices => [12, 18]}] end it "should be linked in Japanese text with the correct indices" do @extractor.extract_mentioned_screen_names_with_indices("の@aliceに到着を待っている").should == [{:screen_name => "alice", :indices => [1, 7]}] end end it "should accept a block arugment and call it in order" do needed = [{:screen_name => "alice", :indices => [0, 6]}, {:screen_name => "bob", :indices => [7, 11]}] @extractor.extract_mentioned_screen_names_with_indices("@alice @bob") do |sn, start_index, end_index| data = needed.shift sn.should == data[:screen_name] start_index.should == data[:indices].first end_index.should == data[:indices].last end needed.should == [] end it "should extract screen name in text with supplementary character" do @extractor.extract_mentioned_screen_names_with_indices("#{[0x10400].pack('U')} @alice").should == [{:screen_name => "alice", :indices => [2, 8]}] end end describe "replies" do context "should be extracted from" do it "should extract from lone name" do @extractor.extract_reply_screen_name("@alice").should == "alice" end it "should extract from the start" do @extractor.extract_reply_screen_name("@alice reply text").should == "alice" end it "should extract preceded by a space" do @extractor.extract_reply_screen_name(" @alice reply text").should == "alice" end it "should extract preceded by a full-width space" do @extractor.extract_reply_screen_name("#{[0x3000].pack('U')}@alice reply text").should == "alice" end end context "should not be extracted from" do it "should not be extracted when preceded by text" do @extractor.extract_reply_screen_name("reply @alice text").should == nil end it "should not be extracted when preceded by puctuation" do %w(. / _ - + # ! @).each do |punct| @extractor.extract_reply_screen_name("#{punct}@alice text").should == nil end end end context "should accept a block arugment" do it "should call the block on match" do @extractor.extract_reply_screen_name("@alice") do |sn| sn.should == "alice" end end it "should not call the block on no match" do calls = 0 @extractor.extract_reply_screen_name("not a reply") do |sn| calls += 1 end calls.should == 0 end end end describe "urls" do describe "matching URLS" do TestUrls::VALID.each do |url| it "should extract the URL #{url} and prefix it with a protocol if missing" do @extractor.extract_urls(url).first.should include(url) end it "should match the URL #{url} when it's embedded in other text" do text = "Sweet url: #{url} I found. #awesome" @extractor.extract_urls(text).first.should include(url) end end end describe "invalid URLS" do it "does not link urls with invalid domains" do @extractor.extract_urls("http://tld-too-short.x").should == [] end end describe "t.co URLS" do TestUrls::TCO.each do |url| it "should only extract the t.co URL from the URL #{url}" do extracted_urls = @extractor.extract_urls(url) extracted_urls.size.should == 1 extracted_url = extracted_urls.first extracted_url.should_not == url extracted_url.should == url[0...20] end it "should match the t.co URL from the URL #{url} when it's embedded in other text" do text = "Sweet url: #{url} I found. #awesome" extracted_urls = @extractor.extract_urls(text) extracted_urls.size.should == 1 extracted_url = extracted_urls.first extracted_url.should_not == url extracted_url.should == url[0...20] end end end end describe "urls with indices" do describe "matching URLS" do TestUrls::VALID.each do |url| it "should extract the URL #{url} and prefix it with a protocol if missing" do extracted_urls = @extractor.extract_urls_with_indices(url) extracted_urls.size.should == 1 extracted_url = extracted_urls.first extracted_url[:url].should include(url) extracted_url[:indices].first.should == 0 extracted_url[:indices].last.should == url.chars.to_a.size end it "should match the URL #{url} when it's embedded in other text" do text = "Sweet url: #{url} I found. #awesome" extracted_urls = @extractor.extract_urls_with_indices(text) extracted_urls.size.should == 1 extracted_url = extracted_urls.first extracted_url[:url].should include(url) extracted_url[:indices].first.should == 11 extracted_url[:indices].last.should == 11 + url.chars.to_a.size end end it "should extract URL in text with supplementary character" do @extractor.extract_urls_with_indices("#{[0x10400].pack('U')} http://twitter.com").should == [{:url => "http://twitter.com", :indices => [2, 20]}] end end describe "invalid URLS" do it "does not link urls with invalid domains" do @extractor.extract_urls_with_indices("http://tld-too-short.x").should == [] end end describe "t.co URLS" do TestUrls::TCO.each do |url| it "should only extract the t.co URL from the URL #{url} and adjust indices correctly" do extracted_urls = @extractor.extract_urls_with_indices(url) extracted_urls.size.should == 1 extracted_url = extracted_urls.first extracted_url[:url].should_not include(url) extracted_url[:url].should include(url[0...20]) extracted_url[:indices].first.should == 0 extracted_url[:indices].last.should == 20 end it "should match the t.co URL from the URL #{url} when it's embedded in other text" do text = "Sweet url: #{url} I found. #awesome" extracted_urls = @extractor.extract_urls_with_indices(text) extracted_urls.size.should == 1 extracted_url = extracted_urls.first extracted_url[:url].should_not include(url) extracted_url[:url].should include(url[0...20]) extracted_url[:indices].first.should == 11 extracted_url[:indices].last.should == 31 end end end end describe "hashtags" do context "extracts latin/numeric hashtags" do %w(text text123 123text).each do |hashtag| it "should extract ##{hashtag}" do @extractor.extract_hashtags("##{hashtag}").should == [hashtag] end it "should extract ##{hashtag} within text" do @extractor.extract_hashtags("pre-text ##{hashtag} post-text").should == [hashtag] end end end context "international hashtags" do context "should allow accents" do %w(mañana café münchen).each do |hashtag| it "should extract ##{hashtag}" do @extractor.extract_hashtags("##{hashtag}").should == [hashtag] end it "should extract ##{hashtag} within text" do @extractor.extract_hashtags("pre-text ##{hashtag} post-text").should == [hashtag] end end it "should not allow the multiplication character" do @extractor.extract_hashtags("#pre#{Twitter::Unicode::U00D7}post").should == ["pre"] end it "should not allow the division character" do @extractor.extract_hashtags("#pre#{Twitter::Unicode::U00F7}post").should == ["pre"] end end end it "should not extract numeric hashtags" do @extractor.extract_hashtags("#1234").should == [] end it "should extract hashtag followed by punctuations" do @extractor.extract_hashtags("#test1: #test2; #test3\"").should == ["test1", "test2" ,"test3"] end end describe "hashtags with indices" do def match_hashtag_in_text(hashtag, text, offset = 0) extracted_hashtags = @extractor.extract_hashtags_with_indices(text) extracted_hashtags.size.should == 1 extracted_hashtag = extracted_hashtags.first extracted_hashtag[:hashtag].should == hashtag extracted_hashtag[:indices].first.should == offset extracted_hashtag[:indices].last.should == offset + hashtag.chars.to_a.size + 1 end def not_match_hashtag_in_text(text) extracted_hashtags = @extractor.extract_hashtags_with_indices(text) extracted_hashtags.size.should == 0 end context "extracts latin/numeric hashtags" do %w(text text123 123text).each do |hashtag| it "should extract ##{hashtag}" do match_hashtag_in_text(hashtag, "##{hashtag}") end it "should extract ##{hashtag} within text" do match_hashtag_in_text(hashtag, "pre-text ##{hashtag} post-text", 9) end end end context "international hashtags" do context "should allow accents" do %w(mañana café münchen).each do |hashtag| it "should extract ##{hashtag}" do match_hashtag_in_text(hashtag, "##{hashtag}") end it "should extract ##{hashtag} within text" do match_hashtag_in_text(hashtag, "pre-text ##{hashtag} post-text", 9) end end it "should not allow the multiplication character" do match_hashtag_in_text("pre", "#pre#{[0xd7].pack('U')}post", 0) end it "should not allow the division character" do match_hashtag_in_text("pre", "#pre#{[0xf7].pack('U')}post", 0) end end end it "should not extract numeric hashtags" do not_match_hashtag_in_text("#1234") end it "should extract hashtag in text with supplementary character" do match_hashtag_in_text("hashtag", "#{[0x10400].pack('U')} #hashtag", 2) end end end twitter-text-1.13.4/spec/hithighlighter_spec.rb0000644000175000017500000000605512667350232021776 0ustar sudheeshsudheesh# encoding: utf-8 require File.dirname(__FILE__) + '/spec_helper' class TestHitHighlighter include Twitter::HitHighlighter end describe Twitter::HitHighlighter do describe "highlight" do before do @highlighter = TestHitHighlighter.new end context "with options" do before do @original = "Testing this hit highliter" @hits = [[13,16]] end it "should default to tags" do @highlighter.hit_highlight(@original, @hits).should == "Testing this hit highliter" end it "should allow tag override" do @highlighter.hit_highlight(@original, @hits, :tag => 'b').should == "Testing this hit highliter" end end context "without links" do before do @original = "Hey! this is a test tweet" end it "should return original when no hits are provided" do @highlighter.hit_highlight(@original).should == @original end it "should highlight one hit" do @highlighter.hit_highlight(@original, hits = [[5, 9]]).should == "Hey! this is a test tweet" end it "should highlight two hits" do @highlighter.hit_highlight(@original, hits = [[5, 9], [15, 19]]).should == "Hey! this is a test tweet" end it "should correctly highlight first-word hits" do @highlighter.hit_highlight(@original, hits = [[0, 3]]).should == "Hey! this is a test tweet" end it "should correctly highlight last-word hits" do @highlighter.hit_highlight(@original, hits = [[20, 25]]).should == "Hey! this is a test tweet" end end context "with links" do it "should highlight with a single link" do @highlighter.hit_highlight("@bcherry this was a test tweet", [[9, 13]]).should == "@bcherry this was a test tweet" end it "should highlight with link at the end" do @highlighter.hit_highlight("test test test", [[5, 9]]).should == "test test test" end it "should highlight with a link at the beginning" do @highlighter.hit_highlight("test test test", [[5, 9]]).should == "test test test" end it "should highlight an entire link" do @highlighter.hit_highlight("test test test", [[5, 9]]).should == "test test test" end it "should highlight within a link" do @highlighter.hit_highlight("test test test", [[6, 8]]).should == "test test test" end it "should highlight around a link" do @highlighter.hit_highlight("test test test", [[3, 11]]).should == "test test test" end it "should fail gracefully with bad hits" do @highlighter.hit_highlight("test test", [[5, 20]]).should == "test test" end it "should not mess up with touching tags" do @highlighter.hit_highlight("foofoo", [[3,6]]).should == "foofoo" end end end end twitter-text-1.13.4/spec/regex_spec.rb0000644000175000017500000000213212667350232020075 0ustar sudheeshsudheesh# encoding: utf-8 require File.dirname(__FILE__) + '/spec_helper' describe "Twitter::Regex regular expressions" do describe "matching URLS" do TestUrls::VALID.each do |url| it "should match the URL #{url}" do url.should match_autolink_expression end it "should match the URL #{url} when it's embedded in other text" do text = "Sweet url: #{url} I found. #awesome" url.should match_autolink_expression_in(text) end end end describe "invalid URLS" do it "does not link urls with invalid characters" do TestUrls::INVALID.each {|url| url.should_not match_autolink_expression} end end describe "matching List names" do it "should match if less than 25 characters" do name = "Shuffleboard Community" name.length.should < 25 name.should match(Twitter::Regex::REGEXEN[:list_name]) end it "should not match if greater than 25 characters" do name = "Most Glorious Shady Meadows Shuffleboard Community" name.length.should > 25 name.should match(Twitter::Regex[:list_name]) end end end twitter-text-1.13.4/spec/unicode_spec.rb0000644000175000017500000000166512667350232020423 0ustar sudheeshsudheesh# encoding: utf-8 require File.dirname(__FILE__) + '/spec_helper' describe Twitter::Unicode do it "should lazy-init constants" do Twitter::Unicode.const_defined?(:UFEB6).should == false Twitter::Unicode::UFEB6.should_not be_nil Twitter::Unicode::UFEB6.should be_kind_of(String) Twitter::Unicode.const_defined?(:UFEB6).should == true end it "should return corresponding character" do Twitter::Unicode::UFEB6.should == [0xfeb6].pack('U') end it "should allow lowercase notation" do Twitter::Unicode::Ufeb6.should == Twitter::Unicode::UFEB6 Twitter::Unicode::Ufeb6.should === Twitter::Unicode::UFEB6 end it "should allow underscore notation" do Twitter::Unicode::U_FEB6.should == Twitter::Unicode::UFEB6 Twitter::Unicode::U_FEB6.should === Twitter::Unicode::UFEB6 end it "should raise on invalid codepoints" do lambda { Twitter::Unicode::FFFFFF }.should raise_error(NameError) end end twitter-text-1.13.4/spec/test_urls.rb0000644000175000017500000000526512667350232020007 0ustar sudheeshsudheesh# encoding: utf-8 module TestUrls VALID = [ "http://google.com", "http://foobar.com/#", "http://google.com/#foo", "http://google.com/#search?q=iphone%20-filter%3Alinks", "http://twitter.com/#search?q=iphone%20-filter%3Alinks", "http://somedomain.com/index.php?path=/abc/def/", "http://www.boingboing.net/2007/02/14/katamari_damacy_phon.html", "http://somehost.com:3000", "http://xo.com/~matthew+%-x", "http://en.wikipedia.org/wiki/Primer_(film)", "http://www.ams.org/bookstore-getitem/item=mbk-59", "http://chilp.it/?77e8fd", "http://tell.me/why", "http://longtlds.info", "http://✪df.ws/ejp", "http://日本.com", "http://search.twitter.com/search?q=avro&lang=en", "http://mrs.domain-dash.biz", "http://x.com/has/one/char/domain", "http://t.co/nwcLTFF", "http://sub_domain-dash.twitter.com", "http://a.b.cd", "http://a_b.c-d.com", "http://a-b.b.com", "http://twitter-dash.com", "http://msdn.microsoft.com/ja-jp/library/system.net.httpwebrequest(v=VS.100).aspx", "www.foobar.com", "WWW.FOOBAR.COM", "www.foobar.co.jp", "http://t.co", "t.co/nwcLTFF", "http://foobar.みんな", "http://foobar.中国", "http://foobar.پاکستان", "https://www.youtube.com/playlist?list=PL0ZPu8XSRTB7wZzn0mLHMvyzVFeRxbWn-" ] unless defined?(TestUrls::VALID) INVALID = [ "http://no-tld", "http://tld-too-short.x", "http://-doman_dash.com", "http://_leadingunderscore.twitter.com", "http://trailingunderscore_.twitter.com", "http://-leadingdash.twitter.com", "http://trailingdash-.twitter.com", "http://-leadingdash.com", "http://trailingdash-.com", "http://no_underscores.com", "http://test.c_o_m", "http://test.c-o-m", "http://twitt#{[0x202A].pack('U')}er.com", "http://twitt#{[0x202B].pack('U')}er.com", "http://twitt#{[0x202C].pack('U')}er.com", "http://twitt#{[0x202D].pack('U')}er.com", "http://twitt#{[0x202E].pack('U')}er.com" ] unless defined?(TestUrls::INVALID) TCO = [ "http://t.co/P53cv5yO!", "http://t.co/fQJmiPGg***", "http://t.co/pbY2NfTZ's", "http://t.co/2vYHpAc5;", "http://t.co/ulYGBYSo:", "http://t.co/GeT4bSiw=win", "http://t.co/8MkmHU0k+fun", "http://t.co/TKLp64dY.yes,", "http://t.co/8vuO27cI$$", "http://t.co/rPYTvdA8/", "http://t.co/WvtMw5ku%", "http://t.co/8t7G3ddS#", "http://t.co/nfHNJDV2/#!", "http://t.co/gK6NOXHs[good]", "http://t.co/dMrT0o1Y]bad", "http://t.co/FNkPfmii-", "http://t.co/sMgS3pjI_oh", "http://t.co/F8Dq3Plb~", "http://t.co/ivvH58vC&help", "http://t.co/iUBL15zD|NZ5KYLQ8" ] unless defined?(TestUrls::TCO) end twitter-text-1.13.4/Gemfile0000644000175000017500000000013712667350232015770 0ustar sudheeshsudheeshsource "http://rubygems.org" # Specify the gem's dependencies in twitter-text.gemspec gemspec twitter-text-1.13.4/lib/0000755000175000017500000000000012667350232015242 5ustar sudheeshsudheeshtwitter-text-1.13.4/lib/assets/0000755000175000017500000000000012667350232016544 5ustar sudheeshsudheeshtwitter-text-1.13.4/lib/assets/tld_lib.yml0000644000175000017500000002101712667350232020701 0ustar sudheeshsudheesh--- country: - ac - ad - ae - af - ag - ai - al - am - an - ao - aq - ar - as - at - au - aw - ax - az - ba - bb - bd - be - bf - bg - bh - bi - bj - bl - bm - bn - bo - bq - br - bs - bt - bv - bw - by - bz - ca - cc - cd - cf - cg - ch - ci - ck - cl - cm - cn - co - cr - cu - cv - cw - cx - cy - cz - de - dj - dk - dm - do - dz - ec - ee - eg - eh - er - es - et - eu - fi - fj - fk - fm - fo - fr - ga - gb - gd - ge - gf - gg - gh - gi - gl - gm - gn - gp - gq - gr - gs - gt - gu - gw - gy - hk - hm - hn - hr - ht - hu - id - ie - il - im - in - io - iq - ir - is - it - je - jm - jo - jp - ke - kg - kh - ki - km - kn - kp - kr - kw - ky - kz - la - lb - lc - li - lk - lr - ls - lt - lu - lv - ly - ma - mc - md - me - mf - mg - mh - mk - ml - mm - mn - mo - mp - mq - mr - ms - mt - mu - mv - mw - mx - my - mz - na - nc - ne - nf - ng - ni - nl - 'no' - np - nr - nu - nz - om - pa - pe - pf - pg - ph - pk - pl - pm - pn - pr - ps - pt - pw - py - qa - re - ro - rs - ru - rw - sa - sb - sc - sd - se - sg - sh - si - sj - sk - sl - sm - sn - so - sr - ss - st - su - sv - sx - sy - sz - tc - td - tf - tg - th - tj - tk - tl - tm - tn - to - tp - tr - tt - tv - tw - tz - ua - ug - uk - um - us - uy - uz - va - vc - ve - vg - vi - vn - vu - wf - ws - ye - yt - za - zm - zw - "ελ" - "бел" - "мкд" - "мон" - "рф" - "срб" - "укр" - "қаз" - "հայ" - "الاردن" - "الجزائر" - "السعودية" - "المغرب" - "امارات" - "ایران" - "بھارت" - "تونس" - "سودان" - "سورية" - "عراق" - "عمان" - "فلسطين" - "قطر" - "مصر" - "مليسيا" - "پاکستان" - "भारत" - "বাংলা" - "ভারত" - "ਭਾਰਤ" - "ભારત" - "இந்தியா" - "இலங்கை" - "சிங்கப்பூர்" - "భారత్" - "ලංකා" - "ไทย" - "გე" - "中国" - "中國" - "台湾" - "台灣" - "新加坡" - "澳門" - "香港" - "한국" generic: - abb - abbott - abogado - academy - accenture - accountant - accountants - aco - active - actor - ads - adult - aeg - aero - afl - agency - aig - airforce - airtel - allfinanz - alsace - amsterdam - android - apartments - app - aquarelle - archi - army - arpa - asia - associates - attorney - auction - audio - auto - autos - axa - azure - band - bank - bar - barcelona - barclaycard - barclays - bargains - bauhaus - bayern - bbc - bbva - bcn - beer - bentley - berlin - best - bet - bharti - bible - bid - bike - bing - bingo - bio - biz - black - blackfriday - bloomberg - blue - bmw - bnl - bnpparibas - boats - bond - boo - boots - boutique - bradesco - bridgestone - broker - brother - brussels - budapest - build - builders - business - buzz - bzh - cab - cafe - cal - camera - camp - cancerresearch - canon - capetown - capital - caravan - cards - care - career - careers - cars - cartier - casa - cash - casino - cat - catering - cba - cbn - ceb - center - ceo - cern - cfa - cfd - chanel - channel - chat - cheap - chloe - christmas - chrome - church - cisco - citic - city - claims - cleaning - click - clinic - clothing - cloud - club - coach - codes - coffee - college - cologne - com - commbank - community - company - computer - condos - construction - consulting - contractors - cooking - cool - coop - corsica - country - coupons - courses - credit - creditcard - cricket - crown - crs - cruises - cuisinella - cymru - cyou - dabur - dad - dance - date - dating - datsun - day - dclk - deals - degree - delivery - delta - democrat - dental - dentist - desi - design - dev - diamonds - diet - digital - direct - directory - discount - dnp - docs - dog - doha - domains - doosan - download - drive - durban - dvag - earth - eat - edu - education - email - emerck - energy - engineer - engineering - enterprises - epson - equipment - erni - esq - estate - eurovision - eus - events - everbank - exchange - expert - exposed - express - fage - fail - faith - family - fan - fans - farm - fashion - feedback - film - finance - financial - firmdale - fish - fishing - fit - fitness - flights - florist - flowers - flsmidth - fly - foo - football - forex - forsale - forum - foundation - frl - frogans - fund - furniture - futbol - fyi - gal - gallery - game - garden - gbiz - gdn - gent - genting - ggee - gift - gifts - gives - giving - glass - gle - global - globo - gmail - gmo - gmx - gold - goldpoint - golf - goo - goog - google - gop - gov - graphics - gratis - green - gripe - group - guge - guide - guitars - guru - hamburg - hangout - haus - healthcare - help - here - hermes - hiphop - hitachi - hiv - hockey - holdings - holiday - homedepot - homes - honda - horse - host - hosting - hoteles - hotmail - house - how - hsbc - ibm - icbc - ice - icu - ifm - iinet - immo - immobilien - industries - infiniti - info - ing - ink - institute - insure - int - international - investments - ipiranga - irish - ist - istanbul - itau - iwc - java - jcb - jetzt - jewelry - jlc - jll - jobs - joburg - jprs - juegos - kaufen - kddi - kim - kitchen - kiwi - koeln - komatsu - krd - kred - kyoto - lacaixa - lancaster - land - lasalle - lat - latrobe - law - lawyer - lds - lease - leclerc - legal - lexus - lgbt - liaison - lidl - life - lighting - limited - limo - link - live - lixil - loan - loans - lol - london - lotte - lotto - love - ltda - lupin - luxe - luxury - madrid - maif - maison - man - management - mango - market - marketing - markets - marriott - mba - media - meet - melbourne - meme - memorial - men - menu - miami - microsoft - mil - mini - mma - mobi - moda - moe - mom - monash - money - montblanc - mormon - mortgage - moscow - motorcycles - mov - movie - movistar - mtn - mtpc - museum - nadex - nagoya - name - navy - nec - net - netbank - network - neustar - new - news - nexus - ngo - nhk - nico - ninja - nissan - nokia - nra - nrw - ntt - nyc - office - okinawa - omega - one - ong - onl - online - ooo - oracle - orange - org - organic - osaka - otsuka - ovh - page - panerai - paris - partners - parts - party - pet - pharmacy - philips - photo - photography - photos - physio - piaget - pics - pictet - pictures - pink - pizza - place - play - plumbing - plus - pohl - poker - porn - post - praxi - press - pro - prod - productions - prof - properties - property - pub - qpon - quebec - racing - realtor - realty - recipes - red - redstone - rehab - reise - reisen - reit - ren - rent - rentals - repair - report - republican - rest - restaurant - review - reviews - rich - ricoh - rio - rip - rocks - rodeo - rsvp - ruhr - run - ryukyu - saarland - sakura - sale - samsung - sandvik - sandvikcoromant - sanofi - sap - sarl - saxo - sca - scb - schmidt - scholarships - school - schule - schwarz - science - scor - scot - seat - seek - sener - services - sew - sex - sexy - shiksha - shoes - show - shriram - singles - site - ski - sky - skype - sncf - soccer - social - software - sohu - solar - solutions - sony - soy - space - spiegel - spreadbetting - srl - starhub - statoil - studio - study - style - sucks - supplies - supply - support - surf - surgery - suzuki - swatch - swiss - sydney - systems - taipei - tatamotors - tatar - tattoo - tax - taxi - team - tech - technology - tel - telefonica - temasek - tennis - thd - theater - tickets - tienda - tips - tires - tirol - today - tokyo - tools - top - toray - toshiba - tours - town - toyota - toys - trade - trading - training - travel - trust - tui - ubs - university - uno - uol - vacations - vegas - ventures - vermögensberater - vermögensberatung - versicherung - vet - viajes - video - villas - vin - vision - vista - vistaprint - vlaanderen - vodka - vote - voting - voto - voyage - wales - walter - wang - watch - webcam - website - wed - wedding - weir - whoswho - wien - wiki - williamhill - win - windows - wine - wme - work - works - world - wtc - wtf - xbox - xerox - xin - xperia - xxx - xyz - yachts - yandex - yodobashi - yoga - yokohama - youtube - zip - zone - zuerich - "дети" - "ком" - "москва" - "онлайн" - "орг" - "рус" - "сайт" - "קום" - "بازار" - "شبكة" - "كوم" - "موقع" - "कॉम" - "नेट" - "संगठन" - "คอม" - "みんな" - "グーグル" - "コム" - "世界" - "中信" - "中文网" - "企业" - "佛山" - "信息" - "健康" - "八卦" - "公司" - "公益" - "商城" - "商店" - "商标" - "在线" - "大拿" - "娱乐" - "工行" - "广东" - "慈善" - "我爱你" - "手机" - "政务" - "政府" - "新闻" - "时尚" - "机构" - "淡马锡" - "游戏" - "点看" - "移动" - "组织机构" - "网址" - "网店" - "网络" - "谷歌" - "集团" - "飞利浦" - "餐厅" - "닷넷" - "닷컴" - "삼성" - onion twitter-text-1.13.4/lib/twitter-text.rb0000644000175000017500000000066612667350232020263 0ustar sudheeshsudheeshmajor, minor, _patch = RUBY_VERSION.split('.') $RUBY_1_9 = if major.to_i == 1 && minor.to_i < 9 # Ruby 1.8 KCODE check. Not needed on 1.9 and later. raise("twitter-text requires the $KCODE variable be set to 'UTF8' or 'u'") unless $KCODE[0].chr =~ /u/i false else true end %w( deprecation regex rewriter autolink extractor unicode validation hit_highlighter ).each do |name| require "twitter-text/#{name}" end twitter-text-1.13.4/lib/twitter-text/0000755000175000017500000000000012667350232017726 5ustar sudheeshsudheeshtwitter-text-1.13.4/lib/twitter-text/hash_helper.rb0000644000175000017500000000117112667350232022535 0ustar sudheeshsudheeshmodule Twitter module HashHelper # Return a new hash with all keys converted to symbols, as long as # they respond to +to_sym+. # # { 'name' => 'Rob', 'years' => '28' }.symbolize_keys # #=> { :name => "Rob", :years => "28" } def self.symbolize_keys(hash) symbolize_keys!(hash.dup) end # Destructively convert all keys to symbols, as long as they respond # to +to_sym+. Same as +symbolize_keys+, but modifies +self+. def self.symbolize_keys!(hash) hash.keys.each do |key| hash[(key.to_sym rescue key) || key] = hash.delete(key) end hash end end end twitter-text-1.13.4/lib/twitter-text/extractor.rb0000644000175000017500000003011412667350232022265 0ustar sudheeshsudheesh# encoding: UTF-8 class String # Helper function to count the character length by first converting to an # array. This is needed because with unicode strings, the return value # of length may be incorrect def char_length if respond_to? :codepoints length else chars.kind_of?(Enumerable) ? chars.to_a.size : chars.size end end # Helper function to convert this string into an array of unicode characters. def to_char_a @to_char_a ||= if chars.kind_of?(Enumerable) chars.to_a else char_array = [] 0.upto(char_length - 1) { |i| char_array << [chars.slice(i)].pack('U') } char_array end end end # Helper functions to return character offsets instead of byte offsets. class MatchData def char_begin(n) if string.respond_to? :codepoints self.begin(n) else string[0, self.begin(n)].char_length end end def char_end(n) if string.respond_to? :codepoints self.end(n) else string[0, self.end(n)].char_length end end end module Twitter # A module for including Tweet parsing in a class. This module provides function for the extraction and processing # of usernames, lists, URLs and hashtags. module Extractor extend self # Remove overlapping entities. # This returns a new array with no overlapping entities. def remove_overlapping_entities(entities) # sort by start index entities = entities.sort_by{|entity| entity[:indices].first} # remove duplicates prev = nil entities.reject!{|entity| (prev && prev[:indices].last > entity[:indices].first) || (prev = entity) && false} entities end # Extracts all usernames, lists, hashtags and URLs in the Tweet text # along with the indices for where the entity ocurred # If the text is nil or contains no entity an empty array # will be returned. # # If a block is given then it will be called for each entity. def extract_entities_with_indices(text, options = {}, &block) # extract all entities entities = extract_urls_with_indices(text, options) + extract_hashtags_with_indices(text, :check_url_overlap => false) + extract_mentions_or_lists_with_indices(text) + extract_cashtags_with_indices(text) return [] if entities.empty? entities = remove_overlapping_entities(entities) entities.each(&block) if block_given? entities end # Extracts a list of all usernames mentioned in the Tweet text. If the # text is nil or contains no username mentions an empty array # will be returned. # # If a block is given then it will be called for each username. def extract_mentioned_screen_names(text, &block) # :yields: username screen_names = extract_mentioned_screen_names_with_indices(text).map{|m| m[:screen_name]} screen_names.each(&block) if block_given? screen_names end # Extracts a list of all usernames mentioned in the Tweet text # along with the indices for where the mention ocurred. If the # text is nil or contains no username mentions, an empty array # will be returned. # # If a block is given, then it will be called with each username, the start # index, and the end index in the text. def extract_mentioned_screen_names_with_indices(text) # :yields: username, start, end return [] unless text possible_screen_names = [] extract_mentions_or_lists_with_indices(text) do |screen_name, list_slug, start_position, end_position| next unless list_slug.empty? possible_screen_names << { :screen_name => screen_name, :indices => [start_position, end_position] } end if block_given? possible_screen_names.each do |mention| yield mention[:screen_name], mention[:indices].first, mention[:indices].last end end possible_screen_names end # Extracts a list of all usernames or lists mentioned in the Tweet text # along with the indices for where the mention ocurred. If the # text is nil or contains no username or list mentions, an empty array # will be returned. # # If a block is given, then it will be called with each username, list slug, the start # index, and the end index in the text. The list_slug will be an empty stirng # if this is a username mention. def extract_mentions_or_lists_with_indices(text) # :yields: username, list_slug, start, end return [] unless text =~ /[@@]/ possible_entries = [] text.to_s.scan(Twitter::Regex[:valid_mention_or_list]) do |before, at, screen_name, list_slug| match_data = $~ after = $' unless after =~ Twitter::Regex[:end_mention_match] start_position = match_data.char_begin(3) - 1 end_position = match_data.char_end(list_slug.nil? ? 3 : 4) possible_entries << { :screen_name => screen_name, :list_slug => list_slug || "", :indices => [start_position, end_position] } end end if block_given? possible_entries.each do |mention| yield mention[:screen_name], mention[:list_slug], mention[:indices].first, mention[:indices].last end end possible_entries end # Extracts the username username replied to in the Tweet text. If the # text is nil or is not a reply nil will be returned. # # If a block is given then it will be called with the username replied to (if any) def extract_reply_screen_name(text) # :yields: username return nil unless text possible_screen_name = text.match(Twitter::Regex[:valid_reply]) return unless possible_screen_name.respond_to?(:captures) return if $' =~ Twitter::Regex[:end_mention_match] screen_name = possible_screen_name.captures.first yield screen_name if block_given? screen_name end # Extracts a list of all URLs included in the Tweet text. If the # text is nil or contains no URLs an empty array # will be returned. # # If a block is given then it will be called for each URL. def extract_urls(text, &block) # :yields: url urls = extract_urls_with_indices(text).map{|u| u[:url]} urls.each(&block) if block_given? urls end # Extracts a list of all URLs included in the Tweet text along # with the indices. If the text is nil or contains no # URLs an empty array will be returned. # # If a block is given then it will be called for each URL. def extract_urls_with_indices(text, options = {:extract_url_without_protocol => true}) # :yields: url, start, end return [] unless text && (options[:extract_url_without_protocol] ? text.index(".") : text.index(":")) urls = [] text.to_s.scan(Twitter::Regex[:valid_url]) do |all, before, url, protocol, domain, port, path, query| valid_url_match_data = $~ start_position = valid_url_match_data.char_begin(3) end_position = valid_url_match_data.char_end(3) # If protocol is missing and domain contains non-ASCII characters, # extract ASCII-only domains. if !protocol next if !options[:extract_url_without_protocol] || before =~ Twitter::Regex[:invalid_url_without_protocol_preceding_chars] last_url = nil domain.scan(Twitter::Regex[:valid_ascii_domain]) do |ascii_domain| last_url = { :url => ascii_domain, :indices => [start_position + $~.char_begin(0), start_position + $~.char_end(0)] } if path || ascii_domain =~ Twitter::Regex[:valid_special_short_domain] || ascii_domain !~ Twitter::Regex[:invalid_short_domain] urls << last_url end end # no ASCII-only domain found. Skip the entire URL next unless last_url # last_url only contains domain. Need to add path and query if they exist. if path # last_url was not added. Add it to urls here. last_url[:url] = url.sub(domain, last_url[:url]) last_url[:indices][1] = end_position end else # In the case of t.co URLs, don't allow additional path characters if url =~ Twitter::Regex[:valid_tco_url] url = $& end_position = start_position + url.char_length end urls << { :url => url, :indices => [start_position, end_position] } end end urls.each{|url| yield url[:url], url[:indices].first, url[:indices].last} if block_given? urls end # Extracts a list of all hashtags included in the Tweet text. If the # text is nil or contains no hashtags an empty array # will be returned. The array returned will not include the leading # # character. # # If a block is given then it will be called for each hashtag. def extract_hashtags(text, &block) # :yields: hashtag_text hashtags = extract_hashtags_with_indices(text).map{|h| h[:hashtag]} hashtags.each(&block) if block_given? hashtags end # Extracts a list of all hashtags included in the Tweet text. If the # text is nil or contains no hashtags an empty array # will be returned. The array returned will not include the leading # # character. # # If a block is given then it will be called for each hashtag. def extract_hashtags_with_indices(text, options = {:check_url_overlap => true}) # :yields: hashtag_text, start, end return [] unless text =~ /[##]/ tags = [] text.scan(Twitter::Regex[:valid_hashtag]) do |before, hash, hash_text| match_data = $~ start_position = match_data.char_begin(2) end_position = match_data.char_end(3) after = $' unless after =~ Twitter::Regex[:end_hashtag_match] tags << { :hashtag => hash_text, :indices => [start_position, end_position] } end end if options[:check_url_overlap] # extract URLs urls = extract_urls_with_indices(text) unless urls.empty? tags.concat(urls) # remove duplicates tags = remove_overlapping_entities(tags) # remove URL entities tags.reject!{|entity| !entity[:hashtag] } end end tags.each{|tag| yield tag[:hashtag], tag[:indices].first, tag[:indices].last} if block_given? tags end # Extracts a list of all cashtags included in the Tweet text. If the # text is nil or contains no cashtags an empty array # will be returned. The array returned will not include the leading $ # character. # # If a block is given then it will be called for each cashtag. def extract_cashtags(text, &block) # :yields: cashtag_text cashtags = extract_cashtags_with_indices(text).map{|h| h[:cashtag]} cashtags.each(&block) if block_given? cashtags end # Extracts a list of all cashtags included in the Tweet text. If the # text is nil or contains no cashtags an empty array # will be returned. The array returned will not include the leading $ # character. # # If a block is given then it will be called for each cashtag. def extract_cashtags_with_indices(text) # :yields: cashtag_text, start, end return [] unless text =~ /\$/ tags = [] text.scan(Twitter::Regex[:valid_cashtag]) do |before, dollar, cash_text| match_data = $~ start_position = match_data.char_begin(2) end_position = match_data.char_end(3) tags << { :cashtag => cash_text, :indices => [start_position, end_position] } end tags.each{|tag| yield tag[:cashtag], tag[:indices].first, tag[:indices].last} if block_given? tags end end end twitter-text-1.13.4/lib/twitter-text/rewriter.rb0000644000175000017500000000405212667350232022117 0ustar sudheeshsudheeshmodule Twitter # A module provides base methods to rewrite usernames, lists, hashtags and URLs. module Rewriter extend self def rewrite_entities(text, entities) chars = text.to_s.to_char_a # sort by start index entities = entities.sort_by do |entity| indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices] indices.first end result = [] last_index = entities.inject(0) do |index, entity| indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices] result << chars[index...indices.first] result << yield(entity, chars) indices.last end result << chars[last_index..-1] result.flatten.join end # These methods are deprecated, will be removed in future. extend Deprecation def rewrite(text, options = {}) [:hashtags, :urls, :usernames_or_lists].inject(text) do |key| options[key] ? send(:"rewrite_#{key}", text, &options[key]) : text end end deprecate :rewrite, :rewrite_entities def rewrite_usernames_or_lists(text) entities = Extractor.extract_mentions_or_lists_with_indices(text) rewrite_entities(text, entities) do |entity, chars| at = chars[entity[:indices].first] list_slug = entity[:list_slug] list_slug = nil if list_slug.empty? yield(at, entity[:screen_name], list_slug) end end deprecate :rewrite_usernames_or_lists, :rewrite_entities def rewrite_hashtags(text) entities = Extractor.extract_hashtags_with_indices(text) rewrite_entities(text, entities) do |entity, chars| hash = chars[entity[:indices].first] yield(hash, entity[:hashtag]) end end deprecate :rewrite_hashtags, :rewrite_entities def rewrite_urls(text) entities = Extractor.extract_urls_with_indices(text, :extract_url_without_protocol => false) rewrite_entities(text, entities) do |entity, chars| yield(entity[:url]) end end deprecate :rewrite_urls, :rewrite_entities end end twitter-text-1.13.4/lib/twitter-text/unicode.rb0000644000175000017500000000162412667350232021704 0ustar sudheeshsudheeshmodule Twitter # This module lazily defines constants of the form Uxxxx for all Unicode # codepoints from U0000 to U10FFFF. The value of each constant is the # UTF-8 string for the codepoint. # Examples: # copyright = Unicode::U00A9 # euro = Unicode::U20AC # infinity = Unicode::U221E # module Unicode CODEPOINT_REGEX = /^U_?([0-9a-fA-F]{4,5}|10[0-9a-fA-F]{4})$/ def self.const_missing(name) # Check that the constant name is of the right form: U0000 to U10FFFF if name.to_s =~ CODEPOINT_REGEX # Convert the codepoint to an immutable UTF-8 string, # define a real constant for that value and return the value #p name, name.class const_set(name, [$1.to_i(16)].pack("U").freeze) else # Raise an error for constants that are not Unicode. raise NameError, "Uninitialized constant: Unicode::#{name}" end end end end twitter-text-1.13.4/lib/twitter-text/regex.rb0000644000175000017500000003443112667350232021372 0ustar sudheeshsudheesh# encoding: UTF-8 module Twitter # A collection of regular expressions for parsing Tweet text. The regular expression # list is frozen at load time to ensure immutability. These regular expressions are # used throughout the Twitter classes. Special care has been taken to make # sure these reular expressions work with Tweets in all languages. class Regex require 'yaml' REGEXEN = {} # :nodoc: def self.regex_range(from, to = nil) # :nodoc: if $RUBY_1_9 if to "\\u{#{from.to_s(16).rjust(4, '0')}}-\\u{#{to.to_s(16).rjust(4, '0')}}" else "\\u{#{from.to_s(16).rjust(4, '0')}}" end else if to [from].pack('U') + '-' + [to].pack('U') else [from].pack('U') end end end TLDS = YAML.load_file( File.join( File.expand_path('../../..', __FILE__), # project root 'lib', 'assets', 'tld_lib.yml' ) ) # Space is more than %20, U+3000 for example is the full-width space used with Kanji. Provide a short-hand # to access both the list of characters and a pattern suitible for use with String#split # Taken from: ActiveSupport::Multibyte::Handlers::UTF8Handler::UNICODE_WHITESPACE UNICODE_SPACES = [ (0x0009..0x000D).to_a, # White_Space # Cc [5] .. 0x0020, # White_Space # Zs SPACE 0x0085, # White_Space # Cc 0x00A0, # White_Space # Zs NO-BREAK SPACE 0x1680, # White_Space # Zs OGHAM SPACE MARK 0x180E, # White_Space # Zs MONGOLIAN VOWEL SEPARATOR (0x2000..0x200A).to_a, # White_Space # Zs [11] EN QUAD..HAIR SPACE 0x2028, # White_Space # Zl LINE SEPARATOR 0x2029, # White_Space # Zp PARAGRAPH SEPARATOR 0x202F, # White_Space # Zs NARROW NO-BREAK SPACE 0x205F, # White_Space # Zs MEDIUM MATHEMATICAL SPACE 0x3000, # White_Space # Zs IDEOGRAPHIC SPACE ].flatten.map{|c| [c].pack('U*')}.freeze REGEXEN[:spaces] = /[#{UNICODE_SPACES.join('')}]/o # Character not allowed in Tweets INVALID_CHARACTERS = [ 0xFFFE, 0xFEFF, # BOM 0xFFFF, # Special 0x202A, 0x202B, 0x202C, 0x202D, 0x202E # Directional change ].map{|cp| [cp].pack('U') }.freeze REGEXEN[:invalid_control_characters] = /[#{INVALID_CHARACTERS.join('')}]/o major, minor, _patch = RUBY_VERSION.split('.') if major.to_i >= 2 || major.to_i == 1 && minor.to_i >= 9 || (defined?(RUBY_ENGINE) && ["jruby", "rbx"].include?(RUBY_ENGINE)) REGEXEN[:list_name] = /[a-zA-Z][a-zA-Z0-9_\-\u0080-\u00ff]{0,24}/ else # This line barfs at compile time in Ruby 1.9, JRuby, or Rubinius. REGEXEN[:list_name] = eval("/[a-zA-Z][a-zA-Z0-9_\\-\x80-\xff]{0,24}/") end # Latin accented characters # Excludes 0xd7 from the range (the multiplication sign, confusable with "x"). # Also excludes 0xf7, the division sign LATIN_ACCENTS = [ regex_range(0xc0, 0xd6), regex_range(0xd8, 0xf6), regex_range(0xf8, 0xff), regex_range(0x0100, 0x024f), regex_range(0x0253, 0x0254), regex_range(0x0256, 0x0257), regex_range(0x0259), regex_range(0x025b), regex_range(0x0263), regex_range(0x0268), regex_range(0x026f), regex_range(0x0272), regex_range(0x0289), regex_range(0x028b), regex_range(0x02bb), regex_range(0x0300, 0x036f), regex_range(0x1e00, 0x1eff) ].join('').freeze REGEXEN[:latin_accents] = /[#{LATIN_ACCENTS}]+/o RTL_CHARACTERS = [ regex_range(0x0600,0x06FF), regex_range(0x0750,0x077F), regex_range(0x0590,0x05FF), regex_range(0xFE70,0xFEFF) ].join('').freeze PUNCTUATION_CHARS = '!"#$%&\'()*+,-./:;<=>?@\[\]^_\`{|}~' SPACE_CHARS = " \t\n\x0B\f\r" CTRL_CHARS = "\x00-\x1F\x7F" # A hashtag must contain at least one unicode letter or mark, as well as numbers, underscores, and select special characters. HASHTAG_ALPHA = /[\p{L}\p{M}]/ HASHTAG_ALPHANUMERIC = /[\p{L}\p{M}\p{Nd}_\u200c\u200d\u0482\ua673\ua67e\u05be\u05f3\u05f4\uff5e\u301c\u309b\u309c\u30a0\u30fb\u3003\u0f0b\u0f0c\u00b7]/ HASHTAG_BOUNDARY = /\A|\z|[^&\p{L}\p{M}\p{Nd}_\u200c\u200d\u0482\ua673\ua67e\u05be\u05f3\u05f4\u309b\u309c\u30a0\u30fb\u3003\u0f0b\u0f0c\u00b7]/ HASHTAG = /(#{HASHTAG_BOUNDARY})(#|#)(?!\ufe0f|\u20e3)(#{HASHTAG_ALPHANUMERIC}*#{HASHTAG_ALPHA}#{HASHTAG_ALPHANUMERIC}*)/io REGEXEN[:valid_hashtag] = /#{HASHTAG}/io # Used in Extractor for final filtering REGEXEN[:end_hashtag_match] = /\A(?:[##]|:\/\/)/o REGEXEN[:valid_mention_preceding_chars] = /(?:[^a-zA-Z0-9_!#\$%&*@@]|^|(?:^|[^a-zA-Z0-9_+~.-])[rR][tT]:?)/o REGEXEN[:at_signs] = /[@@]/ REGEXEN[:valid_mention_or_list] = / (#{REGEXEN[:valid_mention_preceding_chars]}) # $1: Preceeding character (#{REGEXEN[:at_signs]}) # $2: At mark ([a-zA-Z0-9_]{1,20}) # $3: Screen name (\/[a-zA-Z][a-zA-Z0-9_\-]{0,24})? # $4: List (optional) /ox REGEXEN[:valid_reply] = /^(?:#{REGEXEN[:spaces]})*#{REGEXEN[:at_signs]}([a-zA-Z0-9_]{1,20})/o # Used in Extractor for final filtering REGEXEN[:end_mention_match] = /\A(?:#{REGEXEN[:at_signs]}|#{REGEXEN[:latin_accents]}|:\/\/)/o # URL related hash regex collection REGEXEN[:valid_url_preceding_chars] = /(?:[^A-Z0-9@@$###{INVALID_CHARACTERS.join('')}]|^)/io REGEXEN[:invalid_url_without_protocol_preceding_chars] = /[-_.\/]$/ DOMAIN_VALID_CHARS = "[^#{PUNCTUATION_CHARS}#{SPACE_CHARS}#{CTRL_CHARS}#{INVALID_CHARACTERS.join('')}#{UNICODE_SPACES.join('')}]" REGEXEN[:valid_subdomain] = /(?:(?:#{DOMAIN_VALID_CHARS}(?:[_-]|#{DOMAIN_VALID_CHARS})*)?#{DOMAIN_VALID_CHARS}\.)/io REGEXEN[:valid_domain_name] = /(?:(?:#{DOMAIN_VALID_CHARS}(?:[-]|#{DOMAIN_VALID_CHARS})*)?#{DOMAIN_VALID_CHARS}\.)/io REGEXEN[:valid_gTLD] = %r{ (?: (?:#{TLDS['generic'].join('|')}) (?=[^0-9a-z@]|$) ) }ix REGEXEN[:valid_ccTLD] = %r{ (?: (?:#{TLDS['country'].join('|')}) (?=[^0-9a-z@]|$) ) }ix REGEXEN[:valid_punycode] = /(?:xn--[0-9a-z]+)/i REGEXEN[:valid_special_cctld] = %r{ (?: (?:co|tv) (?=[^0-9a-z@]|$) ) }ix REGEXEN[:valid_domain] = /(?: #{REGEXEN[:valid_subdomain]}*#{REGEXEN[:valid_domain_name]} (?:#{REGEXEN[:valid_gTLD]}|#{REGEXEN[:valid_ccTLD]}|#{REGEXEN[:valid_punycode]}) )/iox # This is used in Extractor REGEXEN[:valid_ascii_domain] = / (?:(?:[A-Za-z0-9\-_]|#{REGEXEN[:latin_accents]})+\.)+ (?:#{REGEXEN[:valid_gTLD]}|#{REGEXEN[:valid_ccTLD]}|#{REGEXEN[:valid_punycode]}) /iox # This is used in Extractor for stricter t.co URL extraction REGEXEN[:valid_tco_url] = /^https?:\/\/t\.co\/[a-z0-9]+/i # This is used in Extractor to filter out unwanted URLs. REGEXEN[:invalid_short_domain] = /\A#{REGEXEN[:valid_domain_name]}#{REGEXEN[:valid_ccTLD]}\Z/io REGEXEN[:valid_special_short_domain] = /\A#{REGEXEN[:valid_domain_name]}#{REGEXEN[:valid_special_cctld]}\Z/io REGEXEN[:valid_port_number] = /[0-9]+/ REGEXEN[:valid_general_url_path_chars] = /[a-z\p{Cyrillic}0-9!\*';:=\+\,\.\$\/%#\[\]\-_~&\|@#{LATIN_ACCENTS}]/io # Allow URL paths to contain up to two nested levels of balanced parens # 1. Used in Wikipedia URLs like /Primer_(film) # 2. Used in IIS sessions like /S(dfd346)/ # 3. Used in Rdio URLs like /track/We_Up_(Album_Version_(Edited))/ REGEXEN[:valid_url_balanced_parens] = / \( (?: #{REGEXEN[:valid_general_url_path_chars]}+ | # allow one nested level of balanced parentheses (?: #{REGEXEN[:valid_general_url_path_chars]}* \( #{REGEXEN[:valid_general_url_path_chars]}+ \) #{REGEXEN[:valid_general_url_path_chars]}* ) ) \) /iox # Valid end-of-path chracters (so /foo. does not gobble the period). # 1. Allow =&# for empty URL parameters and other URL-join artifacts REGEXEN[:valid_url_path_ending_chars] = /[a-z\p{Cyrillic}0-9=_#\/\+\-#{LATIN_ACCENTS}]|(?:#{REGEXEN[:valid_url_balanced_parens]})/io REGEXEN[:valid_url_path] = /(?: (?: #{REGEXEN[:valid_general_url_path_chars]}* (?:#{REGEXEN[:valid_url_balanced_parens]} #{REGEXEN[:valid_general_url_path_chars]}*)* #{REGEXEN[:valid_url_path_ending_chars]} )|(?:#{REGEXEN[:valid_general_url_path_chars]}+\/) )/iox REGEXEN[:valid_url_query_chars] = /[a-z0-9!?\*'\(\);:&=\+\$\/%#\[\]\-_\.,~|@]/i REGEXEN[:valid_url_query_ending_chars] = /[a-z0-9_&=#\/\-]/i REGEXEN[:valid_url] = %r{ ( # $1 total match (#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceeding chracter ( # $3 URL (https?:\/\/)? # $4 Protocol (optional) (#{REGEXEN[:valid_domain]}) # $5 Domain(s) (?::(#{REGEXEN[:valid_port_number]}))? # $6 Port number (optional) (/#{REGEXEN[:valid_url_path]}*)? # $7 URL Path and anchor (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $8 Query String ) ) }iox REGEXEN[:cashtag] = /[a-z]{1,6}(?:[._][a-z]{1,2})?/i REGEXEN[:valid_cashtag] = /(^|#{REGEXEN[:spaces]})(\$)(#{REGEXEN[:cashtag]})(?=$|\s|[#{PUNCTUATION_CHARS}])/i # These URL validation pattern strings are based on the ABNF from RFC 3986 REGEXEN[:validate_url_unreserved] = /[a-z\p{Cyrillic}0-9\-._~]/i REGEXEN[:validate_url_pct_encoded] = /(?:%[0-9a-f]{2})/i REGEXEN[:validate_url_sub_delims] = /[!$&'()*+,;=]/i REGEXEN[:validate_url_pchar] = /(?: #{REGEXEN[:validate_url_unreserved]}| #{REGEXEN[:validate_url_pct_encoded]}| #{REGEXEN[:validate_url_sub_delims]}| [:\|@] )/iox REGEXEN[:validate_url_scheme] = /(?:[a-z][a-z0-9+\-.]*)/i REGEXEN[:validate_url_userinfo] = /(?: #{REGEXEN[:validate_url_unreserved]}| #{REGEXEN[:validate_url_pct_encoded]}| #{REGEXEN[:validate_url_sub_delims]}| : )*/iox REGEXEN[:validate_url_dec_octet] = /(?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5]))/i REGEXEN[:validate_url_ipv4] = /(?:#{REGEXEN[:validate_url_dec_octet]}(?:\.#{REGEXEN[:validate_url_dec_octet]}){3})/iox # Punting on real IPv6 validation for now REGEXEN[:validate_url_ipv6] = /(?:\[[a-f0-9:\.]+\])/i # Also punting on IPvFuture for now REGEXEN[:validate_url_ip] = /(?: #{REGEXEN[:validate_url_ipv4]}| #{REGEXEN[:validate_url_ipv6]} )/iox # This is more strict than the rfc specifies REGEXEN[:validate_url_subdomain_segment] = /(?:[a-z0-9](?:[a-z0-9_\-]*[a-z0-9])?)/i REGEXEN[:validate_url_domain_segment] = /(?:[a-z0-9](?:[a-z0-9\-]*[a-z0-9])?)/i REGEXEN[:validate_url_domain_tld] = /(?:[a-z](?:[a-z0-9\-]*[a-z0-9])?)/i REGEXEN[:validate_url_domain] = /(?:(?:#{REGEXEN[:validate_url_subdomain_segment]}\.)* (?:#{REGEXEN[:validate_url_domain_segment]}\.) #{REGEXEN[:validate_url_domain_tld]})/iox REGEXEN[:validate_url_host] = /(?: #{REGEXEN[:validate_url_ip]}| #{REGEXEN[:validate_url_domain]} )/iox # Unencoded internationalized domains - this doesn't check for invalid UTF-8 sequences REGEXEN[:validate_url_unicode_subdomain_segment] = /(?:(?:[a-z0-9]|[^\x00-\x7f])(?:(?:[a-z0-9_\-]|[^\x00-\x7f])*(?:[a-z0-9]|[^\x00-\x7f]))?)/ix REGEXEN[:validate_url_unicode_domain_segment] = /(?:(?:[a-z0-9]|[^\x00-\x7f])(?:(?:[a-z0-9\-]|[^\x00-\x7f])*(?:[a-z0-9]|[^\x00-\x7f]))?)/ix REGEXEN[:validate_url_unicode_domain_tld] = /(?:(?:[a-z]|[^\x00-\x7f])(?:(?:[a-z0-9\-]|[^\x00-\x7f])*(?:[a-z0-9]|[^\x00-\x7f]))?)/ix REGEXEN[:validate_url_unicode_domain] = /(?:(?:#{REGEXEN[:validate_url_unicode_subdomain_segment]}\.)* (?:#{REGEXEN[:validate_url_unicode_domain_segment]}\.) #{REGEXEN[:validate_url_unicode_domain_tld]})/iox REGEXEN[:validate_url_unicode_host] = /(?: #{REGEXEN[:validate_url_ip]}| #{REGEXEN[:validate_url_unicode_domain]} )/iox REGEXEN[:validate_url_port] = /[0-9]{1,5}/ REGEXEN[:validate_url_unicode_authority] = %r{ (?:(#{REGEXEN[:validate_url_userinfo]})@)? # $1 userinfo (#{REGEXEN[:validate_url_unicode_host]}) # $2 host (?::(#{REGEXEN[:validate_url_port]}))? # $3 port }iox REGEXEN[:validate_url_authority] = %r{ (?:(#{REGEXEN[:validate_url_userinfo]})@)? # $1 userinfo (#{REGEXEN[:validate_url_host]}) # $2 host (?::(#{REGEXEN[:validate_url_port]}))? # $3 port }iox REGEXEN[:validate_url_path] = %r{(/#{REGEXEN[:validate_url_pchar]}*)*}i REGEXEN[:validate_url_query] = %r{(#{REGEXEN[:validate_url_pchar]}|/|\?)*}i REGEXEN[:validate_url_fragment] = %r{(#{REGEXEN[:validate_url_pchar]}|/|\?)*}i # Modified version of RFC 3986 Appendix B REGEXEN[:validate_url_unencoded] = %r{ \A # Full URL (?: ([^:/?#]+):// # $1 Scheme )? ([^/?#]*) # $2 Authority ([^?#]*) # $3 Path (?: \?([^#]*) # $4 Query )? (?: \#(.*) # $5 Fragment )?\Z }ix REGEXEN[:rtl_chars] = /[#{RTL_CHARACTERS}]/io REGEXEN.each_pair{|k,v| v.freeze } # Return the regular expression for a given key. If the key # is not a known symbol a nil will be returned. def self.[](key) REGEXEN[key] end end end twitter-text-1.13.4/lib/twitter-text/deprecation.rb0000644000175000017500000000067712667350232022562 0ustar sudheeshsudheeshmodule Twitter module Deprecation def deprecate(method, new_method = nil) deprecated_method = :"deprecated_#{method}" message = "Deprecation: `#{method}` is deprecated." message << " Please use `#{new_method}` instead." if new_method alias_method(deprecated_method, method) define_method method do |*args, &block| warn message send(deprecated_method, *args, &block) end end end end twitter-text-1.13.4/lib/twitter-text/autolink.rb0000644000175000017500000005437012667350232022112 0ustar sudheeshsudheesh# encoding: UTF-8 require 'set' require 'twitter-text/hash_helper' module Twitter # A module for including Tweet auto-linking in a class. The primary use of this is for helpers/views so they can auto-link # usernames, lists, hashtags and URLs. module Autolink extend self # Default CSS class for auto-linked lists DEFAULT_LIST_CLASS = "tweet-url list-slug".freeze # Default CSS class for auto-linked usernames DEFAULT_USERNAME_CLASS = "tweet-url username".freeze # Default CSS class for auto-linked hashtags DEFAULT_HASHTAG_CLASS = "tweet-url hashtag".freeze # Default CSS class for auto-linked cashtags DEFAULT_CASHTAG_CLASS = "tweet-url cashtag".freeze # Default URL base for auto-linked usernames DEFAULT_USERNAME_URL_BASE = "https://twitter.com/".freeze # Default URL base for auto-linked lists DEFAULT_LIST_URL_BASE = "https://twitter.com/".freeze # Default URL base for auto-linked hashtags DEFAULT_HASHTAG_URL_BASE = "https://twitter.com/#!/search?q=%23".freeze # Default URL base for auto-linked cashtags DEFAULT_CASHTAG_URL_BASE = "https://twitter.com/#!/search?q=%24".freeze # Default attributes for invisible span tag DEFAULT_INVISIBLE_TAG_ATTRS = "style='position:absolute;left:-9999px;'".freeze DEFAULT_OPTIONS = { :list_class => DEFAULT_LIST_CLASS, :username_class => DEFAULT_USERNAME_CLASS, :hashtag_class => DEFAULT_HASHTAG_CLASS, :cashtag_class => DEFAULT_CASHTAG_CLASS, :username_url_base => DEFAULT_USERNAME_URL_BASE, :list_url_base => DEFAULT_LIST_URL_BASE, :hashtag_url_base => DEFAULT_HASHTAG_URL_BASE, :cashtag_url_base => DEFAULT_CASHTAG_URL_BASE, :invisible_tag_attrs => DEFAULT_INVISIBLE_TAG_ATTRS }.freeze def auto_link_with_json(text, json, options = {}) # concatenate entities entities = json.values().flatten() # map JSON entity to twitter-text entity # be careful not to alter arguments received entities.map! do |entity| entity = HashHelper.symbolize_keys(entity) # hashtag entity[:hashtag] = entity[:text] if entity[:text] entity end auto_link_entities(text, entities, options) end def auto_link_entities(text, entities, options = {}, &block) return text if entities.empty? # NOTE deprecate these attributes not options keys in options hash, then use html_attrs options = DEFAULT_OPTIONS.merge(options) options[:html_attrs] = extract_html_attrs_from_options!(options) options[:html_attrs][:rel] ||= "nofollow" unless options[:suppress_no_follow] options[:html_attrs][:target] = "_blank" if options[:target_blank] == true Twitter::Rewriter.rewrite_entities(text.dup, entities) do |entity, chars| if entity[:url] link_to_url(entity, chars, options, &block) elsif entity[:hashtag] link_to_hashtag(entity, chars, options, &block) elsif entity[:screen_name] link_to_screen_name(entity, chars, options, &block) elsif entity[:cashtag] link_to_cashtag(entity, chars, options, &block) end end end # Add tags around the usernames, lists, hashtags and URLs in the provided text. # The tags can be controlled with the following entries in the options hash: # Also any elements in the options hash will be converted to HTML attributes # and place in the tag. # # :url_class:: class to add to url tags # :list_class:: class to add to list tags # :username_class:: class to add to username tags # :hashtag_class:: class to add to hashtag tags # :cashtag_class:: class to add to cashtag tags # :username_url_base:: the value for href attribute on username links. The @username (minus the @) will be appended at the end of this. # :list_url_base:: the value for href attribute on list links. The @username/list (minus the @) will be appended at the end of this. # :hashtag_url_base:: the value for href attribute on hashtag links. The #hashtag (minus the #) will be appended at the end of this. # :cashtag_url_base:: the value for href attribute on cashtag links. The $cashtag (minus the $) will be appended at the end of this. # :invisible_tag_attrs:: HTML attribute to add to invisible span tags # :username_include_symbol:: place the @ symbol within username and list links # :suppress_lists:: disable auto-linking to lists # :suppress_no_follow:: do not add rel="nofollow" to auto-linked items # :symbol_tag:: tag to apply around symbol (@, #, $) in username / hashtag / cashtag links # :text_with_symbol_tag:: tag to apply around text part in username / hashtag / cashtag links # :url_target:: the value for target attribute on URL links. # :target_blank:: adds target="_blank" to all auto_linked items username / hashtag / cashtag links / urls # :link_attribute_block:: function to modify the attributes of a link based on the entity. called with |entity, attributes| params, and should modify the attributes hash. # :link_text_block:: function to modify the text of a link based on the entity. called with |entity, text| params, and should return a modified text. def auto_link(text, options = {}, &block) auto_link_entities(text, Extractor.extract_entities_with_indices(text, :extract_url_without_protocol => false), options, &block) end # Add tags around the usernames and lists in the provided text. The # tags can be controlled with the following entries in the options hash. # Also any elements in the options hash will be converted to HTML attributes # and place in the tag. # # :list_class:: class to add to list tags # :username_class:: class to add to username tags # :username_url_base:: the value for href attribute on username links. The @username (minus the @) will be appended at the end of this. # :list_url_base:: the value for href attribute on list links. The @username/list (minus the @) will be appended at the end of this. # :username_include_symbol:: place the @ symbol within username and list links # :suppress_lists:: disable auto-linking to lists # :suppress_no_follow:: do not add rel="nofollow" to auto-linked items # :symbol_tag:: tag to apply around symbol (@, #, $) in username / hashtag / cashtag links # :text_with_symbol_tag:: tag to apply around text part in username / hashtag / cashtag links # :link_attribute_block:: function to modify the attributes of a link based on the entity. called with |entity, attributes| params, and should modify the attributes hash. # :link_text_block:: function to modify the text of a link based on the entity. called with |entity, text| params, and should return a modified text. def auto_link_usernames_or_lists(text, options = {}, &block) # :yields: list_or_username auto_link_entities(text, Extractor.extract_mentions_or_lists_with_indices(text), options, &block) end # Add tags around the hashtags in the provided text. # The tags can be controlled with the following entries in the options hash. # Also any elements in the options hash will be converted to HTML attributes # and place in the tag. # # :hashtag_class:: class to add to hashtag tags # :hashtag_url_base:: the value for href attribute. The hashtag text (minus the #) will be appended at the end of this. # :suppress_no_follow:: do not add rel="nofollow" to auto-linked items # :symbol_tag:: tag to apply around symbol (@, #, $) in username / hashtag / cashtag links # :text_with_symbol_tag:: tag to apply around text part in username / hashtag / cashtag links # :link_attribute_block:: function to modify the attributes of a link based on the entity. called with |entity, attributes| params, and should modify the attributes hash. # :link_text_block:: function to modify the text of a link based on the entity. called with |entity, text| params, and should return a modified text. def auto_link_hashtags(text, options = {}, &block) # :yields: hashtag_text auto_link_entities(text, Extractor.extract_hashtags_with_indices(text), options, &block) end # Add tags around the cashtags in the provided text. # The tags can be controlled with the following entries in the options hash. # Also any elements in the options hash will be converted to HTML attributes # and place in the tag. # # :cashtag_class:: class to add to cashtag tags # :cashtag_url_base:: the value for href attribute. The cashtag text (minus the $) will be appended at the end of this. # :suppress_no_follow:: do not add rel="nofollow" to auto-linked items # :symbol_tag:: tag to apply around symbol (@, #, $) in username / hashtag / cashtag links # :text_with_symbol_tag:: tag to apply around text part in username / hashtag / cashtag links # :link_attribute_block:: function to modify the attributes of a link based on the entity. called with |entity, attributes| params, and should modify the attributes hash. # :link_text_block:: function to modify the text of a link based on the entity. called with |entity, text| params, and should return a modified text. def auto_link_cashtags(text, options = {}, &block) # :yields: cashtag_text auto_link_entities(text, Extractor.extract_cashtags_with_indices(text), options, &block) end # Add tags around the URLs in the provided text. # The tags can be controlled with the following entries in the options hash. # Also any elements in the options hash will be converted to HTML attributes # and place in the tag. # # :url_class:: class to add to url tags # :invisible_tag_attrs:: HTML attribute to add to invisible span tags # :suppress_no_follow:: do not add rel="nofollow" to auto-linked items # :symbol_tag:: tag to apply around symbol (@, #, $) in username / hashtag / cashtag links # :text_with_symbol_tag:: tag to apply around text part in username / hashtag / cashtag links # :url_target:: the value for target attribute on URL links. # :link_attribute_block:: function to modify the attributes of a link based on the entity. called with |entity, attributes| params, and should modify the attributes hash. # :link_text_block:: function to modify the text of a link based on the entity. called with |entity, text| params, and should return a modified text. def auto_link_urls(text, options = {}, &block) auto_link_entities(text, Extractor.extract_urls_with_indices(text, :extract_url_without_protocol => false), options, &block) end # These methods are deprecated, will be removed in future. extend Deprecation # Deprecated: Please use auto_link_urls instead. # Add tags around the URLs in the provided text. # Any elements in the href_options hash will be converted to HTML attributes # and place in the tag. # Unless href_options contains :suppress_no_follow # the rel="nofollow" attribute will be added. alias :auto_link_urls_custom :auto_link_urls deprecate :auto_link_urls_custom, :auto_link_urls private HTML_ENTITIES = { '&' => '&', '>' => '>', '<' => '<', '"' => '"', "'" => ''' } def html_escape(text) text && text.to_s.gsub(/[&"'><]/) do |character| HTML_ENTITIES[character] end end # NOTE We will make this private in future. public :html_escape # Options which should not be passed as HTML attributes OPTIONS_NOT_ATTRIBUTES = Set.new([ :url_class, :list_class, :username_class, :hashtag_class, :cashtag_class, :username_url_base, :list_url_base, :hashtag_url_base, :cashtag_url_base, :username_url_block, :list_url_block, :hashtag_url_block, :cashtag_url_block, :link_url_block, :username_include_symbol, :suppress_lists, :suppress_no_follow, :url_entities, :invisible_tag_attrs, :symbol_tag, :text_with_symbol_tag, :url_target, :target_blank, :link_attribute_block, :link_text_block ]).freeze def extract_html_attrs_from_options!(options) html_attrs = {} options.reject! do |key, value| unless OPTIONS_NOT_ATTRIBUTES.include?(key) html_attrs[key] = value true end end html_attrs end def url_entities_hash(url_entities) (url_entities || {}).inject({}) do |entities, entity| # be careful not to alter arguments received _entity = HashHelper.symbolize_keys(entity) entities[_entity[:url]] = _entity entities end end def link_to_url(entity, chars, options = {}) url = entity[:url] href = if options[:link_url_block] options[:link_url_block].call(url) else url end # NOTE auto link to urls do not use any default values and options # like url_class but use suppress_no_follow. html_attrs = options[:html_attrs].dup html_attrs[:class] = options[:url_class] if options.key?(:url_class) # add target attribute only if :url_target is specified html_attrs[:target] = options[:url_target] if options.key?(:url_target) url_entities = url_entities_hash(options[:url_entities]) # use entity from urlEntities if available url_entity = url_entities[url] || entity link_text = if url_entity[:display_url] html_attrs[:title] ||= url_entity[:expanded_url] link_url_with_entity(url_entity, options) else html_escape(url) end link_to_text(entity, link_text, href, html_attrs, options) end def link_url_with_entity(entity, options) display_url = entity[:display_url] expanded_url = entity[:expanded_url] invisible_tag_attrs = options[:invisible_tag_attrs] || DEFAULT_INVISIBLE_TAG_ATTRS # Goal: If a user copies and pastes a tweet containing t.co'ed link, the resulting paste # should contain the full original URL (expanded_url), not the display URL. # # Method: Whenever possible, we actually emit HTML that contains expanded_url, and use # font-size:0 to hide those parts that should not be displayed (because they are not part of display_url). # Elements with font-size:0 get copied even though they are not visible. # Note that display:none doesn't work here. Elements with display:none don't get copied. # # Additionally, we want to *display* ellipses, but we don't want them copied. To make this happen we # wrap the ellipses in a tco-ellipsis class and provide an onCopy handler that sets display:none on # everything with the tco-ellipsis class. # # Exception: pic.twitter.com images, for which expandedUrl = "https://twitter.com/#!/username/status/1234/photo/1 # For those URLs, display_url is not a substring of expanded_url, so we don't do anything special to render the elided parts. # For a pic.twitter.com URL, the only elided part will be the "https://", so this is fine. display_url_sans_ellipses = display_url.gsub("…", "") if expanded_url.include?(display_url_sans_ellipses) before_display_url, after_display_url = expanded_url.split(display_url_sans_ellipses, 2) preceding_ellipsis = /\A…/.match(display_url).to_s following_ellipsis = /…\z/.match(display_url).to_s # As an example: The user tweets "hi http://longdomainname.com/foo" # This gets shortened to "hi http://t.co/xyzabc", with display_url = "…nname.com/foo" # This will get rendered as: # # … # # http://longdomai # # # nname.com/foo # # #   # … # %(#{preceding_ellipsis} ) << %(#{html_escape(before_display_url)}) << %(#{html_escape(display_url_sans_ellipses)}) << %(#{html_escape(after_display_url)}) << %( #{following_ellipsis}) else html_escape(display_url) end end def link_to_hashtag(entity, chars, options = {}) hash = chars[entity[:indices].first] hashtag = entity[:hashtag] hashtag = yield(hashtag) if block_given? hashtag_class = options[:hashtag_class].to_s if hashtag.match Twitter::Regex::REGEXEN[:rtl_chars] hashtag_class += ' rtl' end href = if options[:hashtag_url_block] options[:hashtag_url_block].call(hashtag) else "#{options[:hashtag_url_base]}#{hashtag}" end html_attrs = { :class => hashtag_class, # FIXME As our conformance test, hash in title should be half-width, # this should be bug of conformance data. :title => "##{hashtag}" }.merge(options[:html_attrs]) link_to_text_with_symbol(entity, hash, hashtag, href, html_attrs, options) end def link_to_cashtag(entity, chars, options = {}) dollar = chars[entity[:indices].first] cashtag = entity[:cashtag] cashtag = yield(cashtag) if block_given? href = if options[:cashtag_url_block] options[:cashtag_url_block].call(cashtag) else "#{options[:cashtag_url_base]}#{cashtag}" end html_attrs = { :class => "#{options[:cashtag_class]}", :title => "$#{cashtag}" }.merge(options[:html_attrs]) link_to_text_with_symbol(entity, dollar, cashtag, href, html_attrs, options) end def link_to_screen_name(entity, chars, options = {}) name = "#{entity[:screen_name]}#{entity[:list_slug]}" chunk = name.dup chunk = yield(chunk) if block_given? at = chars[entity[:indices].first] html_attrs = options[:html_attrs].dup if entity[:list_slug] && !entity[:list_slug].empty? && !options[:suppress_lists] href = if options[:list_url_block] options[:list_url_block].call(name) else "#{options[:list_url_base]}#{name}" end html_attrs[:class] ||= "#{options[:list_class]}" else href = if options[:username_url_block] options[:username_url_block].call(chunk) else "#{options[:username_url_base]}#{name}" end html_attrs[:class] ||= "#{options[:username_class]}" end link_to_text_with_symbol(entity, at, chunk, href, html_attrs, options) end def link_to_text_with_symbol(entity, symbol, text, href, attributes = {}, options = {}) tagged_symbol = options[:symbol_tag] ? "<#{options[:symbol_tag]}>#{symbol}" : symbol text = html_escape(text) tagged_text = options[:text_with_symbol_tag] ? "<#{options[:text_with_symbol_tag]}>#{text}" : text if options[:username_include_symbol] || symbol !~ Twitter::Regex::REGEXEN[:at_signs] "#{link_to_text(entity, tagged_symbol + tagged_text, href, attributes, options)}" else "#{tagged_symbol}#{link_to_text(entity, tagged_text, href, attributes, options)}" end end def link_to_text(entity, text, href, attributes = {}, options = {}) attributes[:href] = href options[:link_attribute_block].call(entity, attributes) if options[:link_attribute_block] text = options[:link_text_block].call(entity, text) if options[:link_text_block] %(#{text}) end BOOLEAN_ATTRIBUTES = Set.new([:disabled, :readonly, :multiple, :checked]).freeze def tag_attrs(attributes) attributes.keys.sort_by{|k| k.to_s}.inject("") do |attrs, key| value = attributes[key] if BOOLEAN_ATTRIBUTES.include?(key) value = value ? key : nil end unless value.nil? value = case value when Array value.compact.join(" ") else value end attrs << %( #{html_escape(key)}="#{html_escape(value)}") end attrs end end end end twitter-text-1.13.4/lib/twitter-text/validation.rb0000644000175000017500000001076412667350232022415 0ustar sudheeshsudheeshrequire 'unf' module Twitter module Validation extend self MAX_LENGTH = 140 DEFAULT_TCO_URL_LENGTHS = { :short_url_length => 23, :short_url_length_https => 23, :characters_reserved_per_media => 23 }.freeze # Returns the length of the string as it would be displayed. This is equivilent to the length of the Unicode NFC # (See: http://www.unicode.org/reports/tr15). This is needed in order to consistently calculate the length of a # string no matter which actual form was transmitted. For example: # # U+0065 Latin Small Letter E # + U+0301 Combining Acute Accent # ---------- # = 2 bytes, 2 characters, displayed as é (1 visual glyph) # … The NFC of {U+0065, U+0301} is {U+00E9}, which is a single chracter and a +display_length+ of 1 # # The string could also contain U+00E9 already, in which case the canonicalization will not change the value. # def tweet_length(text, options = {}) options = DEFAULT_TCO_URL_LENGTHS.merge(options) length = text.to_nfc.unpack("U*").length Twitter::Extractor.extract_urls_with_indices(text) do |url, start_position, end_position| length += start_position - end_position length += url.downcase =~ /^https:\/\// ? options[:short_url_length_https] : options[:short_url_length] end length end # Check the text for any reason that it may not be valid as a Tweet. This is meant as a pre-validation # before posting to api.twitter.com. There are several server-side reasons for Tweets to fail but this pre-validation # will allow quicker feedback. # # Returns false if this text is valid. Otherwise one of the following Symbols will be returned: # # :too_long:: if the text is too long # :empty:: if the text is nil or empty # :invalid_characters:: if the text contains non-Unicode or any of the disallowed Unicode characters def tweet_invalid?(text) return :empty if !text || text.empty? begin return :too_long if tweet_length(text) > MAX_LENGTH return :invalid_characters if Twitter::Regex::INVALID_CHARACTERS.any?{|invalid_char| text.include?(invalid_char) } rescue ArgumentError # non-Unicode value. return :invalid_characters end return false end def valid_tweet_text?(text) !tweet_invalid?(text) end def valid_username?(username) return false if !username || username.empty? extracted = Twitter::Extractor.extract_mentioned_screen_names(username) # Should extract the username minus the @ sign, hence the [1..-1] extracted.size == 1 && extracted.first == username[1..-1] end VALID_LIST_RE = /\A#{Twitter::Regex[:valid_mention_or_list]}\z/o def valid_list?(username_list) match = username_list.match(VALID_LIST_RE) # Must have matched and had nothing before or after !!(match && match[1] == "" && match[4] && !match[4].empty?) end def valid_hashtag?(hashtag) return false if !hashtag || hashtag.empty? extracted = Twitter::Extractor.extract_hashtags(hashtag) # Should extract the hashtag minus the # sign, hence the [1..-1] extracted.size == 1 && extracted.first == hashtag[1..-1] end def valid_url?(url, unicode_domains=true, require_protocol=true) return false if !url || url.empty? url_parts = url.match(Twitter::Regex[:validate_url_unencoded]) return false unless (url_parts && url_parts.to_s == url) scheme, authority, path, query, fragment = url_parts.captures return false unless ((!require_protocol || (valid_match?(scheme, Twitter::Regex[:validate_url_scheme]) && scheme.match(/\Ahttps?\Z/i))) && valid_match?(path, Twitter::Regex[:validate_url_path]) && valid_match?(query, Twitter::Regex[:validate_url_query], true) && valid_match?(fragment, Twitter::Regex[:validate_url_fragment], true)) return (unicode_domains && valid_match?(authority, Twitter::Regex[:validate_url_unicode_authority])) || (!unicode_domains && valid_match?(authority, Twitter::Regex[:validate_url_authority])) end private def valid_match?(string, regex, optional=false) return (string && string.match(regex) && $~.to_s == string) unless optional !(string && (!string.match(regex) || $~.to_s != string)) end end end twitter-text-1.13.4/lib/twitter-text/hit_highlighter.rb0000644000175000017500000000532012667350232023415 0ustar sudheeshsudheeshmodule Twitter # Module for doing "hit highlighting" on tweets that have been auto-linked already. # Useful with the results returned from the Search API. module HitHighlighter extend self # Default Tag used for hit highlighting DEFAULT_HIGHLIGHT_TAG = "em" # Add tags around the hits provided in the text. The # hits should be an array of (start, end) index pairs, relative to the original # text, before auto-linking (but the text may already be auto-linked if desired) # # The tags can be overridden using the :tag option. For example: # # irb> hit_highlight("test hit here", [[5, 8]], :tag => 'strong') # => "test hit here" def hit_highlight(text, hits = [], options = {}) if hits.empty? return text end tag_name = options[:tag] || DEFAULT_HIGHLIGHT_TAG tags = ["<" + tag_name + ">", ""] chunks = text.split(/[<>]/) result = [] chunk_index, chunk = 0, chunks[0] chunk_chars = chunk.to_s.to_char_a prev_chunks_len = 0 chunk_cursor = 0 start_in_chunk = false for hit, index in hits.flatten.each_with_index do tag = tags[index % 2] placed = false until chunk.nil? || hit < prev_chunks_len + chunk.length do result << chunk_chars[chunk_cursor..-1] if start_in_chunk && hit == prev_chunks_len + chunk_chars.length result << tag placed = true end # correctly handle highlights that end on the final character. if tag_text = chunks[chunk_index+1] result << "<#{tag_text}>" end prev_chunks_len += chunk_chars.length chunk_cursor = 0 chunk_index += 2 chunk = chunks[chunk_index] chunk_chars = chunk.to_s.to_char_a start_in_chunk = false end if !placed && !chunk.nil? hit_spot = hit - prev_chunks_len result << chunk_chars[chunk_cursor...hit_spot] << tag chunk_cursor = hit_spot if index % 2 == 0 start_in_chunk = true else start_in_chunk = false end placed = true end # ultimate fallback, hits that run off the end get a closing tag if !placed result << tag end end if chunk if chunk_cursor < chunk_chars.length result << chunk_chars[chunk_cursor..-1] end (chunk_index+1).upto(chunks.length-1).each do |i| result << (i.even? ? chunks[i] : "<#{chunks[i]}>") end end result.flatten.join end end end twitter-text-1.13.4/.rspec0000644000175000017500000000003012667350232015602 0ustar sudheeshsudheesh--color --format=nested twitter-text-1.13.4/README.rdoc0000644000175000017500000000625412667350232016311 0ustar sudheeshsudheesh{rdoc-image:https://img.shields.io/gem/v/twitter-text.svg}[https://rubygems.org/gems/twitter-text] == twitter-text A gem that provides text processing routines for Twitter Tweets. The major reason for this is to unify the various auto-linking and extraction of usernames, lists, hashtags and URLs. == Extraction Examples # Extraction class MyClass include Twitter::Extractor usernames = extract_mentioned_screen_names("Mentioning @twitter and @jack") # usernames = ["twitter", "jack"] end # Extraction with a block argument class MyClass include Twitter::Extractor extract_reply_screen_name("@twitter are you hiring?").do |username| # username = "twitter" end end == Auto-linking Examples # Auto-link class MyClass include Twitter::Autolink html = auto_link("link @user, please #request") end # For Ruby on Rails you want to add this to app/helpers/application_helper.rb module ApplicationHelper include Twitter::Autolink end # Now the auto_link function is available in every view. So in index.html.erb: <%= auto_link("link @user, please #request") %> === Usernames Username extraction and linking matches all valid Twitter usernames but does not verify that the username is a valid Twitter account. === Lists Auto-link and extract list names when they are written in @user/list-name format. === Hashtags Auto-link and extract hashtags, where a hashtag can contain most letters or numbers but cannot be solely numbers and cannot contain punctuation. === URLs Asian languages like Chinese, Japanese or Korean may not use a delimiter such as a space to separate normal text from URLs making it difficult to identify where the URL ends and the text starts. For this reason twitter-text currently does not support extracting or auto-linking of URLs immediately followed by non-Latin characters. Example: "http://twitter.com/は素晴らしい" . The normal text is "は素晴らしい" and is not part of the URL even though it isn't space separated. === International Special care has been taken to be sure that auto-linking and extraction work in Tweets of all languages. This means that languages without spaces between words should work equally well. === Hit Highlighting Use to provide emphasis around the "hits" returned from the Search API, built to work against text that has been auto-linked already. === Thanks Thanks to everybody who has filed issues, provided feedback or contributed patches. Patches courtesy of: * At Twitter … * Matt Sanford - http://github.com/mzsanford * Raffi Krikorian - http://github.com/r * Ben Cherry - http://github.com/bcherry * Patrick Ewing - http://github.com/hoverbird * Jeff Smick - http://github.com/sprsquish * Kenneth Kufluk - https://github.com/kennethkufluk * Keita Fujii - https://github.com/keitaf * Yoshimasa Niwa - https://github.com/niw * Patches from the community … * Jean-Philippe Bougie - http://github.com/jpbougie * Erik Michaels-Ober - https://github.com/sferik * Anyone who has filed an issue. It helps. Really. === Copyright and License Copyright 2011 Twitter, Inc. Licensed under the Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0 twitter-text-1.13.4/LICENSE0000644000175000017500000002361012667350232015503 0ustar sudheeshsudheeshCopyright 2011 Twitter, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this work except in compliance with the License. You may obtain a copy of the License below, or at: http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. twitter-text-1.13.4/test/0000755000175000017500000000000012670063203015444 5ustar sudheeshsudheeshtwitter-text-1.13.4/test/twitter-text-conformance/0000755000175000017500000000000012670063203022420 5ustar sudheeshsudheeshtwitter-text-1.13.4/test/twitter-text-conformance/README.md0000644000175000017500000001172212670063203023702 0ustar sudheeshsudheesh ## Purpose This conformance package provides a cross-platform definition of the test cases for auto linking, extracting and hit highlighting of Tweets. The primary use for this is the twitter-text-* libraries; both those managed by Twitter and those created by the community. The reason for this conformance suite is to provide a way to keep the various implementations of Twitter text handling working in a consistent and interoperable way. While anyone can feel free to implement this logic however they choose the recommendation to developers is to use libraries which pass this conformance suite. ## Format The test cases are stored in YAML files. There is one YAML file for each major operation type, and within those files there is one section for each publicly accessible API. Each test case is defined by: * description: This provides a meaningful name for the test case, for use as an error message if a test fails. * text: The input text of the Tweet. * expected: What results are expected for this input text ## Guidelines for use If you are creating a new twitter-text library in a different programming language please follow these few guidelines: 1. Create a test which reads these files and executes the test cases. 1.a. Do not convert these files to test cases statically. These test cases will change over time. 2. Be sure to implement all of the publicly accessible APIs (the keys to the YAML file) 3. Only expose the public API method and not the underlying regular expressions 3.a. If your language or environment does not allow for this please make a comment to the effect 3.b. This prevents breakage when regular expressions need to change in fundamental ways ## Submitting new conformance tests * You can [fork the github repository](https://github.com/twitter/twitter-text) to add tests and send a pull request * You can [open an issue on github](https://github.com/twitter/twitter-text/issues) * Please be sure to provide example input and output as well as a brief description of the problem. ## Changelog * v1.4.9 - 2011-12-01 [ Git tag v1.4.9 ] * [FIX] Apply stricter parsing of t.co URLs * [FIX] Extract @mention and hashtag before newline * [FIX] Extract URLs without protocol on ccTLD domain with slash * v1.4.8 - 2011-11-02 [ Git tag v1.4.8 ] * [FIX] Extract URLs without protocol in CJK text * [FIX] Do not extract URL in hashtag * [FIX] Extract hashtag after bracket * [FIX] Extract URL with '?' in fragment * v1.4.7 - 2011-10-04 [ Git tag v1.4.7 ] * [FIX] Extract URLs followed by punctuations * [FIX] Extract URLs without protocol in CJK text * [FIX] Extract URLs with '.' * v1.4.5 - 2011-09-20 [ Git tag v1.4.5 ] * [FIX] Extract URLs without protocol * [FIX] Extract URLs with '.', '|' and '&' * v1.4.4 - 2011-08-05 [ Get tag v1.4.4 ] * [FIX] Support ş (U+015F) in hashtags * [FIX] Support latin accents in URL paths * [FIX] Add a test for a common cause for runaway regex engines * v1.4.3 - 2011-07-13 [ Git tag v1.4.3 ] * [FIX] Japanese autolink including long vowel mark (chouon) * [FIX] Japanese autolink after a full-width exclamation point * [FIX] Japanese autolink including ideographic iteration mark * [FIX] Add hashtag extraction with indices test for new language hashtags * [FIX] Add hashtag extraction with indices test for multiple latin hashtags * v1.4.2 - 2011-07-08 [ Git tag v1.4.2 ] * [FIX] Additional Japanese hashtag autolinking tests * v1.4.1 - 2011-05-18 [ Git tag v1.4.1 ] * [FIX] Add support for Russian hashtags * [FIX] Add support for Korean hashtags * [FIX] Add support for Japanese hashtags (Katakana, Hiragana and Kanji) * [FIX] Add support for autolinking punycode domain names and TLDs (via punycode) * [DOC] Update README and License * v1.3.1 - 2010-12-03 - [ Git tag v1.3.1 ] * [DOC] Updated README with Changelog section * [FIX] Autolink URLs with paths ending in + and - * [FIX] Extract URLs with paths ending in + and - * v1.3.0 - 2010-12-03 - [ Git tag v1.3.0 ] * [NOTE] First tagged version (sorry) * [DOC] Updated README file with guidelines for use and format information * [FIX] Do not autolink URLs without protocols * [FIX] Do not extract URLs without protocols * v1.0.0 - 2010-01-21 - [ Git tag v1.0.0 (retroactively) ] * Initial version ## Copyright and License Copyright 2011 Twitter, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this work except in compliance with the License. You may obtain a copy of the License in the LICENSE file, or at: http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. twitter-text-1.13.4/test/twitter-text-conformance/Gemfile0000644000175000017500000000005612670063203023714 0ustar sudheeshsudheeshsource "https://rubygems.org" gem 'nokogiri' twitter-text-1.13.4/test/twitter-text-conformance/autolink.yml0000644000175000017500000015220712670063203025000 0ustar sudheeshsudheeshtests: usernames: - description: "Autolink trailing username" text: "text @username" expected: "text @username" - description: "Autolink username at the beginning" text: "@username text" expected: "@username text" - description: "DO NOT Autolink username preceded by a letter" text: "meet@the beach" expected: "meet@the beach" - description: "Autolink username preceded by puctuation" text: "great.@username" expected: "great.@username" - description: "Autolink username followed by puctuation" text: "@username&^$%^" expected: "@username&^$%^" - description: "Autolink username followed by Japanese" text: "@usernameの" expected: "@usernameの" - description: "Autolink username preceded by Japanese" text: "あ@username" expected: "あ@username" - description: "Autolink username surrounded by Japanese" text: "あ@usernameの" expected: "あ@usernameの" - description: "Autolink username in compressed RT" text: "RT@username: long Tweet is loooong" expected: "RT@username: long Tweet is loooong" - description: "Autolink alternate RT format in middle of text" text: "Check out RT:@username yas" expected: "Check out RT:@username yas" - description: "DO NOT Autolink domain of email address ending in RT like support@example.com" text: "Email support@example.com" expected: "Email support@example.com" - description: "DO NOT Autolink username followed by accented latin characters" text: "@aliceìnheiro something something" expected: "@aliceìnheiro something something" - description: "DO NOT Autolink username @_ in @_@" text: "oh, snap! @_@" expected: "oh, snap! @_@" - description: "Autolink username with full-width at sign (U+FF20)" text: "@username" expected: "@username" - description: "DO NOT Autolink username over 20 characters" text: "@username9012345678901" expected: "@username9012345678901" - description: "Autolink two usernames" text: "@foo @bar" expected: "@foo @bar" - description: "Autolink usernames followed by :" text: "@foo: @bar" expected: "@foo: @bar" - description: "Autolink usernames that are followed by international characters" text: "@foo îs in the house" expected: "@foo îs in the house" - description: "Preserve case when linking a username" text: "@MixedCase" expected: "@MixedCase" lists: - description: "Autolink list preceded by a space" text: "text @username/list" expected: "text @username/list" - description: "DO NOT Autolink list when space follows slash" text: "text @username/ list" expected: "text @username/ list" - description: "DO NOT Autolink list with empty username" text: "text @/list" expected: "text @/list" - description: "Autolink list at the beginning" text: "@username/list" expected: "@username/list" - description: "DO NOT Autolink list preceded by letter" text: "meet@the/beach" expected: "meet@the/beach" - description: "Autolink list preceded by puctuation" text: "great.@username/list" expected: "great.@username/list" - description: "Autolink list followed by puctuation" text: "@username/list&^$%^" expected: "@username/list&^$%^" - description: "Autolink list name over 25 characters (truncated to 25)" text: "@username/list567890123456789012345A" expected: "@username/list567890123456789012345A" - description: "Autolink list that contains an _" text: "text @username/list_name" expected: "text @username/list_name" - description: "Autolink list that contains a -" text: "text @username/list-name" expected: "text @username/list-name" - description: "Autolink list that contains a number" text: "text @username/list123" expected: "text @username/list123" - description: "DO NOT Autolink list starting with a number" text: "@username/1list" expected: "@username/1list" hashtags: - description: "Autolink trailing hashtag" text: "text #hashtag" expected: "text #hashtag" - description: "Autolink alphanumeric hashtag (letter-number-letter)" text: "text #hash0tag" expected: "text #hash0tag" - description: "Autolink alphanumeric hashtag (number-letter)" text: "text #1tag" expected: "text #1tag" - description: "Autolink hashtag with underscore" text: "text #hash_tag" expected: "text #hash_tag" - description: "DO NOT Autolink all-numeric hashtags" text: "text #1234" expected: "text #1234" - description: "DO NOT Autolink hashtag preceded by a letter" text: "text#hashtag" expected: "text#hashtag" - description: "DO NOT Autolink hashtag that begins with \ufe0f (Emoji style hash sign)" text: "#️hashtag" expected: "#️hashtag" - description: "DO NOT Autolink hashtag that begins with \ufe0f (Keycap style hash sign)" text: "#⃣hashtag" expected: "#⃣hashtag" - description: "Autolink multiple hashtags" text: "text #hashtag1 #hashtag2" expected: "text #hashtag1 #hashtag2" - description: "Autolink hashtag preceded by a period" text: "text.#hashtag" expected: "text.#hashtag" - description: "DO NOT Autolink hashtag preceded by &" text: "&#nbsp;" expected: "&#nbsp;" - description: "Autolink hashtag followed by ! (! not included)" text: "text #hashtag!" expected: "text #hashtag!" - description: "Autolink two hashtags separated by a slash" text: "text #dodge/#answer" expected: "text #dodge/#answer" - description: "Autolink hashtag before a slash" text: "text #dodge/answer" expected: "text #dodge/answer" - description: "Autolink hashtag after a slash" text: "text dodge/#answer" expected: "text dodge/#answer" - description: "Autolink hashtag followed by Japanese" text: "text #hashtagの" expected: "text #hashtagの" - description: "Autolink hashtag preceded by full-width space (U+3000)" text: "text #hashtag" expected: "text #hashtag" - description: "Autolink hashtag followed by full-width space (U+3000)" text: "#hashtag text" expected: "#hashtag text" - description: "Autolink hashtag with full-width hash (U+FF03)" text: "#hashtag" expected: "#hashtag" - description: "Autolink hashtag with accented character at the start" text: "#éhashtag" expected: "#éhashtag" - description: "Autolink hashtag with accented character at the end" text: "#hashtagé" expected: "#hashtagé" - description: "Autolink hashtag with accented character in the middle" text: "#hashétag" expected: "#hashétag" - description: "Autolink hashtags in Korean" text: "What is #트위터 anyway?" expected: "What is #트위터 anyway?" - description: "Autolink hashtags in Russian" text: "What is #ашок anyway?" expected: "What is #ашок anyway?" - description: "Autolink a katakana hashtag preceded by a space and followed by a space" text: "カタカナ #カタカナ カタカナ" expected: "カタカナ #カタカナ カタカナ" - description: "Autolink a katakana hashtag preceded by a space and followed by a bracket" text: "カタカナ #カタカナ」カタカナ" expected: "カタカナ #カタカナ」カタカナ" - description: "Autolink a katakana hashtag preceded by a space and followed by a edge" text: "カタカナ #カタカナ" expected: "カタカナ #カタカナ" - description: "Autolink a katakana hashtag preceded by a bracket and followed by a space" text: "カタカナ「#カタカナ カタカナ" expected: "カタカナ「#カタカナ カタカナ" - description: "Autolink a katakana hashtag preceded by a bracket and followed by a bracket" text: "カタカナ「#カタカナ」カタカナ" expected: "カタカナ「#カタカナ」カタカナ" - description: "Autolink a katakana hashtag preceded by a bracket and followed by a edge" text: "カタカナ「#カタカナ" expected: "カタカナ「#カタカナ" - description: "Autolink a katakana hashtag preceded by a edge and followed by a space" text: "#カタカナ カタカナ" expected: "#カタカナ カタカナ" - description: "Autolink a katakana hashtag preceded by a edge and followed by a bracket" text: "#カタカナ」カタカナ" expected: "#カタカナ」カタカナ" - description: "Autolink a katakana hashtag preceded by a edge and followed by a edge" text: "#カタカナ" expected: "#カタカナ" - description: "Autolink a katakana hashtag with a voiced sounds mark followed by a space" text: "#ハッシュタグ テスト" expected: "#ハッシュタグ テスト" - description: "Autolink a katakana hashtag with a voiced sounds mark followed by numbers" text: "#ハッシュタグ123" expected: "#ハッシュタグ123" - description: "Autolink a katakana hashtag with another voiced sounds mark" text: "#パピプペポ" expected: "#パピプペポ" - description: "Autolink a kanji hashtag preceded by a space and followed by a space" text: "漢字 #漢字 漢字" expected: "漢字 #漢字 漢字" - description: "Autolink a kanji hashtag preceded by a space and followed by a bracket" text: "漢字 #漢字」漢字" expected: "漢字 #漢字」漢字" - description: "Autolink a kanji hashtag preceded by a space and followed by a edge" text: "漢字 #漢字" expected: "漢字 #漢字" - description: "Autolink a kanji hashtag preceded by a bracket and followed by a space" text: "漢字「#漢字 漢字" expected: "漢字「#漢字 漢字" - description: "Autolink a kanji hashtag preceded by a bracket and followed by a bracket" text: "漢字「#漢字」漢字" expected: "漢字「#漢字」漢字" - description: "Autolink a kanji hashtag preceded by a bracket and followed by a edge" text: "漢字「#漢字" expected: "漢字「#漢字" - description: "Autolink a kanji hashtag preceded by a edge and followed by a space" text: "#漢字 漢字" expected: "#漢字 漢字" - description: "Autolink a kanji hashtag preceded by a edge and followed by a bracket" text: "#漢字」漢字" expected: "#漢字」漢字" - description: "Autolink a kanji hashtag preceded by a edge and followed by a edge" text: "#漢字" expected: "#漢字" - description: "Autolink a kanji hashtag preceded by an ideographic comma, followed by an ideographic period" text: "これは、#大丈夫。" expected: "これは、#大丈夫。" - description: "Autolink a hiragana hashtag preceded by a space and followed by a space" text: "ひらがな #ひらがな ひらがな" expected: "ひらがな #ひらがな ひらがな" - description: "Autolink a hiragana hashtag preceded by a space and followed by a bracket" text: "ひらがな #ひらがな」ひらがな" expected: "ひらがな #ひらがな」ひらがな" - description: "Autolink a hiragana hashtag preceded by a space and followed by a edge" text: "ひらがな #ひらがな" expected: "ひらがな #ひらがな" - description: "Autolink a hiragana hashtag preceded by a bracket and followed by a space" text: "ひらがな「#ひらがな ひらがな" expected: "ひらがな「#ひらがな ひらがな" - description: "Autolink a hiragana hashtag preceded by a bracket and followed by a bracket" text: "ひらがな「#ひらがな」ひらがな" expected: "ひらがな「#ひらがな」ひらがな" - description: "Autolink a hiragana hashtag preceded by a bracket and followed by a edge" text: "ひらがな「#ひらがな" expected: "ひらがな「#ひらがな" - description: "Autolink a hiragana hashtag preceded by a edge and followed by a space" text: "#ひらがな ひらがな" expected: "#ひらがな ひらがな" - description: "Autolink a hiragana hashtag preceded by a edge and followed by a bracket" text: "#ひらがな」ひらがな" expected: "#ひらがな」ひらがな" - description: "Autolink a hiragana hashtag preceded by a edge and followed by a edge" text: "#ひらがな" expected: "#ひらがな" - description: "Autolink a Kanji/Katakana mix hashtag" text: "日本語ハッシュタグ #日本語ハッシュタグ" expected: "日本語ハッシュタグ #日本語ハッシュタグ" - description: "DO NOT autolink a hashtag without a preceding space" text: "日本語ハッシュタグ#日本語ハッシュタグ" expected: "日本語ハッシュタグ#日本語ハッシュタグ" - description: "DO NOT include a punctuation in a hashtag" text: "#日本語ハッシュタグ。" expected: "#日本語ハッシュタグ。" - description: "Autolink a hashtag after a punctuation" text: "日本語ハッシュタグ。#日本語ハッシュタグ" expected: "日本語ハッシュタグ。#日本語ハッシュタグ" - description: "Autolink a hashtag with chouon" text: "長音ハッシュタグ。#サッカー" expected: "長音ハッシュタグ。#サッカー" - description: "Autolink a hashtag with half-width chouon" text: "長音ハッシュタグ。#サッカー" expected: "長音ハッシュタグ。#サッカー" - description: "Autolink a hashtag with half-width # after full-width !" text: "できましたよー!#日本語ハッシュタグ。" expected: "できましたよー!#日本語ハッシュタグ。" - description: "Autolink a hashtag with full-width # after full-width !" text: "できましたよー!#日本語ハッシュタグ。" expected: "できましたよー!#日本語ハッシュタグ。" - description: "Autolink a hashtag containing ideographic iteration mark" text: "#云々" expected: "#云々" - description: "Autolink multiple hashtags in multiple languages" text: "Hashtags in #中文, #日本語, #한국말, and #русский! Try it out!" expected: "Hashtags in #中文, #日本語, #한국말, and #русский! Try it out!" - description: "Autolink should allow for ş (U+015F) in a hashtag" text: "Here’s a test tweet for you: #Ateş #qrşt #ştu #ş" expected: "Here’s a test tweet for you: #Ateş #qrşt #ştu " - description: "Autolink a hashtag with Latin extended character" text: "#mûǁae" expected: "#mûǁae" # Please be careful with changes to this test case - what looks like "á" is really a + U+0301, and many editors will silently convert this to U+00E1. - description: "Autolink hashtags with combining diacritics" text: "#táim #hag̃ua" expected: "#táim #hag̃ua" - description: "Autolink Arabic hashtag" text: "Arabic hashtag: #فارسی #لس_آنجلس" expected: "Arabic hashtag: #فارسی #لس_آنجلس" - description: "Autolink Thai hashtag" text: "Thai hashtag: #รายละเอียด" expected: "Thai hashtag: #รายละเอียด" urls: - description: "Autolink URL with pipe character" text: "text http://example.com/pipe|character?yes|pipe|character" expected: "text http://example.com/pipe|character?yes|pipe|character" - description: "Autolink trailing url" text: "text http://example.com" expected: "text http://example.com" - description: "Autolink url in mid-text" text: "text http://example.com more text" expected: "text http://example.com more text" - description: "Autolink url in Japanese text" text: "いまなにしてるhttp://example.comいまなにしてる" expected: "いまなにしてるhttp://example.comいまなにしてる" - description: "Autolink url surrounded by parentheses does not capture them" text: "text (http://example.com)" expected: "text (http://example.com)" - description: "Autolink url with path surrounded by parentheses does not capture them" text: "text (http://example.com/test)" expected: "text (http://example.com/test)" - description: "Autolink url with embedded parentheses" text: "text http://msdn.com/S(deadbeef)/page.htm" expected: "text http://msdn.com/S(deadbeef)/page.htm" - description: "Autolink url with embedded parentheses without linking surrounding parentheses" text: "text (URL in parentheses http://msdn.com/S(deadbeef))" expected: "text (URL in parentheses http://msdn.com/S(deadbeef))" - description: "Autolink Rdio #music url with double balanced nested parentheses" text: "text https://rdio.com/artist/50_Cent/album/We_Up/track/We_Up_(Album_Version_(Edited))/" expected: "text https://rdio.com/artist/50_Cent/album/We_Up/track/We_Up_(Album_Version_(Edited))/" - description: "Autolink Rdio #music url with double balanced nested parentheses without linking surrounding parentheses" text: "text (URL in parentheses https://rdio.com/artist/50_Cent/album/We_Up/track/We_Up_(Album_Version_(Edited))/)" expected: "text (URL in parentheses https://rdio.com/artist/50_Cent/album/We_Up/track/We_Up_(Album_Version_(Edited))/)" - description: "Autolink url followed by nested parentheses (without them)" text: "text https://rdio.com/artist/50_Cent/album/We_Up/track/We_Up(URL description with spaces and (parentheses))" expected: "text https://rdio.com/artist/50_Cent/album/We_Up/track/We_Up(URL description with spaces and (parentheses))" - description: "Autolink url followed by completely unbalanced nested parentheses (without them)" text: "text https://rdio.com/artist/50_Cent/album/We_Up/track/We_Up_(Album_Version_(Edited" expected: "text https://rdio.com/artist/50_Cent/album/We_Up/track/We_Up_(Album_Version_(Edited" - description: "Extract valid URL: http://msdn.microsoft.com/ja-jp/library/system.net.httpwebrequest(v=VS.100).aspx" text: "text http://msdn.microsoft.com/ja-jp/library/system.net.httpwebrequest(v=VS.100).aspx" expected: "text http://msdn.microsoft.com/ja-jp/library/system.net.httpwebrequest(v=VS.100).aspx" - description: "Autolink url with balanced parens hiding XSS" text: 'text http://foo.com/("onclick="alert(1)")' expected: 'text http://foo.com/("onclick="alert(1)")' - description: "Autolink url should NOT capture unbalanced parens" text: "Parenthetically bad http://example.com/i_has_a_) thing" expected: "Parenthetically bad http://example.com/i_has_a_) thing" - description: "Autolink url containing unicode characters" text: "I enjoy Macintosh Brand computers: http://✪df.ws/ejp" expected: "I enjoy Macintosh Brand computers: http://✪df.ws/ejp" - description: "Autolink url with .co. under TLD" text: "test http://www.example.co.jp" expected: "test http://www.example.co.jp" - description: "Autolink url with .sx TLD" text: "test http://www.example.sx" expected: "test http://www.example.sx" - description: "DO NOT Autolink url containing ! character in the domain" text: "badly formatted http://foo!bar.com" expected: "badly formatted http://foo!bar.com" - description: "DO NOT Autolink url containing _ character in the domain" text: "badly formatted http://foo_bar.com" expected: "badly formatted http://foo_bar.com" - description: "Autolink url preceded by :" text: "text:http://example.com" expected: "text:http://example.com" - description: "Autolink url followed by ? (without it)" text: "text http://example.com?" expected: "text http://example.com?" - description: "Autolink url followed by ! (without it)" text: "text http://example.com!" expected: "text http://example.com!" - description: "Autolink url followed by , (without it)" text: "text http://example.com," expected: "text http://example.com," - description: "Autolink url with path followed by a comma (wihout the comma)" text: "In http://example.com/test, Douglas explains 42." expected: "In http://example.com/test, Douglas explains 42." - description: "Autolink url followed by . (without it)" text: "text http://example.com." expected: "text http://example.com." - description: "Autolink url followed by : (without it)" text: "text http://example.com:" expected: "text http://example.com:" - description: "Autolink url followed by ; (without it)" text: "text http://example.com;" expected: "text http://example.com;" - description: "Autolink url followed by ] (without it)" text: "text http://example.com]" expected: "text http://example.com]" - description: "Autolink url followed by ) (without it)" text: "text http://example.com)" expected: "text http://example.com)" - description: "Autolink url followed by } (without it)" text: "text http://example.com}" expected: "text http://example.com}" - description: "Autolink url followed by = (without it)" text: "text http://example.com=" expected: "text http://example.com=" - description: "Autolink url followed by ' (without it)" text: "text http://example.com'" expected: "text http://example.com'" - description: "Autolink url preceded by /" text: "text /http://example.com" expected: "text /http://example.com" - description: "Autolink url preceded by !" text: "text !http://example.com" expected: "text !http://example.com" - description: "DO NOT Autolink url preceded by =" text: "text =http://example.com" expected: "text =http://example.com" - description: "Autolink url surrounded by double quotes" text: "text \"http://example.com\"" expected: "text \"http://example.com\"" - description: "DO NOT Autolink url preceded by @" text: "@http://example.com" expected: "@http://example.com" - description: "DO NOT Autolink domain in email address" text: "foo@bar.com" expected: "foo@bar.com" - description: "Autolink url embedded in link tag" text: "http://example.com" expected: "http://example.com" - description: "Autolink multiple urls" text: "http://example.com https://sslexample.com http://sub.example.com" expected: "http://example.com https://sslexample.com http://sub.example.com" - description: "Autolink url with long TLD" text: "http://example.mobi/path" expected: "http://example.mobi/path" - description: "Autolink url containing ending with #value (not as url + hashtag)" text: "http://foo.com/?#foo" expected: "http://foo.com/?#foo" - description: "DO NOT Autolink url without protocol (with www)" text: "www.example.biz" expected: "www.example.biz" - description: "DO NOT Autolink url without protocol (with WWW)" text: "WWW.EXAMPLE.BIZ" expected: "WWW.EXAMPLE.BIZ" - description: "DO NOT Autolink URL without protocol and without www (ending in .com)" text: "foo.com" expected: "foo.com" - description: "DO NOT Autolink URL without protocol and without www (ending in .org)" text: "foo.org" expected: "foo.org" - description: "DO NOT Autolink URL without protocol and without www (ending in .net)" text: "foo.net" expected: "foo.net" - description: "DO NOT Autolink URL without protocol and without www (ending in .gov)" text: "foo.gov" expected: "foo.gov" - description: "DO NOT Autolink URL without protocol and without www (ending in .edu)" text: "foo.edu" expected: "foo.edu" - description: "DO NOT Autolink URL without protocol and without www not ending in /.(edu|com|gov|org|net)/" text: "foo.it twitter.co.jp foo.commerce foo.nettastic foo.us foo.co.uk" expected: "foo.it twitter.co.jp foo.commerce foo.nettastic foo.us foo.co.uk" - description: "Multiple URLs with different protocols but not without a protocol" text: "http://foo.com AND https://bar.com AND www.foobar.com" expected: "http://foo.com AND https://bar.com AND www.foobar.com" - description: "Autolink raw domain followed by domain only links the first" text: "See http://example.com example.com" expected: "See http://example.com example.com" - description: "Autolink url that includes @-sign and numeric dir under it" text: "http://www.flickr.com/photos/29674651@N00/4382024406" expected: "http://www.flickr.com/photos/29674651@N00/4382024406" - description: "Autolink url that includes @-sign and non-numeric dir under it" text: "http://www.flickr.com/photos/29674651@N00/foobar" expected: "http://www.flickr.com/photos/29674651@N00/foobar" - description: "Autolink url with a hashtag-looking fragment" text: "http://www.example.com/#answer" expected: "http://www.example.com/#answer" - description: "Autolink URL with only a domain followed by a period doesn't swallow the period." text: "I think it's proper to end sentences with a period http://tell.me.com. Even when they contain a URL." expected: "I think it's proper to end sentences with a period http://tell.me.com. Even when they contain a URL." - description: "Autolink URL with a path followed by a period doesn't swallow the period." text: "I think it's proper to end sentences with a period http://tell.me/why. Even when they contain a URL." expected: "I think it's proper to end sentences with a period http://tell.me/why. Even when they contain a URL." - description: "Autolink URL with a query followed by a period doesn't swallow the period." text: "I think it's proper to end sentences with a period http://tell.me/why?=because.i.want.it. Even when they contain a URL." expected: "I think it's proper to end sentences with a period http://tell.me/why?=because.i.want.it. Even when they contain a URL." - description: "Autolink URL with a hyphen in the domain name" text: "Czech out sweet deals at http://mrs.domain-dash.biz ok?" expected: "Czech out sweet deals at http://mrs.domain-dash.biz ok?" - description: "Autolink an IDN (punycode) domain and TLD" text: "See also: http://xn--80abe5aohbnkjb.xn--p1ai/" expected: "See also: http://xn--80abe5aohbnkjb.xn--p1ai/" - description: "Autolink URL should NOT autolink www...foo" text: "Is www...foo a valid URL?" expected: "Is www...foo a valid URL?" - description: "Autolink URL should NOT autolink www.-foo.com" text: "Is www.-foo.com a valid URL?" expected: "Is www.-foo.com a valid URL?" - description: "Autolink URL should NOT autolink a domain with a valid dash but no protocol" text: "Is www.foo-bar.com a valid URL?" expected: "Is www.foo-bar.com a valid URL?" - description: "Autolink URL should autolink a domain with a valid dash and a protocol" text: "Is http://www.foo-bar.com a valid URL?" expected: "Is http://www.foo-bar.com a valid URL?" - description: "Autolink URL should link search urls (with &lang=, not ⟨)" text: "Check out http://search.twitter.com/#!/search?q=avro&lang=en" expected: "Check out http://search.twitter.com/#!/search?q=avro&lang=en" - description: "Autolink URL should link urls with very long paths" text: "Check out http://example.com/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" expected: "Check out http://example.com/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" - description: "Autolink URL should HTML escape the URL" text: "example: https://twitter.com/otm_m@\"onmousedown=\"alert('foo')\" style=background-color:yellow;color:yellow;\"/" expected: "example: https://twitter.com/otm_m@\"onmousedown=\"alert('foo')\" style=background-color:yellow;color:yellow;\"/" - description: "Autolink URL should autolink a URL with a - or + at the end of the path" text: "Go to http://example.com/a+ or http://example.com/a-" expected: "Go to http://example.com/a+ or http://example.com/a-" - description: "Autolink URL should autolink a URL with a - or + at the end of the path and query parameters" text: "Go to http://example.com/a+?this=that or http://example.com/a-?this=that" expected: "Go to http://example.com/a+?this=that or http://example.com/a-?this=that" - description: "Autolink URL should autolink URLs with longer paths ending in -" text: "Go to http://example.com/view/slug-url-?foo=bar" expected: "Go to http://example.com/view/slug-url-?foo=bar" - description: "Autolink URL should NOT link URLs with domains beginning in a space" text: "@user Try http:// example.com/path" expected: "@user Try http:// example.com/path" - description: "Autolink URL should NOT link URLs with domains beginning in a non-breaking space (U+00A0)" text: "@user Try http:// example.com/path" expected: "@user Try http:// example.com/path" - description: "Autolink URL should link paths containing accented characters" text: "See: http://example.com/café" expected: "See: http://example.com/café" - description: "Autolink URL should link paths containing Cyrillic characters" text: "Go to http://example.com/Русские_слова maybe?" expected: "Go to http://example.com/Русские_слова maybe?" - description: "Autolink URL should not link URL without protocol" text: "See: www.twitter.com or twitter.com/twitter" expected: "See: www.twitter.com or twitter.com/twitter" - description: "Autolink t.co URL followed by punctuation" text: "See: http://t.co/abcde's page" expected: "See: http://t.co/abcde's page" - description: "DO NOT autolink URL if preceded by $" text: "$https://twitter.com $twitter.com $http://t.co/abcde $t.co/abcde $t.co $TVI.CA $RBS.CA" expected: "$https://twitter.com $twitter.com $http://t.co/abcde $t.co/abcde $t.co $TVI.CA $RBS.CA" cashtags: - description: "Autolink a cashtag" text: "$STOCK" expected: "$STOCK" - description: "Autolink a cashtag in text" text: "Text $STOCK text $symbol text" expected: "Text $STOCK text $symbol text" all: - description: "Autolink all does not break on URL with @" text: "http://www.flickr.com/photos/29674651@N00/4382024406 if you know what's good for you." expected: "http://www.flickr.com/photos/29674651@N00/4382024406 if you know what's good for you." - description: "Correctly handles URL followed directly by @user" text: "See: http://example.com/@user" expected: "See: http://example.com/@user" - description: "Correctly handles URL params containing @user" text: "See: http://example.com/?@user=@user" expected: "See: http://example.com/?@user=@user" - description: "Correctly handles URL with an @user followed by trailing /" text: "See: http://example.com/@user/" expected: "See: http://example.com/@user/" - description: "Does not allow an XSS after an @" text: "See: http://x.xx.com/@\"style=\"color:pink\"onmouseover=alert(1)//" expected: "See: http://x.xx.com/@\"style=\"color:pink\"onmouseover=alert(1)//" - description: "DO NOT autolink URLs if preceded by # or @" text: "#https://twitter.com @https://twitter.com" expected: "#https://twitter.com @https://twitter.com" - description: "Autolink url with a hashtag-looking fragment" text: "http://www.example.com/#answer" expected: "http://www.example.com/#answer" - description: "Autolink hashtag if followed by . and TLD" text: "#twitter.com #twitter.co.jp" expected: "#twitter.com #twitter.co.jp" - description: "Autolink @mention if followed by . and TLD" text: "@twitter.com @twitter.co.jp" expected: "@twitter.com @twitter.co.jp" - description: "Autolink a cashtag" text: "$STOCK" expected: "$STOCK" json: - description: "Do not autolink if JSON is empty." text: "This is a tweet with no entity." json: '{"hashtags":[], "urls":[], "user_mentions":[]}' expected: "This is a tweet with no entity." - description: "Autolink username" text: "text @username" json: '{"hashtags":[], "urls":[], "user_mentions":[{"screen_name": "username", "name": "@username", "id": 318686216, "id_str": "318686216", "indices": [5, 14]}]}' expected: "text @username" - description: "Autolink hashtag" text: "text #hashtag" json: '{"hashtags":[{"text":"hashtag", "indices":[5,13]}], "urls":[], "user_mentions":[]}' expected: "text #hashtag" - description: "Autolink URL" text: "text http://t.co/gksG6xlq" json: '{"hashtags":[], "urls":[{"url": "http://t.co/gksG6xlq", "expanded_url": "http://twitter.com/", "display_url": "twitter.com", "indices": [5, 25]}], "user_mentions":[]}' expected: "text  http://twitter.com/ " - description: "Autolink all" text: "text http://t.co/gksG6xlq text #hashtag text @username" json: '{"hashtags":[{"text":"hashtag", "indices":[31,39]}], "urls":[{"url": "http://t.co/gksG6xlq", "expanded_url": "http://twitter.com/", "display_url": "twitter.com", "indices": [5, 25]}], "user_mentions":[{"screen_name": "username", "name": "@username", "id": 318686216, "id_str": "318686216", "indices": [45, 54]}]}' expected: "text  http://twitter.com/  text #hashtag text @username" twitter-text-1.13.4/test/twitter-text-conformance/tlds.yml0000644000175000017500000037123612670063203024125 0ustar sudheeshsudheesh--- tests: country: - description: ac is a valid country tld text: https://twitter.ac expected: - https://twitter.ac - description: ad is a valid country tld text: https://twitter.ad expected: - https://twitter.ad - description: ae is a valid country tld text: https://twitter.ae expected: - https://twitter.ae - description: af is a valid country tld text: https://twitter.af expected: - https://twitter.af - description: ag is a valid country tld text: https://twitter.ag expected: - https://twitter.ag - description: ai is a valid country tld text: https://twitter.ai expected: - https://twitter.ai - description: al is a valid country tld text: https://twitter.al expected: - https://twitter.al - description: am is a valid country tld text: https://twitter.am expected: - https://twitter.am - description: an is a valid country tld text: https://twitter.an expected: - https://twitter.an - description: ao is a valid country tld text: https://twitter.ao expected: - https://twitter.ao - description: aq is a valid country tld text: https://twitter.aq expected: - https://twitter.aq - description: ar is a valid country tld text: https://twitter.ar expected: - https://twitter.ar - description: as is a valid country tld text: https://twitter.as expected: - https://twitter.as - description: at is a valid country tld text: https://twitter.at expected: - https://twitter.at - description: au is a valid country tld text: https://twitter.au expected: - https://twitter.au - description: aw is a valid country tld text: https://twitter.aw expected: - https://twitter.aw - description: ax is a valid country tld text: https://twitter.ax expected: - https://twitter.ax - description: az is a valid country tld text: https://twitter.az expected: - https://twitter.az - description: ba is a valid country tld text: https://twitter.ba expected: - https://twitter.ba - description: bb is a valid country tld text: https://twitter.bb expected: - https://twitter.bb - description: bd is a valid country tld text: https://twitter.bd expected: - https://twitter.bd - description: be is a valid country tld text: https://twitter.be expected: - https://twitter.be - description: bf is a valid country tld text: https://twitter.bf expected: - https://twitter.bf - description: bg is a valid country tld text: https://twitter.bg expected: - https://twitter.bg - description: bh is a valid country tld text: https://twitter.bh expected: - https://twitter.bh - description: bi is a valid country tld text: https://twitter.bi expected: - https://twitter.bi - description: bj is a valid country tld text: https://twitter.bj expected: - https://twitter.bj - description: bl is a valid country tld text: https://twitter.bl expected: - https://twitter.bl - description: bm is a valid country tld text: https://twitter.bm expected: - https://twitter.bm - description: bn is a valid country tld text: https://twitter.bn expected: - https://twitter.bn - description: bo is a valid country tld text: https://twitter.bo expected: - https://twitter.bo - description: bq is a valid country tld text: https://twitter.bq expected: - https://twitter.bq - description: br is a valid country tld text: https://twitter.br expected: - https://twitter.br - description: bs is a valid country tld text: https://twitter.bs expected: - https://twitter.bs - description: bt is a valid country tld text: https://twitter.bt expected: - https://twitter.bt - description: bv is a valid country tld text: https://twitter.bv expected: - https://twitter.bv - description: bw is a valid country tld text: https://twitter.bw expected: - https://twitter.bw - description: by is a valid country tld text: https://twitter.by expected: - https://twitter.by - description: bz is a valid country tld text: https://twitter.bz expected: - https://twitter.bz - description: ca is a valid country tld text: https://twitter.ca expected: - https://twitter.ca - description: cc is a valid country tld text: https://twitter.cc expected: - https://twitter.cc - description: cd is a valid country tld text: https://twitter.cd expected: - https://twitter.cd - description: cf is a valid country tld text: https://twitter.cf expected: - https://twitter.cf - description: cg is a valid country tld text: https://twitter.cg expected: - https://twitter.cg - description: ch is a valid country tld text: https://twitter.ch expected: - https://twitter.ch - description: ci is a valid country tld text: https://twitter.ci expected: - https://twitter.ci - description: ck is a valid country tld text: https://twitter.ck expected: - https://twitter.ck - description: cl is a valid country tld text: https://twitter.cl expected: - https://twitter.cl - description: cm is a valid country tld text: https://twitter.cm expected: - https://twitter.cm - description: cn is a valid country tld text: https://twitter.cn expected: - https://twitter.cn - description: co is a valid country tld text: https://twitter.co expected: - https://twitter.co - description: cr is a valid country tld text: https://twitter.cr expected: - https://twitter.cr - description: cu is a valid country tld text: https://twitter.cu expected: - https://twitter.cu - description: cv is a valid country tld text: https://twitter.cv expected: - https://twitter.cv - description: cw is a valid country tld text: https://twitter.cw expected: - https://twitter.cw - description: cx is a valid country tld text: https://twitter.cx expected: - https://twitter.cx - description: cy is a valid country tld text: https://twitter.cy expected: - https://twitter.cy - description: cz is a valid country tld text: https://twitter.cz expected: - https://twitter.cz - description: de is a valid country tld text: https://twitter.de expected: - https://twitter.de - description: dj is a valid country tld text: https://twitter.dj expected: - https://twitter.dj - description: dk is a valid country tld text: https://twitter.dk expected: - https://twitter.dk - description: dm is a valid country tld text: https://twitter.dm expected: - https://twitter.dm - description: do is a valid country tld text: https://twitter.do expected: - https://twitter.do - description: dz is a valid country tld text: https://twitter.dz expected: - https://twitter.dz - description: ec is a valid country tld text: https://twitter.ec expected: - https://twitter.ec - description: ee is a valid country tld text: https://twitter.ee expected: - https://twitter.ee - description: eg is a valid country tld text: https://twitter.eg expected: - https://twitter.eg - description: eh is a valid country tld text: https://twitter.eh expected: - https://twitter.eh - description: er is a valid country tld text: https://twitter.er expected: - https://twitter.er - description: es is a valid country tld text: https://twitter.es expected: - https://twitter.es - description: et is a valid country tld text: https://twitter.et expected: - https://twitter.et - description: eu is a valid country tld text: https://twitter.eu expected: - https://twitter.eu - description: fi is a valid country tld text: https://twitter.fi expected: - https://twitter.fi - description: fj is a valid country tld text: https://twitter.fj expected: - https://twitter.fj - description: fk is a valid country tld text: https://twitter.fk expected: - https://twitter.fk - description: fm is a valid country tld text: https://twitter.fm expected: - https://twitter.fm - description: fo is a valid country tld text: https://twitter.fo expected: - https://twitter.fo - description: fr is a valid country tld text: https://twitter.fr expected: - https://twitter.fr - description: ga is a valid country tld text: https://twitter.ga expected: - https://twitter.ga - description: gb is a valid country tld text: https://twitter.gb expected: - https://twitter.gb - description: gd is a valid country tld text: https://twitter.gd expected: - https://twitter.gd - description: ge is a valid country tld text: https://twitter.ge expected: - https://twitter.ge - description: gf is a valid country tld text: https://twitter.gf expected: - https://twitter.gf - description: gg is a valid country tld text: https://twitter.gg expected: - https://twitter.gg - description: gh is a valid country tld text: https://twitter.gh expected: - https://twitter.gh - description: gi is a valid country tld text: https://twitter.gi expected: - https://twitter.gi - description: gl is a valid country tld text: https://twitter.gl expected: - https://twitter.gl - description: gm is a valid country tld text: https://twitter.gm expected: - https://twitter.gm - description: gn is a valid country tld text: https://twitter.gn expected: - https://twitter.gn - description: gp is a valid country tld text: https://twitter.gp expected: - https://twitter.gp - description: gq is a valid country tld text: https://twitter.gq expected: - https://twitter.gq - description: gr is a valid country tld text: https://twitter.gr expected: - https://twitter.gr - description: gs is a valid country tld text: https://twitter.gs expected: - https://twitter.gs - description: gt is a valid country tld text: https://twitter.gt expected: - https://twitter.gt - description: gu is a valid country tld text: https://twitter.gu expected: - https://twitter.gu - description: gw is a valid country tld text: https://twitter.gw expected: - https://twitter.gw - description: gy is a valid country tld text: https://twitter.gy expected: - https://twitter.gy - description: hk is a valid country tld text: https://twitter.hk expected: - https://twitter.hk - description: hm is a valid country tld text: https://twitter.hm expected: - https://twitter.hm - description: hn is a valid country tld text: https://twitter.hn expected: - https://twitter.hn - description: hr is a valid country tld text: https://twitter.hr expected: - https://twitter.hr - description: ht is a valid country tld text: https://twitter.ht expected: - https://twitter.ht - description: hu is a valid country tld text: https://twitter.hu expected: - https://twitter.hu - description: id is a valid country tld text: https://twitter.id expected: - https://twitter.id - description: ie is a valid country tld text: https://twitter.ie expected: - https://twitter.ie - description: il is a valid country tld text: https://twitter.il expected: - https://twitter.il - description: im is a valid country tld text: https://twitter.im expected: - https://twitter.im - description: in is a valid country tld text: https://twitter.in expected: - https://twitter.in - description: io is a valid country tld text: https://twitter.io expected: - https://twitter.io - description: iq is a valid country tld text: https://twitter.iq expected: - https://twitter.iq - description: ir is a valid country tld text: https://twitter.ir expected: - https://twitter.ir - description: is is a valid country tld text: https://twitter.is expected: - https://twitter.is - description: it is a valid country tld text: https://twitter.it expected: - https://twitter.it - description: je is a valid country tld text: https://twitter.je expected: - https://twitter.je - description: jm is a valid country tld text: https://twitter.jm expected: - https://twitter.jm - description: jo is a valid country tld text: https://twitter.jo expected: - https://twitter.jo - description: jp is a valid country tld text: https://twitter.jp expected: - https://twitter.jp - description: ke is a valid country tld text: https://twitter.ke expected: - https://twitter.ke - description: kg is a valid country tld text: https://twitter.kg expected: - https://twitter.kg - description: kh is a valid country tld text: https://twitter.kh expected: - https://twitter.kh - description: ki is a valid country tld text: https://twitter.ki expected: - https://twitter.ki - description: km is a valid country tld text: https://twitter.km expected: - https://twitter.km - description: kn is a valid country tld text: https://twitter.kn expected: - https://twitter.kn - description: kp is a valid country tld text: https://twitter.kp expected: - https://twitter.kp - description: kr is a valid country tld text: https://twitter.kr expected: - https://twitter.kr - description: kw is a valid country tld text: https://twitter.kw expected: - https://twitter.kw - description: ky is a valid country tld text: https://twitter.ky expected: - https://twitter.ky - description: kz is a valid country tld text: https://twitter.kz expected: - https://twitter.kz - description: la is a valid country tld text: https://twitter.la expected: - https://twitter.la - description: lb is a valid country tld text: https://twitter.lb expected: - https://twitter.lb - description: lc is a valid country tld text: https://twitter.lc expected: - https://twitter.lc - description: li is a valid country tld text: https://twitter.li expected: - https://twitter.li - description: lk is a valid country tld text: https://twitter.lk expected: - https://twitter.lk - description: lr is a valid country tld text: https://twitter.lr expected: - https://twitter.lr - description: ls is a valid country tld text: https://twitter.ls expected: - https://twitter.ls - description: lt is a valid country tld text: https://twitter.lt expected: - https://twitter.lt - description: lu is a valid country tld text: https://twitter.lu expected: - https://twitter.lu - description: lv is a valid country tld text: https://twitter.lv expected: - https://twitter.lv - description: ly is a valid country tld text: https://twitter.ly expected: - https://twitter.ly - description: ma is a valid country tld text: https://twitter.ma expected: - https://twitter.ma - description: mc is a valid country tld text: https://twitter.mc expected: - https://twitter.mc - description: md is a valid country tld text: https://twitter.md expected: - https://twitter.md - description: me is a valid country tld text: https://twitter.me expected: - https://twitter.me - description: mf is a valid country tld text: https://twitter.mf expected: - https://twitter.mf - description: mg is a valid country tld text: https://twitter.mg expected: - https://twitter.mg - description: mh is a valid country tld text: https://twitter.mh expected: - https://twitter.mh - description: mk is a valid country tld text: https://twitter.mk expected: - https://twitter.mk - description: ml is a valid country tld text: https://twitter.ml expected: - https://twitter.ml - description: mm is a valid country tld text: https://twitter.mm expected: - https://twitter.mm - description: mn is a valid country tld text: https://twitter.mn expected: - https://twitter.mn - description: mo is a valid country tld text: https://twitter.mo expected: - https://twitter.mo - description: mp is a valid country tld text: https://twitter.mp expected: - https://twitter.mp - description: mq is a valid country tld text: https://twitter.mq expected: - https://twitter.mq - description: mr is a valid country tld text: https://twitter.mr expected: - https://twitter.mr - description: ms is a valid country tld text: https://twitter.ms expected: - https://twitter.ms - description: mt is a valid country tld text: https://twitter.mt expected: - https://twitter.mt - description: mu is a valid country tld text: https://twitter.mu expected: - https://twitter.mu - description: mv is a valid country tld text: https://twitter.mv expected: - https://twitter.mv - description: mw is a valid country tld text: https://twitter.mw expected: - https://twitter.mw - description: mx is a valid country tld text: https://twitter.mx expected: - https://twitter.mx - description: my is a valid country tld text: https://twitter.my expected: - https://twitter.my - description: mz is a valid country tld text: https://twitter.mz expected: - https://twitter.mz - description: na is a valid country tld text: https://twitter.na expected: - https://twitter.na - description: nc is a valid country tld text: https://twitter.nc expected: - https://twitter.nc - description: ne is a valid country tld text: https://twitter.ne expected: - https://twitter.ne - description: nf is a valid country tld text: https://twitter.nf expected: - https://twitter.nf - description: ng is a valid country tld text: https://twitter.ng expected: - https://twitter.ng - description: ni is a valid country tld text: https://twitter.ni expected: - https://twitter.ni - description: nl is a valid country tld text: https://twitter.nl expected: - https://twitter.nl - description: no is a valid country tld text: https://twitter.no expected: - https://twitter.no - description: np is a valid country tld text: https://twitter.np expected: - https://twitter.np - description: nr is a valid country tld text: https://twitter.nr expected: - https://twitter.nr - description: nu is a valid country tld text: https://twitter.nu expected: - https://twitter.nu - description: nz is a valid country tld text: https://twitter.nz expected: - https://twitter.nz - description: om is a valid country tld text: https://twitter.om expected: - https://twitter.om - description: pa is a valid country tld text: https://twitter.pa expected: - https://twitter.pa - description: pe is a valid country tld text: https://twitter.pe expected: - https://twitter.pe - description: pf is a valid country tld text: https://twitter.pf expected: - https://twitter.pf - description: pg is a valid country tld text: https://twitter.pg expected: - https://twitter.pg - description: ph is a valid country tld text: https://twitter.ph expected: - https://twitter.ph - description: pk is a valid country tld text: https://twitter.pk expected: - https://twitter.pk - description: pl is a valid country tld text: https://twitter.pl expected: - https://twitter.pl - description: pm is a valid country tld text: https://twitter.pm expected: - https://twitter.pm - description: pn is a valid country tld text: https://twitter.pn expected: - https://twitter.pn - description: pr is a valid country tld text: https://twitter.pr expected: - https://twitter.pr - description: ps is a valid country tld text: https://twitter.ps expected: - https://twitter.ps - description: pt is a valid country tld text: https://twitter.pt expected: - https://twitter.pt - description: pw is a valid country tld text: https://twitter.pw expected: - https://twitter.pw - description: py is a valid country tld text: https://twitter.py expected: - https://twitter.py - description: qa is a valid country tld text: https://twitter.qa expected: - https://twitter.qa - description: re is a valid country tld text: https://twitter.re expected: - https://twitter.re - description: ro is a valid country tld text: https://twitter.ro expected: - https://twitter.ro - description: rs is a valid country tld text: https://twitter.rs expected: - https://twitter.rs - description: ru is a valid country tld text: https://twitter.ru expected: - https://twitter.ru - description: rw is a valid country tld text: https://twitter.rw expected: - https://twitter.rw - description: sa is a valid country tld text: https://twitter.sa expected: - https://twitter.sa - description: sb is a valid country tld text: https://twitter.sb expected: - https://twitter.sb - description: sc is a valid country tld text: https://twitter.sc expected: - https://twitter.sc - description: sd is a valid country tld text: https://twitter.sd expected: - https://twitter.sd - description: se is a valid country tld text: https://twitter.se expected: - https://twitter.se - description: sg is a valid country tld text: https://twitter.sg expected: - https://twitter.sg - description: sh is a valid country tld text: https://twitter.sh expected: - https://twitter.sh - description: si is a valid country tld text: https://twitter.si expected: - https://twitter.si - description: sj is a valid country tld text: https://twitter.sj expected: - https://twitter.sj - description: sk is a valid country tld text: https://twitter.sk expected: - https://twitter.sk - description: sl is a valid country tld text: https://twitter.sl expected: - https://twitter.sl - description: sm is a valid country tld text: https://twitter.sm expected: - https://twitter.sm - description: sn is a valid country tld text: https://twitter.sn expected: - https://twitter.sn - description: so is a valid country tld text: https://twitter.so expected: - https://twitter.so - description: sr is a valid country tld text: https://twitter.sr expected: - https://twitter.sr - description: ss is a valid country tld text: https://twitter.ss expected: - https://twitter.ss - description: st is a valid country tld text: https://twitter.st expected: - https://twitter.st - description: su is a valid country tld text: https://twitter.su expected: - https://twitter.su - description: sv is a valid country tld text: https://twitter.sv expected: - https://twitter.sv - description: sx is a valid country tld text: https://twitter.sx expected: - https://twitter.sx - description: sy is a valid country tld text: https://twitter.sy expected: - https://twitter.sy - description: sz is a valid country tld text: https://twitter.sz expected: - https://twitter.sz - description: tc is a valid country tld text: https://twitter.tc expected: - https://twitter.tc - description: td is a valid country tld text: https://twitter.td expected: - https://twitter.td - description: tf is a valid country tld text: https://twitter.tf expected: - https://twitter.tf - description: tg is a valid country tld text: https://twitter.tg expected: - https://twitter.tg - description: th is a valid country tld text: https://twitter.th expected: - https://twitter.th - description: tj is a valid country tld text: https://twitter.tj expected: - https://twitter.tj - description: tk is a valid country tld text: https://twitter.tk expected: - https://twitter.tk - description: tl is a valid country tld text: https://twitter.tl expected: - https://twitter.tl - description: tm is a valid country tld text: https://twitter.tm expected: - https://twitter.tm - description: tn is a valid country tld text: https://twitter.tn expected: - https://twitter.tn - description: to is a valid country tld text: https://twitter.to expected: - https://twitter.to - description: tp is a valid country tld text: https://twitter.tp expected: - https://twitter.tp - description: tr is a valid country tld text: https://twitter.tr expected: - https://twitter.tr - description: tt is a valid country tld text: https://twitter.tt expected: - https://twitter.tt - description: tv is a valid country tld text: https://twitter.tv expected: - https://twitter.tv - description: tw is a valid country tld text: https://twitter.tw expected: - https://twitter.tw - description: tz is a valid country tld text: https://twitter.tz expected: - https://twitter.tz - description: ua is a valid country tld text: https://twitter.ua expected: - https://twitter.ua - description: ug is a valid country tld text: https://twitter.ug expected: - https://twitter.ug - description: uk is a valid country tld text: https://twitter.uk expected: - https://twitter.uk - description: um is a valid country tld text: https://twitter.um expected: - https://twitter.um - description: us is a valid country tld text: https://twitter.us expected: - https://twitter.us - description: uy is a valid country tld text: https://twitter.uy expected: - https://twitter.uy - description: uz is a valid country tld text: https://twitter.uz expected: - https://twitter.uz - description: va is a valid country tld text: https://twitter.va expected: - https://twitter.va - description: vc is a valid country tld text: https://twitter.vc expected: - https://twitter.vc - description: ve is a valid country tld text: https://twitter.ve expected: - https://twitter.ve - description: vg is a valid country tld text: https://twitter.vg expected: - https://twitter.vg - description: vi is a valid country tld text: https://twitter.vi expected: - https://twitter.vi - description: vn is a valid country tld text: https://twitter.vn expected: - https://twitter.vn - description: vu is a valid country tld text: https://twitter.vu expected: - https://twitter.vu - description: wf is a valid country tld text: https://twitter.wf expected: - https://twitter.wf - description: ws is a valid country tld text: https://twitter.ws expected: - https://twitter.ws - description: ye is a valid country tld text: https://twitter.ye expected: - https://twitter.ye - description: yt is a valid country tld text: https://twitter.yt expected: - https://twitter.yt - description: za is a valid country tld text: https://twitter.za expected: - https://twitter.za - description: zm is a valid country tld text: https://twitter.zm expected: - https://twitter.zm - description: zw is a valid country tld text: https://twitter.zw expected: - https://twitter.zw - description: "ελ is a valid country tld" text: https://twitter.ελ expected: - https://twitter.ελ - description: "бел is a valid country tld" text: https://twitter.бел expected: - https://twitter.бел - description: "мкд is a valid country tld" text: https://twitter.мкд expected: - https://twitter.мкд - description: "мон is a valid country tld" text: https://twitter.мон expected: - https://twitter.мон - description: "рф is a valid country tld" text: https://twitter.рф expected: - https://twitter.рф - description: "срб is a valid country tld" text: https://twitter.срб expected: - https://twitter.срб - description: "укр is a valid country tld" text: https://twitter.укр expected: - https://twitter.укр - description: "қаз is a valid country tld" text: https://twitter.қаз expected: - https://twitter.қаз - description: "հայ is a valid country tld" text: https://twitter.հայ expected: - https://twitter.հայ - description: "الاردن is a valid country tld" text: https://twitter.الاردن expected: - https://twitter.الاردن - description: "الجزائر is a valid country tld" text: https://twitter.الجزائر expected: - https://twitter.الجزائر - description: "السعودية is a valid country tld" text: https://twitter.السعودية expected: - https://twitter.السعودية - description: "المغرب is a valid country tld" text: https://twitter.المغرب expected: - https://twitter.المغرب - description: "امارات is a valid country tld" text: https://twitter.امارات expected: - https://twitter.امارات - description: "ایران is a valid country tld" text: https://twitter.ایران expected: - https://twitter.ایران - description: "بھارت is a valid country tld" text: https://twitter.بھارت expected: - https://twitter.بھارت - description: "تونس is a valid country tld" text: https://twitter.تونس expected: - https://twitter.تونس - description: "سودان is a valid country tld" text: https://twitter.سودان expected: - https://twitter.سودان - description: "سورية is a valid country tld" text: https://twitter.سورية expected: - https://twitter.سورية - description: "عراق is a valid country tld" text: https://twitter.عراق expected: - https://twitter.عراق - description: "عمان is a valid country tld" text: https://twitter.عمان expected: - https://twitter.عمان - description: "فلسطين is a valid country tld" text: https://twitter.فلسطين expected: - https://twitter.فلسطين - description: "قطر is a valid country tld" text: https://twitter.قطر expected: - https://twitter.قطر - description: "مصر is a valid country tld" text: https://twitter.مصر expected: - https://twitter.مصر - description: "مليسيا is a valid country tld" text: https://twitter.مليسيا expected: - https://twitter.مليسيا - description: "پاکستان is a valid country tld" text: https://twitter.پاکستان expected: - https://twitter.پاکستان - description: "भारत is a valid country tld" text: https://twitter.भारत expected: - https://twitter.भारत - description: "বাংলা is a valid country tld" text: https://twitter.বাংলা expected: - https://twitter.বাংলা - description: "ভারত is a valid country tld" text: https://twitter.ভারত expected: - https://twitter.ভারত - description: "ਭਾਰਤ is a valid country tld" text: https://twitter.ਭਾਰਤ expected: - https://twitter.ਭਾਰਤ - description: "ભારત is a valid country tld" text: https://twitter.ભારત expected: - https://twitter.ભારત - description: "இந்தியா is a valid country tld" text: https://twitter.இந்தியா expected: - https://twitter.இந்தியா - description: "இலங்கை is a valid country tld" text: https://twitter.இலங்கை expected: - https://twitter.இலங்கை - description: "சிங்கப்பூர் is a valid country tld" text: https://twitter.சிங்கப்பூர் expected: - https://twitter.சிங்கப்பூர் - description: "భారత్ is a valid country tld" text: https://twitter.భారత్ expected: - https://twitter.భారత్ - description: "ලංකා is a valid country tld" text: https://twitter.ලංකා expected: - https://twitter.ලංකා - description: "ไทย is a valid country tld" text: https://twitter.ไทย expected: - https://twitter.ไทย - description: "გე is a valid country tld" text: https://twitter.გე expected: - https://twitter.გე - description: "中国 is a valid country tld" text: https://twitter.中国 expected: - https://twitter.中国 - description: "中國 is a valid country tld" text: https://twitter.中國 expected: - https://twitter.中國 - description: "台湾 is a valid country tld" text: https://twitter.台湾 expected: - https://twitter.台湾 - description: "台灣 is a valid country tld" text: https://twitter.台灣 expected: - https://twitter.台灣 - description: "新加坡 is a valid country tld" text: https://twitter.新加坡 expected: - https://twitter.新加坡 - description: "澳門 is a valid country tld" text: https://twitter.澳門 expected: - https://twitter.澳門 - description: "香港 is a valid country tld" text: https://twitter.香港 expected: - https://twitter.香港 - description: "한국 is a valid country tld" text: https://twitter.한국 expected: - https://twitter.한국 generic: - description: abb is a valid generic tld text: https://twitter.abb expected: - https://twitter.abb - description: abbott is a valid generic tld text: https://twitter.abbott expected: - https://twitter.abbott - description: abogado is a valid generic tld text: https://twitter.abogado expected: - https://twitter.abogado - description: academy is a valid generic tld text: https://twitter.academy expected: - https://twitter.academy - description: accenture is a valid generic tld text: https://twitter.accenture expected: - https://twitter.accenture - description: accountant is a valid generic tld text: https://twitter.accountant expected: - https://twitter.accountant - description: accountants is a valid generic tld text: https://twitter.accountants expected: - https://twitter.accountants - description: aco is a valid generic tld text: https://twitter.aco expected: - https://twitter.aco - description: active is a valid generic tld text: https://twitter.active expected: - https://twitter.active - description: actor is a valid generic tld text: https://twitter.actor expected: - https://twitter.actor - description: ads is a valid generic tld text: https://twitter.ads expected: - https://twitter.ads - description: adult is a valid generic tld text: https://twitter.adult expected: - https://twitter.adult - description: aeg is a valid generic tld text: https://twitter.aeg expected: - https://twitter.aeg - description: aero is a valid generic tld text: https://twitter.aero expected: - https://twitter.aero - description: afl is a valid generic tld text: https://twitter.afl expected: - https://twitter.afl - description: agency is a valid generic tld text: https://twitter.agency expected: - https://twitter.agency - description: aig is a valid generic tld text: https://twitter.aig expected: - https://twitter.aig - description: airforce is a valid generic tld text: https://twitter.airforce expected: - https://twitter.airforce - description: airtel is a valid generic tld text: https://twitter.airtel expected: - https://twitter.airtel - description: allfinanz is a valid generic tld text: https://twitter.allfinanz expected: - https://twitter.allfinanz - description: alsace is a valid generic tld text: https://twitter.alsace expected: - https://twitter.alsace - description: amsterdam is a valid generic tld text: https://twitter.amsterdam expected: - https://twitter.amsterdam - description: android is a valid generic tld text: https://twitter.android expected: - https://twitter.android - description: apartments is a valid generic tld text: https://twitter.apartments expected: - https://twitter.apartments - description: app is a valid generic tld text: https://twitter.app expected: - https://twitter.app - description: aquarelle is a valid generic tld text: https://twitter.aquarelle expected: - https://twitter.aquarelle - description: archi is a valid generic tld text: https://twitter.archi expected: - https://twitter.archi - description: army is a valid generic tld text: https://twitter.army expected: - https://twitter.army - description: arpa is a valid generic tld text: https://twitter.arpa expected: - https://twitter.arpa - description: asia is a valid generic tld text: https://twitter.asia expected: - https://twitter.asia - description: associates is a valid generic tld text: https://twitter.associates expected: - https://twitter.associates - description: attorney is a valid generic tld text: https://twitter.attorney expected: - https://twitter.attorney - description: auction is a valid generic tld text: https://twitter.auction expected: - https://twitter.auction - description: audio is a valid generic tld text: https://twitter.audio expected: - https://twitter.audio - description: auto is a valid generic tld text: https://twitter.auto expected: - https://twitter.auto - description: autos is a valid generic tld text: https://twitter.autos expected: - https://twitter.autos - description: axa is a valid generic tld text: https://twitter.axa expected: - https://twitter.axa - description: azure is a valid generic tld text: https://twitter.azure expected: - https://twitter.azure - description: band is a valid generic tld text: https://twitter.band expected: - https://twitter.band - description: bank is a valid generic tld text: https://twitter.bank expected: - https://twitter.bank - description: bar is a valid generic tld text: https://twitter.bar expected: - https://twitter.bar - description: barcelona is a valid generic tld text: https://twitter.barcelona expected: - https://twitter.barcelona - description: barclaycard is a valid generic tld text: https://twitter.barclaycard expected: - https://twitter.barclaycard - description: barclays is a valid generic tld text: https://twitter.barclays expected: - https://twitter.barclays - description: bargains is a valid generic tld text: https://twitter.bargains expected: - https://twitter.bargains - description: bauhaus is a valid generic tld text: https://twitter.bauhaus expected: - https://twitter.bauhaus - description: bayern is a valid generic tld text: https://twitter.bayern expected: - https://twitter.bayern - description: bbc is a valid generic tld text: https://twitter.bbc expected: - https://twitter.bbc - description: bbva is a valid generic tld text: https://twitter.bbva expected: - https://twitter.bbva - description: bcn is a valid generic tld text: https://twitter.bcn expected: - https://twitter.bcn - description: beer is a valid generic tld text: https://twitter.beer expected: - https://twitter.beer - description: bentley is a valid generic tld text: https://twitter.bentley expected: - https://twitter.bentley - description: berlin is a valid generic tld text: https://twitter.berlin expected: - https://twitter.berlin - description: best is a valid generic tld text: https://twitter.best expected: - https://twitter.best - description: bet is a valid generic tld text: https://twitter.bet expected: - https://twitter.bet - description: bharti is a valid generic tld text: https://twitter.bharti expected: - https://twitter.bharti - description: bible is a valid generic tld text: https://twitter.bible expected: - https://twitter.bible - description: bid is a valid generic tld text: https://twitter.bid expected: - https://twitter.bid - description: bike is a valid generic tld text: https://twitter.bike expected: - https://twitter.bike - description: bing is a valid generic tld text: https://twitter.bing expected: - https://twitter.bing - description: bingo is a valid generic tld text: https://twitter.bingo expected: - https://twitter.bingo - description: bio is a valid generic tld text: https://twitter.bio expected: - https://twitter.bio - description: biz is a valid generic tld text: https://twitter.biz expected: - https://twitter.biz - description: black is a valid generic tld text: https://twitter.black expected: - https://twitter.black - description: blackfriday is a valid generic tld text: https://twitter.blackfriday expected: - https://twitter.blackfriday - description: bloomberg is a valid generic tld text: https://twitter.bloomberg expected: - https://twitter.bloomberg - description: blue is a valid generic tld text: https://twitter.blue expected: - https://twitter.blue - description: bmw is a valid generic tld text: https://twitter.bmw expected: - https://twitter.bmw - description: bnl is a valid generic tld text: https://twitter.bnl expected: - https://twitter.bnl - description: bnpparibas is a valid generic tld text: https://twitter.bnpparibas expected: - https://twitter.bnpparibas - description: boats is a valid generic tld text: https://twitter.boats expected: - https://twitter.boats - description: bond is a valid generic tld text: https://twitter.bond expected: - https://twitter.bond - description: boo is a valid generic tld text: https://twitter.boo expected: - https://twitter.boo - description: boots is a valid generic tld text: https://twitter.boots expected: - https://twitter.boots - description: boutique is a valid generic tld text: https://twitter.boutique expected: - https://twitter.boutique - description: bradesco is a valid generic tld text: https://twitter.bradesco expected: - https://twitter.bradesco - description: bridgestone is a valid generic tld text: https://twitter.bridgestone expected: - https://twitter.bridgestone - description: broker is a valid generic tld text: https://twitter.broker expected: - https://twitter.broker - description: brother is a valid generic tld text: https://twitter.brother expected: - https://twitter.brother - description: brussels is a valid generic tld text: https://twitter.brussels expected: - https://twitter.brussels - description: budapest is a valid generic tld text: https://twitter.budapest expected: - https://twitter.budapest - description: build is a valid generic tld text: https://twitter.build expected: - https://twitter.build - description: builders is a valid generic tld text: https://twitter.builders expected: - https://twitter.builders - description: business is a valid generic tld text: https://twitter.business expected: - https://twitter.business - description: buzz is a valid generic tld text: https://twitter.buzz expected: - https://twitter.buzz - description: bzh is a valid generic tld text: https://twitter.bzh expected: - https://twitter.bzh - description: cab is a valid generic tld text: https://twitter.cab expected: - https://twitter.cab - description: cafe is a valid generic tld text: https://twitter.cafe expected: - https://twitter.cafe - description: cal is a valid generic tld text: https://twitter.cal expected: - https://twitter.cal - description: camera is a valid generic tld text: https://twitter.camera expected: - https://twitter.camera - description: camp is a valid generic tld text: https://twitter.camp expected: - https://twitter.camp - description: cancerresearch is a valid generic tld text: https://twitter.cancerresearch expected: - https://twitter.cancerresearch - description: canon is a valid generic tld text: https://twitter.canon expected: - https://twitter.canon - description: capetown is a valid generic tld text: https://twitter.capetown expected: - https://twitter.capetown - description: capital is a valid generic tld text: https://twitter.capital expected: - https://twitter.capital - description: caravan is a valid generic tld text: https://twitter.caravan expected: - https://twitter.caravan - description: cards is a valid generic tld text: https://twitter.cards expected: - https://twitter.cards - description: care is a valid generic tld text: https://twitter.care expected: - https://twitter.care - description: career is a valid generic tld text: https://twitter.career expected: - https://twitter.career - description: careers is a valid generic tld text: https://twitter.careers expected: - https://twitter.careers - description: cars is a valid generic tld text: https://twitter.cars expected: - https://twitter.cars - description: cartier is a valid generic tld text: https://twitter.cartier expected: - https://twitter.cartier - description: casa is a valid generic tld text: https://twitter.casa expected: - https://twitter.casa - description: cash is a valid generic tld text: https://twitter.cash expected: - https://twitter.cash - description: casino is a valid generic tld text: https://twitter.casino expected: - https://twitter.casino - description: cat is a valid generic tld text: https://twitter.cat expected: - https://twitter.cat - description: catering is a valid generic tld text: https://twitter.catering expected: - https://twitter.catering - description: cba is a valid generic tld text: https://twitter.cba expected: - https://twitter.cba - description: cbn is a valid generic tld text: https://twitter.cbn expected: - https://twitter.cbn - description: ceb is a valid generic tld text: https://twitter.ceb expected: - https://twitter.ceb - description: center is a valid generic tld text: https://twitter.center expected: - https://twitter.center - description: ceo is a valid generic tld text: https://twitter.ceo expected: - https://twitter.ceo - description: cern is a valid generic tld text: https://twitter.cern expected: - https://twitter.cern - description: cfa is a valid generic tld text: https://twitter.cfa expected: - https://twitter.cfa - description: cfd is a valid generic tld text: https://twitter.cfd expected: - https://twitter.cfd - description: chanel is a valid generic tld text: https://twitter.chanel expected: - https://twitter.chanel - description: channel is a valid generic tld text: https://twitter.channel expected: - https://twitter.channel - description: chat is a valid generic tld text: https://twitter.chat expected: - https://twitter.chat - description: cheap is a valid generic tld text: https://twitter.cheap expected: - https://twitter.cheap - description: chloe is a valid generic tld text: https://twitter.chloe expected: - https://twitter.chloe - description: christmas is a valid generic tld text: https://twitter.christmas expected: - https://twitter.christmas - description: chrome is a valid generic tld text: https://twitter.chrome expected: - https://twitter.chrome - description: church is a valid generic tld text: https://twitter.church expected: - https://twitter.church - description: cisco is a valid generic tld text: https://twitter.cisco expected: - https://twitter.cisco - description: citic is a valid generic tld text: https://twitter.citic expected: - https://twitter.citic - description: city is a valid generic tld text: https://twitter.city expected: - https://twitter.city - description: claims is a valid generic tld text: https://twitter.claims expected: - https://twitter.claims - description: cleaning is a valid generic tld text: https://twitter.cleaning expected: - https://twitter.cleaning - description: click is a valid generic tld text: https://twitter.click expected: - https://twitter.click - description: clinic is a valid generic tld text: https://twitter.clinic expected: - https://twitter.clinic - description: clothing is a valid generic tld text: https://twitter.clothing expected: - https://twitter.clothing - description: cloud is a valid generic tld text: https://twitter.cloud expected: - https://twitter.cloud - description: club is a valid generic tld text: https://twitter.club expected: - https://twitter.club - description: coach is a valid generic tld text: https://twitter.coach expected: - https://twitter.coach - description: codes is a valid generic tld text: https://twitter.codes expected: - https://twitter.codes - description: coffee is a valid generic tld text: https://twitter.coffee expected: - https://twitter.coffee - description: college is a valid generic tld text: https://twitter.college expected: - https://twitter.college - description: cologne is a valid generic tld text: https://twitter.cologne expected: - https://twitter.cologne - description: com is a valid generic tld text: https://twitter.com expected: - https://twitter.com - description: commbank is a valid generic tld text: https://twitter.commbank expected: - https://twitter.commbank - description: community is a valid generic tld text: https://twitter.community expected: - https://twitter.community - description: company is a valid generic tld text: https://twitter.company expected: - https://twitter.company - description: computer is a valid generic tld text: https://twitter.computer expected: - https://twitter.computer - description: condos is a valid generic tld text: https://twitter.condos expected: - https://twitter.condos - description: construction is a valid generic tld text: https://twitter.construction expected: - https://twitter.construction - description: consulting is a valid generic tld text: https://twitter.consulting expected: - https://twitter.consulting - description: contractors is a valid generic tld text: https://twitter.contractors expected: - https://twitter.contractors - description: cooking is a valid generic tld text: https://twitter.cooking expected: - https://twitter.cooking - description: cool is a valid generic tld text: https://twitter.cool expected: - https://twitter.cool - description: coop is a valid generic tld text: https://twitter.coop expected: - https://twitter.coop - description: corsica is a valid generic tld text: https://twitter.corsica expected: - https://twitter.corsica - description: country is a valid generic tld text: https://twitter.country expected: - https://twitter.country - description: coupons is a valid generic tld text: https://twitter.coupons expected: - https://twitter.coupons - description: courses is a valid generic tld text: https://twitter.courses expected: - https://twitter.courses - description: credit is a valid generic tld text: https://twitter.credit expected: - https://twitter.credit - description: creditcard is a valid generic tld text: https://twitter.creditcard expected: - https://twitter.creditcard - description: cricket is a valid generic tld text: https://twitter.cricket expected: - https://twitter.cricket - description: crown is a valid generic tld text: https://twitter.crown expected: - https://twitter.crown - description: crs is a valid generic tld text: https://twitter.crs expected: - https://twitter.crs - description: cruises is a valid generic tld text: https://twitter.cruises expected: - https://twitter.cruises - description: cuisinella is a valid generic tld text: https://twitter.cuisinella expected: - https://twitter.cuisinella - description: cymru is a valid generic tld text: https://twitter.cymru expected: - https://twitter.cymru - description: cyou is a valid generic tld text: https://twitter.cyou expected: - https://twitter.cyou - description: dabur is a valid generic tld text: https://twitter.dabur expected: - https://twitter.dabur - description: dad is a valid generic tld text: https://twitter.dad expected: - https://twitter.dad - description: dance is a valid generic tld text: https://twitter.dance expected: - https://twitter.dance - description: date is a valid generic tld text: https://twitter.date expected: - https://twitter.date - description: dating is a valid generic tld text: https://twitter.dating expected: - https://twitter.dating - description: datsun is a valid generic tld text: https://twitter.datsun expected: - https://twitter.datsun - description: day is a valid generic tld text: https://twitter.day expected: - https://twitter.day - description: dclk is a valid generic tld text: https://twitter.dclk expected: - https://twitter.dclk - description: deals is a valid generic tld text: https://twitter.deals expected: - https://twitter.deals - description: degree is a valid generic tld text: https://twitter.degree expected: - https://twitter.degree - description: delivery is a valid generic tld text: https://twitter.delivery expected: - https://twitter.delivery - description: delta is a valid generic tld text: https://twitter.delta expected: - https://twitter.delta - description: democrat is a valid generic tld text: https://twitter.democrat expected: - https://twitter.democrat - description: dental is a valid generic tld text: https://twitter.dental expected: - https://twitter.dental - description: dentist is a valid generic tld text: https://twitter.dentist expected: - https://twitter.dentist - description: desi is a valid generic tld text: https://twitter.desi expected: - https://twitter.desi - description: design is a valid generic tld text: https://twitter.design expected: - https://twitter.design - description: dev is a valid generic tld text: https://twitter.dev expected: - https://twitter.dev - description: diamonds is a valid generic tld text: https://twitter.diamonds expected: - https://twitter.diamonds - description: diet is a valid generic tld text: https://twitter.diet expected: - https://twitter.diet - description: digital is a valid generic tld text: https://twitter.digital expected: - https://twitter.digital - description: direct is a valid generic tld text: https://twitter.direct expected: - https://twitter.direct - description: directory is a valid generic tld text: https://twitter.directory expected: - https://twitter.directory - description: discount is a valid generic tld text: https://twitter.discount expected: - https://twitter.discount - description: dnp is a valid generic tld text: https://twitter.dnp expected: - https://twitter.dnp - description: docs is a valid generic tld text: https://twitter.docs expected: - https://twitter.docs - description: dog is a valid generic tld text: https://twitter.dog expected: - https://twitter.dog - description: doha is a valid generic tld text: https://twitter.doha expected: - https://twitter.doha - description: domains is a valid generic tld text: https://twitter.domains expected: - https://twitter.domains - description: doosan is a valid generic tld text: https://twitter.doosan expected: - https://twitter.doosan - description: download is a valid generic tld text: https://twitter.download expected: - https://twitter.download - description: drive is a valid generic tld text: https://twitter.drive expected: - https://twitter.drive - description: durban is a valid generic tld text: https://twitter.durban expected: - https://twitter.durban - description: dvag is a valid generic tld text: https://twitter.dvag expected: - https://twitter.dvag - description: earth is a valid generic tld text: https://twitter.earth expected: - https://twitter.earth - description: eat is a valid generic tld text: https://twitter.eat expected: - https://twitter.eat - description: edu is a valid generic tld text: https://twitter.edu expected: - https://twitter.edu - description: education is a valid generic tld text: https://twitter.education expected: - https://twitter.education - description: email is a valid generic tld text: https://twitter.email expected: - https://twitter.email - description: emerck is a valid generic tld text: https://twitter.emerck expected: - https://twitter.emerck - description: energy is a valid generic tld text: https://twitter.energy expected: - https://twitter.energy - description: engineer is a valid generic tld text: https://twitter.engineer expected: - https://twitter.engineer - description: engineering is a valid generic tld text: https://twitter.engineering expected: - https://twitter.engineering - description: enterprises is a valid generic tld text: https://twitter.enterprises expected: - https://twitter.enterprises - description: epson is a valid generic tld text: https://twitter.epson expected: - https://twitter.epson - description: equipment is a valid generic tld text: https://twitter.equipment expected: - https://twitter.equipment - description: erni is a valid generic tld text: https://twitter.erni expected: - https://twitter.erni - description: esq is a valid generic tld text: https://twitter.esq expected: - https://twitter.esq - description: estate is a valid generic tld text: https://twitter.estate expected: - https://twitter.estate - description: eurovision is a valid generic tld text: https://twitter.eurovision expected: - https://twitter.eurovision - description: eus is a valid generic tld text: https://twitter.eus expected: - https://twitter.eus - description: events is a valid generic tld text: https://twitter.events expected: - https://twitter.events - description: everbank is a valid generic tld text: https://twitter.everbank expected: - https://twitter.everbank - description: exchange is a valid generic tld text: https://twitter.exchange expected: - https://twitter.exchange - description: expert is a valid generic tld text: https://twitter.expert expected: - https://twitter.expert - description: exposed is a valid generic tld text: https://twitter.exposed expected: - https://twitter.exposed - description: express is a valid generic tld text: https://twitter.express expected: - https://twitter.express - description: fage is a valid generic tld text: https://twitter.fage expected: - https://twitter.fage - description: fail is a valid generic tld text: https://twitter.fail expected: - https://twitter.fail - description: faith is a valid generic tld text: https://twitter.faith expected: - https://twitter.faith - description: family is a valid generic tld text: https://twitter.family expected: - https://twitter.family - description: fan is a valid generic tld text: https://twitter.fan expected: - https://twitter.fan - description: fans is a valid generic tld text: https://twitter.fans expected: - https://twitter.fans - description: farm is a valid generic tld text: https://twitter.farm expected: - https://twitter.farm - description: fashion is a valid generic tld text: https://twitter.fashion expected: - https://twitter.fashion - description: feedback is a valid generic tld text: https://twitter.feedback expected: - https://twitter.feedback - description: film is a valid generic tld text: https://twitter.film expected: - https://twitter.film - description: finance is a valid generic tld text: https://twitter.finance expected: - https://twitter.finance - description: financial is a valid generic tld text: https://twitter.financial expected: - https://twitter.financial - description: firmdale is a valid generic tld text: https://twitter.firmdale expected: - https://twitter.firmdale - description: fish is a valid generic tld text: https://twitter.fish expected: - https://twitter.fish - description: fishing is a valid generic tld text: https://twitter.fishing expected: - https://twitter.fishing - description: fit is a valid generic tld text: https://twitter.fit expected: - https://twitter.fit - description: fitness is a valid generic tld text: https://twitter.fitness expected: - https://twitter.fitness - description: flights is a valid generic tld text: https://twitter.flights expected: - https://twitter.flights - description: florist is a valid generic tld text: https://twitter.florist expected: - https://twitter.florist - description: flowers is a valid generic tld text: https://twitter.flowers expected: - https://twitter.flowers - description: flsmidth is a valid generic tld text: https://twitter.flsmidth expected: - https://twitter.flsmidth - description: fly is a valid generic tld text: https://twitter.fly expected: - https://twitter.fly - description: foo is a valid generic tld text: https://twitter.foo expected: - https://twitter.foo - description: football is a valid generic tld text: https://twitter.football expected: - https://twitter.football - description: forex is a valid generic tld text: https://twitter.forex expected: - https://twitter.forex - description: forsale is a valid generic tld text: https://twitter.forsale expected: - https://twitter.forsale - description: forum is a valid generic tld text: https://twitter.forum expected: - https://twitter.forum - description: foundation is a valid generic tld text: https://twitter.foundation expected: - https://twitter.foundation - description: frl is a valid generic tld text: https://twitter.frl expected: - https://twitter.frl - description: frogans is a valid generic tld text: https://twitter.frogans expected: - https://twitter.frogans - description: fund is a valid generic tld text: https://twitter.fund expected: - https://twitter.fund - description: furniture is a valid generic tld text: https://twitter.furniture expected: - https://twitter.furniture - description: futbol is a valid generic tld text: https://twitter.futbol expected: - https://twitter.futbol - description: fyi is a valid generic tld text: https://twitter.fyi expected: - https://twitter.fyi - description: gal is a valid generic tld text: https://twitter.gal expected: - https://twitter.gal - description: gallery is a valid generic tld text: https://twitter.gallery expected: - https://twitter.gallery - description: game is a valid generic tld text: https://twitter.game expected: - https://twitter.game - description: garden is a valid generic tld text: https://twitter.garden expected: - https://twitter.garden - description: gbiz is a valid generic tld text: https://twitter.gbiz expected: - https://twitter.gbiz - description: gdn is a valid generic tld text: https://twitter.gdn expected: - https://twitter.gdn - description: gent is a valid generic tld text: https://twitter.gent expected: - https://twitter.gent - description: genting is a valid generic tld text: https://twitter.genting expected: - https://twitter.genting - description: ggee is a valid generic tld text: https://twitter.ggee expected: - https://twitter.ggee - description: gift is a valid generic tld text: https://twitter.gift expected: - https://twitter.gift - description: gifts is a valid generic tld text: https://twitter.gifts expected: - https://twitter.gifts - description: gives is a valid generic tld text: https://twitter.gives expected: - https://twitter.gives - description: giving is a valid generic tld text: https://twitter.giving expected: - https://twitter.giving - description: glass is a valid generic tld text: https://twitter.glass expected: - https://twitter.glass - description: gle is a valid generic tld text: https://twitter.gle expected: - https://twitter.gle - description: global is a valid generic tld text: https://twitter.global expected: - https://twitter.global - description: globo is a valid generic tld text: https://twitter.globo expected: - https://twitter.globo - description: gmail is a valid generic tld text: https://twitter.gmail expected: - https://twitter.gmail - description: gmo is a valid generic tld text: https://twitter.gmo expected: - https://twitter.gmo - description: gmx is a valid generic tld text: https://twitter.gmx expected: - https://twitter.gmx - description: gold is a valid generic tld text: https://twitter.gold expected: - https://twitter.gold - description: goldpoint is a valid generic tld text: https://twitter.goldpoint expected: - https://twitter.goldpoint - description: golf is a valid generic tld text: https://twitter.golf expected: - https://twitter.golf - description: goo is a valid generic tld text: https://twitter.goo expected: - https://twitter.goo - description: goog is a valid generic tld text: https://twitter.goog expected: - https://twitter.goog - description: google is a valid generic tld text: https://twitter.google expected: - https://twitter.google - description: gop is a valid generic tld text: https://twitter.gop expected: - https://twitter.gop - description: gov is a valid generic tld text: https://twitter.gov expected: - https://twitter.gov - description: graphics is a valid generic tld text: https://twitter.graphics expected: - https://twitter.graphics - description: gratis is a valid generic tld text: https://twitter.gratis expected: - https://twitter.gratis - description: green is a valid generic tld text: https://twitter.green expected: - https://twitter.green - description: gripe is a valid generic tld text: https://twitter.gripe expected: - https://twitter.gripe - description: group is a valid generic tld text: https://twitter.group expected: - https://twitter.group - description: guge is a valid generic tld text: https://twitter.guge expected: - https://twitter.guge - description: guide is a valid generic tld text: https://twitter.guide expected: - https://twitter.guide - description: guitars is a valid generic tld text: https://twitter.guitars expected: - https://twitter.guitars - description: guru is a valid generic tld text: https://twitter.guru expected: - https://twitter.guru - description: hamburg is a valid generic tld text: https://twitter.hamburg expected: - https://twitter.hamburg - description: hangout is a valid generic tld text: https://twitter.hangout expected: - https://twitter.hangout - description: haus is a valid generic tld text: https://twitter.haus expected: - https://twitter.haus - description: healthcare is a valid generic tld text: https://twitter.healthcare expected: - https://twitter.healthcare - description: help is a valid generic tld text: https://twitter.help expected: - https://twitter.help - description: here is a valid generic tld text: https://twitter.here expected: - https://twitter.here - description: hermes is a valid generic tld text: https://twitter.hermes expected: - https://twitter.hermes - description: hiphop is a valid generic tld text: https://twitter.hiphop expected: - https://twitter.hiphop - description: hitachi is a valid generic tld text: https://twitter.hitachi expected: - https://twitter.hitachi - description: hiv is a valid generic tld text: https://twitter.hiv expected: - https://twitter.hiv - description: hockey is a valid generic tld text: https://twitter.hockey expected: - https://twitter.hockey - description: holdings is a valid generic tld text: https://twitter.holdings expected: - https://twitter.holdings - description: holiday is a valid generic tld text: https://twitter.holiday expected: - https://twitter.holiday - description: homedepot is a valid generic tld text: https://twitter.homedepot expected: - https://twitter.homedepot - description: homes is a valid generic tld text: https://twitter.homes expected: - https://twitter.homes - description: honda is a valid generic tld text: https://twitter.honda expected: - https://twitter.honda - description: horse is a valid generic tld text: https://twitter.horse expected: - https://twitter.horse - description: host is a valid generic tld text: https://twitter.host expected: - https://twitter.host - description: hosting is a valid generic tld text: https://twitter.hosting expected: - https://twitter.hosting - description: hoteles is a valid generic tld text: https://twitter.hoteles expected: - https://twitter.hoteles - description: hotmail is a valid generic tld text: https://twitter.hotmail expected: - https://twitter.hotmail - description: house is a valid generic tld text: https://twitter.house expected: - https://twitter.house - description: how is a valid generic tld text: https://twitter.how expected: - https://twitter.how - description: hsbc is a valid generic tld text: https://twitter.hsbc expected: - https://twitter.hsbc - description: ibm is a valid generic tld text: https://twitter.ibm expected: - https://twitter.ibm - description: icbc is a valid generic tld text: https://twitter.icbc expected: - https://twitter.icbc - description: ice is a valid generic tld text: https://twitter.ice expected: - https://twitter.ice - description: icu is a valid generic tld text: https://twitter.icu expected: - https://twitter.icu - description: ifm is a valid generic tld text: https://twitter.ifm expected: - https://twitter.ifm - description: iinet is a valid generic tld text: https://twitter.iinet expected: - https://twitter.iinet - description: immo is a valid generic tld text: https://twitter.immo expected: - https://twitter.immo - description: immobilien is a valid generic tld text: https://twitter.immobilien expected: - https://twitter.immobilien - description: industries is a valid generic tld text: https://twitter.industries expected: - https://twitter.industries - description: infiniti is a valid generic tld text: https://twitter.infiniti expected: - https://twitter.infiniti - description: info is a valid generic tld text: https://twitter.info expected: - https://twitter.info - description: ing is a valid generic tld text: https://twitter.ing expected: - https://twitter.ing - description: ink is a valid generic tld text: https://twitter.ink expected: - https://twitter.ink - description: institute is a valid generic tld text: https://twitter.institute expected: - https://twitter.institute - description: insure is a valid generic tld text: https://twitter.insure expected: - https://twitter.insure - description: int is a valid generic tld text: https://twitter.int expected: - https://twitter.int - description: international is a valid generic tld text: https://twitter.international expected: - https://twitter.international - description: investments is a valid generic tld text: https://twitter.investments expected: - https://twitter.investments - description: ipiranga is a valid generic tld text: https://twitter.ipiranga expected: - https://twitter.ipiranga - description: irish is a valid generic tld text: https://twitter.irish expected: - https://twitter.irish - description: ist is a valid generic tld text: https://twitter.ist expected: - https://twitter.ist - description: istanbul is a valid generic tld text: https://twitter.istanbul expected: - https://twitter.istanbul - description: itau is a valid generic tld text: https://twitter.itau expected: - https://twitter.itau - description: iwc is a valid generic tld text: https://twitter.iwc expected: - https://twitter.iwc - description: java is a valid generic tld text: https://twitter.java expected: - https://twitter.java - description: jcb is a valid generic tld text: https://twitter.jcb expected: - https://twitter.jcb - description: jetzt is a valid generic tld text: https://twitter.jetzt expected: - https://twitter.jetzt - description: jewelry is a valid generic tld text: https://twitter.jewelry expected: - https://twitter.jewelry - description: jlc is a valid generic tld text: https://twitter.jlc expected: - https://twitter.jlc - description: jll is a valid generic tld text: https://twitter.jll expected: - https://twitter.jll - description: jobs is a valid generic tld text: https://twitter.jobs expected: - https://twitter.jobs - description: joburg is a valid generic tld text: https://twitter.joburg expected: - https://twitter.joburg - description: jprs is a valid generic tld text: https://twitter.jprs expected: - https://twitter.jprs - description: juegos is a valid generic tld text: https://twitter.juegos expected: - https://twitter.juegos - description: kaufen is a valid generic tld text: https://twitter.kaufen expected: - https://twitter.kaufen - description: kddi is a valid generic tld text: https://twitter.kddi expected: - https://twitter.kddi - description: kim is a valid generic tld text: https://twitter.kim expected: - https://twitter.kim - description: kitchen is a valid generic tld text: https://twitter.kitchen expected: - https://twitter.kitchen - description: kiwi is a valid generic tld text: https://twitter.kiwi expected: - https://twitter.kiwi - description: koeln is a valid generic tld text: https://twitter.koeln expected: - https://twitter.koeln - description: komatsu is a valid generic tld text: https://twitter.komatsu expected: - https://twitter.komatsu - description: krd is a valid generic tld text: https://twitter.krd expected: - https://twitter.krd - description: kred is a valid generic tld text: https://twitter.kred expected: - https://twitter.kred - description: kyoto is a valid generic tld text: https://twitter.kyoto expected: - https://twitter.kyoto - description: lacaixa is a valid generic tld text: https://twitter.lacaixa expected: - https://twitter.lacaixa - description: lancaster is a valid generic tld text: https://twitter.lancaster expected: - https://twitter.lancaster - description: land is a valid generic tld text: https://twitter.land expected: - https://twitter.land - description: lasalle is a valid generic tld text: https://twitter.lasalle expected: - https://twitter.lasalle - description: lat is a valid generic tld text: https://twitter.lat expected: - https://twitter.lat - description: latrobe is a valid generic tld text: https://twitter.latrobe expected: - https://twitter.latrobe - description: law is a valid generic tld text: https://twitter.law expected: - https://twitter.law - description: lawyer is a valid generic tld text: https://twitter.lawyer expected: - https://twitter.lawyer - description: lds is a valid generic tld text: https://twitter.lds expected: - https://twitter.lds - description: lease is a valid generic tld text: https://twitter.lease expected: - https://twitter.lease - description: leclerc is a valid generic tld text: https://twitter.leclerc expected: - https://twitter.leclerc - description: legal is a valid generic tld text: https://twitter.legal expected: - https://twitter.legal - description: lexus is a valid generic tld text: https://twitter.lexus expected: - https://twitter.lexus - description: lgbt is a valid generic tld text: https://twitter.lgbt expected: - https://twitter.lgbt - description: liaison is a valid generic tld text: https://twitter.liaison expected: - https://twitter.liaison - description: lidl is a valid generic tld text: https://twitter.lidl expected: - https://twitter.lidl - description: life is a valid generic tld text: https://twitter.life expected: - https://twitter.life - description: lighting is a valid generic tld text: https://twitter.lighting expected: - https://twitter.lighting - description: limited is a valid generic tld text: https://twitter.limited expected: - https://twitter.limited - description: limo is a valid generic tld text: https://twitter.limo expected: - https://twitter.limo - description: link is a valid generic tld text: https://twitter.link expected: - https://twitter.link - description: live is a valid generic tld text: https://twitter.live expected: - https://twitter.live - description: lixil is a valid generic tld text: https://twitter.lixil expected: - https://twitter.lixil - description: loan is a valid generic tld text: https://twitter.loan expected: - https://twitter.loan - description: loans is a valid generic tld text: https://twitter.loans expected: - https://twitter.loans - description: lol is a valid generic tld text: https://twitter.lol expected: - https://twitter.lol - description: london is a valid generic tld text: https://twitter.london expected: - https://twitter.london - description: lotte is a valid generic tld text: https://twitter.lotte expected: - https://twitter.lotte - description: lotto is a valid generic tld text: https://twitter.lotto expected: - https://twitter.lotto - description: love is a valid generic tld text: https://twitter.love expected: - https://twitter.love - description: ltda is a valid generic tld text: https://twitter.ltda expected: - https://twitter.ltda - description: lupin is a valid generic tld text: https://twitter.lupin expected: - https://twitter.lupin - description: luxe is a valid generic tld text: https://twitter.luxe expected: - https://twitter.luxe - description: luxury is a valid generic tld text: https://twitter.luxury expected: - https://twitter.luxury - description: madrid is a valid generic tld text: https://twitter.madrid expected: - https://twitter.madrid - description: maif is a valid generic tld text: https://twitter.maif expected: - https://twitter.maif - description: maison is a valid generic tld text: https://twitter.maison expected: - https://twitter.maison - description: man is a valid generic tld text: https://twitter.man expected: - https://twitter.man - description: management is a valid generic tld text: https://twitter.management expected: - https://twitter.management - description: mango is a valid generic tld text: https://twitter.mango expected: - https://twitter.mango - description: market is a valid generic tld text: https://twitter.market expected: - https://twitter.market - description: marketing is a valid generic tld text: https://twitter.marketing expected: - https://twitter.marketing - description: markets is a valid generic tld text: https://twitter.markets expected: - https://twitter.markets - description: marriott is a valid generic tld text: https://twitter.marriott expected: - https://twitter.marriott - description: mba is a valid generic tld text: https://twitter.mba expected: - https://twitter.mba - description: media is a valid generic tld text: https://twitter.media expected: - https://twitter.media - description: meet is a valid generic tld text: https://twitter.meet expected: - https://twitter.meet - description: melbourne is a valid generic tld text: https://twitter.melbourne expected: - https://twitter.melbourne - description: meme is a valid generic tld text: https://twitter.meme expected: - https://twitter.meme - description: memorial is a valid generic tld text: https://twitter.memorial expected: - https://twitter.memorial - description: men is a valid generic tld text: https://twitter.men expected: - https://twitter.men - description: menu is a valid generic tld text: https://twitter.menu expected: - https://twitter.menu - description: miami is a valid generic tld text: https://twitter.miami expected: - https://twitter.miami - description: microsoft is a valid generic tld text: https://twitter.microsoft expected: - https://twitter.microsoft - description: mil is a valid generic tld text: https://twitter.mil expected: - https://twitter.mil - description: mini is a valid generic tld text: https://twitter.mini expected: - https://twitter.mini - description: mma is a valid generic tld text: https://twitter.mma expected: - https://twitter.mma - description: mobi is a valid generic tld text: https://twitter.mobi expected: - https://twitter.mobi - description: moda is a valid generic tld text: https://twitter.moda expected: - https://twitter.moda - description: moe is a valid generic tld text: https://twitter.moe expected: - https://twitter.moe - description: mom is a valid generic tld text: https://twitter.mom expected: - https://twitter.mom - description: monash is a valid generic tld text: https://twitter.monash expected: - https://twitter.monash - description: money is a valid generic tld text: https://twitter.money expected: - https://twitter.money - description: montblanc is a valid generic tld text: https://twitter.montblanc expected: - https://twitter.montblanc - description: mormon is a valid generic tld text: https://twitter.mormon expected: - https://twitter.mormon - description: mortgage is a valid generic tld text: https://twitter.mortgage expected: - https://twitter.mortgage - description: moscow is a valid generic tld text: https://twitter.moscow expected: - https://twitter.moscow - description: motorcycles is a valid generic tld text: https://twitter.motorcycles expected: - https://twitter.motorcycles - description: mov is a valid generic tld text: https://twitter.mov expected: - https://twitter.mov - description: movie is a valid generic tld text: https://twitter.movie expected: - https://twitter.movie - description: movistar is a valid generic tld text: https://twitter.movistar expected: - https://twitter.movistar - description: mtn is a valid generic tld text: https://twitter.mtn expected: - https://twitter.mtn - description: mtpc is a valid generic tld text: https://twitter.mtpc expected: - https://twitter.mtpc - description: museum is a valid generic tld text: https://twitter.museum expected: - https://twitter.museum - description: nadex is a valid generic tld text: https://twitter.nadex expected: - https://twitter.nadex - description: nagoya is a valid generic tld text: https://twitter.nagoya expected: - https://twitter.nagoya - description: name is a valid generic tld text: https://twitter.name expected: - https://twitter.name - description: navy is a valid generic tld text: https://twitter.navy expected: - https://twitter.navy - description: nec is a valid generic tld text: https://twitter.nec expected: - https://twitter.nec - description: net is a valid generic tld text: https://twitter.net expected: - https://twitter.net - description: netbank is a valid generic tld text: https://twitter.netbank expected: - https://twitter.netbank - description: network is a valid generic tld text: https://twitter.network expected: - https://twitter.network - description: neustar is a valid generic tld text: https://twitter.neustar expected: - https://twitter.neustar - description: new is a valid generic tld text: https://twitter.new expected: - https://twitter.new - description: news is a valid generic tld text: https://twitter.news expected: - https://twitter.news - description: nexus is a valid generic tld text: https://twitter.nexus expected: - https://twitter.nexus - description: ngo is a valid generic tld text: https://twitter.ngo expected: - https://twitter.ngo - description: nhk is a valid generic tld text: https://twitter.nhk expected: - https://twitter.nhk - description: nico is a valid generic tld text: https://twitter.nico expected: - https://twitter.nico - description: ninja is a valid generic tld text: https://twitter.ninja expected: - https://twitter.ninja - description: nissan is a valid generic tld text: https://twitter.nissan expected: - https://twitter.nissan - description: nokia is a valid generic tld text: https://twitter.nokia expected: - https://twitter.nokia - description: nra is a valid generic tld text: https://twitter.nra expected: - https://twitter.nra - description: nrw is a valid generic tld text: https://twitter.nrw expected: - https://twitter.nrw - description: ntt is a valid generic tld text: https://twitter.ntt expected: - https://twitter.ntt - description: nyc is a valid generic tld text: https://twitter.nyc expected: - https://twitter.nyc - description: office is a valid generic tld text: https://twitter.office expected: - https://twitter.office - description: okinawa is a valid generic tld text: https://twitter.okinawa expected: - https://twitter.okinawa - description: omega is a valid generic tld text: https://twitter.omega expected: - https://twitter.omega - description: one is a valid generic tld text: https://twitter.one expected: - https://twitter.one - description: ong is a valid generic tld text: https://twitter.ong expected: - https://twitter.ong - description: onl is a valid generic tld text: https://twitter.onl expected: - https://twitter.onl - description: online is a valid generic tld text: https://twitter.online expected: - https://twitter.online - description: ooo is a valid generic tld text: https://twitter.ooo expected: - https://twitter.ooo - description: oracle is a valid generic tld text: https://twitter.oracle expected: - https://twitter.oracle - description: orange is a valid generic tld text: https://twitter.orange expected: - https://twitter.orange - description: org is a valid generic tld text: https://twitter.org expected: - https://twitter.org - description: organic is a valid generic tld text: https://twitter.organic expected: - https://twitter.organic - description: osaka is a valid generic tld text: https://twitter.osaka expected: - https://twitter.osaka - description: otsuka is a valid generic tld text: https://twitter.otsuka expected: - https://twitter.otsuka - description: ovh is a valid generic tld text: https://twitter.ovh expected: - https://twitter.ovh - description: page is a valid generic tld text: https://twitter.page expected: - https://twitter.page - description: panerai is a valid generic tld text: https://twitter.panerai expected: - https://twitter.panerai - description: paris is a valid generic tld text: https://twitter.paris expected: - https://twitter.paris - description: partners is a valid generic tld text: https://twitter.partners expected: - https://twitter.partners - description: parts is a valid generic tld text: https://twitter.parts expected: - https://twitter.parts - description: party is a valid generic tld text: https://twitter.party expected: - https://twitter.party - description: pet is a valid generic tld text: https://twitter.pet expected: - https://twitter.pet - description: pharmacy is a valid generic tld text: https://twitter.pharmacy expected: - https://twitter.pharmacy - description: philips is a valid generic tld text: https://twitter.philips expected: - https://twitter.philips - description: photo is a valid generic tld text: https://twitter.photo expected: - https://twitter.photo - description: photography is a valid generic tld text: https://twitter.photography expected: - https://twitter.photography - description: photos is a valid generic tld text: https://twitter.photos expected: - https://twitter.photos - description: physio is a valid generic tld text: https://twitter.physio expected: - https://twitter.physio - description: piaget is a valid generic tld text: https://twitter.piaget expected: - https://twitter.piaget - description: pics is a valid generic tld text: https://twitter.pics expected: - https://twitter.pics - description: pictet is a valid generic tld text: https://twitter.pictet expected: - https://twitter.pictet - description: pictures is a valid generic tld text: https://twitter.pictures expected: - https://twitter.pictures - description: pink is a valid generic tld text: https://twitter.pink expected: - https://twitter.pink - description: pizza is a valid generic tld text: https://twitter.pizza expected: - https://twitter.pizza - description: place is a valid generic tld text: https://twitter.place expected: - https://twitter.place - description: play is a valid generic tld text: https://twitter.play expected: - https://twitter.play - description: plumbing is a valid generic tld text: https://twitter.plumbing expected: - https://twitter.plumbing - description: plus is a valid generic tld text: https://twitter.plus expected: - https://twitter.plus - description: pohl is a valid generic tld text: https://twitter.pohl expected: - https://twitter.pohl - description: poker is a valid generic tld text: https://twitter.poker expected: - https://twitter.poker - description: porn is a valid generic tld text: https://twitter.porn expected: - https://twitter.porn - description: post is a valid generic tld text: https://twitter.post expected: - https://twitter.post - description: praxi is a valid generic tld text: https://twitter.praxi expected: - https://twitter.praxi - description: press is a valid generic tld text: https://twitter.press expected: - https://twitter.press - description: pro is a valid generic tld text: https://twitter.pro expected: - https://twitter.pro - description: prod is a valid generic tld text: https://twitter.prod expected: - https://twitter.prod - description: productions is a valid generic tld text: https://twitter.productions expected: - https://twitter.productions - description: prof is a valid generic tld text: https://twitter.prof expected: - https://twitter.prof - description: properties is a valid generic tld text: https://twitter.properties expected: - https://twitter.properties - description: property is a valid generic tld text: https://twitter.property expected: - https://twitter.property - description: pub is a valid generic tld text: https://twitter.pub expected: - https://twitter.pub - description: qpon is a valid generic tld text: https://twitter.qpon expected: - https://twitter.qpon - description: quebec is a valid generic tld text: https://twitter.quebec expected: - https://twitter.quebec - description: racing is a valid generic tld text: https://twitter.racing expected: - https://twitter.racing - description: realtor is a valid generic tld text: https://twitter.realtor expected: - https://twitter.realtor - description: realty is a valid generic tld text: https://twitter.realty expected: - https://twitter.realty - description: recipes is a valid generic tld text: https://twitter.recipes expected: - https://twitter.recipes - description: red is a valid generic tld text: https://twitter.red expected: - https://twitter.red - description: redstone is a valid generic tld text: https://twitter.redstone expected: - https://twitter.redstone - description: rehab is a valid generic tld text: https://twitter.rehab expected: - https://twitter.rehab - description: reise is a valid generic tld text: https://twitter.reise expected: - https://twitter.reise - description: reisen is a valid generic tld text: https://twitter.reisen expected: - https://twitter.reisen - description: reit is a valid generic tld text: https://twitter.reit expected: - https://twitter.reit - description: ren is a valid generic tld text: https://twitter.ren expected: - https://twitter.ren - description: rent is a valid generic tld text: https://twitter.rent expected: - https://twitter.rent - description: rentals is a valid generic tld text: https://twitter.rentals expected: - https://twitter.rentals - description: repair is a valid generic tld text: https://twitter.repair expected: - https://twitter.repair - description: report is a valid generic tld text: https://twitter.report expected: - https://twitter.report - description: republican is a valid generic tld text: https://twitter.republican expected: - https://twitter.republican - description: rest is a valid generic tld text: https://twitter.rest expected: - https://twitter.rest - description: restaurant is a valid generic tld text: https://twitter.restaurant expected: - https://twitter.restaurant - description: review is a valid generic tld text: https://twitter.review expected: - https://twitter.review - description: reviews is a valid generic tld text: https://twitter.reviews expected: - https://twitter.reviews - description: rich is a valid generic tld text: https://twitter.rich expected: - https://twitter.rich - description: ricoh is a valid generic tld text: https://twitter.ricoh expected: - https://twitter.ricoh - description: rio is a valid generic tld text: https://twitter.rio expected: - https://twitter.rio - description: rip is a valid generic tld text: https://twitter.rip expected: - https://twitter.rip - description: rocks is a valid generic tld text: https://twitter.rocks expected: - https://twitter.rocks - description: rodeo is a valid generic tld text: https://twitter.rodeo expected: - https://twitter.rodeo - description: rsvp is a valid generic tld text: https://twitter.rsvp expected: - https://twitter.rsvp - description: ruhr is a valid generic tld text: https://twitter.ruhr expected: - https://twitter.ruhr - description: run is a valid generic tld text: https://twitter.run expected: - https://twitter.run - description: ryukyu is a valid generic tld text: https://twitter.ryukyu expected: - https://twitter.ryukyu - description: saarland is a valid generic tld text: https://twitter.saarland expected: - https://twitter.saarland - description: sakura is a valid generic tld text: https://twitter.sakura expected: - https://twitter.sakura - description: sale is a valid generic tld text: https://twitter.sale expected: - https://twitter.sale - description: samsung is a valid generic tld text: https://twitter.samsung expected: - https://twitter.samsung - description: sandvik is a valid generic tld text: https://twitter.sandvik expected: - https://twitter.sandvik - description: sandvikcoromant is a valid generic tld text: https://twitter.sandvikcoromant expected: - https://twitter.sandvikcoromant - description: sanofi is a valid generic tld text: https://twitter.sanofi expected: - https://twitter.sanofi - description: sap is a valid generic tld text: https://twitter.sap expected: - https://twitter.sap - description: sarl is a valid generic tld text: https://twitter.sarl expected: - https://twitter.sarl - description: saxo is a valid generic tld text: https://twitter.saxo expected: - https://twitter.saxo - description: sca is a valid generic tld text: https://twitter.sca expected: - https://twitter.sca - description: scb is a valid generic tld text: https://twitter.scb expected: - https://twitter.scb - description: schmidt is a valid generic tld text: https://twitter.schmidt expected: - https://twitter.schmidt - description: scholarships is a valid generic tld text: https://twitter.scholarships expected: - https://twitter.scholarships - description: school is a valid generic tld text: https://twitter.school expected: - https://twitter.school - description: schule is a valid generic tld text: https://twitter.schule expected: - https://twitter.schule - description: schwarz is a valid generic tld text: https://twitter.schwarz expected: - https://twitter.schwarz - description: science is a valid generic tld text: https://twitter.science expected: - https://twitter.science - description: scor is a valid generic tld text: https://twitter.scor expected: - https://twitter.scor - description: scot is a valid generic tld text: https://twitter.scot expected: - https://twitter.scot - description: seat is a valid generic tld text: https://twitter.seat expected: - https://twitter.seat - description: seek is a valid generic tld text: https://twitter.seek expected: - https://twitter.seek - description: sener is a valid generic tld text: https://twitter.sener expected: - https://twitter.sener - description: services is a valid generic tld text: https://twitter.services expected: - https://twitter.services - description: sew is a valid generic tld text: https://twitter.sew expected: - https://twitter.sew - description: sex is a valid generic tld text: https://twitter.sex expected: - https://twitter.sex - description: sexy is a valid generic tld text: https://twitter.sexy expected: - https://twitter.sexy - description: shiksha is a valid generic tld text: https://twitter.shiksha expected: - https://twitter.shiksha - description: shoes is a valid generic tld text: https://twitter.shoes expected: - https://twitter.shoes - description: show is a valid generic tld text: https://twitter.show expected: - https://twitter.show - description: shriram is a valid generic tld text: https://twitter.shriram expected: - https://twitter.shriram - description: singles is a valid generic tld text: https://twitter.singles expected: - https://twitter.singles - description: site is a valid generic tld text: https://twitter.site expected: - https://twitter.site - description: ski is a valid generic tld text: https://twitter.ski expected: - https://twitter.ski - description: sky is a valid generic tld text: https://twitter.sky expected: - https://twitter.sky - description: skype is a valid generic tld text: https://twitter.skype expected: - https://twitter.skype - description: sncf is a valid generic tld text: https://twitter.sncf expected: - https://twitter.sncf - description: soccer is a valid generic tld text: https://twitter.soccer expected: - https://twitter.soccer - description: social is a valid generic tld text: https://twitter.social expected: - https://twitter.social - description: software is a valid generic tld text: https://twitter.software expected: - https://twitter.software - description: sohu is a valid generic tld text: https://twitter.sohu expected: - https://twitter.sohu - description: solar is a valid generic tld text: https://twitter.solar expected: - https://twitter.solar - description: solutions is a valid generic tld text: https://twitter.solutions expected: - https://twitter.solutions - description: sony is a valid generic tld text: https://twitter.sony expected: - https://twitter.sony - description: soy is a valid generic tld text: https://twitter.soy expected: - https://twitter.soy - description: space is a valid generic tld text: https://twitter.space expected: - https://twitter.space - description: spiegel is a valid generic tld text: https://twitter.spiegel expected: - https://twitter.spiegel - description: spreadbetting is a valid generic tld text: https://twitter.spreadbetting expected: - https://twitter.spreadbetting - description: srl is a valid generic tld text: https://twitter.srl expected: - https://twitter.srl - description: starhub is a valid generic tld text: https://twitter.starhub expected: - https://twitter.starhub - description: statoil is a valid generic tld text: https://twitter.statoil expected: - https://twitter.statoil - description: studio is a valid generic tld text: https://twitter.studio expected: - https://twitter.studio - description: study is a valid generic tld text: https://twitter.study expected: - https://twitter.study - description: style is a valid generic tld text: https://twitter.style expected: - https://twitter.style - description: sucks is a valid generic tld text: https://twitter.sucks expected: - https://twitter.sucks - description: supplies is a valid generic tld text: https://twitter.supplies expected: - https://twitter.supplies - description: supply is a valid generic tld text: https://twitter.supply expected: - https://twitter.supply - description: support is a valid generic tld text: https://twitter.support expected: - https://twitter.support - description: surf is a valid generic tld text: https://twitter.surf expected: - https://twitter.surf - description: surgery is a valid generic tld text: https://twitter.surgery expected: - https://twitter.surgery - description: suzuki is a valid generic tld text: https://twitter.suzuki expected: - https://twitter.suzuki - description: swatch is a valid generic tld text: https://twitter.swatch expected: - https://twitter.swatch - description: swiss is a valid generic tld text: https://twitter.swiss expected: - https://twitter.swiss - description: sydney is a valid generic tld text: https://twitter.sydney expected: - https://twitter.sydney - description: systems is a valid generic tld text: https://twitter.systems expected: - https://twitter.systems - description: taipei is a valid generic tld text: https://twitter.taipei expected: - https://twitter.taipei - description: tatamotors is a valid generic tld text: https://twitter.tatamotors expected: - https://twitter.tatamotors - description: tatar is a valid generic tld text: https://twitter.tatar expected: - https://twitter.tatar - description: tattoo is a valid generic tld text: https://twitter.tattoo expected: - https://twitter.tattoo - description: tax is a valid generic tld text: https://twitter.tax expected: - https://twitter.tax - description: taxi is a valid generic tld text: https://twitter.taxi expected: - https://twitter.taxi - description: team is a valid generic tld text: https://twitter.team expected: - https://twitter.team - description: tech is a valid generic tld text: https://twitter.tech expected: - https://twitter.tech - description: technology is a valid generic tld text: https://twitter.technology expected: - https://twitter.technology - description: tel is a valid generic tld text: https://twitter.tel expected: - https://twitter.tel - description: telefonica is a valid generic tld text: https://twitter.telefonica expected: - https://twitter.telefonica - description: temasek is a valid generic tld text: https://twitter.temasek expected: - https://twitter.temasek - description: tennis is a valid generic tld text: https://twitter.tennis expected: - https://twitter.tennis - description: thd is a valid generic tld text: https://twitter.thd expected: - https://twitter.thd - description: theater is a valid generic tld text: https://twitter.theater expected: - https://twitter.theater - description: tickets is a valid generic tld text: https://twitter.tickets expected: - https://twitter.tickets - description: tienda is a valid generic tld text: https://twitter.tienda expected: - https://twitter.tienda - description: tips is a valid generic tld text: https://twitter.tips expected: - https://twitter.tips - description: tires is a valid generic tld text: https://twitter.tires expected: - https://twitter.tires - description: tirol is a valid generic tld text: https://twitter.tirol expected: - https://twitter.tirol - description: today is a valid generic tld text: https://twitter.today expected: - https://twitter.today - description: tokyo is a valid generic tld text: https://twitter.tokyo expected: - https://twitter.tokyo - description: tools is a valid generic tld text: https://twitter.tools expected: - https://twitter.tools - description: top is a valid generic tld text: https://twitter.top expected: - https://twitter.top - description: toray is a valid generic tld text: https://twitter.toray expected: - https://twitter.toray - description: toshiba is a valid generic tld text: https://twitter.toshiba expected: - https://twitter.toshiba - description: tours is a valid generic tld text: https://twitter.tours expected: - https://twitter.tours - description: town is a valid generic tld text: https://twitter.town expected: - https://twitter.town - description: toyota is a valid generic tld text: https://twitter.toyota expected: - https://twitter.toyota - description: toys is a valid generic tld text: https://twitter.toys expected: - https://twitter.toys - description: trade is a valid generic tld text: https://twitter.trade expected: - https://twitter.trade - description: trading is a valid generic tld text: https://twitter.trading expected: - https://twitter.trading - description: training is a valid generic tld text: https://twitter.training expected: - https://twitter.training - description: travel is a valid generic tld text: https://twitter.travel expected: - https://twitter.travel - description: trust is a valid generic tld text: https://twitter.trust expected: - https://twitter.trust - description: tui is a valid generic tld text: https://twitter.tui expected: - https://twitter.tui - description: ubs is a valid generic tld text: https://twitter.ubs expected: - https://twitter.ubs - description: university is a valid generic tld text: https://twitter.university expected: - https://twitter.university - description: uno is a valid generic tld text: https://twitter.uno expected: - https://twitter.uno - description: uol is a valid generic tld text: https://twitter.uol expected: - https://twitter.uol - description: vacations is a valid generic tld text: https://twitter.vacations expected: - https://twitter.vacations - description: vegas is a valid generic tld text: https://twitter.vegas expected: - https://twitter.vegas - description: ventures is a valid generic tld text: https://twitter.ventures expected: - https://twitter.ventures - description: vermögensberater is a valid generic tld text: https://twitter.vermögensberater expected: - https://twitter.vermögensberater - description: vermögensberatung is a valid generic tld text: https://twitter.vermögensberatung expected: - https://twitter.vermögensberatung - description: versicherung is a valid generic tld text: https://twitter.versicherung expected: - https://twitter.versicherung - description: vet is a valid generic tld text: https://twitter.vet expected: - https://twitter.vet - description: viajes is a valid generic tld text: https://twitter.viajes expected: - https://twitter.viajes - description: video is a valid generic tld text: https://twitter.video expected: - https://twitter.video - description: villas is a valid generic tld text: https://twitter.villas expected: - https://twitter.villas - description: vin is a valid generic tld text: https://twitter.vin expected: - https://twitter.vin - description: vision is a valid generic tld text: https://twitter.vision expected: - https://twitter.vision - description: vista is a valid generic tld text: https://twitter.vista expected: - https://twitter.vista - description: vistaprint is a valid generic tld text: https://twitter.vistaprint expected: - https://twitter.vistaprint - description: vlaanderen is a valid generic tld text: https://twitter.vlaanderen expected: - https://twitter.vlaanderen - description: vodka is a valid generic tld text: https://twitter.vodka expected: - https://twitter.vodka - description: vote is a valid generic tld text: https://twitter.vote expected: - https://twitter.vote - description: voting is a valid generic tld text: https://twitter.voting expected: - https://twitter.voting - description: voto is a valid generic tld text: https://twitter.voto expected: - https://twitter.voto - description: voyage is a valid generic tld text: https://twitter.voyage expected: - https://twitter.voyage - description: wales is a valid generic tld text: https://twitter.wales expected: - https://twitter.wales - description: walter is a valid generic tld text: https://twitter.walter expected: - https://twitter.walter - description: wang is a valid generic tld text: https://twitter.wang expected: - https://twitter.wang - description: watch is a valid generic tld text: https://twitter.watch expected: - https://twitter.watch - description: webcam is a valid generic tld text: https://twitter.webcam expected: - https://twitter.webcam - description: website is a valid generic tld text: https://twitter.website expected: - https://twitter.website - description: wed is a valid generic tld text: https://twitter.wed expected: - https://twitter.wed - description: wedding is a valid generic tld text: https://twitter.wedding expected: - https://twitter.wedding - description: weir is a valid generic tld text: https://twitter.weir expected: - https://twitter.weir - description: whoswho is a valid generic tld text: https://twitter.whoswho expected: - https://twitter.whoswho - description: wien is a valid generic tld text: https://twitter.wien expected: - https://twitter.wien - description: wiki is a valid generic tld text: https://twitter.wiki expected: - https://twitter.wiki - description: williamhill is a valid generic tld text: https://twitter.williamhill expected: - https://twitter.williamhill - description: win is a valid generic tld text: https://twitter.win expected: - https://twitter.win - description: windows is a valid generic tld text: https://twitter.windows expected: - https://twitter.windows - description: wine is a valid generic tld text: https://twitter.wine expected: - https://twitter.wine - description: wme is a valid generic tld text: https://twitter.wme expected: - https://twitter.wme - description: work is a valid generic tld text: https://twitter.work expected: - https://twitter.work - description: works is a valid generic tld text: https://twitter.works expected: - https://twitter.works - description: world is a valid generic tld text: https://twitter.world expected: - https://twitter.world - description: wtc is a valid generic tld text: https://twitter.wtc expected: - https://twitter.wtc - description: wtf is a valid generic tld text: https://twitter.wtf expected: - https://twitter.wtf - description: xbox is a valid generic tld text: https://twitter.xbox expected: - https://twitter.xbox - description: xerox is a valid generic tld text: https://twitter.xerox expected: - https://twitter.xerox - description: xin is a valid generic tld text: https://twitter.xin expected: - https://twitter.xin - description: xperia is a valid generic tld text: https://twitter.xperia expected: - https://twitter.xperia - description: xxx is a valid generic tld text: https://twitter.xxx expected: - https://twitter.xxx - description: xyz is a valid generic tld text: https://twitter.xyz expected: - https://twitter.xyz - description: yachts is a valid generic tld text: https://twitter.yachts expected: - https://twitter.yachts - description: yandex is a valid generic tld text: https://twitter.yandex expected: - https://twitter.yandex - description: yodobashi is a valid generic tld text: https://twitter.yodobashi expected: - https://twitter.yodobashi - description: yoga is a valid generic tld text: https://twitter.yoga expected: - https://twitter.yoga - description: yokohama is a valid generic tld text: https://twitter.yokohama expected: - https://twitter.yokohama - description: youtube is a valid generic tld text: https://twitter.youtube expected: - https://twitter.youtube - description: zip is a valid generic tld text: https://twitter.zip expected: - https://twitter.zip - description: zone is a valid generic tld text: https://twitter.zone expected: - https://twitter.zone - description: zuerich is a valid generic tld text: https://twitter.zuerich expected: - https://twitter.zuerich - description: "дети is a valid generic tld" text: https://twitter.дети expected: - https://twitter.дети - description: "ком is a valid generic tld" text: https://twitter.ком expected: - https://twitter.ком - description: "москва is a valid generic tld" text: https://twitter.москва expected: - https://twitter.москва - description: "онлайн is a valid generic tld" text: https://twitter.онлайн expected: - https://twitter.онлайн - description: "орг is a valid generic tld" text: https://twitter.орг expected: - https://twitter.орг - description: "рус is a valid generic tld" text: https://twitter.рус expected: - https://twitter.рус - description: "сайт is a valid generic tld" text: https://twitter.сайт expected: - https://twitter.сайт - description: "קום is a valid generic tld" text: https://twitter.קום expected: - https://twitter.קום - description: "بازار is a valid generic tld" text: https://twitter.بازار expected: - https://twitter.بازار - description: "شبكة is a valid generic tld" text: https://twitter.شبكة expected: - https://twitter.شبكة - description: "كوم is a valid generic tld" text: https://twitter.كوم expected: - https://twitter.كوم - description: "موقع is a valid generic tld" text: https://twitter.موقع expected: - https://twitter.موقع - description: "कॉम is a valid generic tld" text: https://twitter.कॉम expected: - https://twitter.कॉम - description: "नेट is a valid generic tld" text: https://twitter.नेट expected: - https://twitter.नेट - description: "संगठन is a valid generic tld" text: https://twitter.संगठन expected: - https://twitter.संगठन - description: "คอม is a valid generic tld" text: https://twitter.คอม expected: - https://twitter.คอม - description: "みんな is a valid generic tld" text: https://twitter.みんな expected: - https://twitter.みんな - description: "グーグル is a valid generic tld" text: https://twitter.グーグル expected: - https://twitter.グーグル - description: "コム is a valid generic tld" text: https://twitter.コム expected: - https://twitter.コム - description: "世界 is a valid generic tld" text: https://twitter.世界 expected: - https://twitter.世界 - description: "中信 is a valid generic tld" text: https://twitter.中信 expected: - https://twitter.中信 - description: "中文网 is a valid generic tld" text: https://twitter.中文网 expected: - https://twitter.中文网 - description: "企业 is a valid generic tld" text: https://twitter.企业 expected: - https://twitter.企业 - description: "佛山 is a valid generic tld" text: https://twitter.佛山 expected: - https://twitter.佛山 - description: "信息 is a valid generic tld" text: https://twitter.信息 expected: - https://twitter.信息 - description: "健康 is a valid generic tld" text: https://twitter.健康 expected: - https://twitter.健康 - description: "八卦 is a valid generic tld" text: https://twitter.八卦 expected: - https://twitter.八卦 - description: "公司 is a valid generic tld" text: https://twitter.公司 expected: - https://twitter.公司 - description: "公益 is a valid generic tld" text: https://twitter.公益 expected: - https://twitter.公益 - description: "商城 is a valid generic tld" text: https://twitter.商城 expected: - https://twitter.商城 - description: "商店 is a valid generic tld" text: https://twitter.商店 expected: - https://twitter.商店 - description: "商标 is a valid generic tld" text: https://twitter.商标 expected: - https://twitter.商标 - description: "在线 is a valid generic tld" text: https://twitter.在线 expected: - https://twitter.在线 - description: "大拿 is a valid generic tld" text: https://twitter.大拿 expected: - https://twitter.大拿 - description: "娱乐 is a valid generic tld" text: https://twitter.娱乐 expected: - https://twitter.娱乐 - description: "工行 is a valid generic tld" text: https://twitter.工行 expected: - https://twitter.工行 - description: "广东 is a valid generic tld" text: https://twitter.广东 expected: - https://twitter.广东 - description: "慈善 is a valid generic tld" text: https://twitter.慈善 expected: - https://twitter.慈善 - description: "我爱你 is a valid generic tld" text: https://twitter.我爱你 expected: - https://twitter.我爱你 - description: "手机 is a valid generic tld" text: https://twitter.手机 expected: - https://twitter.手机 - description: "政务 is a valid generic tld" text: https://twitter.政务 expected: - https://twitter.政务 - description: "政府 is a valid generic tld" text: https://twitter.政府 expected: - https://twitter.政府 - description: "新闻 is a valid generic tld" text: https://twitter.新闻 expected: - https://twitter.新闻 - description: "时尚 is a valid generic tld" text: https://twitter.时尚 expected: - https://twitter.时尚 - description: "机构 is a valid generic tld" text: https://twitter.机构 expected: - https://twitter.机构 - description: "淡马锡 is a valid generic tld" text: https://twitter.淡马锡 expected: - https://twitter.淡马锡 - description: "游戏 is a valid generic tld" text: https://twitter.游戏 expected: - https://twitter.游戏 - description: "点看 is a valid generic tld" text: https://twitter.点看 expected: - https://twitter.点看 - description: "移动 is a valid generic tld" text: https://twitter.移动 expected: - https://twitter.移动 - description: "组织机构 is a valid generic tld" text: https://twitter.组织机构 expected: - https://twitter.组织机构 - description: "网址 is a valid generic tld" text: https://twitter.网址 expected: - https://twitter.网址 - description: "网店 is a valid generic tld" text: https://twitter.网店 expected: - https://twitter.网店 - description: "网络 is a valid generic tld" text: https://twitter.网络 expected: - https://twitter.网络 - description: "谷歌 is a valid generic tld" text: https://twitter.谷歌 expected: - https://twitter.谷歌 - description: "集团 is a valid generic tld" text: https://twitter.集团 expected: - https://twitter.集团 - description: "飞利浦 is a valid generic tld" text: https://twitter.飞利浦 expected: - https://twitter.飞利浦 - description: "餐厅 is a valid generic tld" text: https://twitter.餐厅 expected: - https://twitter.餐厅 - description: "닷넷 is a valid generic tld" text: https://twitter.닷넷 expected: - https://twitter.닷넷 - description: "닷컴 is a valid generic tld" text: https://twitter.닷컴 expected: - https://twitter.닷컴 - description: "삼성 is a valid generic tld" text: https://twitter.삼성 expected: - https://twitter.삼성 - description: onion is a valid generic tld text: https://twitter.onion expected: - https://twitter.onion twitter-text-1.13.4/test/twitter-text-conformance/hit_highlighting.yml0000644000175000017500000000531512670063203026460 0ustar sudheeshsudheesh tests: plain_text: - description: "Highlight the beginning of a string" text: "this is a test" hits: [ [0, 4] ] expected: "this is a test" - description: "Highlight the middle of a string" text: "this is a test" hits: [ [5, 7] ] expected: "this is a test" - description: "Highlight the end of a string" text: "this is a test" hits: [ [10, 14] ] expected: "this is a test" - description: "Highlight multiple terms" text: "this is a test" hits: [ [0, 4], [10, 14] ] expected: "this is a test" - description: "DO NOT highlight with empty hits" text: "this is a test" hits: [] expected: "this is a test" - description: "Highlight within Japanese text" text: "東京の天気" hits: [ [0, 2] ] expected: "東京の天気" with_links: - description: "Highlight after a link (offset does not include markup)" text: "@username this is an example" hits: [ [10, 14] ] expected: "@username this is an example" - description: "Highlight anchor text of a link (offset does not include markup)" text: "@username this is an example" hits: [ [1, 9] ] expected: "@username this is an example" - description: "Highlight around a link (offset does not include markup)" text: "@username this is an example" hits: [ [0, 14] ] expected: "@username this is an example" - description: "Highlight touching tags" text: "foofoo" hits: [ [3, 6] ] expected: "foofoo" - description: "Highlight two links" text: "foo bar baz" hits: [ [4, 7], [8, 11] ] expected: "foo bar baz" - description: "Highlight non-link then link not at end" text: "foo bar baz something else" hits: [ [4, 7], [8, 11] ] expected: "foo bar baz something else" - description: "Highlight non-link then link at end" text: "foo bar baz" hits: [ [4, 7], [8, 11] ] expected: "foo bar baz" - description: "Highlight mention at end" text: "something via @twitter" hits: [ [14, 22] ] expected: "something via @twitter" twitter-text-1.13.4/test/twitter-text-conformance/tld_lib.yml0000644000175000017500000002101712670063203024555 0ustar sudheeshsudheesh--- country: - ac - ad - ae - af - ag - ai - al - am - an - ao - aq - ar - as - at - au - aw - ax - az - ba - bb - bd - be - bf - bg - bh - bi - bj - bl - bm - bn - bo - bq - br - bs - bt - bv - bw - by - bz - ca - cc - cd - cf - cg - ch - ci - ck - cl - cm - cn - co - cr - cu - cv - cw - cx - cy - cz - de - dj - dk - dm - do - dz - ec - ee - eg - eh - er - es - et - eu - fi - fj - fk - fm - fo - fr - ga - gb - gd - ge - gf - gg - gh - gi - gl - gm - gn - gp - gq - gr - gs - gt - gu - gw - gy - hk - hm - hn - hr - ht - hu - id - ie - il - im - in - io - iq - ir - is - it - je - jm - jo - jp - ke - kg - kh - ki - km - kn - kp - kr - kw - ky - kz - la - lb - lc - li - lk - lr - ls - lt - lu - lv - ly - ma - mc - md - me - mf - mg - mh - mk - ml - mm - mn - mo - mp - mq - mr - ms - mt - mu - mv - mw - mx - my - mz - na - nc - ne - nf - ng - ni - nl - 'no' - np - nr - nu - nz - om - pa - pe - pf - pg - ph - pk - pl - pm - pn - pr - ps - pt - pw - py - qa - re - ro - rs - ru - rw - sa - sb - sc - sd - se - sg - sh - si - sj - sk - sl - sm - sn - so - sr - ss - st - su - sv - sx - sy - sz - tc - td - tf - tg - th - tj - tk - tl - tm - tn - to - tp - tr - tt - tv - tw - tz - ua - ug - uk - um - us - uy - uz - va - vc - ve - vg - vi - vn - vu - wf - ws - ye - yt - za - zm - zw - "ελ" - "бел" - "мкд" - "мон" - "рф" - "срб" - "укр" - "қаз" - "հայ" - "الاردن" - "الجزائر" - "السعودية" - "المغرب" - "امارات" - "ایران" - "بھارت" - "تونس" - "سودان" - "سورية" - "عراق" - "عمان" - "فلسطين" - "قطر" - "مصر" - "مليسيا" - "پاکستان" - "भारत" - "বাংলা" - "ভারত" - "ਭਾਰਤ" - "ભારત" - "இந்தியா" - "இலங்கை" - "சிங்கப்பூர்" - "భారత్" - "ලංකා" - "ไทย" - "გე" - "中国" - "中國" - "台湾" - "台灣" - "新加坡" - "澳門" - "香港" - "한국" generic: - abb - abbott - abogado - academy - accenture - accountant - accountants - aco - active - actor - ads - adult - aeg - aero - afl - agency - aig - airforce - airtel - allfinanz - alsace - amsterdam - android - apartments - app - aquarelle - archi - army - arpa - asia - associates - attorney - auction - audio - auto - autos - axa - azure - band - bank - bar - barcelona - barclaycard - barclays - bargains - bauhaus - bayern - bbc - bbva - bcn - beer - bentley - berlin - best - bet - bharti - bible - bid - bike - bing - bingo - bio - biz - black - blackfriday - bloomberg - blue - bmw - bnl - bnpparibas - boats - bond - boo - boots - boutique - bradesco - bridgestone - broker - brother - brussels - budapest - build - builders - business - buzz - bzh - cab - cafe - cal - camera - camp - cancerresearch - canon - capetown - capital - caravan - cards - care - career - careers - cars - cartier - casa - cash - casino - cat - catering - cba - cbn - ceb - center - ceo - cern - cfa - cfd - chanel - channel - chat - cheap - chloe - christmas - chrome - church - cisco - citic - city - claims - cleaning - click - clinic - clothing - cloud - club - coach - codes - coffee - college - cologne - com - commbank - community - company - computer - condos - construction - consulting - contractors - cooking - cool - coop - corsica - country - coupons - courses - credit - creditcard - cricket - crown - crs - cruises - cuisinella - cymru - cyou - dabur - dad - dance - date - dating - datsun - day - dclk - deals - degree - delivery - delta - democrat - dental - dentist - desi - design - dev - diamonds - diet - digital - direct - directory - discount - dnp - docs - dog - doha - domains - doosan - download - drive - durban - dvag - earth - eat - edu - education - email - emerck - energy - engineer - engineering - enterprises - epson - equipment - erni - esq - estate - eurovision - eus - events - everbank - exchange - expert - exposed - express - fage - fail - faith - family - fan - fans - farm - fashion - feedback - film - finance - financial - firmdale - fish - fishing - fit - fitness - flights - florist - flowers - flsmidth - fly - foo - football - forex - forsale - forum - foundation - frl - frogans - fund - furniture - futbol - fyi - gal - gallery - game - garden - gbiz - gdn - gent - genting - ggee - gift - gifts - gives - giving - glass - gle - global - globo - gmail - gmo - gmx - gold - goldpoint - golf - goo - goog - google - gop - gov - graphics - gratis - green - gripe - group - guge - guide - guitars - guru - hamburg - hangout - haus - healthcare - help - here - hermes - hiphop - hitachi - hiv - hockey - holdings - holiday - homedepot - homes - honda - horse - host - hosting - hoteles - hotmail - house - how - hsbc - ibm - icbc - ice - icu - ifm - iinet - immo - immobilien - industries - infiniti - info - ing - ink - institute - insure - int - international - investments - ipiranga - irish - ist - istanbul - itau - iwc - java - jcb - jetzt - jewelry - jlc - jll - jobs - joburg - jprs - juegos - kaufen - kddi - kim - kitchen - kiwi - koeln - komatsu - krd - kred - kyoto - lacaixa - lancaster - land - lasalle - lat - latrobe - law - lawyer - lds - lease - leclerc - legal - lexus - lgbt - liaison - lidl - life - lighting - limited - limo - link - live - lixil - loan - loans - lol - london - lotte - lotto - love - ltda - lupin - luxe - luxury - madrid - maif - maison - man - management - mango - market - marketing - markets - marriott - mba - media - meet - melbourne - meme - memorial - men - menu - miami - microsoft - mil - mini - mma - mobi - moda - moe - mom - monash - money - montblanc - mormon - mortgage - moscow - motorcycles - mov - movie - movistar - mtn - mtpc - museum - nadex - nagoya - name - navy - nec - net - netbank - network - neustar - new - news - nexus - ngo - nhk - nico - ninja - nissan - nokia - nra - nrw - ntt - nyc - office - okinawa - omega - one - ong - onl - online - ooo - oracle - orange - org - organic - osaka - otsuka - ovh - page - panerai - paris - partners - parts - party - pet - pharmacy - philips - photo - photography - photos - physio - piaget - pics - pictet - pictures - pink - pizza - place - play - plumbing - plus - pohl - poker - porn - post - praxi - press - pro - prod - productions - prof - properties - property - pub - qpon - quebec - racing - realtor - realty - recipes - red - redstone - rehab - reise - reisen - reit - ren - rent - rentals - repair - report - republican - rest - restaurant - review - reviews - rich - ricoh - rio - rip - rocks - rodeo - rsvp - ruhr - run - ryukyu - saarland - sakura - sale - samsung - sandvik - sandvikcoromant - sanofi - sap - sarl - saxo - sca - scb - schmidt - scholarships - school - schule - schwarz - science - scor - scot - seat - seek - sener - services - sew - sex - sexy - shiksha - shoes - show - shriram - singles - site - ski - sky - skype - sncf - soccer - social - software - sohu - solar - solutions - sony - soy - space - spiegel - spreadbetting - srl - starhub - statoil - studio - study - style - sucks - supplies - supply - support - surf - surgery - suzuki - swatch - swiss - sydney - systems - taipei - tatamotors - tatar - tattoo - tax - taxi - team - tech - technology - tel - telefonica - temasek - tennis - thd - theater - tickets - tienda - tips - tires - tirol - today - tokyo - tools - top - toray - toshiba - tours - town - toyota - toys - trade - trading - training - travel - trust - tui - ubs - university - uno - uol - vacations - vegas - ventures - vermögensberater - vermögensberatung - versicherung - vet - viajes - video - villas - vin - vision - vista - vistaprint - vlaanderen - vodka - vote - voting - voto - voyage - wales - walter - wang - watch - webcam - website - wed - wedding - weir - whoswho - wien - wiki - williamhill - win - windows - wine - wme - work - works - world - wtc - wtf - xbox - xerox - xin - xperia - xxx - xyz - yachts - yandex - yodobashi - yoga - yokohama - youtube - zip - zone - zuerich - "дети" - "ком" - "москва" - "онлайн" - "орг" - "рус" - "сайт" - "קום" - "بازار" - "شبكة" - "كوم" - "موقع" - "कॉम" - "नेट" - "संगठन" - "คอม" - "みんな" - "グーグル" - "コム" - "世界" - "中信" - "中文网" - "企业" - "佛山" - "信息" - "健康" - "八卦" - "公司" - "公益" - "商城" - "商店" - "商标" - "在线" - "大拿" - "娱乐" - "工行" - "广东" - "慈善" - "我爱你" - "手机" - "政务" - "政府" - "新闻" - "时尚" - "机构" - "淡马锡" - "游戏" - "点看" - "移动" - "组织机构" - "网址" - "网店" - "网络" - "谷歌" - "集团" - "飞利浦" - "餐厅" - "닷넷" - "닷컴" - "삼성" - onion twitter-text-1.13.4/test/twitter-text-conformance/validate.yml0000644000175000017500000002263512670063203024744 0ustar sudheeshsudheesh tests: tweets: - description: "Valid Tweet: < 20 characters" text: "I am a Tweet" expected: true - description: "Valid Tweet: 140 characters" text: "A lie gets halfway around the world before the truth has a chance to get its pants on. Winston Churchill (1874-1965) http://bit.ly/dJpywL" expected: true - description: "Valid Tweet: 140 characters (with accents)" text: "A lié géts halfway arøünd thé wørld béføré thé truth has a chance tø get its pants øn. Winston Churchill (1874-1965) http://bit.ly/dJpywL" expected: true - description: "Valid Tweet: 140 characters (double byte characters)" text: "のののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののの" expected: true - description: "Valid Tweet: 140 characters (double word characters)" text: "\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431" expected: true - description: "Invalid Tweet: no characters (empty)" text: "" expected: false - description: "Invalid Tweet: 141 characters" text: "A lie gets halfway around the world before the truth has a chance to get its pants on. -- Winston Churchill (1874-1965) http://bit.ly/dJpywL" expected: false - description: "Invalid Tweet: 141 characters (due to newline)" text: "A lie gets halfway around the world before the truth has a chance to get its pants on. \n- Winston Churchill (1874-1965) http://bit.ly/dJpywL" expected: false usernames: - description: "Valid username: a-z < 20 characters" text: "@username" expected: true - description: "All numeric username are allowed" text: "@12345" expected: true - description: "Usernames should allow the _ character" text: "@example_name" expected: true - description: "Usernames SHOULD NOT allow the - character" text: "@example-name" expected: false lists: - description: "Valid list: a-z < 20 characters" text: "@username/list" expected: true - description: "A username alone SHOULD NOT be considered a valid list" text: "@username" expected: false - description: "A username followed by a slash SHOULD NOT be considered a valid list" text: "@username/" expected: false - description: "Validation SHOULD NOT allow leading spaces" text: " @username/list" expected: false - description: "Validation SHOULD NOT allow trailing spaces" text: "@username/list " expected: false hashtags: - description: "Valid hashtag: a-z < 20 characters" text: "#hashtag" expected: true - description: "Valid hashtag: number followed by letters" text: "#1st" expected: true - description: "Valid hashtag: letters and numbers mixed" text: "#that1time" expected: true - description: "Valid hashtag: letter followed by numbers" text: "#easyas123" expected: true - description: "Invalid hashtag: all numbers" text: "#12345" expected: false - description: "Valid hashtag: Russian text" text: "#ашок" expected: true - description: "Valid hashtag: Korean text" text: "#트위터" expected: true urls: - description: "Valid url: protocol + domain" text: "http://example.com" expected: true - description: "Valid url: ssl + domain + path + query" text: "https://example.com/path/to/resource?search=foo&lang=en" expected: true - description: "Valid url: protocol + domain + path + fragment" text: "http://twitter.com/#!/twitter" expected: true - description: "Valid url: cased protocol and domain" text: "HTTPS://www.ExaMPLE.COM/index.html" expected: true - description: "Valid url: port and userinfo" text: "http://user:PASSW0RD@example.com:8080/login.php" expected: true - description: "Valid url: matrix path parameters" text: "http://sports.yahoo.com/nfl/news;_ylt=Aom0;ylu=XyZ?slug=ap-superbowlnotebook" expected: true - description: "Valid url: ipv4" text: "http://192.168.0.1/index.html?src=asdf" expected: true - description: "Valid url: ipv6" text: "http://[3ffe:1900:4545:3:200:f8ff:fe21:67cf]:80/index.html" expected: true - description: "Valid url: underscore in subdomain" text: "http://test_underscore.twitter.com" expected: true - description: "Valid url: sub delims and question marks" text: "http://example.com?foo=$bar.;baz?BAZ&c=d-#top/?stories+" expected: true - description: "Valid unicode url: unicode domain" text: "http://☃.net/" expected: true - description: "Valid url: Cyrillic characters in path" text: "http://example.com/Русские_слова" expected: true - description: "Valid url: trailing hyphen" text: "https://www.youtube.com/playlist?list=PL0ZPu8XSRTB7wZzn0mLHMvyzVFeRxbWn-" expected: true - description: "Invalid url: invalid scheme" text: "ftp://www.example.com/" expected: false - description: "Invalid url: invalid userinfo characters" text: "https://user:pass[word]@www.example.com/" expected: false - description: "Invalid url: underscore in domain" text: "http://domain-dash_2314352345_dfasd.foo-cow_4352.com" expected: false - description: "Invalid url: domain beginning dash" text: "http://www.-domain4352.com/" expected: false - description: "Invalid url: domain trailing dash" text: "http://www.domain4352-.com/" expected: false - description: "Invalid url: unicode domain trailing dash" text: "http://☃-.net/" expected: false - description: "Invalid url: improperly encoded unicode domain" text: "http://%e2%98%83.net/" expected: false - description: "Invalid url: invalid IP" text: "http://256.1.2.3/" expected: false - description: "Invalid url: invalid char in path" text: "http://en.wikipedia.org/wiki/\"#Punctuation" expected: false - description: "Invalid url: trailing space" text: "http://example.com/#anchor " expected: false urls_without_protocol: - description: "Valid url without protocol: domain + gTLD" text: "example.com" expected: true - description: "Valid url without protocol: subdomain + domain + gTLD" text: "www.example.com" expected: true - description: "Valid url without protocol: domain + ccTLD" text: "t.co" expected: true - description: "Valid url without protocol: subdomain + domain + ccTLD" text: "foo.co.jp" expected: true - description: "Valid url without protocol: domain + path + query" text: "example.com/path/to/resource?search=foo&lang=en" expected: true lengths: - description: "Count the number of characters" text: "This is a test." expected: 15 - description: "Count a URL starting with http:// as 23 characters" text: "http://test.com" expected: 23 - description: "Count a URL starting with https:// as 23 characters" text: "https://test.com" expected: 23 - description: "Count a URL without protocol as 23 characters" text: "test.com" expected: 23 - description: "Count multiple URLs correctly" text: "Test https://test.com test https://test.com test.com test" expected: 86 - description: "Count unicode chars outside the basic multilingual plane (double word)" text: "\U00010000\U0010ffff" expected: 2 - description: "Count unicode chars inside the basic multilingual plane" text: "저찀쯿쿿" expected: 4 - description: "Count a mix of single byte single word, and double word unicode characters" text: "H\U0001f431☺" expected: 3 twitter-text-1.13.4/test/twitter-text-conformance/LICENSE0000644000175000017500000002361012670063203023427 0ustar sudheeshsudheeshCopyright 2011 Twitter, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this work except in compliance with the License. You may obtain a copy of the License below, or at: http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. twitter-text-1.13.4/test/twitter-text-conformance/extract.yml0000644000175000017500000011233712670063203024624 0ustar sudheeshsudheeshtests: mentions: - description: "Extract mention at the begining of a tweet" text: "@username reply" expected: ["username"] - description: "Extract mention at the end of a tweet" text: "mention @username" expected: ["username"] - description: "Extract mention in the middle of a tweet" text: "mention @username in the middle" expected: ["username"] - description: "Extract mention of username with underscore" text: "mention @user_name" expected: ["user_name"] - description: "Extract mention of all numeric username" text: "mention @12345" expected: ["12345"] - description: "Extract mention or multiple usernames" text: "mention @username1 @username2" expected: ["username1", "username2"] - description: "Extract mention in the middle of a Japanese tweet" text: "の@usernameに到着を待っている" expected: ["username"] - description: "DO NOT extract username ending in @" text: "Current Status: @_@ (cc: @username)" expected: ["username"] - description: "DO NOT extract username followed by accented latin characters" text: "@aliceìnheiro something something" expected: [] - description: "Extract lone metion but not @user@user (too close to an email)" text: "@username email me @test@example.com" expected: ["username"] - description: "DO NOT extract 'http' in '@http://' as username" text: "@http://twitter.com" expected: [] - description: "Extract mentions before newline" text: "@username\n@mention" expected: ["username", "mention"] - description: "Extract mentions after 'RT'" text: "RT@username RT:@mention RT @test" expected: ["username", "mention", "test"] - description: "Extract mentions after 'rt'" text: "rt@username rt:@mention rt @test" expected: ["username", "mention", "test"] - description: "Extract mentions after 'Rt'" text: "Rt@username Rt:@mention Rt @test" expected: ["username", "mention", "test"] - description: "Extract mentions after 'rT'" text: "rT@username rT:@mention rT @test" expected: ["username", "mention", "test"] - description: "DO NOT extract username preceded by !" text: "f!@kn" expected: [] - description: "DO NOT extract username preceded by @" text: "f@@kn" expected: [] - description: "DO NOT extract username preceded by #" text: "f#@kn" expected: [] - description: "DO NOT extract username preceded by $" text: "f$@kn" expected: [] - description: "DO NOT extract username preceded by %" text: "f%@kn" expected: [] - description: "DO NOT extract username preceded by &" text: "f&@kn" expected: [] - description: "DO NOT extract username preceded by *" text: "f*@kn" expected: [] mentions_with_indices: - description: "Extract a mention at the start" text: "@username yo!" expected: - screen_name: "username" indices: [0, 9] - description: "Extract a mention that has the same thing mentioned at the start" text: "username @username" expected: - screen_name: "username" indices: [9, 18] - description: "Extract a mention in the middle of a Japanese tweet" text: "の@usernameに到着を待っている" expected: - screen_name: "username" indices: [1, 10] mentions_or_lists_with_indices: - description: "Extract a mention" text: "@username yo!" expected: - screen_name: "username" list_slug: "" indices: [0, 9] - description: "Extract a list" text: "@username/list-name is a great list!" expected: - screen_name: "username" list_slug: "/list-name" indices: [0, 19] - description: "Extract a mention and list" text: "Hey @username, check out out @otheruser/list_name-01!" expected: - screen_name: "username" list_slug: "" indices: [4, 13] - screen_name: "otheruser" list_slug: "/list_name-01" indices: [29, 52] - description: "Extract a list in the middle of a Japanese tweet" text: "の@username/list_name-01に到着を待っている" expected: - screen_name: "username" list_slug: "/list_name-01" indices: [1, 23] - description: "DO NOT extract a list with slug that starts with a number" text: "@username/7list-name is a great list!" expected: - screen_name: "username" list_slug: "" indices: [0, 9] replies: - description: "Extract reply at the begining of a tweet" text: "@username reply" expected: "username" - description: "Extract reply preceded by only a space" text: " @username reply" expected: "username" - description: "Extract reply preceded by only a full-width space (U+3000)" text: " @username reply" expected: "username" - description: "DO NOT Extract reply when preceded by text" text: "a @username mention, not a reply" expected: - description: "DO NOT Extract reply when preceded by ." text: ".@username mention, not a reply" expected: - description: "DO NOT Extract reply when preceded by /" text: "/@username mention, not a reply" expected: - description: "DO NOT Extract reply when preceded by _" text: "_@username mention, not a reply" expected: - description: "DO NOT Extract reply when preceded by -" text: "-@username mention, not a reply" expected: - description: "DO NOT Extract reply when preceded by +" text: "+@username mention, not a reply" expected: - description: "DO NOT Extract reply when preceded by #" text: "#@username mention, not a reply" expected: - description: "DO NOT Extract reply when preceded by !" text: "!@username mention, not a reply" expected: - description: "DO NOT Extract reply when preceded by @" text: "@@username mention, not a reply" expected: - description: "DO NOT Extract reply when followed by URL" text: "@http://twitter.com" expected: urls: - description: "Extract a lone URL" text: "http://example.com" expected: ["http://example.com"] - description: "Extract valid URL: http://google.com" text: "text http://google.com" expected: ["http://google.com"] - description: "Extract valid URL: http://foobar.com/#" text: "text http://foobar.com/#" expected: ["http://foobar.com/#"] - description: "Extract valid URL: http://google.com/#foo" text: "text http://google.com/#foo" expected: ["http://google.com/#foo"] - description: "Extract valid URL: http://google.com/#search?q=iphone%20-filter%3Alinks" text: "text http://google.com/#search?q=iphone%20-filter%3Alinks" expected: ["http://google.com/#search?q=iphone%20-filter%3Alinks"] - description: "Extract valid URL: http://twitter.com/#search?q=iphone%20-filter%3Alinks" text: "text http://twitter.com/#search?q=iphone%20-filter%3Alinks" expected: ["http://twitter.com/#search?q=iphone%20-filter%3Alinks"] - description: "Extract valid URL: http://somedomain.com/index.php?path=/abc/def/" text: "text http://somedomain.com/index.php?path=/abc/def/" expected: ["http://somedomain.com/index.php?path=/abc/def/"] - description: "Extract valid URL: http://www.boingboing.net/2007/02/14/katamari_damacy_phon.html" text: "text http://www.boingboing.net/2007/02/14/katamari_damacy_phon.html" expected: ["http://www.boingboing.net/2007/02/14/katamari_damacy_phon.html"] - description: "Extract valid URL: http://somehost.com:3000" text: "text http://somehost.com:3000" expected: ["http://somehost.com:3000"] - description: "Extract valid URL: http://xo.com/~matthew+%ff-x" text: "text http://xo.com/~matthew+%ff-x" expected: ["http://xo.com/~matthew+%ff-x"] - description: "Extract valid URL: http://xo.com/~matthew+%ff-,.;x" text: "text http://xo.com/~matthew+%ff-,.;x" expected: ["http://xo.com/~matthew+%ff-,.;x"] - description: "Extract valid URL: http://xo.com/,.;x" text: "text http://xo.com/,.;x" expected: ["http://xo.com/,.;x"] - description: "Extract valid URL: http://en.wikipedia.org/wiki/Primer_(film)" text: "text http://en.wikipedia.org/wiki/Primer_(film)" expected: ["http://en.wikipedia.org/wiki/Primer_(film)"] - description: "Extract valid URL: http://www.ams.org/bookstore-getitem/item=mbk-59" text: "text http://www.ams.org/bookstore-getitem/item=mbk-59" expected: ["http://www.ams.org/bookstore-getitem/item=mbk-59"] - description: "Extract valid URL: http://✪df.ws/ejp" text: "text http://✪df.ws/ejp" expected: ["http://✪df.ws/ejp"] - description: "Extract valid URL: http://chilp.it/?77e8fd" text: "text http://chilp.it/?77e8fd" expected: ["http://chilp.it/?77e8fd"] - description: "Extract valid URL: http://x.com/oneletterdomain" text: "text http://x.com/oneletterdomain" expected: ["http://x.com/oneletterdomain"] - description: "Extract valid URL: http://msdn.microsoft.com/ja-jp/library/system.net.httpwebrequest(v=VS.100).aspx" text: "text http://msdn.microsoft.com/ja-jp/library/system.net.httpwebrequest(v=VS.100).aspx" expected: ["http://msdn.microsoft.com/ja-jp/library/system.net.httpwebrequest(v=VS.100).aspx"] - description: "DO NOT extract invalid URL: http://domain-begin_dash_2314352345_dfasd.foo-cow_4352.com" text: "text http://domain-dash_2314352345_dfasd.foo-cow_4352.com" expected: [] - description: "DO NOT extract invalid URL: http://-begin_dash_2314352345_dfasd.foo-cow_4352.com" text: "text http://-dash_2314352345_dfasd.foo-cow_4352.com" expected: [] - description: "DO NOT extract invalid URL: http://no-tld" text: "text http://no-tld" expected: [] - description: "DO NOT extract invalid URL: http://tld-too-short.x" text: "text http://tld-too-short.x" expected: [] - description: "DO NOT extract invalid URL with invalid preceding character: (http://twitter.com" text: "(http://twitter.com" expected: ["http://twitter.com"] - description: "Extract a very long hyphenated sub-domain URL (single letter hyphens)" text: "text http://word-and-a-number-8-ftw.domain.com/" expected: ["http://word-and-a-number-8-ftw.domain.com/"] - description: "Extract a hyphenated TLD (usually a typo)" text: "text http://domain.com-that-you-should-have-put-a-space-after" expected: ["http://domain.com"] - description: "Extract URL ending with # value" text: "text http://foo.com?#foo text" expected: ["http://foo.com?#foo"] - description: "Extract URLs without protocol on (com|org|edu|gov|net) domains" text: "foo.com foo.net foo.org foo.edu foo.gov" expected: ["foo.com", "foo.net", "foo.org", "foo.edu", "foo.gov"] - description: "Extract URLs without protocol not on (com|org|edu|gov|net) domains" text: "foo.baz foo.co.jp www.xxxxxxx.baz www.foo.co.uk wwwww.xxxxxxx foo.comm foo.somecom foo.govedu foo.jp" expected: ["foo.co.jp", "www.foo.co.uk"] - description: "Extract URLs without protocol on ccTLD with slash" text: "t.co/abcde bit.ly/abcde" expected: ["t.co/abcde", "bit.ly/abcde"] - description: "Extract URLs with protocol on ccTLD domains" text: "http://foo.jp http://fooooo.jp" expected: ["http://foo.jp", "http://fooooo.jp"] - description: "Extract URLs with a - or + at the end of the path" text: "Go to http://example.com/a+ or http://example.com/a-" expected: ["http://example.com/a+", "http://example.com/a-"] - description: "Extract URLs with longer paths ending in -" text: "Go to http://example.com/view/slug-url-?foo=bar" expected: ["http://example.com/view/slug-url-?foo=bar"] - description: "Extract URLs beginning with a space" text: "@user Try http:// example.com/path" expected: ["example.com/path"] - description: "Extract long URL without protocol surrounded by CJK characters" text: "これは日本語です。example.com/path/index.html中国語example.com/path한국" expected: ["example.com/path/index.html", "example.com/path"] - description: "Extract short URL without protocol surrounded by CJK characters" text: "twitter.comこれは日本語です。example.com中国語t.co/abcde한국twitter.com example2.comテストtwitter.com/abcde" expected: ["twitter.com", "example.com", "t.co/abcde", "twitter.com", "example2.com", "twitter.com/abcde"] - description: "Extract URLs with and without protocol surrounded by CJK characters" text: "http://twitter.com/これは日本語です。example.com中国語http://t.co/abcde한국twitter.comテストexample2.comテストhttp://twitter.com/abcde" expected: ["http://twitter.com/", "example.com", "http://t.co/abcde", "twitter.com", "example2.com", "http://twitter.com/abcde"] - description: "Extract URLs with protocol and path containing Cyrillic characters" text: "Go to http://twitter.com/Русские_слова" expected: ["http://twitter.com/Русские_слова"] - description: "DO NOT extract short URLs without protocol on ccTLD domains without path" text: "twitter.jp日本語it.so中国語foo.jp it.so foo.jp" expected: [] - description: "Extract some (tv|co) short URLs without protocol on ccTLD domains without path" text: "MLB.tv vine.co twitch.tv t.co" expected: ["MLB.tv", "vine.co", "twitch.tv", "t.co"] - description: "Extract URLs beginning with a non-breaking space (U+00A0)" text: "@user Try http:// example.com/path" expected: ["example.com/path"] - description: "Extract URLs with underscores and dashes in the subdomain" text: "test http://sub_domain-dash.twitter.com" expected: ["http://sub_domain-dash.twitter.com"] - description: "Extract URL with minimum number of valid characters" text: "test http://a.b.cd" expected: ["http://a.b.cd"] - description: "Extract URLs containing underscores and dashes" text: "test http://a_b.c-d.com" expected: ["http://a_b.c-d.com"] - description: "Extract URLs containing dashes in the subdomain" text: "test http://a-b.c.com" expected: ["http://a-b.c.com"] - description: "Extract URLs with dashes in the domain name" text: "test http://twitter-dash.com" expected: ["http://twitter-dash.com"] - description: "Extract URLs with lots of symbols then a period" text: "http://www.bestbuy.com/site/Currie+Technologies+-+Ezip+400+Scooter/9885188.p?id=1218189013070&skuId=9885188" expected: ["http://www.bestbuy.com/site/Currie+Technologies+-+Ezip+400+Scooter/9885188.p?id=1218189013070&skuId=9885188"] - description: "DO NOT extract URLs containing leading dashes in the subdomain" text: "test http://-leadingdash.twitter.com" expected: [] - description: "DO NOT extract URLs containing trailing dashes in the subdomain" text: "test http://trailingdash-.twitter.com" expected: [] - description: "DO NOT extract URLs containing leading underscores in the subdomain" text: "test http://_leadingunderscore.twitter.com" expected: [] - description: "DO NOT extract URLs containing trailing underscores in the subdomain" text: "test http://trailingunderscore_.twitter.com" expected: [] - description: "DO NOT extract URLs containing leading dashes in the domain name" text: "test http://-twitter.com" expected: [] - description: "DO NOT extract URLs containing trailing dashes in the domain name" text: "test http://twitter-.com" expected: [] - description: "DO NOT extract URLs containing underscores in the domain name" text: "test http://twitter_underscore.com" expected: [] - description: "DO NOT extract URLs containing underscores in the tld" text: "test http://twitter.c_o_m" expected: [] - description: "Extract valid URL http://www.foo.com/foo/path-with-period./" text: "test http://www.foo.com/foo/path-with-period./" expected: ["http://www.foo.com/foo/path-with-period./"] - description: "Extract valid URL http://www.foo.org.za/foo/bar/688.1" text: "test http://www.foo.org.za/foo/bar/688.1" expected: ["http://www.foo.org.za/foo/bar/688.1"] - description: "Extract valid URL http://www.foo.com/bar-path/some.stm?param1=foo;param2=P1|0||P2|0" text: "test http://www.foo.com/bar-path/some.stm?param1=foo;param2=P1|0||P2|0" expected: ["http://www.foo.com/bar-path/some.stm?param1=foo;param2=P1|0||P2|0"] - description: "Extract valid URL http://foo.com/bar/123/foo_&_bar/" text: "test http://foo.com/bar/123/foo_&_bar/" expected: ["http://foo.com/bar/123/foo_&_bar/"] - description: "Extract valid URL http://www.cp.sc.edu/events/65" text: "test http://www.cp.sc.edu/events/65 test" expected: ["http://www.cp.sc.edu/events/65"] - description: "Extract valid URL http://www.andersondaradio.no.comunidades.net/" text: "http://www.andersondaradio.no.comunidades.net/ test test" expected: ["http://www.andersondaradio.no.comunidades.net/"] - description: "Extract valid URL ELPAÍS.com" text: "test ELPAÍS.com" expected: ["ELPAÍS.com"] - description: "DO NOT include period at the end of URL" text: "test http://twitter.com/." expected: ["http://twitter.com/"] - description: "Extract a URL with '?' in fragment" text: "http://tn.com.ar/show/00056158/la-estrella-del-certamen-el-turno-de-pamela-anderson?fb_xd_fragment#?=&cb=fe17523f223b7&relation=parent.parent&transport=fragment&type=resize&height=20&ackdata" expected: ["http://tn.com.ar/show/00056158/la-estrella-del-certamen-el-turno-de-pamela-anderson?fb_xd_fragment#?=&cb=fe17523f223b7&relation=parent.parent&transport=fragment&type=resize&height=20&ackdata"] - description: "Extract a URL with '?' in fragment in a text" text: "text http://tn.com.ar/show/00056158/la-estrella-del-certamen-el-turno-de-pamela-anderson?fb_xd_fragment#?=&cb=fe17523f223b7&relation=parent.parent&transport=fragment&type=resize&height=20&ackdata text" expected: ["http://tn.com.ar/show/00056158/la-estrella-del-certamen-el-turno-de-pamela-anderson?fb_xd_fragment#?=&cb=fe17523f223b7&relation=parent.parent&transport=fragment&type=resize&height=20&ackdata"] # A common cause of runaway regex engines. - description: "Extract a URL with a ton of trailing periods" text: "Test a ton of periods http://example.com/path.........................................." expected: ["http://example.com/path"] - description: "Extract a URL with a ton of trailing commas" text: "Test a ton of periods http://example.com/,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,," expected: ["http://example.com/"] - description: "Extract a URL with a ton of trailing '!'" text: "Test a ton of periods http://example.com/path/!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" expected: ["http://example.com/path/"] - description: "DO NOT extract URLs in hashtag or @mention" text: "#test.com @test.com #http://test.com @http://test.com #t.co/abcde @t.co/abcde" expected: [] - description: "Extract a t.co URL with a trailing apostrophe" text: "I really like http://t.co/pbY2NfTZ's website" expected: ["http://t.co/pbY2NfTZ"] - description: "Extract a t.co URL with a trailing hyphen" text: "Check this site out http://t.co/FNkPfmii- it's great" expected: ["http://t.co/FNkPfmii"] - description: "Extract a t.co URL with a trailing colon" text: "According to http://t.co/ulYGBYSo: the internet is cool" expected: ["http://t.co/ulYGBYSo"] - description: "Extract URL before newline" text: "http://twitter.com\nhttp://example.com\nhttp://example.com/path\nexample.com/path\nit.so\nit.so/abcde" expected: ["http://twitter.com", "http://example.com", "http://example.com/path", "example.com/path", "it.so/abcde"] - description: "DO NOT extract URL if preceded by $" text: "$http://twitter.com $twitter.com $http://t.co/abcde $t.co/abcde $t.co $TVI.CA $RBS.CA" expected: [] - description: "DO NOT extract .bz2 file name as URL" text: "long.test.tar.bz2 test.tar.bz2 tar.bz2" expected: [] - description: "DO NOT extract URL with gTLD followed by @ sign" text: "john.doe.gov@mail.com" expected: [] - description: "DO NOT extract URL with ccTLD followed by @ sign" text: "john.doe.jp@mail.com" expected: [] urls_with_indices: - description: "Extract a URL" text: "text http://google.com" expected: - url: "http://google.com" indices: [5, 22] - description: "Extract a URL from a Japanese tweet" text: "皆さん見てください! http://google.com" expected: - url: "http://google.com" indices: [11, 28] - description: "Extract URLs without protocol on ccTLD with slash" text: "t.co/abcde bit.ly/abcde" expected: - url: "t.co/abcde" indices: [0, 10] - url: "bit.ly/abcde" indices: [11, 23] - description: "Extract URLs without protocol surrounded by CJK characters" text: "twitter.comこれは日本語です。example.com中国語t.co/abcde한국twitter.com example2.comテストtwitter.com/abcde" expected: - url: "twitter.com" indices: [0, 11] - url: "example.com" indices: [20, 31] - url: "t.co/abcde" indices: [34, 44] - url: "twitter.com" indices: [46, 57] - url: "example2.com" indices: [58, 70] - url: "twitter.com/abcde" indices: [73, 90] - description: "Extract URLs with and without protocol surrounded by CJK characters" text: "http://twitter.com/これは日本語です。example.com中国語http://t.co/abcde한국twitter.comテストexample2.comテストhttp://twitter.com/abcde" expected: - url: "http://twitter.com/" indices: [0, 19] - url: "example.com" indices: [28, 39] - url: "http://t.co/abcde" indices: [42, 59] - url: "twitter.com" indices: [61, 72] - url: "example2.com" indices: [75, 87] - url: "http://twitter.com/abcde" indices: [90, 114] - description: "Extract t.co URLs skipping trailing characters and adjusting indices correctly" text: "http://t.co/pbY2NfTZ's http://t.co/2vYHpAc5; http://t.co/ulYGBYSo: http://t.co/8MkmHU0k+c http://t.co/TKLp64dY.x http://t.co/8t7G3ddS#a http://t.co/FNkPfmii-" expected: - url: "http://t.co/pbY2NfTZ" indices: [0, 20] - url: "http://t.co/2vYHpAc5" indices: [23, 43] - url: "http://t.co/ulYGBYSo" indices: [45, 65] - url: "http://t.co/8MkmHU0k" indices: [67, 87] - url: "http://t.co/TKLp64dY" indices: [90, 110] - url: "http://t.co/8t7G3ddS" indices: [113, 133] - url: "http://t.co/FNkPfmii" indices: [136, 156] - description: "Extract correct indices for duplicate instances of the same URL" text: "http://t.co http://t.co" expected: - url: "http://t.co" indices: [0, 11] - url: "http://t.co" indices: [12, 23] - description: "Extract I18N URL" text: "test http://xn--ls8h.XN--ls8h.la/" expected: - url: "http://xn--ls8h.XN--ls8h.la/" indices: [5, 33] - description: "Extract URLs with IDN(not encoded)" text: "test http://foobar.みんな/ http://foobar.中国/ http://foobar.پاکستان/ " expected: - url: "http://foobar.みんな/" indices: [5, 23] - url: "http://foobar.中国/" indices: [24, 41] - url: "http://foobar.پاکستان/" indices: [42, 64] hashtags: - description: "Extract an all-alpha hashtag" text: "a #hashtag here" expected: ["hashtag"] - description: "Extract a letter-then-number hashtag" text: "this is #hashtag1" expected: ["hashtag1"] - description: "Extract a number-then-letter hashtag" text: "#1hashtag is this" expected: ["1hashtag"] - description: "DO NOT Extract an all-numeric hashtag" text: "On the #16 bus" expected: [] - description: "DO NOT Extract a single numeric hashtag" text: "#0" expected: [] - description: "Extract hashtag after bracket" text: "(#hashtag1 )#hashtag2 [#hashtag3 ]#hashtag4 ’#hashtag5’#hashtag6" expected: ["hashtag1", "hashtag2", "hashtag3", "hashtag4", "hashtag5", "hashtag6"] - description: "Extract a hashtag containing ñ" text: "I'll write more tests #mañana" expected: ["mañana"] - description: "Extract a hashtag containing é" text: "Working remotely #café" expected: ["café"] - description: "Extract a hashtag containing ü" text: "Getting my Oktoberfest on #münchen" expected: ["münchen"] - description: "DO NOT Extract a hashtag containing Japanese" text: "this is not valid: # 会議中 ハッシュ" expected: [] - description: "Extract a hashtag in Korean" text: "What is #트위터 anyway?" expected: ["트위터"] - description: "Extract a half-width Hangul hashtag" text: "Just random half-width Hangul #ᆪᆭᄚ" expected: ["ᆪᆭᄚ"] - description: "Extract a hashtag in Russian" text: "What is #ашок anyway?" expected: ["ашок"] - description: "Extract a starting katakana hashtag" text: "#カタカナ is a hashtag" expected: ["カタカナ"] - description: "Extract a starting hiragana hashtag" text: "#ひらがな FTW!" expected: ["ひらがな"] - description: "Extract a starting kanji hashtag" text: "#漢字 is the future" expected: ["漢字"] - description: "Extract a trailing katakana hashtag" text: "Hashtag #カタカナ" expected: ["カタカナ"] - description: "Extract a trailing hiragana hashtag" text: "Japanese hashtags #ひらがな" expected: ["ひらがな"] - description: "Extract a trailing kanji hashtag" text: "Study time #漢字" expected: ["漢字"] - description: "Extract a central katakana hashtag" text: "See my #カタカナ hashtag?" expected: ["カタカナ"] - description: "Extract a central hiragana hashtag" text: "Study #ひらがな for fun and profit" expected: ["ひらがな"] - description: "Extract a central kanji hashtag" text: "Some say #漢字 is the past. what do they know?" expected: ["漢字"] - description: "Extract a Kanji/Katakana mixed hashtag" text: "日本語ハッシュタグテスト #日本語ハッシュタグ" expected: ["日本語ハッシュタグ"] - description: "Extract a hashtag after a punctuation" text: "日本語ハッシュテスト。#日本語ハッシュタグ" expected: ["日本語ハッシュタグ"] - description: "DO NOT include a punctuation in a hashtag" text: "#日本語ハッシュタグ。" expected: ["日本語ハッシュタグ"] - description: "Extract a full-width Alnum hashtag" text: "全角英数字ハッシュタグ #hashtag123" expected: ["hashtag123"] - description: "DO NOT extract a hashtag without a preceding space" text: "日本語ハッシュタグ#日本語ハッシュタグ" expected: [] - description: "Hashtag with chouon" text: "長音ハッシュタグ。#サッカー" expected: ["サッカー"] - description: "Hashtag with half-width chouon" text: "長音ハッシュタグ。#サッカー" expected: ["サッカー"] - description: "Hashtag with half-widh voiced sounds marks" text: "#ハッシュタグ #パピプペポ" expected: ["ハッシュタグ", "パピプペポ"] - description: "Hashtag with half-width # after full-width !" text: "できましたよー!#日本語ハッシュタグ。" expected: ["日本語ハッシュタグ"] - description: "Hashtag with full-width # after full-width !" text: "できましたよー!#日本語ハッシュタグ。" expected: ["日本語ハッシュタグ"] - description: "Hashtag with ideographic iteration mark" text: "#云々 #学問のすゝめ #いすゞ #各〻 #各〃" expected: ["云々", "学問のすゝめ", "いすゞ", "各〻", "各〃"] - description: "Extract hashtag with fullwidth tilde" text: "#メ~テレ ハッシュタグ内で~が認識されず" expected: ["メ~テレ"] - description: "Extract hashtag with wave dash" text: "#メ〜テレ ハッシュタグ内で~が認識されず" expected: ["メ〜テレ"] - description: "Hashtags with ş (U+015F)" text: "Here’s a test tweet for you: #Ateş #qrşt #ştu #ş" expected: ["Ateş", "qrşt", "ştu", "ş"] - description: "Hashtags with İ (U+0130) and ı (U+0131)" text: "Here’s a test tweet for you: #İn #ın" expected: ["İn", "ın"] - description: "Hashtag before punctuations" text: "#hashtag: #hashtag; #hashtag, #hashtag. #hashtag! #hashtag?" expected: ["hashtag", "hashtag", "hashtag", "hashtag", "hashtag", "hashtag"] - description: "Hashtag after punctuations" text: ":#hashtag ;#hashtag ,#hashtag .#hashtag !#hashtag ?#hashtag" expected: ["hashtag", "hashtag", "hashtag", "hashtag", "hashtag", "hashtag"] - description: "Hashtag before newline" text: "#hashtag\ntest\n#hashtag2\ntest\n#hashtag3\n" expected: ["hashtag", "hashtag2", "hashtag3"] - description: "DO NOT extract hashtag when # is followed by URL" text: "#http://twitter.com #https://twitter.com" expected: [] - description: "DO NOT extract hashtag if it's a part of URL" text: "http://twitter.com/#hashtag twitter.com/#hashtag" expected: [] - description: "Extract hashtags with Latin extended characters" text: "#Azərbaycanca #mûǁae #Čeština #Ċaoiṁín" expected: ["Azərbaycanca", "mûǁae", "Čeština", "Ċaoiṁín"] - description: "Extract Arabic hashtags" text: "#سیاست #ایران #السياسة #السياح #لغات #اتمی #کنفرانس #العربية #الجزيرة #فارسی" expected: ["سیاست", "ایران", "السياسة", "السياح", "لغات", "اتمی", "کنفرانس", "العربية", "الجزيرة", "فارسی"] - description: "Extract Arabic hashtags with underscore" text: "#برنامه_نویسی #رییس_جمهور #رئيس_الوزراء, #ثبت_نام. #لس_آنجلس" expected: ["برنامه_نویسی", "رییس_جمهور", "رئيس_الوزراء", "ثبت_نام", "لس_آنجلس"] - description: "Extract Hebrew hashtags" text: "#עַל־יְדֵי #וכו׳ #מ״כ" expected: ["עַל־יְדֵי", "וכו׳", "מ״כ"] - description: "Extract Thai hashtags" text: "#ผู้เริ่ม #การเมือง #รายละเอียด #นักท่องเที่ยว #ของขวัญ #สนามบิน #เดินทาง #ประธาน" expected: ["ผู้เริ่ม", "การเมือง", "รายละเอียด", "นักท่องเที่ยว", "ของขวัญ", "สนามบิน", "เดินทาง", "ประธาน"] - description: "Extract Arabic hashtags with Zero-Width Non-Joiner" text: "#أي‌بي‌إم #می‌خواهم" expected: ["أي‌بي‌إم", "می‌خواهم"] - description: "Extract Amharic hashtag" text: "የአላህ መልእክተኛ ሰለላሁ ዓለይሂ ወሰለም #ኢትዮሙስሊምስ" expected: ["ኢትዮሙስሊምስ"] - description: "Extract Sinhala hashtag with Zero-Width Joiner (U+200D)" text: "#ශ්‍රීලංකා" expected: ["ශ්‍රීලංකා"] - description: "Extract Arabic and Persian hashtags with numbers" text: "#۳۴۵هشتگ #هشتگ۶۷۸ #ســـلام_عليكم_٤٠٦" expected: ["۳۴۵هشتگ","هشتگ۶۷۸","ســـلام_عليكم_٤٠٦"] - description: "Extract Hindi hashtags" text: "#महात्मा #महात्मा_१२३४ #१२३४ गांधी" expected: ["महात्मा","महात्मा_१२३४"] - description: "Extract Indic script hashtags" text: "#বাংলা #ગુજરાતી #ಕನ್ನಡ #മലയാളം #ଓଡ଼ିଆ #ਪੰਜਾਬੀ #සිංහල #தமிழ் #తెలుగు" expected: ["বাংলা","ગુજરાતી","ಕನ್ನಡ","മലയാളം","ଓଡ଼ିଆ","ਪੰਜਾਬੀ","සිංහල","தமிழ்","తెలుగు"] - description: "Extract Tibetan hashtags" text: "#བོད་སྐད་ #བོད་སྐད" expected: ["བོད་སྐད་","བོད་སྐད"] - description: "Extract Khmer, Burmese, Laotian hashtags" text: "#មហាត្មះគន្ធី #မြင့်မြတ်သော #ຊີວະສາດ" expected: ["មហាត្មះគន្ធី","မြင့်မြတ်သော","ຊີວະສາດ"] - description: "Extract Greek hashtag" text: "#Μαχάτμα_Γκάντι ήταν Ινδός πολιτικός" expected: ["Μαχάτμα_Γκάντι"] - description: "Extract Armenian and Georgian hashtags" text: "#Մահաթմա #მაჰათმა" expected: ["Մահաթմա","მაჰათმა"] - description: "Extract hashtag with middle dot" text: "#il·lusió" expected: ["il·lusió"] - description: "DO NOT extract hashtags without a letter" text: "#_ #1_2 #122 #〃" expected: [] hashtags_with_indices: - description: "Extract a hastag at the start" text: "#hashtag here" expected: - hashtag: "hashtag" indices: [0, 8] - description: "Extract a hastag at the end" text: "test a #hashtag" expected: - hashtag: "hashtag" indices: [7, 15] - description: "Extract a hastag in the middle" text: "test a #hashtag in a string" expected: - hashtag: "hashtag" indices: [7, 15] - description: "Extract only a valid hashtag" text: "#123 a #hashtag in a string" expected: - hashtag: "hashtag" indices: [7, 15] - description: "Extract a hashtag in a string of multi-byte characters" text: "会議中 #hashtag 会議中" expected: - hashtag: "hashtag" indices: [4, 12] - description: "Extract multiple valid hashtags" text: "One #two three #four" expected: - hashtag: "two" indices: [4, 8] - hashtag: "four" indices: [15, 20] - description: "Extract a non-latin hashtag" text: "Hashtags in #русский!" expected: - hashtag: "русский" indices: [12, 20] - description: "Extract multiple non-latin hashtags" text: "Hashtags in #中文, #日本語, #한국말, and #русский! Try it out!" expected: - hashtag: "中文" indices: [12, 15] - hashtag: "日本語" indices: [17, 21] - hashtag: "한국말" indices: [23, 27] - hashtag: "русский" indices: [33, 41] cashtags: - description: "Extract cashtags" text: "Example cashtags: $TEST $Stock $symbol" expected: ["TEST", "Stock", "symbol"] - description: "Extract cashtags with . or _" text: "Example cashtags: $TEST.T $test.tt $Stock_X $symbol_ab" expected: ["TEST.T", "test.tt", "Stock_X", "symbol_ab"] - description: "Do not extract cashtags if they contain numbers" text: "$123 $test123 $TE123ST" expected: [] - description: "Do not extract cashtags with non-ASCII characters" text: "$ストック $株" expected: [] - description: "Do not extract cashtags with punctuations" text: "$ $. $- $@ $! $() $+" expected: [] - description: "Do not include trailing . or _" text: "$TEST. $TEST_" expected: ["TEST", "TEST"] - description: "Do not extract cashtags if there is no space before $" text: "$OK$NG$BAD text$NO .$NG $$NG" expected: ["OK"] - description: "Do not extract too long cashtags" text: "$CashtagMustBeLessThanSixCharacter" expected: [] cashtags_with_indices: - description: "Extract cashtags" text: "Example: $TEST $symbol test" expected: - cashtag: "TEST" indices: [9, 14] - cashtag: "symbol" indices: [15, 22] - description: "Extract cashtags with . or _" text: "Example: $TEST.T test $symbol_ab end" expected: - cashtag: "TEST.T" indices: [9, 16] - cashtag: "symbol_ab" indices: [22, 32] twitter-text-1.13.4/test/twitter-text-conformance/Rakefile0000644000175000017500000000376612670063203024101 0ustar sudheeshsudheeshrequire 'open-uri' require 'nokogiri' require 'yaml' namespace :tlds do desc 'Grab tlds from iana and save to tld_lib.yml' task :iana_update do doc = Nokogiri::HTML(open('http://www.iana.org/domains/root/db')) tlds = [] types = { 'country' => /country-code/, 'generic' => /generic|sponsored|infrastructure|generic-restricted/, } doc.css('table#tld-table tr').each do |tr| info = tr.css('td') next if info.empty? tlds << { domain: info[0].text.gsub('.', ''), type: info[1].text } end def select_tld(tlds, type) tlds.select {|i| i[:type] =~ type}.map {|i| i[:domain]}.sort end yml = {} types.each do |name, regex| yml[name] = select_tld(tlds, regex) end yml["generic"] << "onion" File.open(repo_path('tld_lib.yml'), 'w') do |file| file.write(yml.to_yaml) end File.open(repo_path("TldLists.java"), 'w') do |file| file.write(<<-EOF // Auto-generated by conformance/Rakefile package com.twitter; import java.util.Arrays; import java.util.List; public class TldLists { public static final List GTLDS = Arrays.asList( #{yml["generic"].sort.map {|el| " \"#{el}\""}.join(",\n")} ); public static final List CTLDS = Arrays.asList( #{yml["country"].sort.map {|el| " \"#{el}\""}.join(",\n")} ); } EOF ) end end desc 'Update tests from tld_lib.yml' task :generate_tests do test_yml = { 'tests' => { } } path = repo_path('tld_lib.yml') yml = YAML.load_file(path) yml.each do |type, tlds| test_yml['tests'][type] = [] tlds.each do |tld| test_yml['tests'][type].push( 'description' => "#{tld} is a valid #{type} tld", 'text' => "https://twitter.#{tld}", 'expected' => ["https://twitter.#{tld}"], ) end end File.open('tlds.yml', 'w') do |file| file.write(test_yml.to_yaml) end end end def repo_path(*path) File.join(File.dirname(__FILE__), *path) end twitter-text-1.13.4/test/twitter-text-conformance/Gemfile.lock0000644000175000017500000000024512670063203024643 0ustar sudheeshsudheeshGEM remote: https://rubygems.org/ specs: mini_portile (0.6.0) nokogiri (1.6.3.1) mini_portile (= 0.6.0) PLATFORMS ruby DEPENDENCIES nokogiri twitter-text-1.13.4/test/twitter-text-conformance/TldLists.java0000644000175000017500000003335212670063203025033 0ustar sudheeshsudheesh// Auto-generated by conformance/Rakefile package com.twitter; import java.util.Arrays; import java.util.List; public class TldLists { public static final List GTLDS = Arrays.asList( "abb", "abbott", "abogado", "academy", "accenture", "accountant", "accountants", "aco", "active", "actor", "ads", "adult", "aeg", "aero", "afl", "agency", "aig", "airforce", "airtel", "allfinanz", "alsace", "amsterdam", "android", "apartments", "app", "aquarelle", "archi", "army", "arpa", "asia", "associates", "attorney", "auction", "audio", "auto", "autos", "axa", "azure", "band", "bank", "bar", "barcelona", "barclaycard", "barclays", "bargains", "bauhaus", "bayern", "bbc", "bbva", "bcn", "beer", "bentley", "berlin", "best", "bet", "bharti", "bible", "bid", "bike", "bing", "bingo", "bio", "biz", "black", "blackfriday", "bloomberg", "blue", "bmw", "bnl", "bnpparibas", "boats", "bond", "boo", "boots", "boutique", "bradesco", "bridgestone", "broker", "brother", "brussels", "budapest", "build", "builders", "business", "buzz", "bzh", "cab", "cafe", "cal", "camera", "camp", "cancerresearch", "canon", "capetown", "capital", "caravan", "cards", "care", "career", "careers", "cars", "cartier", "casa", "cash", "casino", "cat", "catering", "cba", "cbn", "ceb", "center", "ceo", "cern", "cfa", "cfd", "chanel", "channel", "chat", "cheap", "chloe", "christmas", "chrome", "church", "cisco", "citic", "city", "claims", "cleaning", "click", "clinic", "clothing", "cloud", "club", "coach", "codes", "coffee", "college", "cologne", "com", "commbank", "community", "company", "computer", "condos", "construction", "consulting", "contractors", "cooking", "cool", "coop", "corsica", "country", "coupons", "courses", "credit", "creditcard", "cricket", "crown", "crs", "cruises", "cuisinella", "cymru", "cyou", "dabur", "dad", "dance", "date", "dating", "datsun", "day", "dclk", "deals", "degree", "delivery", "delta", "democrat", "dental", "dentist", "desi", "design", "dev", "diamonds", "diet", "digital", "direct", "directory", "discount", "dnp", "docs", "dog", "doha", "domains", "doosan", "download", "drive", "durban", "dvag", "earth", "eat", "edu", "education", "email", "emerck", "energy", "engineer", "engineering", "enterprises", "epson", "equipment", "erni", "esq", "estate", "eurovision", "eus", "events", "everbank", "exchange", "expert", "exposed", "express", "fage", "fail", "faith", "family", "fan", "fans", "farm", "fashion", "feedback", "film", "finance", "financial", "firmdale", "fish", "fishing", "fit", "fitness", "flights", "florist", "flowers", "flsmidth", "fly", "foo", "football", "forex", "forsale", "forum", "foundation", "frl", "frogans", "fund", "furniture", "futbol", "fyi", "gal", "gallery", "game", "garden", "gbiz", "gdn", "gent", "genting", "ggee", "gift", "gifts", "gives", "giving", "glass", "gle", "global", "globo", "gmail", "gmo", "gmx", "gold", "goldpoint", "golf", "goo", "goog", "google", "gop", "gov", "graphics", "gratis", "green", "gripe", "group", "guge", "guide", "guitars", "guru", "hamburg", "hangout", "haus", "healthcare", "help", "here", "hermes", "hiphop", "hitachi", "hiv", "hockey", "holdings", "holiday", "homedepot", "homes", "honda", "horse", "host", "hosting", "hoteles", "hotmail", "house", "how", "hsbc", "ibm", "icbc", "ice", "icu", "ifm", "iinet", "immo", "immobilien", "industries", "infiniti", "info", "ing", "ink", "institute", "insure", "int", "international", "investments", "ipiranga", "irish", "ist", "istanbul", "itau", "iwc", "java", "jcb", "jetzt", "jewelry", "jlc", "jll", "jobs", "joburg", "jprs", "juegos", "kaufen", "kddi", "kim", "kitchen", "kiwi", "koeln", "komatsu", "krd", "kred", "kyoto", "lacaixa", "lancaster", "land", "lasalle", "lat", "latrobe", "law", "lawyer", "lds", "lease", "leclerc", "legal", "lexus", "lgbt", "liaison", "lidl", "life", "lighting", "limited", "limo", "link", "live", "lixil", "loan", "loans", "lol", "london", "lotte", "lotto", "love", "ltda", "lupin", "luxe", "luxury", "madrid", "maif", "maison", "man", "management", "mango", "market", "marketing", "markets", "marriott", "mba", "media", "meet", "melbourne", "meme", "memorial", "men", "menu", "miami", "microsoft", "mil", "mini", "mma", "mobi", "moda", "moe", "mom", "monash", "money", "montblanc", "mormon", "mortgage", "moscow", "motorcycles", "mov", "movie", "movistar", "mtn", "mtpc", "museum", "nadex", "nagoya", "name", "navy", "nec", "net", "netbank", "network", "neustar", "new", "news", "nexus", "ngo", "nhk", "nico", "ninja", "nissan", "nokia", "nra", "nrw", "ntt", "nyc", "office", "okinawa", "omega", "one", "ong", "onion", "onl", "online", "ooo", "oracle", "orange", "org", "organic", "osaka", "otsuka", "ovh", "page", "panerai", "paris", "partners", "parts", "party", "pet", "pharmacy", "philips", "photo", "photography", "photos", "physio", "piaget", "pics", "pictet", "pictures", "pink", "pizza", "place", "play", "plumbing", "plus", "pohl", "poker", "porn", "post", "praxi", "press", "pro", "prod", "productions", "prof", "properties", "property", "pub", "qpon", "quebec", "racing", "realtor", "realty", "recipes", "red", "redstone", "rehab", "reise", "reisen", "reit", "ren", "rent", "rentals", "repair", "report", "republican", "rest", "restaurant", "review", "reviews", "rich", "ricoh", "rio", "rip", "rocks", "rodeo", "rsvp", "ruhr", "run", "ryukyu", "saarland", "sakura", "sale", "samsung", "sandvik", "sandvikcoromant", "sanofi", "sap", "sarl", "saxo", "sca", "scb", "schmidt", "scholarships", "school", "schule", "schwarz", "science", "scor", "scot", "seat", "seek", "sener", "services", "sew", "sex", "sexy", "shiksha", "shoes", "show", "shriram", "singles", "site", "ski", "sky", "skype", "sncf", "soccer", "social", "software", "sohu", "solar", "solutions", "sony", "soy", "space", "spiegel", "spreadbetting", "srl", "starhub", "statoil", "studio", "study", "style", "sucks", "supplies", "supply", "support", "surf", "surgery", "suzuki", "swatch", "swiss", "sydney", "systems", "taipei", "tatamotors", "tatar", "tattoo", "tax", "taxi", "team", "tech", "technology", "tel", "telefonica", "temasek", "tennis", "thd", "theater", "tickets", "tienda", "tips", "tires", "tirol", "today", "tokyo", "tools", "top", "toray", "toshiba", "tours", "town", "toyota", "toys", "trade", "trading", "training", "travel", "trust", "tui", "ubs", "university", "uno", "uol", "vacations", "vegas", "ventures", "vermögensberater", "vermögensberatung", "versicherung", "vet", "viajes", "video", "villas", "vin", "vision", "vista", "vistaprint", "vlaanderen", "vodka", "vote", "voting", "voto", "voyage", "wales", "walter", "wang", "watch", "webcam", "website", "wed", "wedding", "weir", "whoswho", "wien", "wiki", "williamhill", "win", "windows", "wine", "wme", "work", "works", "world", "wtc", "wtf", "xbox", "xerox", "xin", "xperia", "xxx", "xyz", "yachts", "yandex", "yodobashi", "yoga", "yokohama", "youtube", "zip", "zone", "zuerich", "дети", "ком", "москва", "онлайн", "орг", "рус", "сайт", "קום", "بازار", "شبكة", "كوم", "موقع", "कॉम", "नेट", "संगठन", "คอม", "みんな", "グーグル", "コム", "世界", "中信", "中文网", "企业", "佛山", "信息", "健康", "八卦", "公司", "公益", "商城", "商店", "商标", "在线", "大拿", "娱乐", "工行", "广东", "慈善", "我爱你", "手机", "政务", "政府", "新闻", "时尚", "机构", "淡马锡", "游戏", "点看", "移动", "组织机构", "网址", "网店", "网络", "谷歌", "集团", "飞利浦", "餐厅", "닷넷", "닷컴", "삼성" ); public static final List CTLDS = Arrays.asList( "ac", "ad", "ae", "af", "ag", "ai", "al", "am", "an", "ao", "aq", "ar", "as", "at", "au", "aw", "ax", "az", "ba", "bb", "bd", "be", "bf", "bg", "bh", "bi", "bj", "bl", "bm", "bn", "bo", "bq", "br", "bs", "bt", "bv", "bw", "by", "bz", "ca", "cc", "cd", "cf", "cg", "ch", "ci", "ck", "cl", "cm", "cn", "co", "cr", "cu", "cv", "cw", "cx", "cy", "cz", "de", "dj", "dk", "dm", "do", "dz", "ec", "ee", "eg", "eh", "er", "es", "et", "eu", "fi", "fj", "fk", "fm", "fo", "fr", "ga", "gb", "gd", "ge", "gf", "gg", "gh", "gi", "gl", "gm", "gn", "gp", "gq", "gr", "gs", "gt", "gu", "gw", "gy", "hk", "hm", "hn", "hr", "ht", "hu", "id", "ie", "il", "im", "in", "io", "iq", "ir", "is", "it", "je", "jm", "jo", "jp", "ke", "kg", "kh", "ki", "km", "kn", "kp", "kr", "kw", "ky", "kz", "la", "lb", "lc", "li", "lk", "lr", "ls", "lt", "lu", "lv", "ly", "ma", "mc", "md", "me", "mf", "mg", "mh", "mk", "ml", "mm", "mn", "mo", "mp", "mq", "mr", "ms", "mt", "mu", "mv", "mw", "mx", "my", "mz", "na", "nc", "ne", "nf", "ng", "ni", "nl", "no", "np", "nr", "nu", "nz", "om", "pa", "pe", "pf", "pg", "ph", "pk", "pl", "pm", "pn", "pr", "ps", "pt", "pw", "py", "qa", "re", "ro", "rs", "ru", "rw", "sa", "sb", "sc", "sd", "se", "sg", "sh", "si", "sj", "sk", "sl", "sm", "sn", "so", "sr", "ss", "st", "su", "sv", "sx", "sy", "sz", "tc", "td", "tf", "tg", "th", "tj", "tk", "tl", "tm", "tn", "to", "tp", "tr", "tt", "tv", "tw", "tz", "ua", "ug", "uk", "um", "us", "uy", "uz", "va", "vc", "ve", "vg", "vi", "vn", "vu", "wf", "ws", "ye", "yt", "za", "zm", "zw", "ελ", "бел", "мкд", "мон", "рф", "срб", "укр", "қаз", "հայ", "الاردن", "الجزائر", "السعودية", "المغرب", "امارات", "ایران", "بھارت", "تونس", "سودان", "سورية", "عراق", "عمان", "فلسطين", "قطر", "مصر", "مليسيا", "پاکستان", "भारत", "বাংলা", "ভারত", "ਭਾਰਤ", "ભારત", "இந்தியா", "இலங்கை", "சிங்கப்பூர்", "భారత్", "ලංකා", "ไทย", "გე", "中国", "中國", "台湾", "台灣", "新加坡", "澳門", "香港", "한국" ); } twitter-text-1.13.4/test/conformance_test.rb0000644000175000017500000001527612667350232021344 0ustar sudheeshsudheeshrequire 'multi_json' require 'nokogiri' require 'test/unit' require 'yaml' # Detect Ruby 1.8 and older to apply necessary encoding fixes major, minor, patch = RUBY_VERSION.split('.') OLD_RUBY = major.to_i == 1 && minor.to_i < 9 if OLD_RUBY $KCODE='u' end $:.unshift File.join(File.dirname(__FILE__), '..', 'lib') require 'twitter-text' class ConformanceTest < Test::Unit::TestCase include Twitter::Extractor include Twitter::Autolink include Twitter::HitHighlighter include Twitter::Validation private %w(description expected json hits).each do |key| define_method key.to_sym do @test_info[key] end end if OLD_RUBY def text @test_info['text'].gsub(/\\u([0-9a-f]{8})/i) do [$1.to_i(16)].pack('U*') end end else def text @test_info['text'] end end def assert_equal_without_attribute_order(expected, actual, failure_message = nil) assert_block(build_message(failure_message, " expected but was\n", expected, actual)) do equal_nodes?(Nokogiri::HTML(expected).root, Nokogiri::HTML(actual).root) end end def equal_nodes?(expected, actual) return false unless expected.name == actual.name return false unless ordered_attributes(expected) == ordered_attributes(actual) return false if expected.text? && actual.text? && expected.content != actual.content expected.children.each_with_index do |child, index| return false unless equal_nodes?(child, actual.children[index]) end true end def ordered_attributes(element) element.attribute_nodes.map{|attr| [attr.name, attr.value]}.sort end CONFORMANCE_DIR = ENV['CONFORMANCE_DIR'] || File.expand_path("../../../conformance", __FILE__) def self.def_conformance_test(file, test_type, &block) yaml = YAML.load_file(File.join(CONFORMANCE_DIR, file)) raise "No such test suite: #{test_type.to_s}" unless yaml["tests"][test_type.to_s] file_name = file.split('.').first yaml["tests"][test_type.to_s].each do |test_info| name = :"test_#{file_name}_#{test_type} #{test_info['description']}" define_method name do @test_info = test_info instance_eval(&block) end end end public # Extractor Conformance def_conformance_test("extract.yml", :replies) do assert_equal expected, extract_reply_screen_name(text), description end def_conformance_test("extract.yml", :mentions) do assert_equal expected, extract_mentioned_screen_names(text), description end def_conformance_test("extract.yml", :mentions_with_indices) do e = expected.map{|elem| elem.inject({}){|h, (k,v)| h[k.to_sym] = v; h} } assert_equal e, extract_mentioned_screen_names_with_indices(text), description end def_conformance_test("extract.yml", :mentions_or_lists_with_indices) do e = expected.map{|elem| elem.inject({}){|h, (k,v)| h[k.to_sym] = v; h} } assert_equal e, extract_mentions_or_lists_with_indices(text), description end def_conformance_test("extract.yml", :urls) do assert_equal expected, extract_urls(text), description expected.each do |expected_url| assert_equal true, valid_url?(expected_url, true, false), "expected url [#{expected_url}] not valid" end end def_conformance_test("tlds.yml", :generic) do assert_equal expected, extract_urls(text), description end def_conformance_test("tlds.yml", :country) do assert_equal expected, extract_urls(text), description end def_conformance_test("extract.yml", :urls_with_indices) do e = expected.map{|elem| elem.inject({}){|h, (k,v)| h[k.to_sym] = v; h} } assert_equal e, extract_urls_with_indices(text), description end def_conformance_test("extract.yml", :hashtags) do assert_equal expected, extract_hashtags(text), description end def_conformance_test("extract.yml", :hashtags_with_indices) do e = expected.map{|elem| elem.inject({}){|h, (k,v)| h[k.to_sym] = v; h} } assert_equal e, extract_hashtags_with_indices(text), description end def_conformance_test("extract.yml", :cashtags) do assert_equal expected, extract_cashtags(text), description end def_conformance_test("extract.yml", :cashtags_with_indices) do e = expected.map{|elem| elem.inject({}){|h, (k,v)| h[k.to_sym] = v; h} } assert_equal e, extract_cashtags_with_indices(text), description end # Autolink Conformance def_conformance_test("autolink.yml", :usernames) do assert_equal_without_attribute_order expected, auto_link_usernames_or_lists(text, :suppress_no_follow => true), description end def_conformance_test("autolink.yml", :lists) do assert_equal_without_attribute_order expected, auto_link_usernames_or_lists(text, :suppress_no_follow => true), description end def_conformance_test("autolink.yml", :urls) do assert_equal_without_attribute_order expected, auto_link_urls(text, :suppress_no_follow => true), description end def_conformance_test("autolink.yml", :hashtags) do assert_equal_without_attribute_order expected, auto_link_hashtags(text, :suppress_no_follow => true), description end def_conformance_test("autolink.yml", :cashtags) do assert_equal_without_attribute_order expected, auto_link_cashtags(text, :suppress_no_follow => true), description end def_conformance_test("autolink.yml", :all) do assert_equal_without_attribute_order expected, auto_link(text, :suppress_no_follow => true), description end def_conformance_test("autolink.yml", :json) do assert_equal_without_attribute_order expected, auto_link_with_json(text, MultiJson.load(json), :suppress_no_follow => true), description end # HitHighlighter Conformance def_conformance_test("hit_highlighting.yml", :plain_text) do assert_equal expected, hit_highlight(text, hits), description end def_conformance_test("hit_highlighting.yml", :with_links) do assert_equal expected, hit_highlight(text, hits), description end # Validation Conformance def_conformance_test("validate.yml", :tweets) do assert_equal expected, valid_tweet_text?(text), description end def_conformance_test("validate.yml", :usernames) do assert_equal expected, valid_username?(text), description end def_conformance_test("validate.yml", :lists) do assert_equal expected, valid_list?(text), description end def_conformance_test("validate.yml", :urls) do assert_equal expected, valid_url?(text), description end def_conformance_test("validate.yml", :urls_without_protocol) do assert_equal expected, valid_url?(text, true, false), description end def_conformance_test("validate.yml", :hashtags) do assert_equal expected, valid_hashtag?(text), description end def_conformance_test("validate.yml", :lengths) do assert_equal expected, tweet_length(text), description end end twitter-text-1.13.4/script/0000755000175000017500000000000012667350232016000 5ustar sudheeshsudheeshtwitter-text-1.13.4/script/generate0000755000175000017500000000056212667350232017523 0ustar sudheeshsudheesh#!/usr/bin/env ruby APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..')) begin require 'rubigen' rescue LoadError require 'rubygems' require 'rubigen' end require 'rubigen/scripts/generate' ARGV.shift if ['--help', '-h'].include?(ARGV[0]) RubiGen::Base.use_component_sources! [:newgem_simple, :test_unit] RubiGen::Scripts::Generate.new.run(ARGV) twitter-text-1.13.4/script/destroy0000755000175000017500000000056012667350232017420 0ustar sudheeshsudheesh#!/usr/bin/env ruby APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..')) begin require 'rubigen' rescue LoadError require 'rubygems' require 'rubigen' end require 'rubigen/scripts/destroy' ARGV.shift if ['--help', '-h'].include?(ARGV[0]) RubiGen::Base.use_component_sources! [:newgem_simple, :test_unit] RubiGen::Scripts::Destroy.new.run(ARGV) twitter-text-1.13.4/Rakefile0000644000175000017500000000135312667350232016143 0ustar sudheeshsudheeshrequire 'bundler' include Rake::DSL Bundler::GemHelper.install_tasks task :default => ['spec', 'test:conformance'] task :test => :spec require 'rubygems' require 'rspec/core/rake_task' RSpec::Core::RakeTask.new(:spec) namespace :test do namespace :conformance do desc "Run conformance test suite" task :run do ruby '-rubygems', "test/conformance_test.rb" end end desc "Run conformance test suite" task :conformance => ['conformance:run'] do end end require 'rdoc/task' namespace :doc do RDoc::Task.new do |rd| rd.main = "README.rdoc" rd.rdoc_dir = 'doc' rd.rdoc_files.include("README.rdoc", "lib/**/*.rb") end end desc "Run cruise control build" task :cruise => [:spec, 'test:conformance'] do end twitter-text-1.13.4/twitter-text.gemspec0000644000175000017500000000251012667350232020523 0ustar sudheeshsudheesh# encoding: utf-8 Gem::Specification.new do |s| s.name = "twitter-text" s.version = "1.13.4" s.authors = ["Matt Sanford", "Patrick Ewing", "Ben Cherry", "Britt Selvitelle", "Raffi Krikorian", "J.P. Cummins", "Yoshimasa Niwa", "Keita Fujii", "James Koval"] s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com", "bcherry@gmail.com", "bs@brittspace.com", "raffi@twitter.com", "jcummins@twitter.com", "niw@niw.at", "keita@twitter.com", "jkoval@twitter.com"] s.homepage = "http://twitter.com" s.description = s.summary = "A gem that provides text handling for Twitter" s.license = "Apache 2.0" s.platform = Gem::Platform::RUBY s.has_rdoc = true s.summary = "Twitter text handling library" s.add_development_dependency "multi_json", "~> 1.3" s.add_development_dependency "nokogiri", "~> 1.5.10" s.add_development_dependency "rake" s.add_development_dependency "rdoc" s.add_development_dependency "rspec", "~> 2.14.0" s.add_development_dependency "simplecov", "~> 0.8.0" s.add_runtime_dependency "unf", "~> 0.1.0" s.files = `git ls-files`.split("\n") + ['lib/assets/tld_lib.yml'] s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n") s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) } s.require_paths = ["lib"] end twitter-text-1.13.4/.gitmodules0000644000175000017500000000021712667350232016651 0ustar sudheeshsudheesh[submodule "test/twitter-text-conformance"] path = test/twitter-text-conformance url = git://github.com/twitter/twitter-text-conformance.git twitter-text-1.13.4/.gitignore0000644000175000017500000000056312667350232016470 0ustar sudheeshsudheesh*.gem *.rbc *.sw[a-p] *.tmproj *.tmproject *.un~ *~ .DS_Store .Spotlight-V100 .Trashes ._* .bundle .config .directory .elc .emacs.desktop .emacs.desktop.lock .redcar .yardoc Desktop.ini Gemfile.lock Icon? InstalledFiles Session.vim Thumbs.db \#*\# _yardoc auto-save-list coverage doc lib/bundler/man pkg pkg/* rdoc spec/reports test/tmp test/version_tmp tmp tmtags tramp twitter-text-1.13.4/.gemtest0000644000175000017500000000000012667350232016133 0ustar sudheeshsudheesh