twitter-text-1.14.7/000755 000041 000041 00000000000 13154170026 015577 5ustar00www-datawww-data000000 000000 twitter-text-1.14.7/Rakefile000644 000041 000041 00000001353 13154170026 017246 0ustar00www-datawww-data000000 000000 require 'bundler' include Rake::DSL Bundler::GemHelper.install_tasks task :default => ['spec', 'test:conformance'] task :test => :spec require 'rubygems' require 'rspec/core/rake_task' RSpec::Core::RakeTask.new(:spec) namespace :test do namespace :conformance do desc "Run conformance test suite" task :run do ruby '-rubygems', "test/conformance_test.rb" end end desc "Run conformance test suite" task :conformance => ['conformance:run'] do end end require 'rdoc/task' namespace :doc do RDoc::Task.new do |rd| rd.main = "README.rdoc" rd.rdoc_dir = 'doc' rd.rdoc_files.include("README.rdoc", "lib/**/*.rb") end end desc "Run cruise control build" task :cruise => [:spec, 'test:conformance'] do end twitter-text-1.14.7/.gemtest000644 000041 000041 00000000000 13154170026 017236 0ustar00www-datawww-data000000 000000 twitter-text-1.14.7/Gemfile000644 000041 000041 00000000137 13154170026 017073 0ustar00www-datawww-data000000 000000 source "http://rubygems.org" # Specify the gem's dependencies in twitter-text.gemspec gemspec twitter-text-1.14.7/script/000755 000041 000041 00000000000 13154170026 017103 5ustar00www-datawww-data000000 000000 twitter-text-1.14.7/script/destroy000755 000041 000041 00000000560 13154170026 020523 0ustar00www-datawww-data000000 000000 #!/usr/bin/env ruby APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..')) begin require 'rubigen' rescue LoadError require 'rubygems' require 'rubigen' end require 'rubigen/scripts/destroy' ARGV.shift if ['--help', '-h'].include?(ARGV[0]) RubiGen::Base.use_component_sources! [:newgem_simple, :test_unit] RubiGen::Scripts::Destroy.new.run(ARGV) twitter-text-1.14.7/script/generate000755 000041 000041 00000000562 13154170026 020626 0ustar00www-datawww-data000000 000000 #!/usr/bin/env ruby APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..')) begin require 'rubigen' rescue LoadError require 'rubygems' require 'rubigen' end require 'rubigen/scripts/generate' ARGV.shift if ['--help', '-h'].include?(ARGV[0]) RubiGen::Base.use_component_sources! [:newgem_simple, :test_unit] RubiGen::Scripts::Generate.new.run(ARGV) twitter-text-1.14.7/.rspec000644 000041 000041 00000000030 13154170026 016705 0ustar00www-datawww-data000000 000000 --color --format=nested twitter-text-1.14.7/spec/000755 000041 000041 00000000000 13154170026 016531 5ustar00www-datawww-data000000 000000 twitter-text-1.14.7/spec/validation_spec.rb000644 000041 000041 00000003022 13154170026 022217 0ustar00www-datawww-data000000 000000 # encoding: utf-8 require File.dirname(__FILE__) + '/spec_helper' class TestValidation include Twitter::Validation end describe Twitter::Validation do it "should disallow invalid BOM character" do TestValidation.new.tweet_invalid?("Bom:#{Twitter::Unicode::UFFFE}").should == :invalid_characters TestValidation.new.tweet_invalid?("Bom:#{Twitter::Unicode::UFEFF}").should == :invalid_characters end it "should disallow invalid U+FFFF character" do TestValidation.new.tweet_invalid?("Bom:#{Twitter::Unicode::UFFFF}").should == :invalid_characters end it "should disallow direction change characters" do [0x202A, 0x202B, 0x202C, 0x202D, 0x202E].map{|cp| [cp].pack('U') }.each do |char| TestValidation.new.tweet_invalid?("Invalid:#{char}").should == :invalid_characters end end it "should disallow non-Unicode" do TestValidation.new.tweet_invalid?("not-Unicode:\xfff0").should == :invalid_characters end it "should allow <= 140 combined accent characters" do char = [0x65, 0x0301].pack('U') TestValidation.new.tweet_invalid?(char * 139).should == false TestValidation.new.tweet_invalid?(char * 140).should == false TestValidation.new.tweet_invalid?(char * 141).should == :too_long end it "should allow <= 140 multi-byte characters" do char = [ 0x1d106 ].pack('U') TestValidation.new.tweet_invalid?(char * 139).should == false TestValidation.new.tweet_invalid?(char * 140).should == false TestValidation.new.tweet_invalid?(char * 141).should == :too_long end end twitter-text-1.14.7/spec/unicode_spec.rb000644 000041 000041 00000001665 13154170026 021526 0ustar00www-datawww-data000000 000000 # encoding: utf-8 require File.dirname(__FILE__) + '/spec_helper' describe Twitter::Unicode do it "should lazy-init constants" do Twitter::Unicode.const_defined?(:UFEB6).should == false Twitter::Unicode::UFEB6.should_not be_nil Twitter::Unicode::UFEB6.should be_kind_of(String) Twitter::Unicode.const_defined?(:UFEB6).should == true end it "should return corresponding character" do Twitter::Unicode::UFEB6.should == [0xfeb6].pack('U') end it "should allow lowercase notation" do Twitter::Unicode::Ufeb6.should == Twitter::Unicode::UFEB6 Twitter::Unicode::Ufeb6.should === Twitter::Unicode::UFEB6 end it "should allow underscore notation" do Twitter::Unicode::U_FEB6.should == Twitter::Unicode::UFEB6 Twitter::Unicode::U_FEB6.should === Twitter::Unicode::UFEB6 end it "should raise on invalid codepoints" do lambda { Twitter::Unicode::FFFFFF }.should raise_error(NameError) end end twitter-text-1.14.7/spec/test_urls.rb000644 000041 000041 00000005265 13154170026 021112 0ustar00www-datawww-data000000 000000 # encoding: utf-8 module TestUrls VALID = [ "http://google.com", "http://foobar.com/#", "http://google.com/#foo", "http://google.com/#search?q=iphone%20-filter%3Alinks", "http://twitter.com/#search?q=iphone%20-filter%3Alinks", "http://somedomain.com/index.php?path=/abc/def/", "http://www.boingboing.net/2007/02/14/katamari_damacy_phon.html", "http://somehost.com:3000", "http://xo.com/~matthew+%-x", "http://en.wikipedia.org/wiki/Primer_(film)", "http://www.ams.org/bookstore-getitem/item=mbk-59", "http://chilp.it/?77e8fd", "http://tell.me/why", "http://longtlds.info", "http://✪df.ws/ejp", "http://日本.com", "http://search.twitter.com/search?q=avro&lang=en", "http://mrs.domain-dash.biz", "http://x.com/has/one/char/domain", "http://t.co/nwcLTFF", "http://sub_domain-dash.twitter.com", "http://a.b.cd", "http://a_b.c-d.com", "http://a-b.b.com", "http://twitter-dash.com", "http://msdn.microsoft.com/ja-jp/library/system.net.httpwebrequest(v=VS.100).aspx", "www.foobar.com", "WWW.FOOBAR.COM", "www.foobar.co.jp", "http://t.co", "t.co/nwcLTFF", "http://foobar.みんな", "http://foobar.中国", "http://foobar.پاکستان", "https://www.youtube.com/playlist?list=PL0ZPu8XSRTB7wZzn0mLHMvyzVFeRxbWn-" ] unless defined?(TestUrls::VALID) INVALID = [ "http://no-tld", "http://tld-too-short.x", "http://-doman_dash.com", "http://_leadingunderscore.twitter.com", "http://trailingunderscore_.twitter.com", "http://-leadingdash.twitter.com", "http://trailingdash-.twitter.com", "http://-leadingdash.com", "http://trailingdash-.com", "http://no_underscores.com", "http://test.c_o_m", "http://test.c-o-m", "http://twitt#{[0x202A].pack('U')}er.com", "http://twitt#{[0x202B].pack('U')}er.com", "http://twitt#{[0x202C].pack('U')}er.com", "http://twitt#{[0x202D].pack('U')}er.com", "http://twitt#{[0x202E].pack('U')}er.com" ] unless defined?(TestUrls::INVALID) TCO = [ "http://t.co/P53cv5yO!", "http://t.co/fQJmiPGg***", "http://t.co/pbY2NfTZ's", "http://t.co/2vYHpAc5;", "http://t.co/ulYGBYSo:", "http://t.co/GeT4bSiw=win", "http://t.co/8MkmHU0k+fun", "http://t.co/TKLp64dY.yes,", "http://t.co/8vuO27cI$$", "http://t.co/rPYTvdA8/", "http://t.co/WvtMw5ku%", "http://t.co/8t7G3ddS#", "http://t.co/nfHNJDV2/#!", "http://t.co/gK6NOXHs[good]", "http://t.co/dMrT0o1Y]bad", "http://t.co/FNkPfmii-", "http://t.co/sMgS3pjI_oh", "http://t.co/F8Dq3Plb~", "http://t.co/ivvH58vC&help", "http://t.co/iUBL15zD|NZ5KYLQ8" ] unless defined?(TestUrls::TCO) end twitter-text-1.14.7/spec/hithighlighter_spec.rb000644 000041 000041 00000006055 13154170026 023101 0ustar00www-datawww-data000000 000000 # encoding: utf-8 require File.dirname(__FILE__) + '/spec_helper' class TestHitHighlighter include Twitter::HitHighlighter end describe Twitter::HitHighlighter do describe "highlight" do before do @highlighter = TestHitHighlighter.new end context "with options" do before do @original = "Testing this hit highliter" @hits = [[13,16]] end it "should default to tags" do @highlighter.hit_highlight(@original, @hits).should == "Testing this hit highliter" end it "should allow tag override" do @highlighter.hit_highlight(@original, @hits, :tag => 'b').should == "Testing this hit highliter" end end context "without links" do before do @original = "Hey! this is a test tweet" end it "should return original when no hits are provided" do @highlighter.hit_highlight(@original).should == @original end it "should highlight one hit" do @highlighter.hit_highlight(@original, hits = [[5, 9]]).should == "Hey! this is a test tweet" end it "should highlight two hits" do @highlighter.hit_highlight(@original, hits = [[5, 9], [15, 19]]).should == "Hey! this is a test tweet" end it "should correctly highlight first-word hits" do @highlighter.hit_highlight(@original, hits = [[0, 3]]).should == "Hey! this is a test tweet" end it "should correctly highlight last-word hits" do @highlighter.hit_highlight(@original, hits = [[20, 25]]).should == "Hey! this is a test tweet" end end context "with links" do it "should highlight with a single link" do @highlighter.hit_highlight("@bcherry this was a test tweet", [[9, 13]]).should == "@bcherry this was a test tweet" end it "should highlight with link at the end" do @highlighter.hit_highlight("test test test", [[5, 9]]).should == "test test test" end it "should highlight with a link at the beginning" do @highlighter.hit_highlight("test test test", [[5, 9]]).should == "test test test" end it "should highlight an entire link" do @highlighter.hit_highlight("test test test", [[5, 9]]).should == "test test test" end it "should highlight within a link" do @highlighter.hit_highlight("test test test", [[6, 8]]).should == "test test test" end it "should highlight around a link" do @highlighter.hit_highlight("test test test", [[3, 11]]).should == "test test test" end it "should fail gracefully with bad hits" do @highlighter.hit_highlight("test test", [[5, 20]]).should == "test test" end it "should not mess up with touching tags" do @highlighter.hit_highlight("foofoo", [[3,6]]).should == "foofoo" end end end end twitter-text-1.14.7/spec/spec_helper.rb000644 000041 000041 00000007571 13154170026 021361 0ustar00www-datawww-data000000 000000 $TESTING=true # Ruby 1.8 encoding check major, minor, patch = RUBY_VERSION.split('.') if major.to_i == 1 && minor.to_i < 9 $KCODE='u' end $:.push File.join(File.dirname(__FILE__), '..', 'lib') require 'nokogiri' require 'json' require 'simplecov' SimpleCov.start do add_group 'Libraries', 'lib' end require File.expand_path('../../lib/twitter-text', __FILE__) require File.expand_path('../test_urls', __FILE__) RSpec.configure do |config| config.include TestUrls end RSpec::Matchers.define :match_autolink_expression do match do |string| !Twitter::Extractor.extract_urls(string).empty? end end RSpec::Matchers.define :match_autolink_expression_in do |text| match do |url| @match_data = Twitter::Regex[:valid_url].match(text) @match_data && @match_data.to_s.strip == url end failure_message_for_should do |url| "Expected to find url '#{url}' in text '#{text}', but the match was #{@match_data.captures}'" end end RSpec::Matchers.define :have_autolinked_url do |url, inner_text| match do |text| @link = Nokogiri::HTML(text).search("a[@href='#{url}']") @link && @link.inner_text && (inner_text && @link.inner_text == inner_text) || (!inner_text && @link.inner_text == url) end failure_message_for_should do |text| "Expected url '#{url}'#{", inner_text '#{inner_text}'" if inner_text} to be autolinked in '#{text}'" end end RSpec::Matchers.define :link_to_screen_name do |screen_name, inner_text| expected = inner_text ? inner_text : screen_name match do |text| @link = Nokogiri::HTML(text).search("a.username") @link && @link.inner_text == expected && "https://twitter.com/#{screen_name}".should == @link.first['href'] end failure_message_for_should do |text| if @link.first "Expected link '#{@link.inner_text}' with href '#{@link.first['href']}' to match screen_name '#{expected}', but it does not." else "Expected screen name '#{screen_name}' to be autolinked in '#{text}', but no link was found." end end failure_message_for_should_not do |text| "Expected link '#{@link.inner_text}' with href '#{@link.first['href']}' not to match screen_name '#{expected}', but it does." end description do "contain a link with the name and href pointing to the expected screen_name" end end RSpec::Matchers.define :link_to_list_path do |list_path, inner_text| expected = inner_text ? inner_text : list_path match do |text| @link = Nokogiri::HTML(text).search("a.list-slug") @link && @link.inner_text == expected && "https://twitter.com/#{list_path}".downcase.should == @link.first['href'] end failure_message_for_should do |text| if @link.first "Expected link '#{@link.inner_text}' with href '#{@link.first['href']}' to match the list path '#{expected}', but it does not." else "Expected list path '#{list_path}' to be autolinked in '#{text}', but no link was found." end end failure_message_for_should_not do |text| "Expected link '#{@link.inner_text}' with href '#{@link.first['href']}' not to match the list path '#{expected}', but it does." end description do "contain a link with the list title and an href pointing to the list path" end end RSpec::Matchers.define :have_autolinked_hashtag do |hashtag| match do |text| @link = Nokogiri::HTML(text).search("a[@href='https://twitter.com/#!/search?q=#{hashtag.sub(/^#/, '%23')}']") @link && @link.inner_text && @link.inner_text == hashtag end failure_message_for_should do |text| if @link.first "Expected link text to be [#{hashtag}], but it was [#{@link.inner_text}] in #{text}" else "Expected hashtag #{hashtag} to be autolinked in '#{text}', but no link was found." end end failure_message_for_should_not do |text| "Expected link '#{@link.inner_text}' with href '#{@link.first['href']}' not to match the hashtag '#{hashtag}', but it does." end end twitter-text-1.14.7/spec/regex_spec.rb000644 000041 000041 00000002132 13154170026 021200 0ustar00www-datawww-data000000 000000 # encoding: utf-8 require File.dirname(__FILE__) + '/spec_helper' describe "Twitter::Regex regular expressions" do describe "matching URLS" do TestUrls::VALID.each do |url| it "should match the URL #{url}" do url.should match_autolink_expression end it "should match the URL #{url} when it's embedded in other text" do text = "Sweet url: #{url} I found. #awesome" url.should match_autolink_expression_in(text) end end end describe "invalid URLS" do it "does not link urls with invalid characters" do TestUrls::INVALID.each {|url| url.should_not match_autolink_expression} end end describe "matching List names" do it "should match if less than 25 characters" do name = "Shuffleboard Community" name.length.should < 25 name.should match(Twitter::Regex::REGEXEN[:list_name]) end it "should not match if greater than 25 characters" do name = "Most Glorious Shady Meadows Shuffleboard Community" name.length.should > 25 name.should match(Twitter::Regex[:list_name]) end end end twitter-text-1.14.7/spec/autolinking_spec.rb000644 000041 000041 00000074420 13154170026 022423 0ustar00www-datawww-data000000 000000 # encoding: utf-8 require File.dirname(__FILE__) + '/spec_helper' class TestAutolink include Twitter::Autolink end describe Twitter::Autolink do def original_text; end def url; end describe "auto_link_custom" do before do @autolinked_text = TestAutolink.new.auto_link(original_text) if original_text end describe "username autolinking" do context "username preceded by a space" do def original_text; "hello @jacob"; end it "should be linked" do @autolinked_text.should link_to_screen_name('jacob') end end context "username in camelCase" do def original_text() "@jaCob iS cOoL" end it "should be linked" do @autolinked_text.should link_to_screen_name('jaCob') end end context "username at beginning of line" do def original_text; "@jacob you're cool"; end it "should be linked" do @autolinked_text.should link_to_screen_name('jacob') end end context "username preceded by word character" do def original_text; "meet@the beach"; end it "should not be linked" do Nokogiri::HTML(@autolinked_text).search('a').should be_empty end end context "username preceded by non-word character" do def original_text; "great.@jacob"; end it "should be linked" do @autolinked_text.should link_to_screen_name('jacob') end end context "username containing non-word characters" do def original_text; "@zach&^$%^"; end it "should not be linked" do @autolinked_text.should link_to_screen_name('zach') end end context "username over twenty characters" do def original_text @twenty_character_username = "zach" * 5 "@" + @twenty_character_username + "1" end it "should not be linked" do @autolinked_text.should link_to_screen_name(@twenty_character_username) end end context "username followed by japanese" do def original_text; "@jacobの"; end it "should be linked" do @autolinked_text.should link_to_screen_name('jacob') end end context "username preceded by japanese" do def original_text; "あ@matz"; end it "should be linked" do @autolinked_text.should link_to_screen_name('matz') end end context "username surrounded by japanese" do def original_text; "あ@yoshimiの"; end it "should be linked" do @autolinked_text.should link_to_screen_name('yoshimi') end end context "username using full-width at-sign" do def original_text "#{[0xFF20].pack('U')}jacob" end it "should be linked" do @autolinked_text.should link_to_screen_name('jacob') end end end describe "list path autolinking" do context "when List is not available" do it "should not be linked" do @autolinked_text = TestAutolink.new.auto_link_usernames_or_lists("hello @jacob/my-list", :suppress_lists => true) @autolinked_text.should_not link_to_list_path('jacob/my-list') @autolinked_text.should include('my-list') end end context "slug preceded by a space" do def original_text; "hello @jacob/my-list"; end it "should be linked" do @autolinked_text.should link_to_list_path('jacob/my-list') end end context "username followed by a slash but no list" do def original_text; "hello @jacob/ my-list"; end it "should NOT be linked" do @autolinked_text.should_not link_to_list_path('jacob/my-list') @autolinked_text.should link_to_screen_name('jacob') end end context "empty username followed by a list" do def original_text; "hello @/my-list"; end it "should NOT be linked" do Nokogiri::HTML(@autolinked_text).search('a').should be_empty end end context "list slug at beginning of line" do def original_text; "@jacob/my-list"; end it "should be linked" do @autolinked_text.should link_to_list_path('jacob/my-list') end end context "username preceded by alpha-numeric character" do def original_text; "meet@the/beach"; end it "should not be linked" do Nokogiri::HTML(@autolinked_text).search('a').should be_empty end end context "username preceded by non-word character" do def original_text; "great.@jacob/my-list"; end it "should be linked" do @autolinked_text = TestAutolink.new.auto_link("great.@jacob/my-list") @autolinked_text.should link_to_list_path('jacob/my-list') end end context "username containing non-word characters" do def original_text; "@zach/test&^$%^"; end it "should be linked" do @autolinked_text.should link_to_list_path('zach/test') end end context "username over twenty characters" do def original_text @twentyfive_character_list = "jack/" + ("a" * 25) "@#{@twentyfive_character_list}12345" end it "should be linked" do @autolinked_text.should link_to_list_path(@twentyfive_character_list) end end end describe "hashtag autolinking" do context "with an all numeric hashtag" do def original_text; "#123"; end it "should not be linked" do @autolinked_text.should_not have_autolinked_hashtag('#123') end end context "with a hashtag with alphanumeric characters" do def original_text; "#ab1d"; end it "should be linked" do @autolinked_text.should have_autolinked_hashtag('#ab1d') end end context "with a hashtag with underscores" do def original_text; "#a_b_c_d"; end it "should be linked" do @autolinked_text.should have_autolinked_hashtag(original_text) end end context "with a hashtag that is preceded by a word character" do def original_text; "ab#cd"; end it "should not be linked" do @autolinked_text.should_not have_autolinked_hashtag(original_text) end end context "with a page anchor in a url" do def original_text; "Here's my url: http://foobar.com/#home"; end it "should not link the hashtag" do @autolinked_text.should_not have_autolinked_hashtag('#home') end it "should link the url" do @autolinked_text.should have_autolinked_url('http://foobar.com/#home') end end context "with a hashtag that starts with a number but has word characters" do def original_text; "#2ab"; end it "should be linked" do @autolinked_text.should have_autolinked_hashtag(original_text) end end context "with multiple valid hashtags" do def original_text; "I'm frickin' awesome #ab #cd #ef"; end it "links each hashtag" do @autolinked_text.should have_autolinked_hashtag('#ab') @autolinked_text.should have_autolinked_hashtag('#cd') @autolinked_text.should have_autolinked_hashtag('#ef') end end context "with a hashtag preceded by a ." do def original_text; "ok, great.#abc"; end it "should be linked" do @autolinked_text.should have_autolinked_hashtag('#abc') end end context "with a hashtag preceded by a &" do def original_text; "&#nbsp;"; end it "should not be linked" do @autolinked_text.should_not have_autolinked_hashtag('#nbsp;') end end context "with a hashtag that ends in an !" do def original_text; "#great!"; end it "should be linked, but should not include the !" do @autolinked_text.should have_autolinked_hashtag('#great') end end context "with a hashtag followed by Japanese" do def original_text; "#twj_devの"; end it "should be linked" do @autolinked_text.should have_autolinked_hashtag('#twj_devの') end end context "with a hashtag preceded by a full-width space" do def original_text; "#{[0x3000].pack('U')}#twj_dev"; end it "should be linked" do @autolinked_text.should have_autolinked_hashtag('#twj_dev') end end context "with a hashtag followed by a full-width space" do def original_text; "#twj_dev#{[0x3000].pack('U')}"; end it "should be linked" do @autolinked_text.should have_autolinked_hashtag('#twj_dev') end end context "with a hashtag using full-width hash" do def original_text; "#{[0xFF03].pack('U')}twj_dev"; end it "should be linked" do link = Nokogiri::HTML(@autolinked_text).search('a') (link.inner_text.respond_to?(:force_encoding) ? link.inner_text.force_encoding("utf-8") : link.inner_text).should == "#{[0xFF03].pack('U')}twj_dev" link.first['href'].should == 'https://twitter.com/#!/search?q=%23twj_dev' end end context "with a hashtag containing an accented latin character" do def original_text # the hashtag is #éhashtag "##{[0x00e9].pack('U')}hashtag" end it "should be linked" do @autolinked_text.should == "#éhashtag" end end end describe "URL autolinking" do def url; "http://www.google.com"; end context "when embedded in plain text" do def original_text; "On my search engine #{url} I found good links."; end it "should be linked" do @autolinked_text.should have_autolinked_url(url) end end context "when surrounded by Japanese;" do def original_text; "いまなにしてる#{url}いまなにしてる"; end it "should be linked" do @autolinked_text.should have_autolinked_url(url) end end context "with a path surrounded by parentheses;" do def original_text; "I found a neatness (#{url})"; end it "should be linked" do @autolinked_text.should have_autolinked_url(url) end context "when the URL ends with a slash;" do def url; "http://www.google.com/"; end it "should be linked" do @autolinked_text.should have_autolinked_url(url) end end context "when the URL has a path;" do def url; "http://www.google.com/fsdfasdf"; end it "should be linked" do @autolinked_text.should have_autolinked_url(url) end end end context "when path contains parens" do def original_text; "I found a neatness (#{url})"; end it "should be linked" do @autolinked_text.should have_autolinked_url(url) end context "wikipedia" do def url; "http://en.wikipedia.org/wiki/Madonna_(artist)"; end it "should be linked" do @autolinked_text.should have_autolinked_url(url) end end context "IIS session" do def url; "http://msdn.com/S(deadbeef)/page.htm"; end it "should be linked" do @autolinked_text.should have_autolinked_url(url) end end context "unbalanced parens" do def url; "http://example.com/i_has_a_("; end it "should be linked" do @autolinked_text.should have_autolinked_url("http://example.com/i_has_a_") end end context "balanced parens with a double quote inside" do def url; "http://foo.com/foo_(\")_bar" end it "should be linked" do @autolinked_text.should have_autolinked_url("http://foo.com/foo_") end end context "balanced parens hiding XSS" do def url; 'http://x.xx.com/("style="color:red"onmouseover="alert(1)' end it "should be linked" do @autolinked_text.should have_autolinked_url("http://x.xx.com/") end end end context "when preceded by a :" do def original_text; "Check this out @hoverbird:#{url}"; end it "should be linked" do @autolinked_text.should have_autolinked_url(url) end end context "with a URL ending in allowed punctuation" do it "does not consume ending punctuation" do matcher = TestAutolink.new %w| ? ! , . : ; ] ) } = \ ' |.each do |char| matcher.auto_link("#{url}#{char}").should have_autolinked_url(url) end end end context "with a URL preceded in forbidden characters" do it "should be linked" do matcher = TestAutolink.new %w| \ ' / ! = |.each do |char| matcher.auto_link("#{char}#{url}").should have_autolinked_url(url) end end end context "when embedded in a link tag" do def original_text; "#{url}"; end it "should be linked" do @autolinked_text.should have_autolinked_url(url) end end context "with multiple URLs" do def original_text; "http://www.links.org link at start of page, link at end http://www.foo.org"; end it "should autolink each one" do @autolinked_text.should have_autolinked_url('http://www.links.org') @autolinked_text.should have_autolinked_url('http://www.foo.org') end end context "with multiple URLs in different formats" do def original_text; "http://foo.com https://bar.com http://mail.foobar.org"; end it "should autolink each one, in the proper order" do @autolinked_text.should have_autolinked_url('http://foo.com') @autolinked_text.should have_autolinked_url('https://bar.com') @autolinked_text.should have_autolinked_url('http://mail.foobar.org') end end context "with a URL having a long TLD" do def original_text; "Yahoo integriert Facebook http://golem.mobi/0912/71607.html"; end it "should autolink it" do @autolinked_text.should have_autolinked_url('http://golem.mobi/0912/71607.html') end end context "with a url lacking the protocol" do def original_text; "I like www.foobar.com dudes"; end it "does not link at all" do link = Nokogiri::HTML(@autolinked_text).search('a') link.should be_empty end end context "with a @ in a URL" do context "with XSS attack" do def original_text; 'http://x.xx.com/@"style="color:pink"onmouseover=alert(1)//'; end it "should not allow XSS follwing @" do @autolinked_text.should have_autolinked_url('http://x.xx.com/') end end context "with a username not followed by a /" do def original_text; 'http://example.com/@foobar'; end it "should link url" do @autolinked_text.should have_autolinked_url('http://example.com/@foobar') end end context "with a username followed by a /" do def original_text; 'http://example.com/@foobar/'; end it "should not link the username but link full url" do @autolinked_text.should have_autolinked_url('http://example.com/@foobar/') @autolinked_text.should_not link_to_screen_name('foobar') end end end context "regex engine quirks" do context "does not spiral out of control on repeated periods" do def original_text; "Test a ton of periods http://example.com/path.........................................."; end it "should autolink" do @autolinked_text.should have_autolinked_url('http://example.com/path') end end context "does not spiral out of control on repeated dashes" do def original_text; "Single char file ext http://www.bestbuy.com/site/Currie+Technologies+-+Ezip+400+Scooter/9885188.p?id=1218189013070&skuId=9885188"; end it "should autolink" do @autolinked_text.should have_autolinked_url('http://www.bestbuy.com/site/Currie+Technologies+-+Ezip+400+Scooter/9885188.p?id=1218189013070&skuId=9885188') end end end end describe "Autolink all" do before do @linker = TestAutolink.new end it "should allow url/hashtag overlap" do auto_linked = @linker.auto_link("https://twitter.com/#search") auto_linked.should have_autolinked_url('https://twitter.com/#search') end it "should not add invalid option in HTML tags" do auto_linked = @linker.auto_link("https://twitter.com/ is a URL, not a hashtag", :hashtag_class => 'hashtag_classname') auto_linked.should have_autolinked_url('https://twitter.com/') auto_linked.should_not include('hashtag_class') auto_linked.should_not include('hashtag_classname') end it "should autolink url/hashtag/mention in text with Unicode supplementary characters" do auto_linked = @linker.auto_link("#{[0x10400].pack('U')} #hashtag #{[0x10400].pack('U')} @mention #{[0x10400].pack('U')} http://twitter.com/") auto_linked.should have_autolinked_hashtag('#hashtag') auto_linked.should link_to_screen_name('mention') auto_linked.should have_autolinked_url('http://twitter.com/') end end end describe "autolinking options" do before do @linker = TestAutolink.new end it "should show display_url when :url_entities provided" do linked = @linker.auto_link("http://t.co/0JG5Mcq", :url_entities => [{ "url" => "http://t.co/0JG5Mcq", "display_url" => "blog.twitter.com/2011/05/twitte…", "expanded_url" => "http://blog.twitter.com/2011/05/twitter-for-mac-update.html", "indices" => [ 84, 103 ] }]) html = Nokogiri::HTML(linked) html.search('a').should_not be_empty html.search('a[@href="http://t.co/0JG5Mcq"]').should_not be_empty html.search('span[@class=js-display-url]').inner_text.should == "blog.twitter.com/2011/05/twitte" html.inner_text.should == " http://blog.twitter.com/2011/05/twitter-for-mac-update.html …" html.search('span[@style="position:absolute;left:-9999px;"]').size.should == 4 end it "should accept invisible_tag_attrs option" do linked = @linker.auto_link("http://t.co/0JG5Mcq", { :url_entities => [{ "url" => "http://t.co/0JG5Mcq", "display_url" => "blog.twitter.com/2011/05/twitte…", "expanded_url" => "http://blog.twitter.com/2011/05/twitter-for-mac-update.html", "indices" => [ 0, 19 ] }], :invisible_tag_attrs => "style='dummy;'" }) html = Nokogiri::HTML(linked) html.search('span[@style="dummy;"]').size.should == 4 end it "should show display_url if available in entity" do linked = @linker.auto_link_entities("http://t.co/0JG5Mcq", [{ :url => "http://t.co/0JG5Mcq", :display_url => "blog.twitter.com/2011/05/twitte…", :expanded_url => "http://blog.twitter.com/2011/05/twitter-for-mac-update.html", :indices => [0, 19] }] ) html = Nokogiri::HTML(linked) html.search('a').should_not be_empty html.search('a[@href="http://t.co/0JG5Mcq"]').should_not be_empty html.search('span[@class=js-display-url]').inner_text.should == "blog.twitter.com/2011/05/twitte" html.inner_text.should == " http://blog.twitter.com/2011/05/twitter-for-mac-update.html …" end it "should apply :class as a CSS class" do linked = @linker.auto_link("http://example.com/", :class => 'myclass') linked.should have_autolinked_url('http://example.com/') linked.should match(/myclass/) end it "should apply :url_class only on URL" do linked = @linker.auto_link("http://twitter.com") linked.should have_autolinked_url('http://twitter.com') linked.should_not match(/class/) linked = @linker.auto_link("http://twitter.com", :url_class => 'testClass') linked.should have_autolinked_url('http://twitter.com') linked.should match(/class=\"testClass\"/) linked = @linker.auto_link("#hash @tw", :url_class => 'testClass') linked.should match(/class=\"tweet-url hashtag\"/) linked.should match(/class=\"tweet-url username\"/) linked.should_not match(/class=\"testClass\"/) end it "should add rel=nofollow by default" do linked = @linker.auto_link("http://example.com/") linked.should have_autolinked_url('http://example.com/') linked.should match(/nofollow/) end it "should include the '@' symbol in a username when passed :username_include_symbol" do linked = @linker.auto_link("@user", :username_include_symbol => true) linked.should link_to_screen_name('user', '@user') end it "should include the '@' symbol in a list when passed :username_include_symbol" do linked = @linker.auto_link("@user/list", :username_include_symbol => true) linked.should link_to_list_path('user/list', '@user/list') end it "should not add rel=nofollow when passed :suppress_no_follow" do linked = @linker.auto_link("http://example.com/", :suppress_no_follow => true) linked.should have_autolinked_url('http://example.com/') linked.should_not match(/nofollow/) end it "should not add a target attribute by default" do linked = @linker.auto_link("http://example.com/") linked.should have_autolinked_url('http://example.com/') linked.should_not match(/target=/) end it "should respect the :target option" do linked = @linker.auto_link("http://example.com/", :target => 'mywindow') linked.should have_autolinked_url('http://example.com/') linked.should match(/target="mywindow"/) end it "should customize href by username_url_block option" do linked = @linker.auto_link("@test", :username_url_block => lambda{|a| "dummy"}) linked.should have_autolinked_url('dummy', 'test') end it "should customize href by list_url_block option" do linked = @linker.auto_link("@test/list", :list_url_block => lambda{|a| "dummy"}) linked.should have_autolinked_url('dummy', 'test/list') end it "should customize href by hashtag_url_block option" do linked = @linker.auto_link("#hashtag", :hashtag_url_block => lambda{|a| "dummy"}) linked.should have_autolinked_url('dummy', '#hashtag') end it "should customize href by cashtag_url_block option" do linked = @linker.auto_link("$CASH", :cashtag_url_block => lambda{|a| "dummy"}) linked.should have_autolinked_url('dummy', '$CASH') end it "should customize href by link_url_block option" do linked = @linker.auto_link("http://example.com/", :link_url_block => lambda{|a| "dummy"}) linked.should have_autolinked_url('dummy', 'http://example.com/') end it "should modify link attributes by link_attribute_block" do linked = @linker.auto_link("#hash @mention", :link_attribute_block => lambda{|entity, attributes| attributes[:"dummy-hash-attr"] = "test" if entity[:hashtag] } ) linked.should match(/]+hashtag[^>]+dummy-hash-attr=\"test\"[^>]+>/) linked.should_not match(/]+username[^>]+dummy-hash-attr=\"test\"[^>]+>/) linked.should_not match(/link_attribute_block/i) linked = @linker.auto_link("@mention http://twitter.com/", :link_attribute_block => lambda{|entity, attributes| attributes["dummy-url-attr"] = entity[:url] if entity[:url] } ) linked.should_not match(/]+username[^>]+dummy-url-attr=\"http:\/\/twitter.com\/\"[^>]*>/) linked.should match(/]+dummy-url-attr=\"http:\/\/twitter.com\/\"/) end it "should modify link text by link_text_block" do linked = @linker.auto_link("#hash @mention", :link_text_block => lambda{|entity, text| entity[:hashtag] ? "#replaced" : "pre_#{text}_post" } ) linked.should match(/]+>#replaced<\/a>/) linked.should match(/]+>pre_mention_post<\/a>/) linked = @linker.auto_link("#hash @mention", { :link_text_block => lambda{|entity, text| "pre_#{text}_post" }, :symbol_tag => "s", :text_with_symbol_tag => "b", :username_include_symbol => true }) linked.should match(/]+>pre_#<\/s>hash<\/b>_post<\/a>/) linked.should match(/]+>pre_@<\/s>mention<\/b>_post<\/a>/) end it "should apply :url_target only to auto-linked URLs" do auto_linked = @linker.auto_link("#hashtag @mention http://test.com/", {:url_target => '_blank'}) auto_linked.should have_autolinked_hashtag('#hashtag') auto_linked.should link_to_screen_name('mention') auto_linked.should have_autolinked_url('http://test.com/') auto_linked.should_not match(/]+hashtag[^>]+target[^>]+>/) auto_linked.should_not match(/]+username[^>]+target[^>]+>/) auto_linked.should match(/]+test.com[^>]+target=\"_blank\"[^>]*>/) end it "should apply target='_blank' only to auto-linked URLs when :target_blank is set to true" do auto_linked = @linker.auto_link("#hashtag @mention http://test.com/", {:target_blank => true}) auto_linked.should have_autolinked_hashtag('#hashtag') auto_linked.should link_to_screen_name('mention') auto_linked.should have_autolinked_url('http://test.com/') auto_linked.should match(/]+hashtag[^>]+target=\"_blank\"[^>]*>/) auto_linked.should match(/]+username[^>]+target=\"_blank\"[^>]*>/) auto_linked.should match(/]+test.com[^>]+target=\"_blank\"[^>]*>/) end end describe "link_url_with_entity" do before do @linker = TestAutolink.new end it "should use display_url and expanded_url" do @linker.send(:link_url_with_entity, { :url => "http://t.co/abcde", :display_url => "twitter.com", :expanded_url => "http://twitter.com/"}, {:invisible_tag_attrs => "class='invisible'"}).gsub('"', "'").should == "twitter.com"; end it "should correctly handle display_url ending with '…'" do @linker.send(:link_url_with_entity, { :url => "http://t.co/abcde", :display_url => "twitter.com…", :expanded_url => "http://twitter.com/abcdefg"}, {:invisible_tag_attrs => "class='invisible'"}).gsub('"', "'").should == "twitter.com"; end it "should correctly handle display_url starting with '…'" do @linker.send(:link_url_with_entity, { :url => "http://t.co/abcde", :display_url => "…tter.com/abcdefg", :expanded_url => "http://twitter.com/abcdefg"}, {:invisible_tag_attrs => "class='invisible'"}).gsub('"', "'").should == "tter.com/abcdefg"; end it "should not create spans if display_url and expanded_url are on different domains" do @linker.send(:link_url_with_entity, { :url => "http://t.co/abcde", :display_url => "pic.twitter.com/xyz", :expanded_url => "http://twitter.com/foo/statuses/123/photo/1"}, {:invisible_tag_attrs => "class='invisible'"}).gsub('"', "'").should == "pic.twitter.com/xyz" end end describe "symbol_tag" do before do @linker = TestAutolink.new end it "should put :symbol_tag around symbol" do @linker.auto_link("@mention", {:symbol_tag => 's', :username_include_symbol=>true}).should match(/@<\/s>mention/) @linker.auto_link("#hash", {:symbol_tag => 's'}).should match(/#<\/s>hash/) result = @linker.auto_link("@mention #hash $CASH", {:symbol_tag => 'b', :username_include_symbol=>true}) result.should match(/@<\/b>mention/) result.should match(/#<\/b>hash/) result.should match(/\$<\/b>CASH/) end it "should put :text_with_symbol_tag around text" do result = @linker.auto_link("@mention #hash $CASH", {:text_with_symbol_tag => 'b'}) result.should match(/mention<\/b>/) result.should match(/hash<\/b>/) result.should match(/CASH<\/b>/) end it "should put :symbol_tag around symbol and :text_with_symbol_tag around text" do result = @linker.auto_link("@mention #hash $CASH", {:symbol_tag => 's', :text_with_symbol_tag => 'b', :username_include_symbol=>true}) result.should match(/@<\/s>mention<\/b>/) result.should match(/#<\/s>hash<\/b>/) result.should match(/\$<\/s>CASH<\/b>/) end end describe "html_escape" do before do @linker = TestAutolink.new end it "should escape html entities properly" do @linker.html_escape("&").should == "&" @linker.html_escape(">").should == ">" @linker.html_escape("<").should == "<" @linker.html_escape("\"").should == """ @linker.html_escape("'").should == "'" @linker.html_escape("&<>\"").should == "&<>"" @linker.html_escape("
").should == "<div>" @linker.html_escape("a&b").should == "a&b" @linker.html_escape("twitter & friends").should == "<a href="https://twitter.com" target="_blank">twitter & friends</a>" @linker.html_escape("&").should == "&amp;" @linker.html_escape(nil).should == nil end end end twitter-text-1.14.7/spec/extractor_spec.rb000644 000041 000041 00000032227 13154170026 022111 0ustar00www-datawww-data000000 000000 # encoding: utf-8 require File.dirname(__FILE__) + '/spec_helper' class TestExtractor include Twitter::Extractor end describe Twitter::Extractor do before do @extractor = TestExtractor.new end describe "mentions" do context "single screen name alone " do it "should be linked" do @extractor.extract_mentioned_screen_names("@alice").should == ["alice"] end it "should be linked with _" do @extractor.extract_mentioned_screen_names("@alice_adams").should == ["alice_adams"] end it "should be linked if numeric" do @extractor.extract_mentioned_screen_names("@1234").should == ["1234"] end end context "multiple screen names" do it "should both be linked" do @extractor.extract_mentioned_screen_names("@alice @bob").should == ["alice", "bob"] end end context "screen names embedded in text" do it "should be linked in Latin text" do @extractor.extract_mentioned_screen_names("waiting for @alice to arrive").should == ["alice"] end it "should be linked in Japanese text" do @extractor.extract_mentioned_screen_names("の@aliceに到着を待っている").should == ["alice"] end it "should ignore mentions preceded by !, @, #, $, %, & or *" do invalid_chars = ['!', '@', '#', '$', '%', '&', '*'] invalid_chars.each do |c| @extractor.extract_mentioned_screen_names("f#{c}@kn").should == [] end end end it "should accept a block arugment and call it in order" do needed = ["alice", "bob"] @extractor.extract_mentioned_screen_names("@alice @bob") do |sn| sn.should == needed.shift end needed.should == [] end end describe "mentions with indices" do context "single screen name alone " do it "should be linked and the correct indices" do @extractor.extract_mentioned_screen_names_with_indices("@alice").should == [{:screen_name => "alice", :indices => [0, 6]}] end it "should be linked with _ and the correct indices" do @extractor.extract_mentioned_screen_names_with_indices("@alice_adams").should == [{:screen_name => "alice_adams", :indices => [0, 12]}] end it "should be linked if numeric and the correct indices" do @extractor.extract_mentioned_screen_names_with_indices("@1234").should == [{:screen_name => "1234", :indices => [0, 5]}] end end context "multiple screen names" do it "should both be linked with the correct indices" do @extractor.extract_mentioned_screen_names_with_indices("@alice @bob").should == [{:screen_name => "alice", :indices => [0, 6]}, {:screen_name => "bob", :indices => [7, 11]}] end it "should be linked with the correct indices even when repeated" do @extractor.extract_mentioned_screen_names_with_indices("@alice @alice @bob").should == [{:screen_name => "alice", :indices => [0, 6]}, {:screen_name => "alice", :indices => [7, 13]}, {:screen_name => "bob", :indices => [14, 18]}] end end context "screen names embedded in text" do it "should be linked in Latin text with the correct indices" do @extractor.extract_mentioned_screen_names_with_indices("waiting for @alice to arrive").should == [{:screen_name => "alice", :indices => [12, 18]}] end it "should be linked in Japanese text with the correct indices" do @extractor.extract_mentioned_screen_names_with_indices("の@aliceに到着を待っている").should == [{:screen_name => "alice", :indices => [1, 7]}] end end it "should accept a block arugment and call it in order" do needed = [{:screen_name => "alice", :indices => [0, 6]}, {:screen_name => "bob", :indices => [7, 11]}] @extractor.extract_mentioned_screen_names_with_indices("@alice @bob") do |sn, start_index, end_index| data = needed.shift sn.should == data[:screen_name] start_index.should == data[:indices].first end_index.should == data[:indices].last end needed.should == [] end it "should extract screen name in text with supplementary character" do @extractor.extract_mentioned_screen_names_with_indices("#{[0x10400].pack('U')} @alice").should == [{:screen_name => "alice", :indices => [2, 8]}] end end describe "replies" do context "should be extracted from" do it "should extract from lone name" do @extractor.extract_reply_screen_name("@alice").should == "alice" end it "should extract from the start" do @extractor.extract_reply_screen_name("@alice reply text").should == "alice" end it "should extract preceded by a space" do @extractor.extract_reply_screen_name(" @alice reply text").should == "alice" end it "should extract preceded by a full-width space" do @extractor.extract_reply_screen_name("#{[0x3000].pack('U')}@alice reply text").should == "alice" end end context "should not be extracted from" do it "should not be extracted when preceded by text" do @extractor.extract_reply_screen_name("reply @alice text").should == nil end it "should not be extracted when preceded by puctuation" do %w(. / _ - + # ! @).each do |punct| @extractor.extract_reply_screen_name("#{punct}@alice text").should == nil end end end context "should accept a block arugment" do it "should call the block on match" do @extractor.extract_reply_screen_name("@alice") do |sn| sn.should == "alice" end end it "should not call the block on no match" do calls = 0 @extractor.extract_reply_screen_name("not a reply") do |sn| calls += 1 end calls.should == 0 end end end describe "urls" do describe "matching URLS" do TestUrls::VALID.each do |url| it "should extract the URL #{url} and prefix it with a protocol if missing" do @extractor.extract_urls(url).first.should include(url) end it "should match the URL #{url} when it's embedded in other text" do text = "Sweet url: #{url} I found. #awesome" @extractor.extract_urls(text).first.should include(url) end end end describe "invalid URLS" do it "does not link urls with invalid domains" do @extractor.extract_urls("http://tld-too-short.x").should == [] end end describe "t.co URLS" do TestUrls::TCO.each do |url| it "should only extract the t.co URL from the URL #{url}" do extracted_urls = @extractor.extract_urls(url) extracted_urls.size.should == 1 extracted_url = extracted_urls.first extracted_url.should_not == url extracted_url.should == url[0...20] end it "should match the t.co URL from the URL #{url} when it's embedded in other text" do text = "Sweet url: #{url} I found. #awesome" extracted_urls = @extractor.extract_urls(text) extracted_urls.size.should == 1 extracted_url = extracted_urls.first extracted_url.should_not == url extracted_url.should == url[0...20] end end end end describe "urls with indices" do describe "matching URLS" do TestUrls::VALID.each do |url| it "should extract the URL #{url} and prefix it with a protocol if missing" do extracted_urls = @extractor.extract_urls_with_indices(url) extracted_urls.size.should == 1 extracted_url = extracted_urls.first extracted_url[:url].should include(url) extracted_url[:indices].first.should == 0 extracted_url[:indices].last.should == url.chars.to_a.size end it "should match the URL #{url} when it's embedded in other text" do text = "Sweet url: #{url} I found. #awesome" extracted_urls = @extractor.extract_urls_with_indices(text) extracted_urls.size.should == 1 extracted_url = extracted_urls.first extracted_url[:url].should include(url) extracted_url[:indices].first.should == 11 extracted_url[:indices].last.should == 11 + url.chars.to_a.size end end it "should extract URL in text with supplementary character" do @extractor.extract_urls_with_indices("#{[0x10400].pack('U')} http://twitter.com").should == [{:url => "http://twitter.com", :indices => [2, 20]}] end end describe "invalid URLS" do it "does not link urls with invalid domains" do @extractor.extract_urls_with_indices("http://tld-too-short.x").should == [] end end describe "t.co URLS" do TestUrls::TCO.each do |url| it "should only extract the t.co URL from the URL #{url} and adjust indices correctly" do extracted_urls = @extractor.extract_urls_with_indices(url) extracted_urls.size.should == 1 extracted_url = extracted_urls.first extracted_url[:url].should_not include(url) extracted_url[:url].should include(url[0...20]) extracted_url[:indices].first.should == 0 extracted_url[:indices].last.should == 20 end it "should match the t.co URL from the URL #{url} when it's embedded in other text" do text = "Sweet url: #{url} I found. #awesome" extracted_urls = @extractor.extract_urls_with_indices(text) extracted_urls.size.should == 1 extracted_url = extracted_urls.first extracted_url[:url].should_not include(url) extracted_url[:url].should include(url[0...20]) extracted_url[:indices].first.should == 11 extracted_url[:indices].last.should == 31 end end end end describe "hashtags" do context "extracts latin/numeric hashtags" do %w(text text123 123text).each do |hashtag| it "should extract ##{hashtag}" do @extractor.extract_hashtags("##{hashtag}").should == [hashtag] end it "should extract ##{hashtag} within text" do @extractor.extract_hashtags("pre-text ##{hashtag} post-text").should == [hashtag] end end end context "international hashtags" do context "should allow accents" do %w(mañana café münchen).each do |hashtag| it "should extract ##{hashtag}" do @extractor.extract_hashtags("##{hashtag}").should == [hashtag] end it "should extract ##{hashtag} within text" do @extractor.extract_hashtags("pre-text ##{hashtag} post-text").should == [hashtag] end end it "should not allow the multiplication character" do @extractor.extract_hashtags("#pre#{Twitter::Unicode::U00D7}post").should == ["pre"] end it "should not allow the division character" do @extractor.extract_hashtags("#pre#{Twitter::Unicode::U00F7}post").should == ["pre"] end end end it "should not extract numeric hashtags" do @extractor.extract_hashtags("#1234").should == [] end it "should extract hashtag followed by punctuations" do @extractor.extract_hashtags("#test1: #test2; #test3\"").should == ["test1", "test2" ,"test3"] end end describe "hashtags with indices" do def match_hashtag_in_text(hashtag, text, offset = 0) extracted_hashtags = @extractor.extract_hashtags_with_indices(text) extracted_hashtags.size.should == 1 extracted_hashtag = extracted_hashtags.first extracted_hashtag[:hashtag].should == hashtag extracted_hashtag[:indices].first.should == offset extracted_hashtag[:indices].last.should == offset + hashtag.chars.to_a.size + 1 end def not_match_hashtag_in_text(text) extracted_hashtags = @extractor.extract_hashtags_with_indices(text) extracted_hashtags.size.should == 0 end context "extracts latin/numeric hashtags" do %w(text text123 123text).each do |hashtag| it "should extract ##{hashtag}" do match_hashtag_in_text(hashtag, "##{hashtag}") end it "should extract ##{hashtag} within text" do match_hashtag_in_text(hashtag, "pre-text ##{hashtag} post-text", 9) end end end context "international hashtags" do context "should allow accents" do %w(mañana café münchen).each do |hashtag| it "should extract ##{hashtag}" do match_hashtag_in_text(hashtag, "##{hashtag}") end it "should extract ##{hashtag} within text" do match_hashtag_in_text(hashtag, "pre-text ##{hashtag} post-text", 9) end end it "should not allow the multiplication character" do match_hashtag_in_text("pre", "#pre#{[0xd7].pack('U')}post", 0) end it "should not allow the division character" do match_hashtag_in_text("pre", "#pre#{[0xf7].pack('U')}post", 0) end end end it "should not extract numeric hashtags" do not_match_hashtag_in_text("#1234") end it "should extract hashtag in text with supplementary character" do match_hashtag_in_text("hashtag", "#{[0x10400].pack('U')} #hashtag", 2) end end end twitter-text-1.14.7/spec/twitter_text_spec.rb000644 000041 000041 00000000631 13154170026 022636 0ustar00www-datawww-data000000 000000 # encoding: utf-8 require File.dirname(__FILE__) + '/spec_helper' major, minor, patch = RUBY_VERSION.split('.') if major.to_i == 1 && minor.to_i < 9 describe "base" do before do $KCODE = 'NONE' end after do $KCODE = 'u' end it "should raise with invalid KCODE on Ruby < 1.9" do lambda do require 'twitter-text' end.should raise_error end end end twitter-text-1.14.7/spec/rewriter_spec.rb000644 000041 000041 00000040002 13154170026 021727 0ustar00www-datawww-data000000 000000 # encoding: utf-8 require File.dirname(__FILE__) + '/spec_helper' describe Twitter::Rewriter do def original_text; end def url; end def block(*args) if Array === @block_args unless Array === @block_args.first @block_args = [@block_args] end @block_args << args else @block_args = args end "[rewritten]" end describe "rewrite usernames" do #{{{ before do @rewritten_text = Twitter::Rewriter.rewrite_usernames_or_lists(original_text, &method(:block)) end context "username preceded by a space" do def original_text; "hello @jacob"; end it "should be rewritten" do @block_args.should == ["@", "jacob", nil] @rewritten_text.should == "hello [rewritten]" end end context "username at beginning of line" do def original_text; "@jacob you're cool"; end it "should be rewritten" do @block_args.should == ["@", "jacob", nil] @rewritten_text.should == "[rewritten] you're cool" end end context "username preceded by word character" do def original_text; "meet@the beach"; end it "should not be rewritten" do @block_args.should be_nil @rewritten_text.should == "meet@the beach" end end context "username preceded by non-word character" do def original_text; "great.@jacob"; end it "should be rewritten" do @block_args.should == ["@", "jacob", nil] @rewritten_text.should == "great.[rewritten]" end end context "username containing non-word characters" do def original_text; "@jacob&^$%^"; end it "should be rewritten" do @block_args.should == ["@", "jacob", nil] @rewritten_text.should == "[rewritten]&^$%^" end end context "username over twenty characters" do def original_text @twenty_character_username = "zach" * 5 "@" + @twenty_character_username + "1" end it "should be rewritten" do @block_args.should == ["@", @twenty_character_username, nil] @rewritten_text.should == "[rewritten]1" end end context "username followed by japanese" do def original_text; "@jacobの"; end it "should be rewritten" do @block_args.should == ["@", "jacob", nil] @rewritten_text.should == "[rewritten]の" end end context "username preceded by japanese" do def original_text; "あ@jacob"; end it "should be rewritten" do @block_args.should == ["@", "jacob", nil] @rewritten_text.should == "あ[rewritten]" end end context "username surrounded by japanese" do def original_text; "あ@jacobの"; end it "should be rewritten" do @block_args.should == ["@", "jacob", nil] @rewritten_text.should == "あ[rewritten]の" end end context "username using full-width at-sign" do def original_text "#{[0xFF20].pack('U')}jacob" end it "should be rewritten" do @block_args.should == ["@", "jacob", nil] @rewritten_text.should == "[rewritten]" end end end #}}} describe "rewrite lists" do #{{{ before do @rewritten_text = Twitter::Rewriter.rewrite_usernames_or_lists(original_text, &method(:block)) end context "slug preceded by a space" do def original_text; "hello @jacob/my-list"; end it "should be rewritten" do @block_args.should == ["@", "jacob", "/my-list"] @rewritten_text.should == "hello [rewritten]" end end context "username followed by a slash but no list" do def original_text; "hello @jacob/ my-list"; end it "should not be rewritten" do @block_args.should == ["@", "jacob", nil] @rewritten_text.should == "hello [rewritten]/ my-list" end end context "empty username followed by a list" do def original_text; "hello @/my-list"; end it "should not be rewritten" do @block_args.should be_nil @rewritten_text.should == "hello @/my-list" end end context "list slug at beginning of line" do def original_text; "@jacob/my-list"; end it "should be rewritten" do @block_args.should == ["@", "jacob", "/my-list"] @rewritten_text.should == "[rewritten]" end end context "username preceded by alpha-numeric character" do def original_text; "meet@jacob/my-list"; end it "should not be rewritten" do @block_args.should be_nil @rewritten_text.should == "meet@jacob/my-list" end end context "username preceded by non-word character" do def original_text; "great.@jacob/my-list"; end it "should be rewritten" do @block_args.should == ["@", "jacob", "/my-list"] @rewritten_text.should == "great.[rewritten]" end end context "username containing non-word characters" do def original_text; "@jacob/my-list&^$%^"; end it "should be rewritten" do @block_args.should == ["@", "jacob", "/my-list"] @rewritten_text.should == "[rewritten]&^$%^" end end context "username over twenty characters" do def original_text @twentyfive_character_list = "a" * 25 "@jacob/#{@twentyfive_character_list}12345" end it "should be rewritten" do @block_args.should == ["@", "jacob", "/#{@twentyfive_character_list}"] @rewritten_text.should == "[rewritten]12345" end end end #}}} describe "rewrite hashtags" do #{{{ before do @rewritten_text = Twitter::Rewriter.rewrite_hashtags(original_text, &method(:block)) end context "with an all numeric hashtag" do def original_text; "#123"; end it "should not be rewritten" do @block_args.should be_nil @rewritten_text.should == "#123" end end context "with a hashtag with alphanumeric characters" do def original_text; "#ab1d"; end it "should be rewritten" do @block_args.should == ["#", "ab1d"] @rewritten_text.should == "[rewritten]" end end context "with a hashtag with underscores" do def original_text; "#a_b_c_d"; end it "should be rewritten" do @block_args.should == ["#", "a_b_c_d"] @rewritten_text.should == "[rewritten]" end end context "with a hashtag that is preceded by a word character" do def original_text; "ab#cd"; end it "should not be rewritten" do @block_args.should be_nil @rewritten_text.should == "ab#cd" end end context "with a hashtag that starts with a number but has word characters" do def original_text; "#2ab"; end it "should be rewritten" do @block_args.should == ["#", "2ab"] @rewritten_text.should == "[rewritten]" end end context "with multiple valid hashtags" do def original_text; "I'm frickin' awesome #ab #cd #ef"; end it "rewrites each hashtag" do @block_args.should == [["#", "ab"], ["#", "cd"], ["#", "ef"]] @rewritten_text.should == "I'm frickin' awesome [rewritten] [rewritten] [rewritten]" end end context "with a hashtag preceded by a ." do def original_text; "ok, great.#abc"; end it "should be rewritten" do @block_args.should == ["#", "abc"] @rewritten_text.should == "ok, great.[rewritten]" end end context "with a hashtag preceded by a &" do def original_text; "&#nbsp;"; end it "should not be rewritten" do @block_args.should be_nil @rewritten_text.should == "&#nbsp;" end end context "with a hashtag that ends in an !" do def original_text; "#great!"; end it "should be rewritten, but should not include the !" do @block_args.should == ["#", "great"]; @rewritten_text.should == "[rewritten]!" end end context "with a hashtag followed by Japanese" do def original_text; "#twj_devの"; end it "should be rewritten" do @block_args.should == ["#", "twj_devの"]; @rewritten_text.should == "[rewritten]" end end context "with a hashtag preceded by a full-width space" do def original_text; "#{[0x3000].pack('U')}#twj_dev"; end it "should be rewritten" do @block_args.should == ["#", "twj_dev"]; @rewritten_text.should == " [rewritten]" end end context "with a hashtag followed by a full-width space" do def original_text; "#twj_dev#{[0x3000].pack('U')}"; end it "should be rewritten" do @block_args.should == ["#", "twj_dev"]; @rewritten_text.should == "[rewritten] " end end context "with a hashtag using full-width hash" do def original_text; "#{[0xFF03].pack('U')}twj_dev"; end it "should be rewritten" do @block_args.should == ["#", "twj_dev"]; @rewritten_text.should == "[rewritten]" end end context "with a hashtag containing an accented latin character" do def original_text # the hashtag is #éhashtag "##{[0x00e9].pack('U')}hashtag" end it "should be rewritten" do @block_args.should == ["#", "éhashtag"]; @rewritten_text.should == "[rewritten]" end end end #}}} describe "rewrite urls" do #{{{ def url; "http://www.google.com"; end before do @rewritten_text = Twitter::Rewriter.rewrite_urls(original_text, &method(:block)) end context "when embedded in plain text" do def original_text; "On my search engine #{url} I found good links."; end it "should be rewritten" do @block_args.should == [url]; @rewritten_text.should == "On my search engine [rewritten] I found good links." end end context "when surrounded by Japanese;" do def original_text; "いまなにしてる#{url}いまなにしてる"; end it "should be rewritten" do @block_args.should == [url]; @rewritten_text.should == "いまなにしてる[rewritten]いまなにしてる" end end context "with a path surrounded by parentheses;" do def original_text; "I found a neatness (#{url})"; end it "should be rewritten" do @block_args.should == [url]; @rewritten_text.should == "I found a neatness ([rewritten])" end context "when the URL ends with a slash;" do def url; "http://www.google.com/"; end it "should be rewritten" do @block_args.should == [url]; @rewritten_text.should == "I found a neatness ([rewritten])" end end context "when the URL has a path;" do def url; "http://www.google.com/fsdfasdf"; end it "should be rewritten" do @block_args.should == [url]; @rewritten_text.should == "I found a neatness ([rewritten])" end end end context "when path contains parens" do def original_text; "I found a neatness (#{url})"; end it "should be rewritten" do @block_args.should == [url]; @rewritten_text.should == "I found a neatness ([rewritten])" end context "wikipedia" do def url; "http://en.wikipedia.org/wiki/Madonna_(artist)"; end it "should be rewritten" do @block_args.should == [url]; @rewritten_text.should == "I found a neatness ([rewritten])" end end context "IIS session" do def url; "http://msdn.com/S(deadbeef)/page.htm"; end it "should be rewritten" do @block_args.should == [url]; @rewritten_text.should == "I found a neatness ([rewritten])" end end context "unbalanced parens" do def url; "http://example.com/i_has_a_("; end it "should be rewritten" do @block_args.should == ["http://example.com/i_has_a_"]; @rewritten_text.should == "I found a neatness ([rewritten]()" end end context "balanced parens with a double quote inside" do def url; "http://foo.bar.com/foo_(\")_bar" end it "should be rewritten" do @block_args.should == ["http://foo.bar.com/foo_"]; @rewritten_text.should == "I found a neatness ([rewritten](\")_bar)" end end context "balanced parens hiding XSS" do def url; 'http://x.xx.com/("style="color:red"onmouseover="alert(1)' end it "should be rewritten" do @block_args.should == ["http://x.xx.com/"]; @rewritten_text.should == 'I found a neatness ([rewritten]("style="color:red"onmouseover="alert(1))' end end end context "when preceded by a :" do def original_text; "Check this out @hoverbird:#{url}"; end it "should be rewritten" do @block_args.should == [url]; @rewritten_text.should == "Check this out @hoverbird:[rewritten]" end end context "with a URL ending in allowed punctuation" do it "does not consume ending punctuation" do %w| ? ! , . : ; ] ) } = \ ' |.each do |char| Twitter::Rewriter.rewrite_urls("#{url}#{char}") do |url| url.should == url; "[rewritten]" end.should == "[rewritten]#{char}" end end end context "with a URL preceded in forbidden characters" do it "should be rewritten" do %w| \ ' / ! = |.each do |char| Twitter::Rewriter.rewrite_urls("#{char}#{url}") do |url| "[rewritten]" # should not be called here. end.should == "#{char}[rewritten]" end end end context "when embedded in a link tag" do def original_text; "#{url}"; end it "should be rewritten" do @block_args.should == [url]; @rewritten_text.should == "[rewritten]" end end context "with multiple URLs" do def original_text; "http://www.links.org link at start of page, link at end http://www.foo.org"; end it "should autolink each one" do @block_args.should == [["http://www.links.org"], ["http://www.foo.org"]]; @rewritten_text.should == "[rewritten] link at start of page, link at end [rewritten]" end end context "with multiple URLs in different formats" do def original_text; "http://foo.com https://bar.com http://mail.foobar.org"; end it "should autolink each one, in the proper order" do @block_args.should == [["http://foo.com"], ["https://bar.com"], ["http://mail.foobar.org"]]; @rewritten_text.should == "[rewritten] [rewritten] [rewritten]" end end context "with a URL having a long TLD" do def original_text; "Yahoo integriert Facebook http://golem.mobi/0912/71607.html"; end it "should autolink it" do @block_args.should == ["http://golem.mobi/0912/71607.html"] @rewritten_text.should == "Yahoo integriert Facebook [rewritten]" end end context "with a url lacking the protocol" do def original_text; "I like www.foobar.com dudes"; end it "does not link at all" do @block_args.should be_nil @rewritten_text.should == "I like www.foobar.com dudes" end end context "with a @ in a URL" do context "with XSS attack" do def original_text; 'http://x.xx.com/@"style="color:pink"onmouseover=alert(1)//'; end it "should not allow XSS follwing @" do @block_args.should == ["http://x.xx.com/"] @rewritten_text.should == '[rewritten]@"style="color:pink"onmouseover=alert(1)//' end end context "with a username not followed by a /" do def original_text; "http://example.com/@foobar"; end it "should link url" do @block_args.should == ["http://example.com/@foobar"] @rewritten_text.should == "[rewritten]" end end context "with a username followed by a /" do def original_text; "http://example.com/@foobar/"; end it "should not link the username but link full url" do @block_args.should == ["http://example.com/@foobar/"] @rewritten_text.should == "[rewritten]" end end end end #}}} end # vim: foldmethod=marker twitter-text-1.14.7/lib/000755 000041 000041 00000000000 13154170026 016345 5ustar00www-datawww-data000000 000000 twitter-text-1.14.7/lib/assets/000755 000041 000041 00000000000 13154170026 017647 5ustar00www-datawww-data000000 000000 twitter-text-1.14.7/lib/assets/tld_lib.yml000644 000041 000041 00000032212 13154170026 022003 0ustar00www-datawww-data000000 000000 --- country: - "한국" - "香港" - "澳門" - "新加坡" - "台灣" - "台湾" - "中國" - "中国" - "გე" - "ไทย" - "ලංකා" - "ഭാരതം" - "ಭಾರತ" - "భారత్" - "சிங்கப்பூர்" - "இலங்கை" - "இந்தியா" - "ଭାରତ" - "ભારત" - "ਭਾਰਤ" - "ভাৰত" - "ভারত" - "বাংলা" - "भारोत" - "भारतम्" - "भारत" - "ڀارت" - "پاکستان" - "مليسيا" - "مصر" - "قطر" - "فلسطين" - "عمان" - "عراق" - "سورية" - "سودان" - "تونس" - "بھارت" - "بارت" - "ایران" - "امارات" - "المغرب" - "السعودية" - "الجزائر" - "الاردن" - "հայ" - "қаз" - "укр" - "срб" - "рф" - "мон" - "мкд" - "ею" - "бел" - "бг" - "ελ" - zw - zm - za - yt - ye - ws - wf - vu - vn - vi - vg - ve - vc - va - uz - uy - us - um - uk - ug - ua - tz - tw - tv - tt - tr - tp - to - tn - tm - tl - tk - tj - th - tg - tf - td - tc - sz - sy - sx - sv - su - st - ss - sr - so - sn - sm - sl - sk - sj - si - sh - sg - se - sd - sc - sb - sa - rw - ru - rs - ro - re - qa - py - pw - pt - ps - pr - pn - pm - pl - pk - ph - pg - pf - pe - pa - om - nz - nu - nr - np - 'no' - nl - ni - ng - nf - ne - nc - na - mz - my - mx - mw - mv - mu - mt - ms - mr - mq - mp - mo - mn - mm - ml - mk - mh - mg - mf - me - md - mc - ma - ly - lv - lu - lt - ls - lr - lk - li - lc - lb - la - kz - ky - kw - kr - kp - kn - km - ki - kh - kg - ke - jp - jo - jm - je - it - is - ir - iq - io - in - im - il - ie - id - hu - ht - hr - hn - hm - hk - gy - gw - gu - gt - gs - gr - gq - gp - gn - gm - gl - gi - gh - gg - gf - ge - gd - gb - ga - fr - fo - fm - fk - fj - fi - eu - et - es - er - eh - eg - ee - ec - dz - do - dm - dk - dj - de - cz - cy - cx - cw - cv - cu - cr - co - cn - cm - cl - ck - ci - ch - cg - cf - cd - cc - ca - bz - by - bw - bv - bt - bs - br - bq - bo - bn - bm - bl - bj - bi - bh - bg - bf - be - bd - bb - ba - az - ax - aw - au - at - as - ar - aq - ao - an - am - al - ai - ag - af - ae - ad - ac generic: - "삼성" - "닷컴" - "닷넷" - "香格里拉" - "餐厅" - "食品" - "飞利浦" - "電訊盈科" - "集团" - "通販" - "购物" - "谷歌" - "诺基亚" - "联通" - "网络" - "网站" - "网店" - "网址" - "组织机构" - "移动" - "珠宝" - "点看" - "游戏" - "淡马锡" - "机构" - "書籍" - "时尚" - "新闻" - "政府" - "政务" - "手表" - "手机" - "我爱你" - "慈善" - "微博" - "广东" - "工行" - "家電" - "娱乐" - "天主教" - "大拿" - "大众汽车" - "在线" - "嘉里大酒店" - "嘉里" - "商标" - "商店" - "商城" - "公益" - "公司" - "八卦" - "健康" - "信息" - "佛山" - "企业" - "中文网" - "中信" - "世界" - "ポイント" - "ファッション" - "セール" - "ストア" - "コム" - "グーグル" - "クラウド" - "みんな" - "คอม" - "संगठन" - "नेट" - "कॉम" - "همراه" - "موقع" - "موبايلي" - "كوم" - "كاثوليك" - "عرب" - "شبكة" - "بيتك" - "بازار" - "العليان" - "ارامكو" - "اتصالات" - "ابوظبي" - "קום" - "сайт" - "рус" - "орг" - "онлайн" - "москва" - "ком" - "католик" - "дети" - zuerich - zone - zippo - zip - zero - zara - zappos - yun - youtube - you - yokohama - yoga - yodobashi - yandex - yamaxun - yahoo - yachts - xyz - xxx - xperia - xin - xihuan - xfinity - xerox - xbox - wtf - wtc - wow - world - works - work - woodside - wolterskluwer - wme - winners - wine - windows - win - williamhill - wiki - wien - whoswho - weir - weibo - wedding - wed - website - weber - webcam - weatherchannel - weather - watches - watch - warman - wanggou - wang - walter - walmart - wales - vuelos - voyage - voto - voting - vote - volvo - volkswagen - vodka - vlaanderen - vivo - viva - vistaprint - vista - vision - visa - virgin - vip - vin - villas - viking - vig - video - viajes - vet - versicherung - vermögensberatung - vermögensberater - verisign - ventures - vegas - vanguard - vana - vacations - ups - uol - uno - university - unicom - uconnect - ubs - ubank - tvs - tushu - tunes - tui - tube - trv - trust - travelersinsurance - travelers - travelchannel - travel - training - trading - trade - toys - toyota - town - tours - total - toshiba - toray - top - tools - tokyo - today - tmall - tkmaxx - tjx - tjmaxx - tirol - tires - tips - tiffany - tienda - tickets - tiaa - theatre - theater - thd - teva - tennis - temasek - telefonica - telecity - tel - technology - tech - team - tdk - tci - taxi - tax - tattoo - tatar - tatamotors - target - taobao - talk - taipei - tab - systems - symantec - sydney - swiss - swiftcover - swatch - suzuki - surgery - surf - support - supply - supplies - sucks - style - study - studio - stream - store - storage - stockholm - stcgroup - stc - statoil - statefarm - statebank - starhub - star - staples - stada - srt - srl - spreadbetting - spot - spiegel - space - soy - sony - song - solutions - solar - sohu - software - softbank - social - soccer - sncf - smile - smart - sling - skype - sky - skin - ski - site - singles - sina - silk - shriram - showtime - show - shouji - shopping - shop - shoes - shiksha - shia - shell - shaw - sharp - shangrila - sfr - sexy - sex - sew - seven - ses - services - sener - select - seek - security - secure - seat - search - scot - scor - scjohnson - science - schwarz - schule - school - scholarships - schmidt - schaeffler - scb - sca - sbs - sbi - saxo - save - sas - sarl - sapo - sap - sanofi - sandvikcoromant - sandvik - samsung - samsclub - salon - sale - sakura - safety - safe - saarland - ryukyu - rwe - run - ruhr - rugby - rsvp - room - rogers - rodeo - rocks - rocher - rmit - rip - rio - ril - rightathome - ricoh - richardli - rich - rexroth - reviews - review - restaurant - rest - republican - report - repair - rentals - rent - ren - reliance - reit - reisen - reise - rehab - redumbrella - redstone - red - recipes - realty - realtor - realestate - read - raid - radio - racing - qvc - quest - quebec - qpon - pwc - pub - prudential - pru - protection - property - properties - promo - progressive - prof - productions - prod - pro - prime - press - praxi - pramerica - post - porn - politie - poker - pohl - pnc - plus - plumbing - playstation - play - place - pizza - pioneer - pink - ping - pin - pid - pictures - pictet - pics - piaget - physio - photos - photography - photo - phone - philips - phd - pharmacy - pfizer - pet - pccw - pay - passagens - party - parts - partners - pars - paris - panerai - panasonic - pamperedchef - page - ovh - ott - otsuka - osaka - origins - orientexpress - organic - org - orange - oracle - open - ooo - onyourside - online - onl - ong - one - omega - ollo - oldnavy - olayangroup - olayan - okinawa - office - 'off' - observer - obi - nyc - ntt - nrw - nra - nowtv - nowruz - now - norton - northwesternmutual - nokia - nissay - nissan - ninja - nikon - nike - nico - nhk - ngo - nfl - nexus - nextdirect - next - news - newholland - new - neustar - network - netflix - netbank - net - nec - nba - navy - natura - nationwide - name - nagoya - nadex - nab - mutuelle - mutual - museum - mtr - mtpc - mtn - msd - movistar - movie - mov - motorcycles - moto - moscow - mortgage - mormon - mopar - montblanc - monster - money - monash - mom - moi - moe - moda - mobily - mobile - mobi - mma - mls - mlb - mitsubishi - mit - mint - mini - mil - microsoft - miami - metlife - merckmsd - meo - menu - men - memorial - meme - melbourne - meet - media - med - mckinsey - mcdonalds - mcd - mba - mattel - maserati - marshalls - marriott - markets - marketing - market - map - mango - management - man - makeup - maison - maif - madrid - macys - luxury - luxe - lupin - lundbeck - ltda - ltd - lplfinancial - lpl - love - lotto - lotte - london - lol - loft - locus - locker - loans - loan - lixil - living - live - lipsy - link - linde - lincoln - limo - limited - lilly - like - lighting - lifestyle - lifeinsurance - life - lidl - liaison - lgbt - lexus - lego - legal - lefrak - leclerc - lease - lds - lawyer - law - latrobe - latino - lat - lasalle - lanxess - landrover - land - lancome - lancia - lancaster - lamer - lamborghini - ladbrokes - lacaixa - kyoto - kuokgroup - kred - krd - kpn - kpmg - kosher - komatsu - koeln - kiwi - kitchen - kindle - kinder - kim - kia - kfh - kerryproperties - kerrylogistics - kerryhotels - kddi - kaufen - juniper - juegos - jprs - jpmorgan - joy - jot - joburg - jobs - jnj - jmp - jll - jlc - jio - jewelry - jetzt - jeep - jcp - jcb - java - jaguar - iwc - iveco - itv - itau - istanbul - ist - ismaili - iselect - irish - ipiranga - investments - intuit - international - intel - int - insure - insurance - institute - ink - ing - info - infiniti - industries - immobilien - immo - imdb - imamat - ikano - iinet - ifm - ieee - icu - ice - icbc - ibm - hyundai - hyatt - hughes - htc - hsbc - how - house - hotmail - hotels - hoteles - hot - hosting - host - hospital - horse - honeywell - honda - homesense - homes - homegoods - homedepot - holiday - holdings - hockey - hkt - hiv - hitachi - hisamitsu - hiphop - hgtv - hermes - here - helsinki - help - healthcare - health - hdfcbank - hdfc - hbo - haus - hangout - hamburg - hair - guru - guitars - guide - guge - gucci - guardian - group - grocery - gripe - green - gratis - graphics - grainger - gov - got - gop - google - goog - goodyear - goodhands - goo - golf - goldpoint - gold - godaddy - gmx - gmo - gmbh - gmail - globo - global - gle - glass - glade - giving - gives - gifts - gift - ggee - george - genting - gent - gea - gdn - gbiz - garden - gap - games - game - gallup - gallo - gallery - gal - fyi - futbol - furniture - fund - fun - fujixerox - fujitsu - ftr - frontier - frontdoor - frogans - frl - fresenius - free - fox - foundation - forum - forsale - forex - ford - football - foodnetwork - food - foo - fly - flsmidth - flowers - florist - flir - flights - flickr - fitness - fit - fishing - fish - firmdale - firestone - fire - financial - finance - final - film - fido - fidelity - fiat - ferrero - ferrari - feedback - fedex - fast - fashion - farmers - farm - fans - fan - family - faith - fairwinds - fail - fage - extraspace - express - exposed - expert - exchange - everbank - events - eus - eurovision - etisalat - esurance - estate - esq - erni - ericsson - equipment - epson - epost - enterprises - engineering - engineer - energy - emerck - email - education - edu - edeka - eco - eat - earth - dvr - dvag - durban - dupont - duns - dunlop - duck - dubai - dtv - drive - download - dot - doosan - domains - doha - dog - dodge - doctor - docs - dnp - diy - dish - discover - discount - directory - direct - digital - diet - diamonds - dhl - dev - design - desi - dentist - dental - democrat - delta - deloitte - dell - delivery - degree - deals - dealer - deal - dds - dclk - day - datsun - dating - date - data - dance - dad - dabur - cyou - cymru - cuisinella - csc - cruises - cruise - crs - crown - cricket - creditunion - creditcard - credit - courses - coupons - coupon - country - corsica - coop - cool - cookingchannel - cooking - contractors - contact - consulting - construction - condos - comsec - computer - compare - company - community - commbank - comcast - com - cologne - college - coffee - codes - coach - clubmed - club - cloud - clothing - clinique - clinic - click - cleaning - claims - cityeats - city - citic - citi - citadel - cisco - circle - cipriani - church - chrysler - chrome - christmas - chloe - chintai - cheap - chat - chase - channel - chanel - cfd - cfa - cern - ceo - center - ceb - cbs - cbre - cbn - cba - catholic - catering - cat - casino - cash - caseih - case - casa - cartier - cars - careers - career - care - cards - caravan - car - capitalone - capital - capetown - canon - cancerresearch - camp - camera - cam - calvinklein - call - cal - cafe - cab - bzh - buzz - buy - business - builders - build - bugatti - budapest - brussels - brother - broker - broadway - bridgestone - bradesco - box - boutique - bot - boston - bostik - bosch - boots - booking - book - boo - bond - bom - bofa - boehringer - boats - bnpparibas - bnl - bmw - bms - blue - bloomberg - blog - blockbuster - blanco - blackfriday - black - biz - bio - bingo - bing - bike - bid - bible - bharti - bet - bestbuy - best - berlin - bentley - beer - beauty - beats - bcn - bcg - bbva - bbt - bbc - bayern - bauhaus - basketball - baseball - bargains - barefoot - barclays - barclaycard - barcelona - bar - bank - band - bananarepublic - banamex - baidu - baby - azure - axa - aws - avianca - autos - auto - author - auspost - audio - audible - audi - auction - attorney - athleta - associates - asia - asda - arte - art - arpa - army - archi - aramco - arab - aquarelle - apple - app - apartments - aol - anz - anquan - android - analytics - amsterdam - amica - amfam - amex - americanfamily - americanexpress - alstom - alsace - ally - allstate - allfinanz - alipay - alibaba - alfaromeo - akdn - airtel - airforce - airbus - aigo - aig - agency - agakhan - africa - afl - afamilycompany - aetna - aero - aeg - adult - ads - adac - actor - active - aco - accountants - accountant - accenture - academy - abudhabi - abogado - able - abc - abbvie - abbott - abb - abarth - aarp - aaa - onion twitter-text-1.14.7/lib/twitter-text.rb000644 000041 000041 00000000666 13154170026 021366 0ustar00www-datawww-data000000 000000 major, minor, _patch = RUBY_VERSION.split('.') $RUBY_1_9 = if major.to_i == 1 && minor.to_i < 9 # Ruby 1.8 KCODE check. Not needed on 1.9 and later. raise("twitter-text requires the $KCODE variable be set to 'UTF8' or 'u'") unless $KCODE[0].chr =~ /u/i false else true end %w( deprecation regex rewriter autolink extractor unicode validation hit_highlighter ).each do |name| require "twitter-text/#{name}" end twitter-text-1.14.7/lib/twitter-text/000755 000041 000041 00000000000 13154170026 021031 5ustar00www-datawww-data000000 000000 twitter-text-1.14.7/lib/twitter-text/regex.rb000644 000041 000041 00000042213 13154170026 022472 0ustar00www-datawww-data000000 000000 # encoding: UTF-8 module Twitter # A collection of regular expressions for parsing Tweet text. The regular expression # list is frozen at load time to ensure immutability. These regular expressions are # used throughout the Twitter classes. Special care has been taken to make # sure these reular expressions work with Tweets in all languages. class Regex require 'yaml' REGEXEN = {} # :nodoc: def self.regex_range(from, to = nil) # :nodoc: if $RUBY_1_9 if to "\\u{#{from.to_s(16).rjust(4, '0')}}-\\u{#{to.to_s(16).rjust(4, '0')}}" else "\\u{#{from.to_s(16).rjust(4, '0')}}" end else if to [from].pack('U') + '-' + [to].pack('U') else [from].pack('U') end end end TLDS = YAML.load_file( File.join( File.expand_path('../../..', __FILE__), # project root 'lib', 'assets', 'tld_lib.yml' ) ) # Space is more than %20, U+3000 for example is the full-width space used with Kanji. Provide a short-hand # to access both the list of characters and a pattern suitible for use with String#split # Taken from: ActiveSupport::Multibyte::Handlers::UTF8Handler::UNICODE_WHITESPACE UNICODE_SPACES = [ (0x0009..0x000D).to_a, # White_Space # Cc [5] .. 0x0020, # White_Space # Zs SPACE 0x0085, # White_Space # Cc 0x00A0, # White_Space # Zs NO-BREAK SPACE 0x1680, # White_Space # Zs OGHAM SPACE MARK 0x180E, # White_Space # Zs MONGOLIAN VOWEL SEPARATOR (0x2000..0x200A).to_a, # White_Space # Zs [11] EN QUAD..HAIR SPACE 0x2028, # White_Space # Zl LINE SEPARATOR 0x2029, # White_Space # Zp PARAGRAPH SEPARATOR 0x202F, # White_Space # Zs NARROW NO-BREAK SPACE 0x205F, # White_Space # Zs MEDIUM MATHEMATICAL SPACE 0x3000, # White_Space # Zs IDEOGRAPHIC SPACE ].flatten.map{|c| [c].pack('U*')}.freeze REGEXEN[:spaces] = /[#{UNICODE_SPACES.join('')}]/o # Character not allowed in Tweets INVALID_CHARACTERS = [ 0xFFFE, 0xFEFF, # BOM 0xFFFF, # Special 0x202A, 0x202B, 0x202C, 0x202D, 0x202E # Directional change ].map{|cp| [cp].pack('U') }.freeze REGEXEN[:invalid_control_characters] = /[#{INVALID_CHARACTERS.join('')}]/o major, minor, _patch = RUBY_VERSION.split('.') if major.to_i >= 2 || major.to_i == 1 && minor.to_i >= 9 || (defined?(RUBY_ENGINE) && ["jruby", "rbx"].include?(RUBY_ENGINE)) REGEXEN[:list_name] = /[a-zA-Z][a-zA-Z0-9_\-\u0080-\u00ff]{0,24}/ else # This line barfs at compile time in Ruby 1.9, JRuby, or Rubinius. REGEXEN[:list_name] = eval("/[a-zA-Z][a-zA-Z0-9_\\-\x80-\xff]{0,24}/") end # Latin accented characters # Excludes 0xd7 from the range (the multiplication sign, confusable with "x"). # Also excludes 0xf7, the division sign LATIN_ACCENTS = [ regex_range(0xc0, 0xd6), regex_range(0xd8, 0xf6), regex_range(0xf8, 0xff), regex_range(0x0100, 0x024f), regex_range(0x0253, 0x0254), regex_range(0x0256, 0x0257), regex_range(0x0259), regex_range(0x025b), regex_range(0x0263), regex_range(0x0268), regex_range(0x026f), regex_range(0x0272), regex_range(0x0289), regex_range(0x028b), regex_range(0x02bb), regex_range(0x0300, 0x036f), regex_range(0x1e00, 0x1eff) ].join('').freeze REGEXEN[:latin_accents] = /[#{LATIN_ACCENTS}]+/o RTL_CHARACTERS = [ regex_range(0x0600,0x06FF), regex_range(0x0750,0x077F), regex_range(0x0590,0x05FF), regex_range(0xFE70,0xFEFF) ].join('').freeze PUNCTUATION_CHARS = '!"#$%&\'()*+,-./:;<=>?@\[\]^_\`{|}~' SPACE_CHARS = " \t\n\x0B\f\r" CTRL_CHARS = "\x00-\x1F\x7F" # Generated from unicode_regex/unicode_regex_groups.scala, more inclusive than Ruby's \p{L}\p{M} HASHTAG_LETTERS_AND_MARKS = "\\p{L}\\p{M}" + "\u037f\u0528-\u052f\u08a0-\u08b2\u08e4-\u08ff\u0978\u0980\u0c00\u0c34\u0c81\u0d01\u0ede\u0edf" + "\u10c7\u10cd\u10fd-\u10ff\u16f1-\u16f8\u17b4\u17b5\u191d\u191e\u1ab0-\u1abe\u1bab-\u1bad\u1bba-" + "\u1bbf\u1cf3-\u1cf6\u1cf8\u1cf9\u1de7-\u1df5\u2cf2\u2cf3\u2d27\u2d2d\u2d66\u2d67\u9fcc\ua674-" + "\ua67b\ua698-\ua69d\ua69f\ua792-\ua79f\ua7aa-\ua7ad\ua7b0\ua7b1\ua7f7-\ua7f9\ua9e0-\ua9ef\ua9fa-" + "\ua9fe\uaa7c-\uaa7f\uaae0-\uaaef\uaaf2-\uaaf6\uab30-\uab5a\uab5c-\uab5f\uab64\uab65\uf870-\uf87f" + "\uf882\uf884-\uf89f\uf8b8\uf8c1-\uf8d6\ufa2e\ufa2f\ufe27-\ufe2d\u{102e0}\u{1031f}\u{10350}-\u{1037a}" + "\u{10500}-\u{10527}\u{10530}-\u{10563}\u{10600}-\u{10736}\u{10740}-\u{10755}\u{10760}-\u{10767}" + "\u{10860}-\u{10876}\u{10880}-\u{1089e}\u{10980}-\u{109b7}\u{109be}\u{109bf}\u{10a80}-\u{10a9c}" + "\u{10ac0}-\u{10ac7}\u{10ac9}-\u{10ae6}\u{10b80}-\u{10b91}\u{1107f}\u{110d0}-\u{110e8}\u{11100}-" + "\u{11134}\u{11150}-\u{11173}\u{11176}\u{11180}-\u{111c4}\u{111da}\u{11200}-\u{11211}\u{11213}-" + "\u{11237}\u{112b0}-\u{112ea}\u{11301}-\u{11303}\u{11305}-\u{1130c}\u{1130f}\u{11310}\u{11313}-" + "\u{11328}\u{1132a}-\u{11330}\u{11332}\u{11333}\u{11335}-\u{11339}\u{1133c}-\u{11344}\u{11347}" + "\u{11348}\u{1134b}-\u{1134d}\u{11357}\u{1135d}-\u{11363}\u{11366}-\u{1136c}\u{11370}-\u{11374}" + "\u{11480}-\u{114c5}\u{114c7}\u{11580}-\u{115b5}\u{115b8}-\u{115c0}\u{11600}-\u{11640}\u{11644}" + "\u{11680}-\u{116b7}\u{118a0}-\u{118df}\u{118ff}\u{11ac0}-\u{11af8}\u{1236f}-\u{12398}\u{16a40}-" + "\u{16a5e}\u{16ad0}-\u{16aed}\u{16af0}-\u{16af4}\u{16b00}-\u{16b36}\u{16b40}-\u{16b43}\u{16b63}-" + "\u{16b77}\u{16b7d}-\u{16b8f}\u{16f00}-\u{16f44}\u{16f50}-\u{16f7e}\u{16f8f}-\u{16f9f}\u{1bc00}-" + "\u{1bc6a}\u{1bc70}-\u{1bc7c}\u{1bc80}-\u{1bc88}\u{1bc90}-\u{1bc99}\u{1bc9d}\u{1bc9e}\u{1e800}-" + "\u{1e8c4}\u{1e8d0}-\u{1e8d6}\u{1ee00}-\u{1ee03}\u{1ee05}-\u{1ee1f}\u{1ee21}\u{1ee22}\u{1ee24}" + "\u{1ee27}\u{1ee29}-\u{1ee32}\u{1ee34}-\u{1ee37}\u{1ee39}\u{1ee3b}\u{1ee42}\u{1ee47}\u{1ee49}" + "\u{1ee4b}\u{1ee4d}-\u{1ee4f}\u{1ee51}\u{1ee52}\u{1ee54}\u{1ee57}\u{1ee59}\u{1ee5b}\u{1ee5d}\u{1ee5f}" + "\u{1ee61}\u{1ee62}\u{1ee64}\u{1ee67}-\u{1ee6a}\u{1ee6c}-\u{1ee72}\u{1ee74}-\u{1ee77}\u{1ee79}-" + "\u{1ee7c}\u{1ee7e}\u{1ee80}-\u{1ee89}\u{1ee8b}-\u{1ee9b}\u{1eea1}-\u{1eea3}\u{1eea5}-\u{1eea9}" + "\u{1eeab}-\u{1eebb}" # Generated from unicode_regex/unicode_regex_groups.scala, more inclusive than Ruby's \p{Nd} HASHTAG_NUMERALS = "\\p{Nd}" + "\u0de6-\u0def\ua9f0-\ua9f9\u{110f0}-\u{110f9}\u{11136}-\u{1113f}\u{111d0}-\u{111d9}\u{112f0}-" + "\u{112f9}\u{114d0}-\u{114d9}\u{11650}-\u{11659}\u{116c0}-\u{116c9}\u{118e0}-\u{118e9}\u{16a60}-" + "\u{16a69}\u{16b50}-\u{16b59}" HASHTAG_SPECIAL_CHARS = "_\u200c\u200d\ua67e\u05be\u05f3\u05f4\uff5e\u301c\u309b\u309c\u30a0\u30fb\u3003\u0f0b\u0f0c\u00b7" HASHTAG_LETTERS_NUMERALS = "#{HASHTAG_LETTERS_AND_MARKS}#{HASHTAG_NUMERALS}#{HASHTAG_SPECIAL_CHARS}" HASHTAG_LETTERS_NUMERALS_SET = "[#{HASHTAG_LETTERS_NUMERALS}]" HASHTAG_LETTERS_SET = "[#{HASHTAG_LETTERS_AND_MARKS}]" HASHTAG = /(\A|\ufe0e|\ufe0f|[^&#{HASHTAG_LETTERS_NUMERALS}])(#|#)(?!\ufe0f|\u20e3)(#{HASHTAG_LETTERS_NUMERALS_SET}*#{HASHTAG_LETTERS_SET}#{HASHTAG_LETTERS_NUMERALS_SET}*)/io REGEXEN[:valid_hashtag] = /#{HASHTAG}/io # Used in Extractor for final filtering REGEXEN[:end_hashtag_match] = /\A(?:[##]|:\/\/)/o REGEXEN[:valid_mention_preceding_chars] = /(?:[^a-zA-Z0-9_!#\$%&*@@]|^|(?:^|[^a-zA-Z0-9_+~.-])[rR][tT]:?)/o REGEXEN[:at_signs] = /[@@]/ REGEXEN[:valid_mention_or_list] = / (#{REGEXEN[:valid_mention_preceding_chars]}) # $1: Preceeding character (#{REGEXEN[:at_signs]}) # $2: At mark ([a-zA-Z0-9_]{1,20}) # $3: Screen name (\/[a-zA-Z][a-zA-Z0-9_\-]{0,24})? # $4: List (optional) /ox REGEXEN[:valid_reply] = /^(?:#{REGEXEN[:spaces]})*#{REGEXEN[:at_signs]}([a-zA-Z0-9_]{1,20})/o # Used in Extractor for final filtering REGEXEN[:end_mention_match] = /\A(?:#{REGEXEN[:at_signs]}|#{REGEXEN[:latin_accents]}|:\/\/)/o # URL related hash regex collection REGEXEN[:valid_url_preceding_chars] = /(?:[^A-Z0-9@@$###{INVALID_CHARACTERS.join('')}]|^)/io REGEXEN[:invalid_url_without_protocol_preceding_chars] = /[-_.\/]$/ DOMAIN_VALID_CHARS = "[^#{PUNCTUATION_CHARS}#{SPACE_CHARS}#{CTRL_CHARS}#{INVALID_CHARACTERS.join('')}#{UNICODE_SPACES.join('')}]" REGEXEN[:valid_subdomain] = /(?:(?:#{DOMAIN_VALID_CHARS}(?:[_-]|#{DOMAIN_VALID_CHARS})*)?#{DOMAIN_VALID_CHARS}\.)/io REGEXEN[:valid_domain_name] = /(?:(?:#{DOMAIN_VALID_CHARS}(?:[-]|#{DOMAIN_VALID_CHARS})*)?#{DOMAIN_VALID_CHARS}\.)/io REGEXEN[:valid_gTLD] = %r{ (?: (?:#{TLDS['generic'].join('|')}) (?=[^0-9a-z@]|$) ) }ix REGEXEN[:valid_ccTLD] = %r{ (?: (?:#{TLDS['country'].join('|')}) (?=[^0-9a-z@]|$) ) }ix REGEXEN[:valid_punycode] = /(?:xn--[0-9a-z]+)/i REGEXEN[:valid_special_cctld] = %r{ (?: (?:co|tv) (?=[^0-9a-z@]|$) ) }ix REGEXEN[:valid_domain] = /(?: #{REGEXEN[:valid_subdomain]}*#{REGEXEN[:valid_domain_name]} (?:#{REGEXEN[:valid_gTLD]}|#{REGEXEN[:valid_ccTLD]}|#{REGEXEN[:valid_punycode]}) )/iox # This is used in Extractor REGEXEN[:valid_ascii_domain] = / (?:(?:[A-Za-z0-9\-_]|#{REGEXEN[:latin_accents]})+\.)+ (?:#{REGEXEN[:valid_gTLD]}|#{REGEXEN[:valid_ccTLD]}|#{REGEXEN[:valid_punycode]}) /iox # This is used in Extractor for stricter t.co URL extraction REGEXEN[:valid_tco_url] = /^https?:\/\/t\.co\/[a-z0-9]+/i # This is used in Extractor to filter out unwanted URLs. REGEXEN[:invalid_short_domain] = /\A#{REGEXEN[:valid_domain_name]}#{REGEXEN[:valid_ccTLD]}\Z/io REGEXEN[:valid_special_short_domain] = /\A#{REGEXEN[:valid_domain_name]}#{REGEXEN[:valid_special_cctld]}\Z/io REGEXEN[:valid_port_number] = /[0-9]+/ REGEXEN[:valid_general_url_path_chars] = /[a-z\p{Cyrillic}0-9!\*';:=\+\,\.\$\/%#\[\]\-_~&\|@#{LATIN_ACCENTS}]/io # Allow URL paths to contain up to two nested levels of balanced parens # 1. Used in Wikipedia URLs like /Primer_(film) # 2. Used in IIS sessions like /S(dfd346)/ # 3. Used in Rdio URLs like /track/We_Up_(Album_Version_(Edited))/ REGEXEN[:valid_url_balanced_parens] = / \( (?: #{REGEXEN[:valid_general_url_path_chars]}+ | # allow one nested level of balanced parentheses (?: #{REGEXEN[:valid_general_url_path_chars]}* \( #{REGEXEN[:valid_general_url_path_chars]}+ \) #{REGEXEN[:valid_general_url_path_chars]}* ) ) \) /iox # Valid end-of-path chracters (so /foo. does not gobble the period). # 1. Allow =&# for empty URL parameters and other URL-join artifacts REGEXEN[:valid_url_path_ending_chars] = /[a-z\p{Cyrillic}0-9=_#\/\+\-#{LATIN_ACCENTS}]|(?:#{REGEXEN[:valid_url_balanced_parens]})/io REGEXEN[:valid_url_path] = /(?: (?: #{REGEXEN[:valid_general_url_path_chars]}* (?:#{REGEXEN[:valid_url_balanced_parens]} #{REGEXEN[:valid_general_url_path_chars]}*)* #{REGEXEN[:valid_url_path_ending_chars]} )|(?:#{REGEXEN[:valid_general_url_path_chars]}+\/) )/iox REGEXEN[:valid_url_query_chars] = /[a-z0-9!?\*'\(\);:&=\+\$\/%#\[\]\-_\.,~|@]/i REGEXEN[:valid_url_query_ending_chars] = /[a-z0-9_&=#\/\-]/i REGEXEN[:valid_url] = %r{ ( # $1 total match (#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceeding chracter ( # $3 URL (https?:\/\/)? # $4 Protocol (optional) (#{REGEXEN[:valid_domain]}) # $5 Domain(s) (?::(#{REGEXEN[:valid_port_number]}))? # $6 Port number (optional) (/#{REGEXEN[:valid_url_path]}*)? # $7 URL Path and anchor (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $8 Query String ) ) }iox REGEXEN[:cashtag] = /[a-z]{1,6}(?:[._][a-z]{1,2})?/i REGEXEN[:valid_cashtag] = /(^|#{REGEXEN[:spaces]})(\$)(#{REGEXEN[:cashtag]})(?=$|\s|[#{PUNCTUATION_CHARS}])/i # These URL validation pattern strings are based on the ABNF from RFC 3986 REGEXEN[:validate_url_unreserved] = /[a-z\p{Cyrillic}0-9\-._~]/i REGEXEN[:validate_url_pct_encoded] = /(?:%[0-9a-f]{2})/i REGEXEN[:validate_url_sub_delims] = /[!$&'()*+,;=]/i REGEXEN[:validate_url_pchar] = /(?: #{REGEXEN[:validate_url_unreserved]}| #{REGEXEN[:validate_url_pct_encoded]}| #{REGEXEN[:validate_url_sub_delims]}| [:\|@] )/iox REGEXEN[:validate_url_scheme] = /(?:[a-z][a-z0-9+\-.]*)/i REGEXEN[:validate_url_userinfo] = /(?: #{REGEXEN[:validate_url_unreserved]}| #{REGEXEN[:validate_url_pct_encoded]}| #{REGEXEN[:validate_url_sub_delims]}| : )*/iox REGEXEN[:validate_url_dec_octet] = /(?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5]))/i REGEXEN[:validate_url_ipv4] = /(?:#{REGEXEN[:validate_url_dec_octet]}(?:\.#{REGEXEN[:validate_url_dec_octet]}){3})/iox # Punting on real IPv6 validation for now REGEXEN[:validate_url_ipv6] = /(?:\[[a-f0-9:\.]+\])/i # Also punting on IPvFuture for now REGEXEN[:validate_url_ip] = /(?: #{REGEXEN[:validate_url_ipv4]}| #{REGEXEN[:validate_url_ipv6]} )/iox # This is more strict than the rfc specifies REGEXEN[:validate_url_subdomain_segment] = /(?:[a-z0-9](?:[a-z0-9_\-]*[a-z0-9])?)/i REGEXEN[:validate_url_domain_segment] = /(?:[a-z0-9](?:[a-z0-9\-]*[a-z0-9])?)/i REGEXEN[:validate_url_domain_tld] = /(?:[a-z](?:[a-z0-9\-]*[a-z0-9])?)/i REGEXEN[:validate_url_domain] = /(?:(?:#{REGEXEN[:validate_url_subdomain_segment]}\.)* (?:#{REGEXEN[:validate_url_domain_segment]}\.) #{REGEXEN[:validate_url_domain_tld]})/iox REGEXEN[:validate_url_host] = /(?: #{REGEXEN[:validate_url_ip]}| #{REGEXEN[:validate_url_domain]} )/iox # Unencoded internationalized domains - this doesn't check for invalid UTF-8 sequences REGEXEN[:validate_url_unicode_subdomain_segment] = /(?:(?:[a-z0-9]|[^\x00-\x7f])(?:(?:[a-z0-9_\-]|[^\x00-\x7f])*(?:[a-z0-9]|[^\x00-\x7f]))?)/ix REGEXEN[:validate_url_unicode_domain_segment] = /(?:(?:[a-z0-9]|[^\x00-\x7f])(?:(?:[a-z0-9\-]|[^\x00-\x7f])*(?:[a-z0-9]|[^\x00-\x7f]))?)/ix REGEXEN[:validate_url_unicode_domain_tld] = /(?:(?:[a-z]|[^\x00-\x7f])(?:(?:[a-z0-9\-]|[^\x00-\x7f])*(?:[a-z0-9]|[^\x00-\x7f]))?)/ix REGEXEN[:validate_url_unicode_domain] = /(?:(?:#{REGEXEN[:validate_url_unicode_subdomain_segment]}\.)* (?:#{REGEXEN[:validate_url_unicode_domain_segment]}\.) #{REGEXEN[:validate_url_unicode_domain_tld]})/iox REGEXEN[:validate_url_unicode_host] = /(?: #{REGEXEN[:validate_url_ip]}| #{REGEXEN[:validate_url_unicode_domain]} )/iox REGEXEN[:validate_url_port] = /[0-9]{1,5}/ REGEXEN[:validate_url_unicode_authority] = %r{ (?:(#{REGEXEN[:validate_url_userinfo]})@)? # $1 userinfo (#{REGEXEN[:validate_url_unicode_host]}) # $2 host (?::(#{REGEXEN[:validate_url_port]}))? # $3 port }iox REGEXEN[:validate_url_authority] = %r{ (?:(#{REGEXEN[:validate_url_userinfo]})@)? # $1 userinfo (#{REGEXEN[:validate_url_host]}) # $2 host (?::(#{REGEXEN[:validate_url_port]}))? # $3 port }iox REGEXEN[:validate_url_path] = %r{(/#{REGEXEN[:validate_url_pchar]}*)*}i REGEXEN[:validate_url_query] = %r{(#{REGEXEN[:validate_url_pchar]}|/|\?)*}i REGEXEN[:validate_url_fragment] = %r{(#{REGEXEN[:validate_url_pchar]}|/|\?)*}i # Modified version of RFC 3986 Appendix B REGEXEN[:validate_url_unencoded] = %r{ \A # Full URL (?: ([^:/?#]+):// # $1 Scheme )? ([^/?#]*) # $2 Authority ([^?#]*) # $3 Path (?: \?([^#]*) # $4 Query )? (?: \#(.*) # $5 Fragment )?\Z }ix REGEXEN[:rtl_chars] = /[#{RTL_CHARACTERS}]/io REGEXEN.each_pair{|k,v| v.freeze } # Return the regular expression for a given key. If the key # is not a known symbol a nil will be returned. def self.[](key) REGEXEN[key] end end end twitter-text-1.14.7/lib/twitter-text/deprecation.rb000644 000041 000041 00000000677 13154170026 023665 0ustar00www-datawww-data000000 000000 module Twitter module Deprecation def deprecate(method, new_method = nil) deprecated_method = :"deprecated_#{method}" message = "Deprecation: `#{method}` is deprecated." message << " Please use `#{new_method}` instead." if new_method alias_method(deprecated_method, method) define_method method do |*args, &block| warn message send(deprecated_method, *args, &block) end end end end twitter-text-1.14.7/lib/twitter-text/validation.rb000644 000041 000041 00000010764 13154170026 023520 0ustar00www-datawww-data000000 000000 require 'unf' module Twitter module Validation extend self MAX_LENGTH = 140 DEFAULT_TCO_URL_LENGTHS = { :short_url_length => 23, :short_url_length_https => 23, :characters_reserved_per_media => 23 }.freeze # Returns the length of the string as it would be displayed. This is equivilent to the length of the Unicode NFC # (See: http://www.unicode.org/reports/tr15). This is needed in order to consistently calculate the length of a # string no matter which actual form was transmitted. For example: # # U+0065 Latin Small Letter E # + U+0301 Combining Acute Accent # ---------- # = 2 bytes, 2 characters, displayed as é (1 visual glyph) # … The NFC of {U+0065, U+0301} is {U+00E9}, which is a single chracter and a +display_length+ of 1 # # The string could also contain U+00E9 already, in which case the canonicalization will not change the value. # def tweet_length(text, options = {}) options = DEFAULT_TCO_URL_LENGTHS.merge(options) length = text.to_nfc.unpack("U*").length Twitter::Extractor.extract_urls_with_indices(text) do |url, start_position, end_position| length += start_position - end_position length += url.downcase =~ /^https:\/\// ? options[:short_url_length_https] : options[:short_url_length] end length end # Check the text for any reason that it may not be valid as a Tweet. This is meant as a pre-validation # before posting to api.twitter.com. There are several server-side reasons for Tweets to fail but this pre-validation # will allow quicker feedback. # # Returns false if this text is valid. Otherwise one of the following Symbols will be returned: # # :too_long:: if the text is too long # :empty:: if the text is nil or empty # :invalid_characters:: if the text contains non-Unicode or any of the disallowed Unicode characters def tweet_invalid?(text) return :empty if !text || text.empty? begin return :too_long if tweet_length(text) > MAX_LENGTH return :invalid_characters if Twitter::Regex::INVALID_CHARACTERS.any?{|invalid_char| text.include?(invalid_char) } rescue ArgumentError # non-Unicode value. return :invalid_characters end return false end def valid_tweet_text?(text) !tweet_invalid?(text) end def valid_username?(username) return false if !username || username.empty? extracted = Twitter::Extractor.extract_mentioned_screen_names(username) # Should extract the username minus the @ sign, hence the [1..-1] extracted.size == 1 && extracted.first == username[1..-1] end VALID_LIST_RE = /\A#{Twitter::Regex[:valid_mention_or_list]}\z/o def valid_list?(username_list) match = username_list.match(VALID_LIST_RE) # Must have matched and had nothing before or after !!(match && match[1] == "" && match[4] && !match[4].empty?) end def valid_hashtag?(hashtag) return false if !hashtag || hashtag.empty? extracted = Twitter::Extractor.extract_hashtags(hashtag) # Should extract the hashtag minus the # sign, hence the [1..-1] extracted.size == 1 && extracted.first == hashtag[1..-1] end def valid_url?(url, unicode_domains=true, require_protocol=true) return false if !url || url.empty? url_parts = url.match(Twitter::Regex[:validate_url_unencoded]) return false unless (url_parts && url_parts.to_s == url) scheme, authority, path, query, fragment = url_parts.captures return false unless ((!require_protocol || (valid_match?(scheme, Twitter::Regex[:validate_url_scheme]) && scheme.match(/\Ahttps?\Z/i))) && valid_match?(path, Twitter::Regex[:validate_url_path]) && valid_match?(query, Twitter::Regex[:validate_url_query], true) && valid_match?(fragment, Twitter::Regex[:validate_url_fragment], true)) return (unicode_domains && valid_match?(authority, Twitter::Regex[:validate_url_unicode_authority])) || (!unicode_domains && valid_match?(authority, Twitter::Regex[:validate_url_authority])) end private def valid_match?(string, regex, optional=false) return (string && string.match(regex) && $~.to_s == string) unless optional !(string && (!string.match(regex) || $~.to_s != string)) end end end twitter-text-1.14.7/lib/twitter-text/hit_highlighter.rb000644 000041 000041 00000005320 13154170026 024520 0ustar00www-datawww-data000000 000000 module Twitter # Module for doing "hit highlighting" on tweets that have been auto-linked already. # Useful with the results returned from the Search API. module HitHighlighter extend self # Default Tag used for hit highlighting DEFAULT_HIGHLIGHT_TAG = "em" # Add tags around the hits provided in the text. The # hits should be an array of (start, end) index pairs, relative to the original # text, before auto-linking (but the text may already be auto-linked if desired) # # The tags can be overridden using the :tag option. For example: # # irb> hit_highlight("test hit here", [[5, 8]], :tag => 'strong') # => "test hit here" def hit_highlight(text, hits = [], options = {}) if hits.empty? return text end tag_name = options[:tag] || DEFAULT_HIGHLIGHT_TAG tags = ["<" + tag_name + ">", ""] chunks = text.split(/[<>]/) result = [] chunk_index, chunk = 0, chunks[0] chunk_chars = chunk.to_s.to_char_a prev_chunks_len = 0 chunk_cursor = 0 start_in_chunk = false for hit, index in hits.flatten.each_with_index do tag = tags[index % 2] placed = false until chunk.nil? || hit < prev_chunks_len + chunk.length do result << chunk_chars[chunk_cursor..-1] if start_in_chunk && hit == prev_chunks_len + chunk_chars.length result << tag placed = true end # correctly handle highlights that end on the final character. if tag_text = chunks[chunk_index+1] result << "<#{tag_text}>" end prev_chunks_len += chunk_chars.length chunk_cursor = 0 chunk_index += 2 chunk = chunks[chunk_index] chunk_chars = chunk.to_s.to_char_a start_in_chunk = false end if !placed && !chunk.nil? hit_spot = hit - prev_chunks_len result << chunk_chars[chunk_cursor...hit_spot] << tag chunk_cursor = hit_spot if index % 2 == 0 start_in_chunk = true else start_in_chunk = false end placed = true end # ultimate fallback, hits that run off the end get a closing tag if !placed result << tag end end if chunk if chunk_cursor < chunk_chars.length result << chunk_chars[chunk_cursor..-1] end (chunk_index+1).upto(chunks.length-1).each do |i| result << (i.even? ? chunks[i] : "<#{chunks[i]}>") end end result.flatten.join end end end twitter-text-1.14.7/lib/twitter-text/unicode.rb000644 000041 000041 00000001624 13154170026 023007 0ustar00www-datawww-data000000 000000 module Twitter # This module lazily defines constants of the form Uxxxx for all Unicode # codepoints from U0000 to U10FFFF. The value of each constant is the # UTF-8 string for the codepoint. # Examples: # copyright = Unicode::U00A9 # euro = Unicode::U20AC # infinity = Unicode::U221E # module Unicode CODEPOINT_REGEX = /^U_?([0-9a-fA-F]{4,5}|10[0-9a-fA-F]{4})$/ def self.const_missing(name) # Check that the constant name is of the right form: U0000 to U10FFFF if name.to_s =~ CODEPOINT_REGEX # Convert the codepoint to an immutable UTF-8 string, # define a real constant for that value and return the value #p name, name.class const_set(name, [$1.to_i(16)].pack("U").freeze) else # Raise an error for constants that are not Unicode. raise NameError, "Uninitialized constant: Unicode::#{name}" end end end end twitter-text-1.14.7/lib/twitter-text/autolink.rb000644 000041 000041 00000054370 13154170026 023215 0ustar00www-datawww-data000000 000000 # encoding: UTF-8 require 'set' require 'twitter-text/hash_helper' module Twitter # A module for including Tweet auto-linking in a class. The primary use of this is for helpers/views so they can auto-link # usernames, lists, hashtags and URLs. module Autolink extend self # Default CSS class for auto-linked lists DEFAULT_LIST_CLASS = "tweet-url list-slug".freeze # Default CSS class for auto-linked usernames DEFAULT_USERNAME_CLASS = "tweet-url username".freeze # Default CSS class for auto-linked hashtags DEFAULT_HASHTAG_CLASS = "tweet-url hashtag".freeze # Default CSS class for auto-linked cashtags DEFAULT_CASHTAG_CLASS = "tweet-url cashtag".freeze # Default URL base for auto-linked usernames DEFAULT_USERNAME_URL_BASE = "https://twitter.com/".freeze # Default URL base for auto-linked lists DEFAULT_LIST_URL_BASE = "https://twitter.com/".freeze # Default URL base for auto-linked hashtags DEFAULT_HASHTAG_URL_BASE = "https://twitter.com/#!/search?q=%23".freeze # Default URL base for auto-linked cashtags DEFAULT_CASHTAG_URL_BASE = "https://twitter.com/#!/search?q=%24".freeze # Default attributes for invisible span tag DEFAULT_INVISIBLE_TAG_ATTRS = "style='position:absolute;left:-9999px;'".freeze DEFAULT_OPTIONS = { :list_class => DEFAULT_LIST_CLASS, :username_class => DEFAULT_USERNAME_CLASS, :hashtag_class => DEFAULT_HASHTAG_CLASS, :cashtag_class => DEFAULT_CASHTAG_CLASS, :username_url_base => DEFAULT_USERNAME_URL_BASE, :list_url_base => DEFAULT_LIST_URL_BASE, :hashtag_url_base => DEFAULT_HASHTAG_URL_BASE, :cashtag_url_base => DEFAULT_CASHTAG_URL_BASE, :invisible_tag_attrs => DEFAULT_INVISIBLE_TAG_ATTRS }.freeze def auto_link_with_json(text, json, options = {}) # concatenate entities entities = json.values().flatten() # map JSON entity to twitter-text entity # be careful not to alter arguments received entities.map! do |entity| entity = HashHelper.symbolize_keys(entity) # hashtag entity[:hashtag] = entity[:text] if entity[:text] entity end auto_link_entities(text, entities, options) end def auto_link_entities(text, entities, options = {}, &block) return text if entities.empty? # NOTE deprecate these attributes not options keys in options hash, then use html_attrs options = DEFAULT_OPTIONS.merge(options) options[:html_attrs] = extract_html_attrs_from_options!(options) options[:html_attrs][:rel] ||= "nofollow" unless options[:suppress_no_follow] options[:html_attrs][:target] = "_blank" if options[:target_blank] == true Twitter::Rewriter.rewrite_entities(text.dup, entities) do |entity, chars| if entity[:url] link_to_url(entity, chars, options, &block) elsif entity[:hashtag] link_to_hashtag(entity, chars, options, &block) elsif entity[:screen_name] link_to_screen_name(entity, chars, options, &block) elsif entity[:cashtag] link_to_cashtag(entity, chars, options, &block) end end end # Add tags around the usernames, lists, hashtags and URLs in the provided text. # The tags can be controlled with the following entries in the options hash: # Also any elements in the options hash will be converted to HTML attributes # and place in the tag. # # :url_class:: class to add to url tags # :list_class:: class to add to list tags # :username_class:: class to add to username tags # :hashtag_class:: class to add to hashtag tags # :cashtag_class:: class to add to cashtag tags # :username_url_base:: the value for href attribute on username links. The @username (minus the @) will be appended at the end of this. # :list_url_base:: the value for href attribute on list links. The @username/list (minus the @) will be appended at the end of this. # :hashtag_url_base:: the value for href attribute on hashtag links. The #hashtag (minus the #) will be appended at the end of this. # :cashtag_url_base:: the value for href attribute on cashtag links. The $cashtag (minus the $) will be appended at the end of this. # :invisible_tag_attrs:: HTML attribute to add to invisible span tags # :username_include_symbol:: place the @ symbol within username and list links # :suppress_lists:: disable auto-linking to lists # :suppress_no_follow:: do not add rel="nofollow" to auto-linked items # :symbol_tag:: tag to apply around symbol (@, #, $) in username / hashtag / cashtag links # :text_with_symbol_tag:: tag to apply around text part in username / hashtag / cashtag links # :url_target:: the value for target attribute on URL links. # :target_blank:: adds target="_blank" to all auto_linked items username / hashtag / cashtag links / urls # :link_attribute_block:: function to modify the attributes of a link based on the entity. called with |entity, attributes| params, and should modify the attributes hash. # :link_text_block:: function to modify the text of a link based on the entity. called with |entity, text| params, and should return a modified text. def auto_link(text, options = {}, &block) auto_link_entities(text, Extractor.extract_entities_with_indices(text, :extract_url_without_protocol => false), options, &block) end # Add tags around the usernames and lists in the provided text. The # tags can be controlled with the following entries in the options hash. # Also any elements in the options hash will be converted to HTML attributes # and place in the tag. # # :list_class:: class to add to list tags # :username_class:: class to add to username tags # :username_url_base:: the value for href attribute on username links. The @username (minus the @) will be appended at the end of this. # :list_url_base:: the value for href attribute on list links. The @username/list (minus the @) will be appended at the end of this. # :username_include_symbol:: place the @ symbol within username and list links # :suppress_lists:: disable auto-linking to lists # :suppress_no_follow:: do not add rel="nofollow" to auto-linked items # :symbol_tag:: tag to apply around symbol (@, #, $) in username / hashtag / cashtag links # :text_with_symbol_tag:: tag to apply around text part in username / hashtag / cashtag links # :link_attribute_block:: function to modify the attributes of a link based on the entity. called with |entity, attributes| params, and should modify the attributes hash. # :link_text_block:: function to modify the text of a link based on the entity. called with |entity, text| params, and should return a modified text. def auto_link_usernames_or_lists(text, options = {}, &block) # :yields: list_or_username auto_link_entities(text, Extractor.extract_mentions_or_lists_with_indices(text), options, &block) end # Add tags around the hashtags in the provided text. # The tags can be controlled with the following entries in the options hash. # Also any elements in the options hash will be converted to HTML attributes # and place in the tag. # # :hashtag_class:: class to add to hashtag tags # :hashtag_url_base:: the value for href attribute. The hashtag text (minus the #) will be appended at the end of this. # :suppress_no_follow:: do not add rel="nofollow" to auto-linked items # :symbol_tag:: tag to apply around symbol (@, #, $) in username / hashtag / cashtag links # :text_with_symbol_tag:: tag to apply around text part in username / hashtag / cashtag links # :link_attribute_block:: function to modify the attributes of a link based on the entity. called with |entity, attributes| params, and should modify the attributes hash. # :link_text_block:: function to modify the text of a link based on the entity. called with |entity, text| params, and should return a modified text. def auto_link_hashtags(text, options = {}, &block) # :yields: hashtag_text auto_link_entities(text, Extractor.extract_hashtags_with_indices(text), options, &block) end # Add tags around the cashtags in the provided text. # The tags can be controlled with the following entries in the options hash. # Also any elements in the options hash will be converted to HTML attributes # and place in the tag. # # :cashtag_class:: class to add to cashtag tags # :cashtag_url_base:: the value for href attribute. The cashtag text (minus the $) will be appended at the end of this. # :suppress_no_follow:: do not add rel="nofollow" to auto-linked items # :symbol_tag:: tag to apply around symbol (@, #, $) in username / hashtag / cashtag links # :text_with_symbol_tag:: tag to apply around text part in username / hashtag / cashtag links # :link_attribute_block:: function to modify the attributes of a link based on the entity. called with |entity, attributes| params, and should modify the attributes hash. # :link_text_block:: function to modify the text of a link based on the entity. called with |entity, text| params, and should return a modified text. def auto_link_cashtags(text, options = {}, &block) # :yields: cashtag_text auto_link_entities(text, Extractor.extract_cashtags_with_indices(text), options, &block) end # Add tags around the URLs in the provided text. # The tags can be controlled with the following entries in the options hash. # Also any elements in the options hash will be converted to HTML attributes # and place in the tag. # # :url_class:: class to add to url tags # :invisible_tag_attrs:: HTML attribute to add to invisible span tags # :suppress_no_follow:: do not add rel="nofollow" to auto-linked items # :symbol_tag:: tag to apply around symbol (@, #, $) in username / hashtag / cashtag links # :text_with_symbol_tag:: tag to apply around text part in username / hashtag / cashtag links # :url_target:: the value for target attribute on URL links. # :link_attribute_block:: function to modify the attributes of a link based on the entity. called with |entity, attributes| params, and should modify the attributes hash. # :link_text_block:: function to modify the text of a link based on the entity. called with |entity, text| params, and should return a modified text. def auto_link_urls(text, options = {}, &block) auto_link_entities(text, Extractor.extract_urls_with_indices(text, :extract_url_without_protocol => false), options, &block) end # These methods are deprecated, will be removed in future. extend Deprecation # Deprecated: Please use auto_link_urls instead. # Add tags around the URLs in the provided text. # Any elements in the href_options hash will be converted to HTML attributes # and place in the tag. # Unless href_options contains :suppress_no_follow # the rel="nofollow" attribute will be added. alias :auto_link_urls_custom :auto_link_urls deprecate :auto_link_urls_custom, :auto_link_urls private HTML_ENTITIES = { '&' => '&', '>' => '>', '<' => '<', '"' => '"', "'" => ''' } def html_escape(text) text && text.to_s.gsub(/[&"'><]/) do |character| HTML_ENTITIES[character] end end # NOTE We will make this private in future. public :html_escape # Options which should not be passed as HTML attributes OPTIONS_NOT_ATTRIBUTES = Set.new([ :url_class, :list_class, :username_class, :hashtag_class, :cashtag_class, :username_url_base, :list_url_base, :hashtag_url_base, :cashtag_url_base, :username_url_block, :list_url_block, :hashtag_url_block, :cashtag_url_block, :link_url_block, :username_include_symbol, :suppress_lists, :suppress_no_follow, :url_entities, :invisible_tag_attrs, :symbol_tag, :text_with_symbol_tag, :url_target, :target_blank, :link_attribute_block, :link_text_block ]).freeze def extract_html_attrs_from_options!(options) html_attrs = {} options.reject! do |key, value| unless OPTIONS_NOT_ATTRIBUTES.include?(key) html_attrs[key] = value true end end html_attrs end def url_entities_hash(url_entities) (url_entities || {}).inject({}) do |entities, entity| # be careful not to alter arguments received _entity = HashHelper.symbolize_keys(entity) entities[_entity[:url]] = _entity entities end end def link_to_url(entity, chars, options = {}) url = entity[:url] href = if options[:link_url_block] options[:link_url_block].call(url) else url end # NOTE auto link to urls do not use any default values and options # like url_class but use suppress_no_follow. html_attrs = options[:html_attrs].dup html_attrs[:class] = options[:url_class] if options.key?(:url_class) # add target attribute only if :url_target is specified html_attrs[:target] = options[:url_target] if options.key?(:url_target) url_entities = url_entities_hash(options[:url_entities]) # use entity from urlEntities if available url_entity = url_entities[url] || entity link_text = if url_entity[:display_url] html_attrs[:title] ||= url_entity[:expanded_url] link_url_with_entity(url_entity, options) else html_escape(url) end link_to_text(entity, link_text, href, html_attrs, options) end def link_url_with_entity(entity, options) display_url = entity[:display_url] expanded_url = entity[:expanded_url] invisible_tag_attrs = options[:invisible_tag_attrs] || DEFAULT_INVISIBLE_TAG_ATTRS # Goal: If a user copies and pastes a tweet containing t.co'ed link, the resulting paste # should contain the full original URL (expanded_url), not the display URL. # # Method: Whenever possible, we actually emit HTML that contains expanded_url, and use # font-size:0 to hide those parts that should not be displayed (because they are not part of display_url). # Elements with font-size:0 get copied even though they are not visible. # Note that display:none doesn't work here. Elements with display:none don't get copied. # # Additionally, we want to *display* ellipses, but we don't want them copied. To make this happen we # wrap the ellipses in a tco-ellipsis class and provide an onCopy handler that sets display:none on # everything with the tco-ellipsis class. # # Exception: pic.twitter.com images, for which expandedUrl = "https://twitter.com/#!/username/status/1234/photo/1 # For those URLs, display_url is not a substring of expanded_url, so we don't do anything special to render the elided parts. # For a pic.twitter.com URL, the only elided part will be the "https://", so this is fine. display_url_sans_ellipses = display_url.gsub("…", "") if expanded_url.include?(display_url_sans_ellipses) before_display_url, after_display_url = expanded_url.split(display_url_sans_ellipses, 2) preceding_ellipsis = /\A…/.match(display_url).to_s following_ellipsis = /…\z/.match(display_url).to_s # As an example: The user tweets "hi http://longdomainname.com/foo" # This gets shortened to "hi http://t.co/xyzabc", with display_url = "…nname.com/foo" # This will get rendered as: # # … # # http://longdomai # # # nname.com/foo # # #   # … # %(#{preceding_ellipsis} ) << %(#{html_escape(before_display_url)}) << %(#{html_escape(display_url_sans_ellipses)}) << %(#{html_escape(after_display_url)}) << %( #{following_ellipsis}) else html_escape(display_url) end end def link_to_hashtag(entity, chars, options = {}) hash = chars[entity[:indices].first] hashtag = entity[:hashtag] hashtag = yield(hashtag) if block_given? hashtag_class = options[:hashtag_class].to_s if hashtag.match Twitter::Regex::REGEXEN[:rtl_chars] hashtag_class += ' rtl' end href = if options[:hashtag_url_block] options[:hashtag_url_block].call(hashtag) else "#{options[:hashtag_url_base]}#{hashtag}" end html_attrs = { :class => hashtag_class, # FIXME As our conformance test, hash in title should be half-width, # this should be bug of conformance data. :title => "##{hashtag}" }.merge(options[:html_attrs]) link_to_text_with_symbol(entity, hash, hashtag, href, html_attrs, options) end def link_to_cashtag(entity, chars, options = {}) dollar = chars[entity[:indices].first] cashtag = entity[:cashtag] cashtag = yield(cashtag) if block_given? href = if options[:cashtag_url_block] options[:cashtag_url_block].call(cashtag) else "#{options[:cashtag_url_base]}#{cashtag}" end html_attrs = { :class => "#{options[:cashtag_class]}", :title => "$#{cashtag}" }.merge(options[:html_attrs]) link_to_text_with_symbol(entity, dollar, cashtag, href, html_attrs, options) end def link_to_screen_name(entity, chars, options = {}) name = "#{entity[:screen_name]}#{entity[:list_slug]}" chunk = name.dup chunk = yield(chunk) if block_given? at = chars[entity[:indices].first] html_attrs = options[:html_attrs].dup if entity[:list_slug] && !entity[:list_slug].empty? && !options[:suppress_lists] href = if options[:list_url_block] options[:list_url_block].call(name) else "#{options[:list_url_base]}#{name}" end html_attrs[:class] ||= "#{options[:list_class]}" else href = if options[:username_url_block] options[:username_url_block].call(chunk) else "#{options[:username_url_base]}#{name}" end html_attrs[:class] ||= "#{options[:username_class]}" end link_to_text_with_symbol(entity, at, chunk, href, html_attrs, options) end def link_to_text_with_symbol(entity, symbol, text, href, attributes = {}, options = {}) tagged_symbol = options[:symbol_tag] ? "<#{options[:symbol_tag]}>#{symbol}" : symbol text = html_escape(text) tagged_text = options[:text_with_symbol_tag] ? "<#{options[:text_with_symbol_tag]}>#{text}" : text if options[:username_include_symbol] || symbol !~ Twitter::Regex::REGEXEN[:at_signs] "#{link_to_text(entity, tagged_symbol + tagged_text, href, attributes, options)}" else "#{tagged_symbol}#{link_to_text(entity, tagged_text, href, attributes, options)}" end end def link_to_text(entity, text, href, attributes = {}, options = {}) attributes[:href] = href options[:link_attribute_block].call(entity, attributes) if options[:link_attribute_block] text = options[:link_text_block].call(entity, text) if options[:link_text_block] %(#{text}) end BOOLEAN_ATTRIBUTES = Set.new([:disabled, :readonly, :multiple, :checked]).freeze def tag_attrs(attributes) attributes.keys.sort_by{|k| k.to_s}.inject("") do |attrs, key| value = attributes[key] if BOOLEAN_ATTRIBUTES.include?(key) value = value ? key : nil end unless value.nil? value = case value when Array value.compact.join(" ") else value end attrs << %( #{html_escape(key)}="#{html_escape(value)}") end attrs end end end end twitter-text-1.14.7/lib/twitter-text/extractor.rb000644 000041 000041 00000030114 13154170026 023370 0ustar00www-datawww-data000000 000000 # encoding: UTF-8 class String # Helper function to count the character length by first converting to an # array. This is needed because with unicode strings, the return value # of length may be incorrect def char_length if respond_to? :codepoints length else chars.kind_of?(Enumerable) ? chars.to_a.size : chars.size end end # Helper function to convert this string into an array of unicode characters. def to_char_a @to_char_a ||= if chars.kind_of?(Enumerable) chars.to_a else char_array = [] 0.upto(char_length - 1) { |i| char_array << [chars.slice(i)].pack('U') } char_array end end end # Helper functions to return character offsets instead of byte offsets. class MatchData def char_begin(n) if string.respond_to? :codepoints self.begin(n) else string[0, self.begin(n)].char_length end end def char_end(n) if string.respond_to? :codepoints self.end(n) else string[0, self.end(n)].char_length end end end module Twitter # A module for including Tweet parsing in a class. This module provides function for the extraction and processing # of usernames, lists, URLs and hashtags. module Extractor extend self # Remove overlapping entities. # This returns a new array with no overlapping entities. def remove_overlapping_entities(entities) # sort by start index entities = entities.sort_by{|entity| entity[:indices].first} # remove duplicates prev = nil entities.reject!{|entity| (prev && prev[:indices].last > entity[:indices].first) || (prev = entity) && false} entities end # Extracts all usernames, lists, hashtags and URLs in the Tweet text # along with the indices for where the entity ocurred # If the text is nil or contains no entity an empty array # will be returned. # # If a block is given then it will be called for each entity. def extract_entities_with_indices(text, options = {}, &block) # extract all entities entities = extract_urls_with_indices(text, options) + extract_hashtags_with_indices(text, :check_url_overlap => false) + extract_mentions_or_lists_with_indices(text) + extract_cashtags_with_indices(text) return [] if entities.empty? entities = remove_overlapping_entities(entities) entities.each(&block) if block_given? entities end # Extracts a list of all usernames mentioned in the Tweet text. If the # text is nil or contains no username mentions an empty array # will be returned. # # If a block is given then it will be called for each username. def extract_mentioned_screen_names(text, &block) # :yields: username screen_names = extract_mentioned_screen_names_with_indices(text).map{|m| m[:screen_name]} screen_names.each(&block) if block_given? screen_names end # Extracts a list of all usernames mentioned in the Tweet text # along with the indices for where the mention ocurred. If the # text is nil or contains no username mentions, an empty array # will be returned. # # If a block is given, then it will be called with each username, the start # index, and the end index in the text. def extract_mentioned_screen_names_with_indices(text) # :yields: username, start, end return [] unless text possible_screen_names = [] extract_mentions_or_lists_with_indices(text) do |screen_name, list_slug, start_position, end_position| next unless list_slug.empty? possible_screen_names << { :screen_name => screen_name, :indices => [start_position, end_position] } end if block_given? possible_screen_names.each do |mention| yield mention[:screen_name], mention[:indices].first, mention[:indices].last end end possible_screen_names end # Extracts a list of all usernames or lists mentioned in the Tweet text # along with the indices for where the mention ocurred. If the # text is nil or contains no username or list mentions, an empty array # will be returned. # # If a block is given, then it will be called with each username, list slug, the start # index, and the end index in the text. The list_slug will be an empty stirng # if this is a username mention. def extract_mentions_or_lists_with_indices(text) # :yields: username, list_slug, start, end return [] unless text =~ /[@@]/ possible_entries = [] text.to_s.scan(Twitter::Regex[:valid_mention_or_list]) do |before, at, screen_name, list_slug| match_data = $~ after = $' unless after =~ Twitter::Regex[:end_mention_match] start_position = match_data.char_begin(3) - 1 end_position = match_data.char_end(list_slug.nil? ? 3 : 4) possible_entries << { :screen_name => screen_name, :list_slug => list_slug || "", :indices => [start_position, end_position] } end end if block_given? possible_entries.each do |mention| yield mention[:screen_name], mention[:list_slug], mention[:indices].first, mention[:indices].last end end possible_entries end # Extracts the username username replied to in the Tweet text. If the # text is nil or is not a reply nil will be returned. # # If a block is given then it will be called with the username replied to (if any) def extract_reply_screen_name(text) # :yields: username return nil unless text possible_screen_name = text.match(Twitter::Regex[:valid_reply]) return unless possible_screen_name.respond_to?(:captures) return if $' =~ Twitter::Regex[:end_mention_match] screen_name = possible_screen_name.captures.first yield screen_name if block_given? screen_name end # Extracts a list of all URLs included in the Tweet text. If the # text is nil or contains no URLs an empty array # will be returned. # # If a block is given then it will be called for each URL. def extract_urls(text, &block) # :yields: url urls = extract_urls_with_indices(text).map{|u| u[:url]} urls.each(&block) if block_given? urls end # Extracts a list of all URLs included in the Tweet text along # with the indices. If the text is nil or contains no # URLs an empty array will be returned. # # If a block is given then it will be called for each URL. def extract_urls_with_indices(text, options = {:extract_url_without_protocol => true}) # :yields: url, start, end return [] unless text && (options[:extract_url_without_protocol] ? text.index(".") : text.index(":")) urls = [] text.to_s.scan(Twitter::Regex[:valid_url]) do |all, before, url, protocol, domain, port, path, query| valid_url_match_data = $~ start_position = valid_url_match_data.char_begin(3) end_position = valid_url_match_data.char_end(3) # If protocol is missing and domain contains non-ASCII characters, # extract ASCII-only domains. if !protocol next if !options[:extract_url_without_protocol] || before =~ Twitter::Regex[:invalid_url_without_protocol_preceding_chars] last_url = nil domain.scan(Twitter::Regex[:valid_ascii_domain]) do |ascii_domain| last_url = { :url => ascii_domain, :indices => [start_position + $~.char_begin(0), start_position + $~.char_end(0)] } if path || ascii_domain =~ Twitter::Regex[:valid_special_short_domain] || ascii_domain !~ Twitter::Regex[:invalid_short_domain] urls << last_url end end # no ASCII-only domain found. Skip the entire URL next unless last_url # last_url only contains domain. Need to add path and query if they exist. if path # last_url was not added. Add it to urls here. last_url[:url] = url.sub(domain, last_url[:url]) last_url[:indices][1] = end_position end else # In the case of t.co URLs, don't allow additional path characters if url =~ Twitter::Regex[:valid_tco_url] url = $& end_position = start_position + url.char_length end urls << { :url => url, :indices => [start_position, end_position] } end end urls.each{|url| yield url[:url], url[:indices].first, url[:indices].last} if block_given? urls end # Extracts a list of all hashtags included in the Tweet text. If the # text is nil or contains no hashtags an empty array # will be returned. The array returned will not include the leading # # character. # # If a block is given then it will be called for each hashtag. def extract_hashtags(text, &block) # :yields: hashtag_text hashtags = extract_hashtags_with_indices(text).map{|h| h[:hashtag]} hashtags.each(&block) if block_given? hashtags end # Extracts a list of all hashtags included in the Tweet text. If the # text is nil or contains no hashtags an empty array # will be returned. The array returned will not include the leading # # character. # # If a block is given then it will be called for each hashtag. def extract_hashtags_with_indices(text, options = {:check_url_overlap => true}) # :yields: hashtag_text, start, end return [] unless text =~ /[##]/ tags = [] text.scan(Twitter::Regex[:valid_hashtag]) do |before, hash, hash_text| match_data = $~ start_position = match_data.char_begin(2) end_position = match_data.char_end(3) after = $' unless after =~ Twitter::Regex[:end_hashtag_match] tags << { :hashtag => hash_text, :indices => [start_position, end_position] } end end if options[:check_url_overlap] # extract URLs urls = extract_urls_with_indices(text) unless urls.empty? tags.concat(urls) # remove duplicates tags = remove_overlapping_entities(tags) # remove URL entities tags.reject!{|entity| !entity[:hashtag] } end end tags.each{|tag| yield tag[:hashtag], tag[:indices].first, tag[:indices].last} if block_given? tags end # Extracts a list of all cashtags included in the Tweet text. If the # text is nil or contains no cashtags an empty array # will be returned. The array returned will not include the leading $ # character. # # If a block is given then it will be called for each cashtag. def extract_cashtags(text, &block) # :yields: cashtag_text cashtags = extract_cashtags_with_indices(text).map{|h| h[:cashtag]} cashtags.each(&block) if block_given? cashtags end # Extracts a list of all cashtags included in the Tweet text. If the # text is nil or contains no cashtags an empty array # will be returned. The array returned will not include the leading $ # character. # # If a block is given then it will be called for each cashtag. def extract_cashtags_with_indices(text) # :yields: cashtag_text, start, end return [] unless text =~ /\$/ tags = [] text.scan(Twitter::Regex[:valid_cashtag]) do |before, dollar, cash_text| match_data = $~ start_position = match_data.char_begin(2) end_position = match_data.char_end(3) tags << { :cashtag => cash_text, :indices => [start_position, end_position] } end tags.each{|tag| yield tag[:cashtag], tag[:indices].first, tag[:indices].last} if block_given? tags end end end twitter-text-1.14.7/lib/twitter-text/rewriter.rb000644 000041 000041 00000004052 13154170026 023222 0ustar00www-datawww-data000000 000000 module Twitter # A module provides base methods to rewrite usernames, lists, hashtags and URLs. module Rewriter extend self def rewrite_entities(text, entities) chars = text.to_s.to_char_a # sort by start index entities = entities.sort_by do |entity| indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices] indices.first end result = [] last_index = entities.inject(0) do |index, entity| indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices] result << chars[index...indices.first] result << yield(entity, chars) indices.last end result << chars[last_index..-1] result.flatten.join end # These methods are deprecated, will be removed in future. extend Deprecation def rewrite(text, options = {}) [:hashtags, :urls, :usernames_or_lists].inject(text) do |key| options[key] ? send(:"rewrite_#{key}", text, &options[key]) : text end end deprecate :rewrite, :rewrite_entities def rewrite_usernames_or_lists(text) entities = Extractor.extract_mentions_or_lists_with_indices(text) rewrite_entities(text, entities) do |entity, chars| at = chars[entity[:indices].first] list_slug = entity[:list_slug] list_slug = nil if list_slug.empty? yield(at, entity[:screen_name], list_slug) end end deprecate :rewrite_usernames_or_lists, :rewrite_entities def rewrite_hashtags(text) entities = Extractor.extract_hashtags_with_indices(text) rewrite_entities(text, entities) do |entity, chars| hash = chars[entity[:indices].first] yield(hash, entity[:hashtag]) end end deprecate :rewrite_hashtags, :rewrite_entities def rewrite_urls(text) entities = Extractor.extract_urls_with_indices(text, :extract_url_without_protocol => false) rewrite_entities(text, entities) do |entity, chars| yield(entity[:url]) end end deprecate :rewrite_urls, :rewrite_entities end end twitter-text-1.14.7/lib/twitter-text/hash_helper.rb000644 000041 000041 00000001171 13154170026 023640 0ustar00www-datawww-data000000 000000 module Twitter module HashHelper # Return a new hash with all keys converted to symbols, as long as # they respond to +to_sym+. # # { 'name' => 'Rob', 'years' => '28' }.symbolize_keys # #=> { :name => "Rob", :years => "28" } def self.symbolize_keys(hash) symbolize_keys!(hash.dup) end # Destructively convert all keys to symbols, as long as they respond # to +to_sym+. Same as +symbolize_keys+, but modifies +self+. def self.symbolize_keys!(hash) hash.keys.each do |key| hash[(key.to_sym rescue key) || key] = hash.delete(key) end hash end end end twitter-text-1.14.7/twitter-text.gemspec000644 000041 000041 00000002647 13154170026 021641 0ustar00www-datawww-data000000 000000 # encoding: utf-8 Gem::Specification.new do |s| s.name = "twitter-text" s.version = "1.14.7" s.authors = ["Matt Sanford", "Patrick Ewing", "Ben Cherry", "Britt Selvitelle", "Raffi Krikorian", "J.P. Cummins", "Yoshimasa Niwa", "Keita Fujii", "James Koval"] s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com", "bcherry@gmail.com", "bs@brittspace.com", "raffi@twitter.com", "jcummins@twitter.com", "niw@niw.at", "keita@twitter.com", "jkoval@twitter.com"] s.homepage = "http://twitter.com" s.description = s.summary = "A gem that provides text handling for Twitter" s.license = "Apache 2.0" s.platform = Gem::Platform::RUBY s.has_rdoc = true s.summary = "Twitter text handling library" s.add_development_dependency "test-unit" s.add_development_dependency "multi_json", "~> 1.3" s.add_development_dependency "nokogiri", "~> 1.5.10" s.add_development_dependency "rake", "~> 11.1" # 12 removes method named `last_comment` s.add_development_dependency "rdoc" s.add_development_dependency "rspec", "~> 2.14.0" s.add_development_dependency "simplecov", "~> 0.8.0" s.add_runtime_dependency "unf", "~> 0.1.0" s.files = `git ls-files`.split("\n") + ['lib/assets/tld_lib.yml'] s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n") s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) } s.require_paths = ["lib"] end twitter-text-1.14.7/test/000755 000041 000041 00000000000 13154170026 016556 5ustar00www-datawww-data000000 000000 twitter-text-1.14.7/test/conformance_test.rb000644 000041 000041 00000015504 13154170026 022441 0ustar00www-datawww-data000000 000000 require 'multi_json' require 'nokogiri' require 'test/unit' require 'yaml' # Detect Ruby 1.8 and older to apply necessary encoding fixes major, minor, patch = RUBY_VERSION.split('.') OLD_RUBY = major.to_i == 1 && minor.to_i < 9 if OLD_RUBY $KCODE='u' end $:.unshift File.join(File.dirname(__FILE__), '..', 'lib') require 'twitter-text' class ConformanceTest < Test::Unit::TestCase include Twitter::Extractor include Twitter::Autolink include Twitter::HitHighlighter include Twitter::Validation private %w(description expected json hits).each do |key| define_method key.to_sym do @test_info[key] end end if OLD_RUBY def text @test_info['text'].gsub(/\\u([0-9a-f]{8})/i) do [$1.to_i(16)].pack('U*') end end else def text @test_info['text'] end end def assert_equal_without_attribute_order(expected, actual, failure_message = nil) assert_block(build_message(failure_message, " expected but was\n", expected, actual)) do equal_nodes?(Nokogiri::HTML(expected).root, Nokogiri::HTML(actual).root) end end def equal_nodes?(expected, actual) return false unless expected.name == actual.name return false unless ordered_attributes(expected) == ordered_attributes(actual) return false if expected.text? && actual.text? && expected.content != actual.content expected.children.each_with_index do |child, index| return false unless equal_nodes?(child, actual.children[index]) end true end def ordered_attributes(element) element.attribute_nodes.map{|attr| [attr.name, attr.value]}.sort end CONFORMANCE_DIR = ENV['CONFORMANCE_DIR'] || File.expand_path("../../../conformance", __FILE__) def self.def_conformance_test(file, test_type, &block) yaml = YAML.load_file(File.join(CONFORMANCE_DIR, file)) raise "No such test suite: #{test_type.to_s}" unless yaml["tests"][test_type.to_s] file_name = file.split('.').first yaml["tests"][test_type.to_s].each do |test_info| name = :"test_#{file_name}_#{test_type} #{test_info['description']}" define_method name do @test_info = test_info instance_eval(&block) end end end public # Extractor Conformance def_conformance_test("extract.yml", :replies) do assert_equal expected, extract_reply_screen_name(text), description end def_conformance_test("extract.yml", :mentions) do assert_equal expected, extract_mentioned_screen_names(text), description end def_conformance_test("extract.yml", :mentions_with_indices) do e = expected.map{|elem| elem.inject({}){|h, (k,v)| h[k.to_sym] = v; h} } assert_equal e, extract_mentioned_screen_names_with_indices(text), description end def_conformance_test("extract.yml", :mentions_or_lists_with_indices) do e = expected.map{|elem| elem.inject({}){|h, (k,v)| h[k.to_sym] = v; h} } assert_equal e, extract_mentions_or_lists_with_indices(text), description end def_conformance_test("extract.yml", :urls) do assert_equal expected, extract_urls(text), description expected.each do |expected_url| assert_equal true, valid_url?(expected_url, true, false), "expected url [#{expected_url}] not valid" end end def_conformance_test("tlds.yml", :generic) do assert_equal expected, extract_urls(text), description end def_conformance_test("tlds.yml", :country) do assert_equal expected, extract_urls(text), description end def_conformance_test("extract.yml", :urls_with_indices) do e = expected.map{|elem| elem.inject({}){|h, (k,v)| h[k.to_sym] = v; h} } assert_equal e, extract_urls_with_indices(text), description end def_conformance_test("extract.yml", :hashtags) do assert_equal expected, extract_hashtags(text), description end def_conformance_test("extract.yml", :hashtags_from_astral) do assert_equal expected, extract_hashtags(text), description end def_conformance_test("extract.yml", :hashtags_with_indices) do e = expected.map{|elem| elem.inject({}){|h, (k,v)| h[k.to_sym] = v; h} } assert_equal e, extract_hashtags_with_indices(text), description end def_conformance_test("extract.yml", :cashtags) do assert_equal expected, extract_cashtags(text), description end def_conformance_test("extract.yml", :cashtags_with_indices) do e = expected.map{|elem| elem.inject({}){|h, (k,v)| h[k.to_sym] = v; h} } assert_equal e, extract_cashtags_with_indices(text), description end # Autolink Conformance def_conformance_test("autolink.yml", :usernames) do assert_equal_without_attribute_order expected, auto_link_usernames_or_lists(text, :suppress_no_follow => true), description end def_conformance_test("autolink.yml", :lists) do assert_equal_without_attribute_order expected, auto_link_usernames_or_lists(text, :suppress_no_follow => true), description end def_conformance_test("autolink.yml", :urls) do assert_equal_without_attribute_order expected, auto_link_urls(text, :suppress_no_follow => true), description end def_conformance_test("autolink.yml", :hashtags) do assert_equal_without_attribute_order expected, auto_link_hashtags(text, :suppress_no_follow => true), description end def_conformance_test("autolink.yml", :cashtags) do assert_equal_without_attribute_order expected, auto_link_cashtags(text, :suppress_no_follow => true), description end def_conformance_test("autolink.yml", :all) do assert_equal_without_attribute_order expected, auto_link(text, :suppress_no_follow => true), description end def_conformance_test("autolink.yml", :json) do assert_equal_without_attribute_order expected, auto_link_with_json(text, MultiJson.load(json), :suppress_no_follow => true), description end # HitHighlighter Conformance def_conformance_test("hit_highlighting.yml", :plain_text) do assert_equal expected, hit_highlight(text, hits), description end def_conformance_test("hit_highlighting.yml", :with_links) do assert_equal expected, hit_highlight(text, hits), description end # Validation Conformance def_conformance_test("validate.yml", :tweets) do assert_equal expected, valid_tweet_text?(text), description end def_conformance_test("validate.yml", :usernames) do assert_equal expected, valid_username?(text), description end def_conformance_test("validate.yml", :lists) do assert_equal expected, valid_list?(text), description end def_conformance_test("validate.yml", :urls) do assert_equal expected, valid_url?(text), description end def_conformance_test("validate.yml", :urls_without_protocol) do assert_equal expected, valid_url?(text, true, false), description end def_conformance_test("validate.yml", :hashtags) do assert_equal expected, valid_hashtag?(text), description end def_conformance_test("validate.yml", :lengths) do assert_equal expected, tweet_length(text), description end end twitter-text-1.14.7/.gitignore000644 000041 000041 00000000563 13154170026 017573 0ustar00www-datawww-data000000 000000 *.gem *.rbc *.sw[a-p] *.tmproj *.tmproject *.un~ *~ .DS_Store .Spotlight-V100 .Trashes ._* .bundle .config .directory .elc .emacs.desktop .emacs.desktop.lock .redcar .yardoc Desktop.ini Gemfile.lock Icon? InstalledFiles Session.vim Thumbs.db \#*\# _yardoc auto-save-list coverage doc lib/bundler/man pkg pkg/* rdoc spec/reports test/tmp test/version_tmp tmp tmtags tramp twitter-text-1.14.7/LICENSE000644 000041 000041 00000023610 13154170026 016606 0ustar00www-datawww-data000000 000000 Copyright 2011 Twitter, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this work except in compliance with the License. You may obtain a copy of the License below, or at: http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. twitter-text-1.14.7/README.md000644 000041 000041 00000006267 13154170026 017071 0ustar00www-datawww-data000000 000000 # twitter-text ![hello](https://img.shields.io/gem/v/twitter-text.svg) A gem that provides text processing routines for Twitter Tweets. The major reason for this is to unify the various auto-linking and extraction of usernames, lists, hashtags and URLs. ## Extraction Examples # Extraction ``` class MyClass include Twitter::Extractor usernames = extract_mentioned_screen_names("Mentioning @twitter and @jack") # usernames = ["twitter", "jack"] end ``` # Extraction with a block argument ```ruby class MyClass include Twitter::Extractor extract_reply_screen_name("@twitter are you hiring?").do |username| # username = "twitter" end end ``` ## Auto-linking Examples # Auto-link ``` class MyClass include Twitter::Autolink html = auto_link("link @user, please #request") end ``` # For Ruby on Rails you want to add this to app/helpers/application_helper.rb ``` module ApplicationHelper include Twitter::Autolink end ``` # Now the auto_link function is available in every view. So in index.html.erb: ```ruby <%= auto_link("link @user, please #request") %> ``` ### Usernames Username extraction and linking matches all valid Twitter usernames but does not verify that the username is a valid Twitter account. ### Lists Auto-link and extract list names when they are written in @user/list-name format. ### Hashtags Auto-link and extract hashtags, where a hashtag can contain most letters or numbers but cannot be solely numbers and cannot contain punctuation. ### URLs Asian languages like Chinese, Japanese or Korean may not use a delimiter such as a space to separate normal text from URLs making it difficult to identify where the URL ends and the text starts. For this reason twitter-text currently does not support extracting or auto-linking of URLs immediately followed by non-Latin characters. Example: "http://twitter.com/は素晴らしい" . The normal text is "は素晴らしい" and is not part of the URL even though it isn't space separated. ### International Special care has been taken to be sure that auto-linking and extraction work in Tweets of all languages. This means that languages without spaces between words should work equally well. ### Hit Highlighting Use to provide emphasis around the "hits" returned from the Search API, built to work against text that has been auto-linked already. ### Thanks Thanks to everybody who has filed issues, provided feedback or contributed patches. Patches courtesy of: * At Twitter … * Matt Sanford - http://github.com/mzsanford * Raffi Krikorian - http://github.com/r * Ben Cherry - http://github.com/bcherry * Patrick Ewing - http://github.com/hoverbird * Jeff Smick - http://github.com/sprsquish * Kenneth Kufluk - https://github.com/kennethkufluk * Keita Fujii - https://github.com/keitaf * Yoshimasa Niwa - https://github.com/niw * Patches from the community … * Jean-Philippe Bougie - http://github.com/jpbougie * Erik Michaels-Ober - https://github.com/sferik * Anyone who has filed an issue. It helps. Really. ### Copyright and License **Copyright 2011 Twitter, Inc.** Licensed under the Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0 twitter-text-1.14.7/.gitmodules000644 000041 000041 00000000217 13154170026 017754 0ustar00www-datawww-data000000 000000 [submodule "test/twitter-text-conformance"] path = test/twitter-text-conformance url = git://github.com/twitter/twitter-text-conformance.git twitter-text-1.14.7/test/twitter-text-conformance/validate.yml000664 001751 001751 00000022635 13126461251 024550 0ustar00srudsrud000000 000000 tests: tweets: - description: "Valid Tweet: < 20 characters" text: "I am a Tweet" expected: true - description: "Valid Tweet: 140 characters" text: "A lie gets halfway around the world before the truth has a chance to get its pants on. Winston Churchill (1874-1965) http://bit.ly/dJpywL" expected: true - description: "Valid Tweet: 140 characters (with accents)" text: "A lié géts halfway arøünd thé wørld béføré thé truth has a chance tø get its pants øn. Winston Churchill (1874-1965) http://bit.ly/dJpywL" expected: true - description: "Valid Tweet: 140 characters (double byte characters)" text: "のののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののの" expected: true - description: "Valid Tweet: 140 characters (double word characters)" text: "\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431\U0001f431" expected: true - description: "Invalid Tweet: no characters (empty)" text: "" expected: false - description: "Invalid Tweet: 141 characters" text: "A lie gets halfway around the world before the truth has a chance to get its pants on. -- Winston Churchill (1874-1965) http://bit.ly/dJpywL" expected: false - description: "Invalid Tweet: 141 characters (due to newline)" text: "A lie gets halfway around the world before the truth has a chance to get its pants on. \n- Winston Churchill (1874-1965) http://bit.ly/dJpywL" expected: false usernames: - description: "Valid username: a-z < 20 characters" text: "@username" expected: true - description: "All numeric username are allowed" text: "@12345" expected: true - description: "Usernames should allow the _ character" text: "@example_name" expected: true - description: "Usernames SHOULD NOT allow the - character" text: "@example-name" expected: false lists: - description: "Valid list: a-z < 20 characters" text: "@username/list" expected: true - description: "A username alone SHOULD NOT be considered a valid list" text: "@username" expected: false - description: "A username followed by a slash SHOULD NOT be considered a valid list" text: "@username/" expected: false - description: "Validation SHOULD NOT allow leading spaces" text: " @username/list" expected: false - description: "Validation SHOULD NOT allow trailing spaces" text: "@username/list " expected: false hashtags: - description: "Valid hashtag: a-z < 20 characters" text: "#hashtag" expected: true - description: "Valid hashtag: number followed by letters" text: "#1st" expected: true - description: "Valid hashtag: letters and numbers mixed" text: "#that1time" expected: true - description: "Valid hashtag: letter followed by numbers" text: "#easyas123" expected: true - description: "Invalid hashtag: all numbers" text: "#12345" expected: false - description: "Valid hashtag: Russian text" text: "#ашок" expected: true - description: "Valid hashtag: Korean text" text: "#트위터" expected: true urls: - description: "Valid url: protocol + domain" text: "http://example.com" expected: true - description: "Valid url: ssl + domain + path + query" text: "https://example.com/path/to/resource?search=foo&lang=en" expected: true - description: "Valid url: protocol + domain + path + fragment" text: "http://twitter.com/#!/twitter" expected: true - description: "Valid url: cased protocol and domain" text: "HTTPS://www.ExaMPLE.COM/index.html" expected: true - description: "Valid url: port and userinfo" text: "http://user:PASSW0RD@example.com:8080/login.php" expected: true - description: "Valid url: matrix path parameters" text: "http://sports.yahoo.com/nfl/news;_ylt=Aom0;ylu=XyZ?slug=ap-superbowlnotebook" expected: true - description: "Valid url: ipv4" text: "http://192.168.0.1/index.html?src=asdf" expected: true - description: "Valid url: ipv6" text: "http://[3ffe:1900:4545:3:200:f8ff:fe21:67cf]:80/index.html" expected: true - description: "Valid url: underscore in subdomain" text: "http://test_underscore.twitter.com" expected: true - description: "Valid url: sub delims and question marks" text: "http://example.com?foo=$bar.;baz?BAZ&c=d-#top/?stories+" expected: true - description: "Valid unicode url: unicode domain" text: "http://☃.net/" expected: true - description: "Valid url: Cyrillic characters in path" text: "http://example.com/Русские_слова" expected: true - description: "Valid url: trailing hyphen" text: "https://www.youtube.com/playlist?list=PL0ZPu8XSRTB7wZzn0mLHMvyzVFeRxbWn-" expected: true - description: "Invalid url: invalid scheme" text: "ftp://www.example.com/" expected: false - description: "Invalid url: invalid userinfo characters" text: "https://user:pass[word]@www.example.com/" expected: false - description: "Invalid url: underscore in domain" text: "http://domain-dash_2314352345_dfasd.foo-cow_4352.com" expected: false - description: "Invalid url: domain beginning dash" text: "http://www.-domain4352.com/" expected: false - description: "Invalid url: domain trailing dash" text: "http://www.domain4352-.com/" expected: false - description: "Invalid url: unicode domain trailing dash" text: "http://☃-.net/" expected: false - description: "Invalid url: improperly encoded unicode domain" text: "http://%e2%98%83.net/" expected: false - description: "Invalid url: invalid IP" text: "http://256.1.2.3/" expected: false - description: "Invalid url: invalid char in path" text: "http://en.wikipedia.org/wiki/\"#Punctuation" expected: false - description: "Invalid url: trailing space" text: "http://example.com/#anchor " expected: false urls_without_protocol: - description: "Valid url without protocol: domain + gTLD" text: "example.com" expected: true - description: "Valid url without protocol: subdomain + domain + gTLD" text: "www.example.com" expected: true - description: "Valid url without protocol: domain + ccTLD" text: "t.co" expected: true - description: "Valid url without protocol: subdomain + domain + ccTLD" text: "foo.co.jp" expected: true - description: "Valid url without protocol: domain + path + query" text: "example.com/path/to/resource?search=foo&lang=en" expected: true lengths: - description: "Count the number of characters" text: "This is a test." expected: 15 - description: "Count a URL starting with http:// as 23 characters" text: "http://test.com" expected: 23 - description: "Count a URL starting with https:// as 23 characters" text: "https://test.com" expected: 23 - description: "Count a URL without protocol as 23 characters" text: "test.com" expected: 23 - description: "Count multiple URLs correctly" text: "Test https://test.com test https://test.com test.com test" expected: 86 - description: "Count unicode chars outside the basic multilingual plane (double word)" text: "\U00010000\U0010ffff" expected: 2 - description: "Count unicode chars inside the basic multilingual plane" text: "저찀쯿쿿" expected: 4 - description: "Count a mix of single byte single word, and double word unicode characters" text: "H\U0001f431☺" expected: 3 twitter-text-1.14.7/test/twitter-text-conformance/tld_lib.yml000664 001751 001751 00000032212 13126461251 024360 0ustar00srudsrud000000 000000 --- country: - "한국" - "香港" - "澳門" - "新加坡" - "台灣" - "台湾" - "中國" - "中国" - "გე" - "ไทย" - "ලංකා" - "ഭാരതം" - "ಭಾರತ" - "భారత్" - "சிங்கப்பூர்" - "இலங்கை" - "இந்தியா" - "ଭାରତ" - "ભારત" - "ਭਾਰਤ" - "ভাৰত" - "ভারত" - "বাংলা" - "भारोत" - "भारतम्" - "भारत" - "ڀارت" - "پاکستان" - "مليسيا" - "مصر" - "قطر" - "فلسطين" - "عمان" - "عراق" - "سورية" - "سودان" - "تونس" - "بھارت" - "بارت" - "ایران" - "امارات" - "المغرب" - "السعودية" - "الجزائر" - "الاردن" - "հայ" - "қаз" - "укр" - "срб" - "рф" - "мон" - "мкд" - "ею" - "бел" - "бг" - "ελ" - zw - zm - za - yt - ye - ws - wf - vu - vn - vi - vg - ve - vc - va - uz - uy - us - um - uk - ug - ua - tz - tw - tv - tt - tr - tp - to - tn - tm - tl - tk - tj - th - tg - tf - td - tc - sz - sy - sx - sv - su - st - ss - sr - so - sn - sm - sl - sk - sj - si - sh - sg - se - sd - sc - sb - sa - rw - ru - rs - ro - re - qa - py - pw - pt - ps - pr - pn - pm - pl - pk - ph - pg - pf - pe - pa - om - nz - nu - nr - np - 'no' - nl - ni - ng - nf - ne - nc - na - mz - my - mx - mw - mv - mu - mt - ms - mr - mq - mp - mo - mn - mm - ml - mk - mh - mg - mf - me - md - mc - ma - ly - lv - lu - lt - ls - lr - lk - li - lc - lb - la - kz - ky - kw - kr - kp - kn - km - ki - kh - kg - ke - jp - jo - jm - je - it - is - ir - iq - io - in - im - il - ie - id - hu - ht - hr - hn - hm - hk - gy - gw - gu - gt - gs - gr - gq - gp - gn - gm - gl - gi - gh - gg - gf - ge - gd - gb - ga - fr - fo - fm - fk - fj - fi - eu - et - es - er - eh - eg - ee - ec - dz - do - dm - dk - dj - de - cz - cy - cx - cw - cv - cu - cr - co - cn - cm - cl - ck - ci - ch - cg - cf - cd - cc - ca - bz - by - bw - bv - bt - bs - br - bq - bo - bn - bm - bl - bj - bi - bh - bg - bf - be - bd - bb - ba - az - ax - aw - au - at - as - ar - aq - ao - an - am - al - ai - ag - af - ae - ad - ac generic: - "삼성" - "닷컴" - "닷넷" - "香格里拉" - "餐厅" - "食品" - "飞利浦" - "電訊盈科" - "集团" - "通販" - "购物" - "谷歌" - "诺基亚" - "联通" - "网络" - "网站" - "网店" - "网址" - "组织机构" - "移动" - "珠宝" - "点看" - "游戏" - "淡马锡" - "机构" - "書籍" - "时尚" - "新闻" - "政府" - "政务" - "手表" - "手机" - "我爱你" - "慈善" - "微博" - "广东" - "工行" - "家電" - "娱乐" - "天主教" - "大拿" - "大众汽车" - "在线" - "嘉里大酒店" - "嘉里" - "商标" - "商店" - "商城" - "公益" - "公司" - "八卦" - "健康" - "信息" - "佛山" - "企业" - "中文网" - "中信" - "世界" - "ポイント" - "ファッション" - "セール" - "ストア" - "コム" - "グーグル" - "クラウド" - "みんな" - "คอม" - "संगठन" - "नेट" - "कॉम" - "همراه" - "موقع" - "موبايلي" - "كوم" - "كاثوليك" - "عرب" - "شبكة" - "بيتك" - "بازار" - "العليان" - "ارامكو" - "اتصالات" - "ابوظبي" - "קום" - "сайт" - "рус" - "орг" - "онлайн" - "москва" - "ком" - "католик" - "дети" - zuerich - zone - zippo - zip - zero - zara - zappos - yun - youtube - you - yokohama - yoga - yodobashi - yandex - yamaxun - yahoo - yachts - xyz - xxx - xperia - xin - xihuan - xfinity - xerox - xbox - wtf - wtc - wow - world - works - work - woodside - wolterskluwer - wme - winners - wine - windows - win - williamhill - wiki - wien - whoswho - weir - weibo - wedding - wed - website - weber - webcam - weatherchannel - weather - watches - watch - warman - wanggou - wang - walter - walmart - wales - vuelos - voyage - voto - voting - vote - volvo - volkswagen - vodka - vlaanderen - vivo - viva - vistaprint - vista - vision - visa - virgin - vip - vin - villas - viking - vig - video - viajes - vet - versicherung - vermögensberatung - vermögensberater - verisign - ventures - vegas - vanguard - vana - vacations - ups - uol - uno - university - unicom - uconnect - ubs - ubank - tvs - tushu - tunes - tui - tube - trv - trust - travelersinsurance - travelers - travelchannel - travel - training - trading - trade - toys - toyota - town - tours - total - toshiba - toray - top - tools - tokyo - today - tmall - tkmaxx - tjx - tjmaxx - tirol - tires - tips - tiffany - tienda - tickets - tiaa - theatre - theater - thd - teva - tennis - temasek - telefonica - telecity - tel - technology - tech - team - tdk - tci - taxi - tax - tattoo - tatar - tatamotors - target - taobao - talk - taipei - tab - systems - symantec - sydney - swiss - swiftcover - swatch - suzuki - surgery - surf - support - supply - supplies - sucks - style - study - studio - stream - store - storage - stockholm - stcgroup - stc - statoil - statefarm - statebank - starhub - star - staples - stada - srt - srl - spreadbetting - spot - spiegel - space - soy - sony - song - solutions - solar - sohu - software - softbank - social - soccer - sncf - smile - smart - sling - skype - sky - skin - ski - site - singles - sina - silk - shriram - showtime - show - shouji - shopping - shop - shoes - shiksha - shia - shell - shaw - sharp - shangrila - sfr - sexy - sex - sew - seven - ses - services - sener - select - seek - security - secure - seat - search - scot - scor - scjohnson - science - schwarz - schule - school - scholarships - schmidt - schaeffler - scb - sca - sbs - sbi - saxo - save - sas - sarl - sapo - sap - sanofi - sandvikcoromant - sandvik - samsung - samsclub - salon - sale - sakura - safety - safe - saarland - ryukyu - rwe - run - ruhr - rugby - rsvp - room - rogers - rodeo - rocks - rocher - rmit - rip - rio - ril - rightathome - ricoh - richardli - rich - rexroth - reviews - review - restaurant - rest - republican - report - repair - rentals - rent - ren - reliance - reit - reisen - reise - rehab - redumbrella - redstone - red - recipes - realty - realtor - realestate - read - raid - radio - racing - qvc - quest - quebec - qpon - pwc - pub - prudential - pru - protection - property - properties - promo - progressive - prof - productions - prod - pro - prime - press - praxi - pramerica - post - porn - politie - poker - pohl - pnc - plus - plumbing - playstation - play - place - pizza - pioneer - pink - ping - pin - pid - pictures - pictet - pics - piaget - physio - photos - photography - photo - phone - philips - phd - pharmacy - pfizer - pet - pccw - pay - passagens - party - parts - partners - pars - paris - panerai - panasonic - pamperedchef - page - ovh - ott - otsuka - osaka - origins - orientexpress - organic - org - orange - oracle - open - ooo - onyourside - online - onl - ong - one - omega - ollo - oldnavy - olayangroup - olayan - okinawa - office - 'off' - observer - obi - nyc - ntt - nrw - nra - nowtv - nowruz - now - norton - northwesternmutual - nokia - nissay - nissan - ninja - nikon - nike - nico - nhk - ngo - nfl - nexus - nextdirect - next - news - newholland - new - neustar - network - netflix - netbank - net - nec - nba - navy - natura - nationwide - name - nagoya - nadex - nab - mutuelle - mutual - museum - mtr - mtpc - mtn - msd - movistar - movie - mov - motorcycles - moto - moscow - mortgage - mormon - mopar - montblanc - monster - money - monash - mom - moi - moe - moda - mobily - mobile - mobi - mma - mls - mlb - mitsubishi - mit - mint - mini - mil - microsoft - miami - metlife - merckmsd - meo - menu - men - memorial - meme - melbourne - meet - media - med - mckinsey - mcdonalds - mcd - mba - mattel - maserati - marshalls - marriott - markets - marketing - market - map - mango - management - man - makeup - maison - maif - madrid - macys - luxury - luxe - lupin - lundbeck - ltda - ltd - lplfinancial - lpl - love - lotto - lotte - london - lol - loft - locus - locker - loans - loan - lixil - living - live - lipsy - link - linde - lincoln - limo - limited - lilly - like - lighting - lifestyle - lifeinsurance - life - lidl - liaison - lgbt - lexus - lego - legal - lefrak - leclerc - lease - lds - lawyer - law - latrobe - latino - lat - lasalle - lanxess - landrover - land - lancome - lancia - lancaster - lamer - lamborghini - ladbrokes - lacaixa - kyoto - kuokgroup - kred - krd - kpn - kpmg - kosher - komatsu - koeln - kiwi - kitchen - kindle - kinder - kim - kia - kfh - kerryproperties - kerrylogistics - kerryhotels - kddi - kaufen - juniper - juegos - jprs - jpmorgan - joy - jot - joburg - jobs - jnj - jmp - jll - jlc - jio - jewelry - jetzt - jeep - jcp - jcb - java - jaguar - iwc - iveco - itv - itau - istanbul - ist - ismaili - iselect - irish - ipiranga - investments - intuit - international - intel - int - insure - insurance - institute - ink - ing - info - infiniti - industries - immobilien - immo - imdb - imamat - ikano - iinet - ifm - ieee - icu - ice - icbc - ibm - hyundai - hyatt - hughes - htc - hsbc - how - house - hotmail - hotels - hoteles - hot - hosting - host - hospital - horse - honeywell - honda - homesense - homes - homegoods - homedepot - holiday - holdings - hockey - hkt - hiv - hitachi - hisamitsu - hiphop - hgtv - hermes - here - helsinki - help - healthcare - health - hdfcbank - hdfc - hbo - haus - hangout - hamburg - hair - guru - guitars - guide - guge - gucci - guardian - group - grocery - gripe - green - gratis - graphics - grainger - gov - got - gop - google - goog - goodyear - goodhands - goo - golf - goldpoint - gold - godaddy - gmx - gmo - gmbh - gmail - globo - global - gle - glass - glade - giving - gives - gifts - gift - ggee - george - genting - gent - gea - gdn - gbiz - garden - gap - games - game - gallup - gallo - gallery - gal - fyi - futbol - furniture - fund - fun - fujixerox - fujitsu - ftr - frontier - frontdoor - frogans - frl - fresenius - free - fox - foundation - forum - forsale - forex - ford - football - foodnetwork - food - foo - fly - flsmidth - flowers - florist - flir - flights - flickr - fitness - fit - fishing - fish - firmdale - firestone - fire - financial - finance - final - film - fido - fidelity - fiat - ferrero - ferrari - feedback - fedex - fast - fashion - farmers - farm - fans - fan - family - faith - fairwinds - fail - fage - extraspace - express - exposed - expert - exchange - everbank - events - eus - eurovision - etisalat - esurance - estate - esq - erni - ericsson - equipment - epson - epost - enterprises - engineering - engineer - energy - emerck - email - education - edu - edeka - eco - eat - earth - dvr - dvag - durban - dupont - duns - dunlop - duck - dubai - dtv - drive - download - dot - doosan - domains - doha - dog - dodge - doctor - docs - dnp - diy - dish - discover - discount - directory - direct - digital - diet - diamonds - dhl - dev - design - desi - dentist - dental - democrat - delta - deloitte - dell - delivery - degree - deals - dealer - deal - dds - dclk - day - datsun - dating - date - data - dance - dad - dabur - cyou - cymru - cuisinella - csc - cruises - cruise - crs - crown - cricket - creditunion - creditcard - credit - courses - coupons - coupon - country - corsica - coop - cool - cookingchannel - cooking - contractors - contact - consulting - construction - condos - comsec - computer - compare - company - community - commbank - comcast - com - cologne - college - coffee - codes - coach - clubmed - club - cloud - clothing - clinique - clinic - click - cleaning - claims - cityeats - city - citic - citi - citadel - cisco - circle - cipriani - church - chrysler - chrome - christmas - chloe - chintai - cheap - chat - chase - channel - chanel - cfd - cfa - cern - ceo - center - ceb - cbs - cbre - cbn - cba - catholic - catering - cat - casino - cash - caseih - case - casa - cartier - cars - careers - career - care - cards - caravan - car - capitalone - capital - capetown - canon - cancerresearch - camp - camera - cam - calvinklein - call - cal - cafe - cab - bzh - buzz - buy - business - builders - build - bugatti - budapest - brussels - brother - broker - broadway - bridgestone - bradesco - box - boutique - bot - boston - bostik - bosch - boots - booking - book - boo - bond - bom - bofa - boehringer - boats - bnpparibas - bnl - bmw - bms - blue - bloomberg - blog - blockbuster - blanco - blackfriday - black - biz - bio - bingo - bing - bike - bid - bible - bharti - bet - bestbuy - best - berlin - bentley - beer - beauty - beats - bcn - bcg - bbva - bbt - bbc - bayern - bauhaus - basketball - baseball - bargains - barefoot - barclays - barclaycard - barcelona - bar - bank - band - bananarepublic - banamex - baidu - baby - azure - axa - aws - avianca - autos - auto - author - auspost - audio - audible - audi - auction - attorney - athleta - associates - asia - asda - arte - art - arpa - army - archi - aramco - arab - aquarelle - apple - app - apartments - aol - anz - anquan - android - analytics - amsterdam - amica - amfam - amex - americanfamily - americanexpress - alstom - alsace - ally - allstate - allfinanz - alipay - alibaba - alfaromeo - akdn - airtel - airforce - airbus - aigo - aig - agency - agakhan - africa - afl - afamilycompany - aetna - aero - aeg - adult - ads - adac - actor - active - aco - accountants - accountant - accenture - academy - abudhabi - abogado - able - abc - abbvie - abbott - abb - abarth - aarp - aaa - onion twitter-text-1.14.7/test/twitter-text-conformance/Gemfile000664 001751 001751 00000000056 13126461251 023520 0ustar00srudsrud000000 000000 source "https://rubygems.org" gem 'nokogiri' twitter-text-1.14.7/test/twitter-text-conformance/LICENSE000664 001751 001751 00000023610 13126461251 023233 0ustar00srudsrud000000 000000 Copyright 2011 Twitter, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this work except in compliance with the License. You may obtain a copy of the License below, or at: http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. twitter-text-1.14.7/test/twitter-text-conformance/hit_highlighting.yml000664 001751 001751 00000005315 13126461251 026264 0ustar00srudsrud000000 000000 tests: plain_text: - description: "Highlight the beginning of a string" text: "this is a test" hits: [ [0, 4] ] expected: "this is a test" - description: "Highlight the middle of a string" text: "this is a test" hits: [ [5, 7] ] expected: "this is a test" - description: "Highlight the end of a string" text: "this is a test" hits: [ [10, 14] ] expected: "this is a test" - description: "Highlight multiple terms" text: "this is a test" hits: [ [0, 4], [10, 14] ] expected: "this is a test" - description: "DO NOT highlight with empty hits" text: "this is a test" hits: [] expected: "this is a test" - description: "Highlight within Japanese text" text: "東京の天気" hits: [ [0, 2] ] expected: "東京の天気" with_links: - description: "Highlight after a link (offset does not include markup)" text: "@username this is an example" hits: [ [10, 14] ] expected: "@username this is an example" - description: "Highlight anchor text of a link (offset does not include markup)" text: "@username this is an example" hits: [ [1, 9] ] expected: "@username this is an example" - description: "Highlight around a link (offset does not include markup)" text: "@username this is an example" hits: [ [0, 14] ] expected: "@username this is an example" - description: "Highlight touching tags" text: "foofoo" hits: [ [3, 6] ] expected: "foofoo" - description: "Highlight two links" text: "foo bar baz" hits: [ [4, 7], [8, 11] ] expected: "foo bar baz" - description: "Highlight non-link then link not at end" text: "foo bar baz something else" hits: [ [4, 7], [8, 11] ] expected: "foo bar baz something else" - description: "Highlight non-link then link at end" text: "foo bar baz" hits: [ [4, 7], [8, 11] ] expected: "foo bar baz" - description: "Highlight mention at end" text: "something via @twitter" hits: [ [14, 22] ] expected: "something via @twitter" twitter-text-1.14.7/test/twitter-text-conformance/extract.yml000664 001751 001751 00000115635 13126461251 024434 0ustar00srudsrud000000 000000 tests: mentions: - description: "Extract mention at the begining of a tweet" text: "@username reply" expected: ["username"] - description: "Extract mention at the end of a tweet" text: "mention @username" expected: ["username"] - description: "Extract mention in the middle of a tweet" text: "mention @username in the middle" expected: ["username"] - description: "Extract mention of username with underscore" text: "mention @user_name" expected: ["user_name"] - description: "Extract mention of all numeric username" text: "mention @12345" expected: ["12345"] - description: "Extract mention or multiple usernames" text: "mention @username1 @username2" expected: ["username1", "username2"] - description: "Extract mention in the middle of a Japanese tweet" text: "の@usernameに到着を待っている" expected: ["username"] - description: "DO NOT extract username ending in @" text: "Current Status: @_@ (cc: @username)" expected: ["username"] - description: "DO NOT extract username followed by accented latin characters" text: "@aliceìnheiro something something" expected: [] - description: "Extract lone metion but not @user@user (too close to an email)" text: "@username email me @test@example.com" expected: ["username"] - description: "DO NOT extract 'http' in '@http://' as username" text: "@http://twitter.com" expected: [] - description: "Extract mentions before newline" text: "@username\n@mention" expected: ["username", "mention"] - description: "Extract mentions after 'RT'" text: "RT@username RT:@mention RT @test" expected: ["username", "mention", "test"] - description: "Extract mentions after 'rt'" text: "rt@username rt:@mention rt @test" expected: ["username", "mention", "test"] - description: "Extract mentions after 'Rt'" text: "Rt@username Rt:@mention Rt @test" expected: ["username", "mention", "test"] - description: "Extract mentions after 'rT'" text: "rT@username rT:@mention rT @test" expected: ["username", "mention", "test"] - description: "DO NOT extract username preceded by !" text: "f!@kn" expected: [] - description: "DO NOT extract username preceded by @" text: "f@@kn" expected: [] - description: "DO NOT extract username preceded by #" text: "f#@kn" expected: [] - description: "DO NOT extract username preceded by $" text: "f$@kn" expected: [] - description: "DO NOT extract username preceded by %" text: "f%@kn" expected: [] - description: "DO NOT extract username preceded by &" text: "f&@kn" expected: [] - description: "DO NOT extract username preceded by *" text: "f*@kn" expected: [] mentions_with_indices: - description: "Extract a mention at the start" text: "@username yo!" expected: - screen_name: "username" indices: [0, 9] - description: "Extract a mention that has the same thing mentioned at the start" text: "username @username" expected: - screen_name: "username" indices: [9, 18] - description: "Extract a mention in the middle of a Japanese tweet" text: "の@usernameに到着を待っている" expected: - screen_name: "username" indices: [1, 10] mentions_or_lists_with_indices: - description: "Extract a mention" text: "@username yo!" expected: - screen_name: "username" list_slug: "" indices: [0, 9] - description: "Extract a list" text: "@username/list-name is a great list!" expected: - screen_name: "username" list_slug: "/list-name" indices: [0, 19] - description: "Extract a mention and list" text: "Hey @username, check out out @otheruser/list_name-01!" expected: - screen_name: "username" list_slug: "" indices: [4, 13] - screen_name: "otheruser" list_slug: "/list_name-01" indices: [29, 52] - description: "Extract a list in the middle of a Japanese tweet" text: "の@username/list_name-01に到着を待っている" expected: - screen_name: "username" list_slug: "/list_name-01" indices: [1, 23] - description: "DO NOT extract a list with slug that starts with a number" text: "@username/7list-name is a great list!" expected: - screen_name: "username" list_slug: "" indices: [0, 9] replies: - description: "Extract reply at the begining of a tweet" text: "@username reply" expected: "username" - description: "Extract reply preceded by only a space" text: " @username reply" expected: "username" - description: "Extract reply preceded by only a full-width space (U+3000)" text: " @username reply" expected: "username" - description: "DO NOT Extract reply when preceded by text" text: "a @username mention, not a reply" expected: - description: "DO NOT Extract reply when preceded by ." text: ".@username mention, not a reply" expected: - description: "DO NOT Extract reply when preceded by /" text: "/@username mention, not a reply" expected: - description: "DO NOT Extract reply when preceded by _" text: "_@username mention, not a reply" expected: - description: "DO NOT Extract reply when preceded by -" text: "-@username mention, not a reply" expected: - description: "DO NOT Extract reply when preceded by +" text: "+@username mention, not a reply" expected: - description: "DO NOT Extract reply when preceded by #" text: "#@username mention, not a reply" expected: - description: "DO NOT Extract reply when preceded by !" text: "!@username mention, not a reply" expected: - description: "DO NOT Extract reply when preceded by @" text: "@@username mention, not a reply" expected: - description: "DO NOT Extract reply when followed by URL" text: "@http://twitter.com" expected: urls: - description: "Extract a lone URL" text: "http://example.com" expected: ["http://example.com"] - description: "Extract valid URL: http://google.com" text: "text http://google.com" expected: ["http://google.com"] - description: "Extract valid URL: http://foobar.com/#" text: "text http://foobar.com/#" expected: ["http://foobar.com/#"] - description: "Extract valid URL: http://google.com/#foo" text: "text http://google.com/#foo" expected: ["http://google.com/#foo"] - description: "Extract valid URL: http://google.com/#search?q=iphone%20-filter%3Alinks" text: "text http://google.com/#search?q=iphone%20-filter%3Alinks" expected: ["http://google.com/#search?q=iphone%20-filter%3Alinks"] - description: "Extract valid URL: http://twitter.com/#search?q=iphone%20-filter%3Alinks" text: "text http://twitter.com/#search?q=iphone%20-filter%3Alinks" expected: ["http://twitter.com/#search?q=iphone%20-filter%3Alinks"] - description: "Extract valid URL: http://somedomain.com/index.php?path=/abc/def/" text: "text http://somedomain.com/index.php?path=/abc/def/" expected: ["http://somedomain.com/index.php?path=/abc/def/"] - description: "Extract valid URL: http://www.boingboing.net/2007/02/14/katamari_damacy_phon.html" text: "text http://www.boingboing.net/2007/02/14/katamari_damacy_phon.html" expected: ["http://www.boingboing.net/2007/02/14/katamari_damacy_phon.html"] - description: "Extract valid URL: http://somehost.com:3000" text: "text http://somehost.com:3000" expected: ["http://somehost.com:3000"] - description: "Extract valid URL: http://xo.com/~matthew+%ff-x" text: "text http://xo.com/~matthew+%ff-x" expected: ["http://xo.com/~matthew+%ff-x"] - description: "Extract valid URL: http://xo.com/~matthew+%ff-,.;x" text: "text http://xo.com/~matthew+%ff-,.;x" expected: ["http://xo.com/~matthew+%ff-,.;x"] - description: "Extract valid URL: http://xo.com/,.;x" text: "text http://xo.com/,.;x" expected: ["http://xo.com/,.;x"] - description: "Extract valid URL: http://en.wikipedia.org/wiki/Primer_(film)" text: "text http://en.wikipedia.org/wiki/Primer_(film)" expected: ["http://en.wikipedia.org/wiki/Primer_(film)"] - description: "Extract valid URL: http://www.ams.org/bookstore-getitem/item=mbk-59" text: "text http://www.ams.org/bookstore-getitem/item=mbk-59" expected: ["http://www.ams.org/bookstore-getitem/item=mbk-59"] - description: "Extract valid URL: http://✪df.ws/ejp" text: "text http://✪df.ws/ejp" expected: ["http://✪df.ws/ejp"] - description: "Extract valid URL: http://chilp.it/?77e8fd" text: "text http://chilp.it/?77e8fd" expected: ["http://chilp.it/?77e8fd"] - description: "Extract valid URL: http://x.com/oneletterdomain" text: "text http://x.com/oneletterdomain" expected: ["http://x.com/oneletterdomain"] - description: "Extract valid URL: http://msdn.microsoft.com/ja-jp/library/system.net.httpwebrequest(v=VS.100).aspx" text: "text http://msdn.microsoft.com/ja-jp/library/system.net.httpwebrequest(v=VS.100).aspx" expected: ["http://msdn.microsoft.com/ja-jp/library/system.net.httpwebrequest(v=VS.100).aspx"] - description: "DO NOT extract invalid URL: http://domain-begin_dash_2314352345_dfasd.foo-cow_4352.com" text: "text http://domain-dash_2314352345_dfasd.foo-cow_4352.com" expected: [] - description: "DO NOT extract invalid URL: http://-begin_dash_2314352345_dfasd.foo-cow_4352.com" text: "text http://-dash_2314352345_dfasd.foo-cow_4352.com" expected: [] - description: "DO NOT extract invalid URL: http://no-tld" text: "text http://no-tld" expected: [] - description: "DO NOT extract invalid URL: http://tld-too-short.x" text: "text http://tld-too-short.x" expected: [] - description: "DO NOT extract invalid URL with invalid preceding character: (http://twitter.com" text: "(http://twitter.com" expected: ["http://twitter.com"] - description: "Extract a very long hyphenated sub-domain URL (single letter hyphens)" text: "text http://word-and-a-number-8-ftw.domain.com/" expected: ["http://word-and-a-number-8-ftw.domain.com/"] - description: "Extract a hyphenated TLD (usually a typo)" text: "text http://domain.com-that-you-should-have-put-a-space-after" expected: ["http://domain.com"] - description: "Extract URL ending with # value" text: "text http://foo.com?#foo text" expected: ["http://foo.com?#foo"] - description: "Extract URLs without protocol on (com|org|edu|gov|net) domains" text: "foo.com foo.net foo.org foo.edu foo.gov" expected: ["foo.com", "foo.net", "foo.org", "foo.edu", "foo.gov"] - description: "Extract URLs without protocol not on (com|org|edu|gov|net) domains" text: "foo.baz foo.co.jp www.xxxxxxx.baz www.foo.co.uk wwwww.xxxxxxx foo.comm foo.somecom foo.govedu foo.jp" expected: ["foo.co.jp", "www.foo.co.uk"] - description: "Extract URLs without protocol on ccTLD with slash" text: "t.co/abcde bit.ly/abcde" expected: ["t.co/abcde", "bit.ly/abcde"] - description: "Extract URLs with protocol on ccTLD domains" text: "http://foo.jp http://fooooo.jp" expected: ["http://foo.jp", "http://fooooo.jp"] - description: "Extract URLs with a - or + at the end of the path" text: "Go to http://example.com/a+ or http://example.com/a-" expected: ["http://example.com/a+", "http://example.com/a-"] - description: "Extract URLs with longer paths ending in -" text: "Go to http://example.com/view/slug-url-?foo=bar" expected: ["http://example.com/view/slug-url-?foo=bar"] - description: "Extract URLs beginning with a space" text: "@user Try http:// example.com/path" expected: ["example.com/path"] - description: "Extract long URL without protocol surrounded by CJK characters" text: "これは日本語です。example.com/path/index.html中国語example.com/path한국" expected: ["example.com/path/index.html", "example.com/path"] - description: "Extract short URL without protocol surrounded by CJK characters" text: "twitter.comこれは日本語です。example.com中国語t.co/abcde한국twitter.com example2.comテストtwitter.com/abcde" expected: ["twitter.com", "example.com", "t.co/abcde", "twitter.com", "example2.com", "twitter.com/abcde"] - description: "Extract URLs with and without protocol surrounded by CJK characters" text: "http://twitter.com/これは日本語です。example.com中国語http://t.co/abcde한국twitter.comテストexample2.comテストhttp://twitter.com/abcde" expected: ["http://twitter.com/", "example.com", "http://t.co/abcde", "twitter.com", "example2.com", "http://twitter.com/abcde"] - description: "Extract URLs with protocol and path containing Cyrillic characters" text: "Go to http://twitter.com/Русские_слова" expected: ["http://twitter.com/Русские_слова"] - description: "DO NOT extract short URLs without protocol on ccTLD domains without path" text: "twitter.jp日本語it.so中国語foo.jp it.so foo.jp" expected: [] - description: "Extract some (tv|co) short URLs without protocol on ccTLD domains without path" text: "MLB.tv vine.co twitch.tv t.co" expected: ["MLB.tv", "vine.co", "twitch.tv", "t.co"] - description: "Extract URLs beginning with a non-breaking space (U+00A0)" text: "@user Try http:// example.com/path" expected: ["example.com/path"] - description: "Extract URLs with underscores and dashes in the subdomain" text: "test http://sub_domain-dash.twitter.com" expected: ["http://sub_domain-dash.twitter.com"] - description: "Extract URL with minimum number of valid characters" text: "test http://a.b.cd" expected: ["http://a.b.cd"] - description: "Extract URLs containing underscores and dashes" text: "test http://a_b.c-d.com" expected: ["http://a_b.c-d.com"] - description: "Extract URLs containing dashes in the subdomain" text: "test http://a-b.c.com" expected: ["http://a-b.c.com"] - description: "Extract URLs with dashes in the domain name" text: "test http://twitter-dash.com" expected: ["http://twitter-dash.com"] - description: "Extract URLs with lots of symbols then a period" text: "http://www.bestbuy.com/site/Currie+Technologies+-+Ezip+400+Scooter/9885188.p?id=1218189013070&skuId=9885188" expected: ["http://www.bestbuy.com/site/Currie+Technologies+-+Ezip+400+Scooter/9885188.p?id=1218189013070&skuId=9885188"] - description: "DO NOT extract URLs containing leading dashes in the subdomain" text: "test http://-leadingdash.twitter.com" expected: [] - description: "DO NOT extract URLs containing trailing dashes in the subdomain" text: "test http://trailingdash-.twitter.com" expected: [] - description: "DO NOT extract URLs containing leading underscores in the subdomain" text: "test http://_leadingunderscore.twitter.com" expected: [] - description: "DO NOT extract URLs containing trailing underscores in the subdomain" text: "test http://trailingunderscore_.twitter.com" expected: [] - description: "DO NOT extract URLs containing leading dashes in the domain name" text: "test http://-twitter.com" expected: [] - description: "DO NOT extract URLs containing trailing dashes in the domain name" text: "test http://twitter-.com" expected: [] - description: "DO NOT extract URLs containing underscores in the domain name" text: "test http://twitter_underscore.com" expected: [] - description: "DO NOT extract URLs containing underscores in the tld" text: "test http://twitter.c_o_m" expected: [] - description: "Extract valid URL http://www.foo.com/foo/path-with-period./" text: "test http://www.foo.com/foo/path-with-period./" expected: ["http://www.foo.com/foo/path-with-period./"] - description: "Extract valid URL http://www.foo.org.za/foo/bar/688.1" text: "test http://www.foo.org.za/foo/bar/688.1" expected: ["http://www.foo.org.za/foo/bar/688.1"] - description: "Extract valid URL http://www.foo.com/bar-path/some.stm?param1=foo;param2=P1|0||P2|0" text: "test http://www.foo.com/bar-path/some.stm?param1=foo;param2=P1|0||P2|0" expected: ["http://www.foo.com/bar-path/some.stm?param1=foo;param2=P1|0||P2|0"] - description: "Extract valid URL http://foo.com/bar/123/foo_&_bar/" text: "test http://foo.com/bar/123/foo_&_bar/" expected: ["http://foo.com/bar/123/foo_&_bar/"] - description: "Extract valid URL http://www.cp.sc.edu/events/65" text: "test http://www.cp.sc.edu/events/65 test" expected: ["http://www.cp.sc.edu/events/65"] - description: "Extract valid URL http://www.andersondaradio.no.comunidades.net/" text: "http://www.andersondaradio.no.comunidades.net/ test test" expected: ["http://www.andersondaradio.no.comunidades.net/"] - description: "Extract valid URL ELPAÍS.com" text: "test ELPAÍS.com" expected: ["ELPAÍS.com"] - description: "DO NOT include period at the end of URL" text: "test http://twitter.com/." expected: ["http://twitter.com/"] - description: "Extract a URL with '?' in fragment" text: "http://tn.com.ar/show/00056158/la-estrella-del-certamen-el-turno-de-pamela-anderson?fb_xd_fragment#?=&cb=fe17523f223b7&relation=parent.parent&transport=fragment&type=resize&height=20&ackdata" expected: ["http://tn.com.ar/show/00056158/la-estrella-del-certamen-el-turno-de-pamela-anderson?fb_xd_fragment#?=&cb=fe17523f223b7&relation=parent.parent&transport=fragment&type=resize&height=20&ackdata"] - description: "Extract a URL with '?' in fragment in a text" text: "text http://tn.com.ar/show/00056158/la-estrella-del-certamen-el-turno-de-pamela-anderson?fb_xd_fragment#?=&cb=fe17523f223b7&relation=parent.parent&transport=fragment&type=resize&height=20&ackdata text" expected: ["http://tn.com.ar/show/00056158/la-estrella-del-certamen-el-turno-de-pamela-anderson?fb_xd_fragment#?=&cb=fe17523f223b7&relation=parent.parent&transport=fragment&type=resize&height=20&ackdata"] # A common cause of runaway regex engines. - description: "Extract a URL with a ton of trailing periods" text: "Test a ton of periods http://example.com/path.........................................." expected: ["http://example.com/path"] - description: "Extract a URL with a ton of trailing commas" text: "Test a ton of periods http://example.com/,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,," expected: ["http://example.com/"] - description: "Extract a URL with a ton of trailing '!'" text: "Test a ton of periods http://example.com/path/!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" expected: ["http://example.com/path/"] - description: "DO NOT extract URLs in hashtag or @mention" text: "#test.com @test.com #http://test.com @http://test.com #t.co/abcde @t.co/abcde" expected: [] - description: "Extract a t.co URL with a trailing apostrophe" text: "I really like http://t.co/pbY2NfTZ's website" expected: ["http://t.co/pbY2NfTZ"] - description: "Extract a t.co URL with a trailing hyphen" text: "Check this site out http://t.co/FNkPfmii- it's great" expected: ["http://t.co/FNkPfmii"] - description: "Extract a t.co URL with a trailing colon" text: "According to http://t.co/ulYGBYSo: the internet is cool" expected: ["http://t.co/ulYGBYSo"] - description: "Extract URL before newline" text: "http://twitter.com\nhttp://example.com\nhttp://example.com/path\nexample.com/path\nit.so\nit.so/abcde" expected: ["http://twitter.com", "http://example.com", "http://example.com/path", "example.com/path", "it.so/abcde"] - description: "DO NOT extract URL if preceded by $" text: "$http://twitter.com $twitter.com $http://t.co/abcde $t.co/abcde $t.co $TVI.CA $RBS.CA" expected: [] - description: "DO NOT extract .bz2 file name as URL" text: "long.test.tar.bz2 test.tar.bz2 tar.bz2" expected: [] - description: "DO NOT extract URL with gTLD followed by @ sign" text: "john.doe.gov@mail.com" expected: [] - description: "DO NOT extract URL with ccTLD followed by @ sign" text: "john.doe.jp@mail.com" expected: [] urls_with_indices: - description: "Extract a URL" text: "text http://google.com" expected: - url: "http://google.com" indices: [5, 22] - description: "Extract a URL from a Japanese tweet" text: "皆さん見てください! http://google.com" expected: - url: "http://google.com" indices: [11, 28] - description: "Extract URLs without protocol on ccTLD with slash" text: "t.co/abcde bit.ly/abcde" expected: - url: "t.co/abcde" indices: [0, 10] - url: "bit.ly/abcde" indices: [11, 23] - description: "Extract URLs without protocol surrounded by CJK characters" text: "twitter.comこれは日本語です。example.com中国語t.co/abcde한국twitter.com example2.comテストtwitter.com/abcde" expected: - url: "twitter.com" indices: [0, 11] - url: "example.com" indices: [20, 31] - url: "t.co/abcde" indices: [34, 44] - url: "twitter.com" indices: [46, 57] - url: "example2.com" indices: [58, 70] - url: "twitter.com/abcde" indices: [73, 90] - description: "Extract URLs with and without protocol surrounded by CJK characters" text: "http://twitter.com/これは日本語です。example.com中国語http://t.co/abcde한국twitter.comテストexample2.comテストhttp://twitter.com/abcde" expected: - url: "http://twitter.com/" indices: [0, 19] - url: "example.com" indices: [28, 39] - url: "http://t.co/abcde" indices: [42, 59] - url: "twitter.com" indices: [61, 72] - url: "example2.com" indices: [75, 87] - url: "http://twitter.com/abcde" indices: [90, 114] - description: "Extract t.co URLs skipping trailing characters and adjusting indices correctly" text: "http://t.co/pbY2NfTZ's http://t.co/2vYHpAc5; http://t.co/ulYGBYSo: http://t.co/8MkmHU0k+c http://t.co/TKLp64dY.x http://t.co/8t7G3ddS#a http://t.co/FNkPfmii-" expected: - url: "http://t.co/pbY2NfTZ" indices: [0, 20] - url: "http://t.co/2vYHpAc5" indices: [23, 43] - url: "http://t.co/ulYGBYSo" indices: [45, 65] - url: "http://t.co/8MkmHU0k" indices: [67, 87] - url: "http://t.co/TKLp64dY" indices: [90, 110] - url: "http://t.co/8t7G3ddS" indices: [113, 133] - url: "http://t.co/FNkPfmii" indices: [136, 156] - description: "Properly extract URL that contains t.co in referer" text: "http://www.foo.com?referer=https://t.co/abcde http://t.co/xyzzy" expected: - url: "http://www.foo.com?referer=https://t.co/abcde" indices: [0, 45] - url: "http://t.co/xyzzy" indices: [46, 63] - description: "Extract correct indices for duplicate instances of the same URL" text: "http://t.co http://t.co" expected: - url: "http://t.co" indices: [0, 11] - url: "http://t.co" indices: [12, 23] - description: "Extract I18N URL" text: "test http://xn--ls8h.XN--ls8h.la/" expected: - url: "http://xn--ls8h.XN--ls8h.la/" indices: [5, 33] - description: "Extract URLs with IDN(not encoded)" text: "test http://foobar.みんな/ http://foobar.中国/ http://foobar.پاکستان/ " expected: - url: "http://foobar.みんな/" indices: [5, 23] - url: "http://foobar.中国/" indices: [24, 41] - url: "http://foobar.پاکستان/" indices: [42, 64] hashtags: - description: "Extract hashtag after emoji without variant selector (uFE0E or uFE0F)" text: "a ✌#hashtag here" expected: ["hashtag"] - description: "Extract hashtag after emoji with variant selector FE0E" text: "a ✌︎#hashtag here" expected: ["hashtag"] - description: "Extract hashtag after emoji with variant selector FE0F" text: "a ✌️#hashtag here" expected: ["hashtag"] - description: "Extract hashtag after emoji with skin tone without variant selector (FE0E or FE0F)" text: "a ✌🏿#hashtag here" expected: ["hashtag"] - description: "Extract hashtag after emoji with skin tone with variant selector FE0F" text: "a ✌🏿️#hashtag here" expected: ["hashtag"] - description: "Extract hashtag after emoji with zero-width-joiner" text: "a 👨‍👩‍👧#hashtag here" expected: ["hashtag"] - description: "Extract an all-alpha hashtag" text: "a #hashtag here" expected: ["hashtag"] - description: "Extract a letter-then-number hashtag" text: "this is #hashtag1" expected: ["hashtag1"] - description: "Extract a number-then-letter hashtag" text: "#1hashtag is this" expected: ["1hashtag"] - description: "DO NOT Extract an all-numeric hashtag" text: "On the #16 bus" expected: [] - description: "DO NOT Extract a single numeric hashtag" text: "#0" expected: [] - description: "Extract hashtag after bracket" text: "(#hashtag1 )#hashtag2 [#hashtag3 ]#hashtag4 ’#hashtag5’#hashtag6" expected: ["hashtag1", "hashtag2", "hashtag3", "hashtag4", "hashtag5", "hashtag6"] - description: "Extract a hashtag containing ñ" text: "I'll write more tests #mañana" expected: ["mañana"] - description: "Extract a hashtag containing é" text: "Working remotely #café" expected: ["café"] - description: "Extract a hashtag containing ü" text: "Getting my Oktoberfest on #münchen" expected: ["münchen"] - description: "DO NOT Extract a hashtag containing Japanese" text: "this is not valid: # 会議中 ハッシュ" expected: [] - description: "Extract a hashtag in Korean" text: "What is #트위터 anyway?" expected: ["트위터"] - description: "Extract a half-width Hangul hashtag" text: "Just random half-width Hangul #ᆪᆭᄚ" expected: ["ᆪᆭᄚ"] - description: "Extract a hashtag in Russian" text: "What is #ашок anyway?" expected: ["ашок"] - description: "Extract a starting katakana hashtag" text: "#カタカナ is a hashtag" expected: ["カタカナ"] - description: "Extract a starting hiragana hashtag" text: "#ひらがな FTW!" expected: ["ひらがな"] - description: "Extract a starting kanji hashtag" text: "#漢字 is the future" expected: ["漢字"] - description: "Extract a trailing katakana hashtag" text: "Hashtag #カタカナ" expected: ["カタカナ"] - description: "Extract a trailing hiragana hashtag" text: "Japanese hashtags #ひらがな" expected: ["ひらがな"] - description: "Extract a trailing kanji hashtag" text: "Study time #漢字" expected: ["漢字"] - description: "Extract a central katakana hashtag" text: "See my #カタカナ hashtag?" expected: ["カタカナ"] - description: "Extract a central hiragana hashtag" text: "Study #ひらがな for fun and profit" expected: ["ひらがな"] - description: "Extract a central kanji hashtag" text: "Some say #漢字 is the past. what do they know?" expected: ["漢字"] - description: "Extract a Kanji/Katakana mixed hashtag" text: "日本語ハッシュタグテスト #日本語ハッシュタグ" expected: ["日本語ハッシュタグ"] - description: "Extract a hashtag after a punctuation" text: "日本語ハッシュテスト。#日本語ハッシュタグ" expected: ["日本語ハッシュタグ"] - description: "DO NOT include a punctuation in a hashtag" text: "#日本語ハッシュタグ。" expected: ["日本語ハッシュタグ"] - description: "Extract a full-width Alnum hashtag" text: "全角英数字ハッシュタグ #hashtag123" expected: ["hashtag123"] - description: "DO NOT extract a hashtag without a preceding space" text: "日本語ハッシュタグ#日本語ハッシュタグ" expected: [] - description: "Hashtag with chouon" text: "長音ハッシュタグ。#サッカー" expected: ["サッカー"] - description: "Hashtag with half-width chouon" text: "長音ハッシュタグ。#サッカー" expected: ["サッカー"] - description: "Hashtag with half-widh voiced sounds marks" text: "#ハッシュタグ #パピプペポ" expected: ["ハッシュタグ", "パピプペポ"] - description: "Hashtag with half-width # after full-width !" text: "できましたよー!#日本語ハッシュタグ。" expected: ["日本語ハッシュタグ"] - description: "Hashtag with full-width # after full-width !" text: "できましたよー!#日本語ハッシュタグ。" expected: ["日本語ハッシュタグ"] - description: "Hashtag with ideographic iteration mark" text: "#云々 #学問のすゝめ #いすゞ #各〻 #各〃" expected: ["云々", "学問のすゝめ", "いすゞ", "各〻", "各〃"] - description: "Extract hashtag with fullwidth tilde" text: "#メ~テレ ハッシュタグ内で~が認識されず" expected: ["メ~テレ"] - description: "Extract hashtag with wave dash" text: "#メ〜テレ ハッシュタグ内で~が認識されず" expected: ["メ〜テレ"] - description: "Hashtags with ş (U+015F)" text: "Here’s a test tweet for you: #Ateş #qrşt #ştu #ş" expected: ["Ateş", "qrşt", "ştu", "ş"] - description: "Hashtags with İ (U+0130) and ı (U+0131)" text: "Here’s a test tweet for you: #İn #ın" expected: ["İn", "ın"] - description: "Hashtag before punctuations" text: "#hashtag: #hashtag; #hashtag, #hashtag. #hashtag! #hashtag?" expected: ["hashtag", "hashtag", "hashtag", "hashtag", "hashtag", "hashtag"] - description: "Hashtag after punctuations" text: ":#hashtag ;#hashtag ,#hashtag .#hashtag !#hashtag ?#hashtag" expected: ["hashtag", "hashtag", "hashtag", "hashtag", "hashtag", "hashtag"] - description: "Hashtag before newline" text: "#hashtag\ntest\n#hashtag2\ntest\n#hashtag3\n" expected: ["hashtag", "hashtag2", "hashtag3"] - description: "DO NOT extract hashtag when # is followed by URL" text: "#http://twitter.com #https://twitter.com" expected: [] - description: "DO NOT extract hashtag if it's a part of URL" text: "http://twitter.com/#hashtag twitter.com/#hashtag" expected: [] - description: "Extract hashtags with Latin extended characters" text: "#Azərbaycanca #mûǁae #Čeština #Ċaoiṁín" expected: ["Azərbaycanca", "mûǁae", "Čeština", "Ċaoiṁín"] - description: "Extract Arabic hashtags" text: "#سیاست #ایران #السياسة #السياح #لغات #اتمی #کنفرانس #العربية #الجزيرة #فارسی" expected: ["سیاست", "ایران", "السياسة", "السياح", "لغات", "اتمی", "کنفرانس", "العربية", "الجزيرة", "فارسی"] - description: "Extract Arabic hashtags with underscore" text: "#برنامه_نویسی #رییس_جمهور #رئيس_الوزراء, #ثبت_نام. #لس_آنجلس" expected: ["برنامه_نویسی", "رییس_جمهور", "رئيس_الوزراء", "ثبت_نام", "لس_آنجلس"] - description: "Extract Hebrew hashtags" text: "#עַל־יְדֵי #וכו׳ #מ״כ" expected: ["עַל־יְדֵי", "וכו׳", "מ״כ"] - description: "Extract Thai hashtags" text: "#ผู้เริ่ม #การเมือง #รายละเอียด #นักท่องเที่ยว #ของขวัญ #สนามบิน #เดินทาง #ประธาน" expected: ["ผู้เริ่ม", "การเมือง", "รายละเอียด", "นักท่องเที่ยว", "ของขวัญ", "สนามบิน", "เดินทาง", "ประธาน"] - description: "Extract Arabic hashtags with Zero-Width Non-Joiner" text: "#أي‌بي‌إم #می‌خواهم" expected: ["أي‌بي‌إم", "می‌خواهم"] - description: "Extract Amharic hashtag" text: "የአላህ መልእክተኛ ሰለላሁ ዓለይሂ ወሰለም #ኢትዮሙስሊምስ" expected: ["ኢትዮሙስሊምስ"] - description: "Extract Sinhala hashtag with Zero-Width Joiner (U+200D)" text: "#ශ්‍රීලංකා" expected: ["ශ්‍රීලංකා"] - description: "Extract Arabic and Persian hashtags with numbers" text: "#۳۴۵هشتگ #هشتگ۶۷۸ #ســـلام_عليكم_٤٠٦" expected: ["۳۴۵هشتگ","هشتگ۶۷۸","ســـلام_عليكم_٤٠٦"] - description: "Extract Hindi hashtags" text: "#महात्मा #महात्मा_१२३४ #१२३४ गांधी" expected: ["महात्मा","महात्मा_१२३४"] - description: "Extract Indic script hashtags" text: "#বাংলা #ગુજરાતી #ಕನ್ನಡ #മലയാളം #ଓଡ଼ିଆ #ਪੰਜਾਬੀ #සිංහල #தமிழ் #తెలుగు" expected: ["বাংলা","ગુજરાતી","ಕನ್ನಡ","മലയാളം","ଓଡ଼ିଆ","ਪੰਜਾਬੀ","සිංහල","தமிழ்","తెలుగు"] - description: "Extract Tibetan hashtags" text: "#བོད་སྐད་ #བོད་སྐད" expected: ["བོད་སྐད་","བོད་སྐད"] - description: "Extract Khmer, Burmese, Laotian hashtags" text: "#មហាត្មះគន្ធី #မြင့်မြတ်သော #ຊີວະສາດ" expected: ["មហាត្មះគន្ធី","မြင့်မြတ်သော","ຊີວະສາດ"] - description: "Extract Greek hashtag" text: "#Μαχάτμα_Γκάντι ήταν Ινδός πολιτικός" expected: ["Μαχάτμα_Γκάντι"] - description: "Extract Armenian and Georgian hashtags" text: "#Մահաթմա #მაჰათმა" expected: ["Մահաթմա","მაჰათმა"] - description: "Extract hashtag with middle dot" text: "#il·lusió" expected: ["il·lusió"] - description: "DO NOT extract hashtags without a letter" text: "#_ #1_2 #122 #〃" expected: [] hashtags_from_astral: - description: "Extract hashtag with letter from astral plane (U+20021)" text: "#\U00020021" expected: ["\U00020021"] - description: "Extract hashtag with letter plus marker from astral plane (U+16f04 U+16f51)" text: "#\U00016f04\U00016f51" expected: ["\U00016f04\U00016f51"] - description: "Extract hashtag with letter plus number from astral plane (U+104a0)" text: "#\U00000041\U000104a0" expected: ["A\U000104a0"] hashtags_with_indices: - description: "Extract a hastag at the start" text: "#hashtag here" expected: - hashtag: "hashtag" indices: [0, 8] - description: "Extract a hastag at the end" text: "test a #hashtag" expected: - hashtag: "hashtag" indices: [7, 15] - description: "Extract a hastag in the middle" text: "test a #hashtag in a string" expected: - hashtag: "hashtag" indices: [7, 15] - description: "Extract only a valid hashtag" text: "#123 a #hashtag in a string" expected: - hashtag: "hashtag" indices: [7, 15] - description: "Extract a hashtag in a string of multi-byte characters" text: "会議中 #hashtag 会議中" expected: - hashtag: "hashtag" indices: [4, 12] - description: "Extract multiple valid hashtags" text: "One #two three #four" expected: - hashtag: "two" indices: [4, 8] - hashtag: "four" indices: [15, 20] - description: "Extract a non-latin hashtag" text: "Hashtags in #русский!" expected: - hashtag: "русский" indices: [12, 20] - description: "Extract multiple non-latin hashtags" text: "Hashtags in #中文, #日本語, #한국말, and #русский! Try it out!" expected: - hashtag: "中文" indices: [12, 15] - hashtag: "日本語" indices: [17, 21] - hashtag: "한국말" indices: [23, 27] - hashtag: "русский" indices: [33, 41] cashtags: - description: "Extract cashtags" text: "Example cashtags: $TEST $Stock $symbol" expected: ["TEST", "Stock", "symbol"] - description: "Extract cashtags with . or _" text: "Example cashtags: $TEST.T $test.tt $Stock_X $symbol_ab" expected: ["TEST.T", "test.tt", "Stock_X", "symbol_ab"] - description: "Do not extract cashtags if they contain numbers" text: "$123 $test123 $TE123ST" expected: [] - description: "Do not extract cashtags with non-ASCII characters" text: "$ストック $株" expected: [] - description: "Do not extract cashtags with punctuations" text: "$ $. $- $@ $! $() $+" expected: [] - description: "Do not include trailing . or _" text: "$TEST. $TEST_" expected: ["TEST", "TEST"] - description: "Do not extract cashtags if there is no space before $" text: "$OK$NG$BAD text$NO .$NG $$NG" expected: ["OK"] - description: "Do not extract too long cashtags" text: "$CashtagMustBeLessThanSixCharacter" expected: [] cashtags_with_indices: - description: "Extract cashtags" text: "Example: $TEST $symbol test" expected: - cashtag: "TEST" indices: [9, 14] - cashtag: "symbol" indices: [15, 22] - description: "Extract cashtags with . or _" text: "Example: $TEST.T test $symbol_ab end" expected: - cashtag: "TEST.T" indices: [9, 16] - cashtag: "symbol_ab" indices: [22, 32] twitter-text-1.14.7/test/twitter-text-conformance/README.md000664 001751 001751 00000011722 13126461251 023506 0ustar00srudsrud000000 000000 ## Purpose This conformance package provides a cross-platform definition of the test cases for auto linking, extracting and hit highlighting of Tweets. The primary use for this is the twitter-text-* libraries; both those managed by Twitter and those created by the community. The reason for this conformance suite is to provide a way to keep the various implementations of Twitter text handling working in a consistent and interoperable way. While anyone can feel free to implement this logic however they choose the recommendation to developers is to use libraries which pass this conformance suite. ## Format The test cases are stored in YAML files. There is one YAML file for each major operation type, and within those files there is one section for each publicly accessible API. Each test case is defined by: * description: This provides a meaningful name for the test case, for use as an error message if a test fails. * text: The input text of the Tweet. * expected: What results are expected for this input text ## Guidelines for use If you are creating a new twitter-text library in a different programming language please follow these few guidelines: 1. Create a test which reads these files and executes the test cases. 1.a. Do not convert these files to test cases statically. These test cases will change over time. 2. Be sure to implement all of the publicly accessible APIs (the keys to the YAML file) 3. Only expose the public API method and not the underlying regular expressions 3.a. If your language or environment does not allow for this please make a comment to the effect 3.b. This prevents breakage when regular expressions need to change in fundamental ways ## Submitting new conformance tests * You can [fork the github repository](https://github.com/twitter/twitter-text) to add tests and send a pull request * You can [open an issue on github](https://github.com/twitter/twitter-text/issues) * Please be sure to provide example input and output as well as a brief description of the problem. ## Changelog * v1.4.9 - 2011-12-01 [ Git tag v1.4.9 ] * [FIX] Apply stricter parsing of t.co URLs * [FIX] Extract @mention and hashtag before newline * [FIX] Extract URLs without protocol on ccTLD domain with slash * v1.4.8 - 2011-11-02 [ Git tag v1.4.8 ] * [FIX] Extract URLs without protocol in CJK text * [FIX] Do not extract URL in hashtag * [FIX] Extract hashtag after bracket * [FIX] Extract URL with '?' in fragment * v1.4.7 - 2011-10-04 [ Git tag v1.4.7 ] * [FIX] Extract URLs followed by punctuations * [FIX] Extract URLs without protocol in CJK text * [FIX] Extract URLs with '.' * v1.4.5 - 2011-09-20 [ Git tag v1.4.5 ] * [FIX] Extract URLs without protocol * [FIX] Extract URLs with '.', '|' and '&' * v1.4.4 - 2011-08-05 [ Get tag v1.4.4 ] * [FIX] Support ş (U+015F) in hashtags * [FIX] Support latin accents in URL paths * [FIX] Add a test for a common cause for runaway regex engines * v1.4.3 - 2011-07-13 [ Git tag v1.4.3 ] * [FIX] Japanese autolink including long vowel mark (chouon) * [FIX] Japanese autolink after a full-width exclamation point * [FIX] Japanese autolink including ideographic iteration mark * [FIX] Add hashtag extraction with indices test for new language hashtags * [FIX] Add hashtag extraction with indices test for multiple latin hashtags * v1.4.2 - 2011-07-08 [ Git tag v1.4.2 ] * [FIX] Additional Japanese hashtag autolinking tests * v1.4.1 - 2011-05-18 [ Git tag v1.4.1 ] * [FIX] Add support for Russian hashtags * [FIX] Add support for Korean hashtags * [FIX] Add support for Japanese hashtags (Katakana, Hiragana and Kanji) * [FIX] Add support for autolinking punycode domain names and TLDs (via punycode) * [DOC] Update README and License * v1.3.1 - 2010-12-03 - [ Git tag v1.3.1 ] * [DOC] Updated README with Changelog section * [FIX] Autolink URLs with paths ending in + and - * [FIX] Extract URLs with paths ending in + and - * v1.3.0 - 2010-12-03 - [ Git tag v1.3.0 ] * [NOTE] First tagged version (sorry) * [DOC] Updated README file with guidelines for use and format information * [FIX] Do not autolink URLs without protocols * [FIX] Do not extract URLs without protocols * v1.0.0 - 2010-01-21 - [ Git tag v1.0.0 (retroactively) ] * Initial version ## Copyright and License Copyright 2011 Twitter, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this work except in compliance with the License. You may obtain a copy of the License in the LICENSE file, or at: http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. twitter-text-1.14.7/test/twitter-text-conformance/Rakefile000664 001751 001751 00000004217 13126461251 023675 0ustar00srudsrud000000 000000 require 'open-uri' require 'nokogiri' require 'yaml' namespace :tlds do desc 'Grab tlds from iana and save to tld_lib.yml' task :iana_update do doc = Nokogiri::HTML(open('http://www.iana.org/domains/root/db')) tlds = [] types = { 'country' => /country-code/, 'generic' => /generic|sponsored|infrastructure|generic-restricted/, } doc.css('table#tld-table tr').each do |tr| info = tr.css('td') unless info.empty? tlds << parse_node(info) end end yml = {} types.each do |name, regex| yml[name] = select_tld(tlds, regex) end yml["generic"] << "onion" File.open(repo_path('tld_lib.yml'), 'w') do |file| file.write(yml.to_yaml) end File.open(repo_path("TldLists.java"), 'w') do |file| file.write(<<-EOF // Auto-generated by conformance/Rakefile package com.twitter; import java.util.Arrays; import java.util.List; public class TldLists { public static final List GTLDS = Arrays.asList( #{yml["generic"].map {|el| " \"#{el}\""}.join(",\n")} ); public static final List CTLDS = Arrays.asList( #{yml["country"].map {|el| " \"#{el}\""}.join(",\n")} ); } EOF ) end end desc 'Update tests from tld_lib.yml' task :generate_tests do test_yml = { 'tests' => { } } path = repo_path('tld_lib.yml') yml = YAML.load_file(path) yml.each do |type, tlds| test_yml['tests'][type] = [] tlds.each do |tld| test_yml['tests'][type].push( 'description' => "#{tld} is a valid #{type} tld", 'text' => "https://twitter.#{tld}", 'expected' => ["https://twitter.#{tld}"], ) end end File.open('tlds.yml', 'w') do |file| file.write(test_yml.to_yaml) end end end def parse_node(node) { domain: node[0].text.gsub(/[\.\s]+/, '').gsub("\u200f", '').gsub("\u200e", ""), type: node[1].text } end def select_tld(tlds, type) # Reverse tlds to make sure tld regex can match longer one when subset exists tlds.select {|i| i[:type] =~ type}.map {|i| i[:domain]}.sort.reverse end def repo_path(*path) File.join(File.dirname(__FILE__), *path) end twitter-text-1.14.7/test/twitter-text-conformance/tlds.yml000664 001751 001751 00000562341 13126461251 023730 0ustar00srudsrud000000 000000 --- tests: country: - description: "한국 is a valid country tld" text: https://twitter.한국 expected: - https://twitter.한국 - description: "香港 is a valid country tld" text: https://twitter.香港 expected: - https://twitter.香港 - description: "澳門 is a valid country tld" text: https://twitter.澳門 expected: - https://twitter.澳門 - description: "新加坡 is a valid country tld" text: https://twitter.新加坡 expected: - https://twitter.新加坡 - description: "台灣 is a valid country tld" text: https://twitter.台灣 expected: - https://twitter.台灣 - description: "台湾 is a valid country tld" text: https://twitter.台湾 expected: - https://twitter.台湾 - description: "中國 is a valid country tld" text: https://twitter.中國 expected: - https://twitter.中國 - description: "中国 is a valid country tld" text: https://twitter.中国 expected: - https://twitter.中国 - description: "გე is a valid country tld" text: https://twitter.გე expected: - https://twitter.გე - description: "ไทย is a valid country tld" text: https://twitter.ไทย expected: - https://twitter.ไทย - description: "ලංකා is a valid country tld" text: https://twitter.ලංකා expected: - https://twitter.ලංකා - description: "ഭാരതം is a valid country tld" text: https://twitter.ഭാരതം expected: - https://twitter.ഭാരതം - description: "ಭಾರತ is a valid country tld" text: https://twitter.ಭಾರತ expected: - https://twitter.ಭಾರತ - description: "భారత్ is a valid country tld" text: https://twitter.భారత్ expected: - https://twitter.భారత్ - description: "சிங்கப்பூர் is a valid country tld" text: https://twitter.சிங்கப்பூர் expected: - https://twitter.சிங்கப்பூர் - description: "இலங்கை is a valid country tld" text: https://twitter.இலங்கை expected: - https://twitter.இலங்கை - description: "இந்தியா is a valid country tld" text: https://twitter.இந்தியா expected: - https://twitter.இந்தியா - description: "ଭାରତ is a valid country tld" text: https://twitter.ଭାରତ expected: - https://twitter.ଭାରତ - description: "ભારત is a valid country tld" text: https://twitter.ભારત expected: - https://twitter.ભારત - description: "ਭਾਰਤ is a valid country tld" text: https://twitter.ਭਾਰਤ expected: - https://twitter.ਭਾਰਤ - description: "ভাৰত is a valid country tld" text: https://twitter.ভাৰত expected: - https://twitter.ভাৰত - description: "ভারত is a valid country tld" text: https://twitter.ভারত expected: - https://twitter.ভারত - description: "বাংলা is a valid country tld" text: https://twitter.বাংলা expected: - https://twitter.বাংলা - description: "भारोत is a valid country tld" text: https://twitter.भारोत expected: - https://twitter.भारोत - description: "भारतम् is a valid country tld" text: https://twitter.भारतम् expected: - https://twitter.भारतम् - description: "भारत is a valid country tld" text: https://twitter.भारत expected: - https://twitter.भारत - description: "ڀارت is a valid country tld" text: https://twitter.ڀارت expected: - https://twitter.ڀارت - description: "پاکستان is a valid country tld" text: https://twitter.پاکستان expected: - https://twitter.پاکستان - description: "مليسيا is a valid country tld" text: https://twitter.مليسيا expected: - https://twitter.مليسيا - description: "مصر is a valid country tld" text: https://twitter.مصر expected: - https://twitter.مصر - description: "قطر is a valid country tld" text: https://twitter.قطر expected: - https://twitter.قطر - description: "فلسطين is a valid country tld" text: https://twitter.فلسطين expected: - https://twitter.فلسطين - description: "عمان is a valid country tld" text: https://twitter.عمان expected: - https://twitter.عمان - description: "عراق is a valid country tld" text: https://twitter.عراق expected: - https://twitter.عراق - description: "سورية is a valid country tld" text: https://twitter.سورية expected: - https://twitter.سورية - description: "سودان is a valid country tld" text: https://twitter.سودان expected: - https://twitter.سودان - description: "تونس is a valid country tld" text: https://twitter.تونس expected: - https://twitter.تونس - description: "بھارت is a valid country tld" text: https://twitter.بھارت expected: - https://twitter.بھارت - description: "بارت is a valid country tld" text: https://twitter.بارت expected: - https://twitter.بارت - description: "ایران is a valid country tld" text: https://twitter.ایران expected: - https://twitter.ایران - description: "امارات is a valid country tld" text: https://twitter.امارات expected: - https://twitter.امارات - description: "المغرب is a valid country tld" text: https://twitter.المغرب expected: - https://twitter.المغرب - description: "السعودية is a valid country tld" text: https://twitter.السعودية expected: - https://twitter.السعودية - description: "الجزائر is a valid country tld" text: https://twitter.الجزائر expected: - https://twitter.الجزائر - description: "الاردن is a valid country tld" text: https://twitter.الاردن expected: - https://twitter.الاردن - description: "հայ is a valid country tld" text: https://twitter.հայ expected: - https://twitter.հայ - description: "қаз is a valid country tld" text: https://twitter.қаз expected: - https://twitter.қаз - description: "укр is a valid country tld" text: https://twitter.укр expected: - https://twitter.укр - description: "срб is a valid country tld" text: https://twitter.срб expected: - https://twitter.срб - description: "рф is a valid country tld" text: https://twitter.рф expected: - https://twitter.рф - description: "мон is a valid country tld" text: https://twitter.мон expected: - https://twitter.мон - description: "мкд is a valid country tld" text: https://twitter.мкд expected: - https://twitter.мкд - description: "ею is a valid country tld" text: https://twitter.ею expected: - https://twitter.ею - description: "бел is a valid country tld" text: https://twitter.бел expected: - https://twitter.бел - description: "бг is a valid country tld" text: https://twitter.бг expected: - https://twitter.бг - description: "ελ is a valid country tld" text: https://twitter.ελ expected: - https://twitter.ελ - description: zw is a valid country tld text: https://twitter.zw expected: - https://twitter.zw - description: zm is a valid country tld text: https://twitter.zm expected: - https://twitter.zm - description: za is a valid country tld text: https://twitter.za expected: - https://twitter.za - description: yt is a valid country tld text: https://twitter.yt expected: - https://twitter.yt - description: ye is a valid country tld text: https://twitter.ye expected: - https://twitter.ye - description: ws is a valid country tld text: https://twitter.ws expected: - https://twitter.ws - description: wf is a valid country tld text: https://twitter.wf expected: - https://twitter.wf - description: vu is a valid country tld text: https://twitter.vu expected: - https://twitter.vu - description: vn is a valid country tld text: https://twitter.vn expected: - https://twitter.vn - description: vi is a valid country tld text: https://twitter.vi expected: - https://twitter.vi - description: vg is a valid country tld text: https://twitter.vg expected: - https://twitter.vg - description: ve is a valid country tld text: https://twitter.ve expected: - https://twitter.ve - description: vc is a valid country tld text: https://twitter.vc expected: - https://twitter.vc - description: va is a valid country tld text: https://twitter.va expected: - https://twitter.va - description: uz is a valid country tld text: https://twitter.uz expected: - https://twitter.uz - description: uy is a valid country tld text: https://twitter.uy expected: - https://twitter.uy - description: us is a valid country tld text: https://twitter.us expected: - https://twitter.us - description: um is a valid country tld text: https://twitter.um expected: - https://twitter.um - description: uk is a valid country tld text: https://twitter.uk expected: - https://twitter.uk - description: ug is a valid country tld text: https://twitter.ug expected: - https://twitter.ug - description: ua is a valid country tld text: https://twitter.ua expected: - https://twitter.ua - description: tz is a valid country tld text: https://twitter.tz expected: - https://twitter.tz - description: tw is a valid country tld text: https://twitter.tw expected: - https://twitter.tw - description: tv is a valid country tld text: https://twitter.tv expected: - https://twitter.tv - description: tt is a valid country tld text: https://twitter.tt expected: - https://twitter.tt - description: tr is a valid country tld text: https://twitter.tr expected: - https://twitter.tr - description: tp is a valid country tld text: https://twitter.tp expected: - https://twitter.tp - description: to is a valid country tld text: https://twitter.to expected: - https://twitter.to - description: tn is a valid country tld text: https://twitter.tn expected: - https://twitter.tn - description: tm is a valid country tld text: https://twitter.tm expected: - https://twitter.tm - description: tl is a valid country tld text: https://twitter.tl expected: - https://twitter.tl - description: tk is a valid country tld text: https://twitter.tk expected: - https://twitter.tk - description: tj is a valid country tld text: https://twitter.tj expected: - https://twitter.tj - description: th is a valid country tld text: https://twitter.th expected: - https://twitter.th - description: tg is a valid country tld text: https://twitter.tg expected: - https://twitter.tg - description: tf is a valid country tld text: https://twitter.tf expected: - https://twitter.tf - description: td is a valid country tld text: https://twitter.td expected: - https://twitter.td - description: tc is a valid country tld text: https://twitter.tc expected: - https://twitter.tc - description: sz is a valid country tld text: https://twitter.sz expected: - https://twitter.sz - description: sy is a valid country tld text: https://twitter.sy expected: - https://twitter.sy - description: sx is a valid country tld text: https://twitter.sx expected: - https://twitter.sx - description: sv is a valid country tld text: https://twitter.sv expected: - https://twitter.sv - description: su is a valid country tld text: https://twitter.su expected: - https://twitter.su - description: st is a valid country tld text: https://twitter.st expected: - https://twitter.st - description: ss is a valid country tld text: https://twitter.ss expected: - https://twitter.ss - description: sr is a valid country tld text: https://twitter.sr expected: - https://twitter.sr - description: so is a valid country tld text: https://twitter.so expected: - https://twitter.so - description: sn is a valid country tld text: https://twitter.sn expected: - https://twitter.sn - description: sm is a valid country tld text: https://twitter.sm expected: - https://twitter.sm - description: sl is a valid country tld text: https://twitter.sl expected: - https://twitter.sl - description: sk is a valid country tld text: https://twitter.sk expected: - https://twitter.sk - description: sj is a valid country tld text: https://twitter.sj expected: - https://twitter.sj - description: si is a valid country tld text: https://twitter.si expected: - https://twitter.si - description: sh is a valid country tld text: https://twitter.sh expected: - https://twitter.sh - description: sg is a valid country tld text: https://twitter.sg expected: - https://twitter.sg - description: se is a valid country tld text: https://twitter.se expected: - https://twitter.se - description: sd is a valid country tld text: https://twitter.sd expected: - https://twitter.sd - description: sc is a valid country tld text: https://twitter.sc expected: - https://twitter.sc - description: sb is a valid country tld text: https://twitter.sb expected: - https://twitter.sb - description: sa is a valid country tld text: https://twitter.sa expected: - https://twitter.sa - description: rw is a valid country tld text: https://twitter.rw expected: - https://twitter.rw - description: ru is a valid country tld text: https://twitter.ru expected: - https://twitter.ru - description: rs is a valid country tld text: https://twitter.rs expected: - https://twitter.rs - description: ro is a valid country tld text: https://twitter.ro expected: - https://twitter.ro - description: re is a valid country tld text: https://twitter.re expected: - https://twitter.re - description: qa is a valid country tld text: https://twitter.qa expected: - https://twitter.qa - description: py is a valid country tld text: https://twitter.py expected: - https://twitter.py - description: pw is a valid country tld text: https://twitter.pw expected: - https://twitter.pw - description: pt is a valid country tld text: https://twitter.pt expected: - https://twitter.pt - description: ps is a valid country tld text: https://twitter.ps expected: - https://twitter.ps - description: pr is a valid country tld text: https://twitter.pr expected: - https://twitter.pr - description: pn is a valid country tld text: https://twitter.pn expected: - https://twitter.pn - description: pm is a valid country tld text: https://twitter.pm expected: - https://twitter.pm - description: pl is a valid country tld text: https://twitter.pl expected: - https://twitter.pl - description: pk is a valid country tld text: https://twitter.pk expected: - https://twitter.pk - description: ph is a valid country tld text: https://twitter.ph expected: - https://twitter.ph - description: pg is a valid country tld text: https://twitter.pg expected: - https://twitter.pg - description: pf is a valid country tld text: https://twitter.pf expected: - https://twitter.pf - description: pe is a valid country tld text: https://twitter.pe expected: - https://twitter.pe - description: pa is a valid country tld text: https://twitter.pa expected: - https://twitter.pa - description: om is a valid country tld text: https://twitter.om expected: - https://twitter.om - description: nz is a valid country tld text: https://twitter.nz expected: - https://twitter.nz - description: nu is a valid country tld text: https://twitter.nu expected: - https://twitter.nu - description: nr is a valid country tld text: https://twitter.nr expected: - https://twitter.nr - description: np is a valid country tld text: https://twitter.np expected: - https://twitter.np - description: no is a valid country tld text: https://twitter.no expected: - https://twitter.no - description: nl is a valid country tld text: https://twitter.nl expected: - https://twitter.nl - description: ni is a valid country tld text: https://twitter.ni expected: - https://twitter.ni - description: ng is a valid country tld text: https://twitter.ng expected: - https://twitter.ng - description: nf is a valid country tld text: https://twitter.nf expected: - https://twitter.nf - description: ne is a valid country tld text: https://twitter.ne expected: - https://twitter.ne - description: nc is a valid country tld text: https://twitter.nc expected: - https://twitter.nc - description: na is a valid country tld text: https://twitter.na expected: - https://twitter.na - description: mz is a valid country tld text: https://twitter.mz expected: - https://twitter.mz - description: my is a valid country tld text: https://twitter.my expected: - https://twitter.my - description: mx is a valid country tld text: https://twitter.mx expected: - https://twitter.mx - description: mw is a valid country tld text: https://twitter.mw expected: - https://twitter.mw - description: mv is a valid country tld text: https://twitter.mv expected: - https://twitter.mv - description: mu is a valid country tld text: https://twitter.mu expected: - https://twitter.mu - description: mt is a valid country tld text: https://twitter.mt expected: - https://twitter.mt - description: ms is a valid country tld text: https://twitter.ms expected: - https://twitter.ms - description: mr is a valid country tld text: https://twitter.mr expected: - https://twitter.mr - description: mq is a valid country tld text: https://twitter.mq expected: - https://twitter.mq - description: mp is a valid country tld text: https://twitter.mp expected: - https://twitter.mp - description: mo is a valid country tld text: https://twitter.mo expected: - https://twitter.mo - description: mn is a valid country tld text: https://twitter.mn expected: - https://twitter.mn - description: mm is a valid country tld text: https://twitter.mm expected: - https://twitter.mm - description: ml is a valid country tld text: https://twitter.ml expected: - https://twitter.ml - description: mk is a valid country tld text: https://twitter.mk expected: - https://twitter.mk - description: mh is a valid country tld text: https://twitter.mh expected: - https://twitter.mh - description: mg is a valid country tld text: https://twitter.mg expected: - https://twitter.mg - description: mf is a valid country tld text: https://twitter.mf expected: - https://twitter.mf - description: me is a valid country tld text: https://twitter.me expected: - https://twitter.me - description: md is a valid country tld text: https://twitter.md expected: - https://twitter.md - description: mc is a valid country tld text: https://twitter.mc expected: - https://twitter.mc - description: ma is a valid country tld text: https://twitter.ma expected: - https://twitter.ma - description: ly is a valid country tld text: https://twitter.ly expected: - https://twitter.ly - description: lv is a valid country tld text: https://twitter.lv expected: - https://twitter.lv - description: lu is a valid country tld text: https://twitter.lu expected: - https://twitter.lu - description: lt is a valid country tld text: https://twitter.lt expected: - https://twitter.lt - description: ls is a valid country tld text: https://twitter.ls expected: - https://twitter.ls - description: lr is a valid country tld text: https://twitter.lr expected: - https://twitter.lr - description: lk is a valid country tld text: https://twitter.lk expected: - https://twitter.lk - description: li is a valid country tld text: https://twitter.li expected: - https://twitter.li - description: lc is a valid country tld text: https://twitter.lc expected: - https://twitter.lc - description: lb is a valid country tld text: https://twitter.lb expected: - https://twitter.lb - description: la is a valid country tld text: https://twitter.la expected: - https://twitter.la - description: kz is a valid country tld text: https://twitter.kz expected: - https://twitter.kz - description: ky is a valid country tld text: https://twitter.ky expected: - https://twitter.ky - description: kw is a valid country tld text: https://twitter.kw expected: - https://twitter.kw - description: kr is a valid country tld text: https://twitter.kr expected: - https://twitter.kr - description: kp is a valid country tld text: https://twitter.kp expected: - https://twitter.kp - description: kn is a valid country tld text: https://twitter.kn expected: - https://twitter.kn - description: km is a valid country tld text: https://twitter.km expected: - https://twitter.km - description: ki is a valid country tld text: https://twitter.ki expected: - https://twitter.ki - description: kh is a valid country tld text: https://twitter.kh expected: - https://twitter.kh - description: kg is a valid country tld text: https://twitter.kg expected: - https://twitter.kg - description: ke is a valid country tld text: https://twitter.ke expected: - https://twitter.ke - description: jp is a valid country tld text: https://twitter.jp expected: - https://twitter.jp - description: jo is a valid country tld text: https://twitter.jo expected: - https://twitter.jo - description: jm is a valid country tld text: https://twitter.jm expected: - https://twitter.jm - description: je is a valid country tld text: https://twitter.je expected: - https://twitter.je - description: it is a valid country tld text: https://twitter.it expected: - https://twitter.it - description: is is a valid country tld text: https://twitter.is expected: - https://twitter.is - description: ir is a valid country tld text: https://twitter.ir expected: - https://twitter.ir - description: iq is a valid country tld text: https://twitter.iq expected: - https://twitter.iq - description: io is a valid country tld text: https://twitter.io expected: - https://twitter.io - description: in is a valid country tld text: https://twitter.in expected: - https://twitter.in - description: im is a valid country tld text: https://twitter.im expected: - https://twitter.im - description: il is a valid country tld text: https://twitter.il expected: - https://twitter.il - description: ie is a valid country tld text: https://twitter.ie expected: - https://twitter.ie - description: id is a valid country tld text: https://twitter.id expected: - https://twitter.id - description: hu is a valid country tld text: https://twitter.hu expected: - https://twitter.hu - description: ht is a valid country tld text: https://twitter.ht expected: - https://twitter.ht - description: hr is a valid country tld text: https://twitter.hr expected: - https://twitter.hr - description: hn is a valid country tld text: https://twitter.hn expected: - https://twitter.hn - description: hm is a valid country tld text: https://twitter.hm expected: - https://twitter.hm - description: hk is a valid country tld text: https://twitter.hk expected: - https://twitter.hk - description: gy is a valid country tld text: https://twitter.gy expected: - https://twitter.gy - description: gw is a valid country tld text: https://twitter.gw expected: - https://twitter.gw - description: gu is a valid country tld text: https://twitter.gu expected: - https://twitter.gu - description: gt is a valid country tld text: https://twitter.gt expected: - https://twitter.gt - description: gs is a valid country tld text: https://twitter.gs expected: - https://twitter.gs - description: gr is a valid country tld text: https://twitter.gr expected: - https://twitter.gr - description: gq is a valid country tld text: https://twitter.gq expected: - https://twitter.gq - description: gp is a valid country tld text: https://twitter.gp expected: - https://twitter.gp - description: gn is a valid country tld text: https://twitter.gn expected: - https://twitter.gn - description: gm is a valid country tld text: https://twitter.gm expected: - https://twitter.gm - description: gl is a valid country tld text: https://twitter.gl expected: - https://twitter.gl - description: gi is a valid country tld text: https://twitter.gi expected: - https://twitter.gi - description: gh is a valid country tld text: https://twitter.gh expected: - https://twitter.gh - description: gg is a valid country tld text: https://twitter.gg expected: - https://twitter.gg - description: gf is a valid country tld text: https://twitter.gf expected: - https://twitter.gf - description: ge is a valid country tld text: https://twitter.ge expected: - https://twitter.ge - description: gd is a valid country tld text: https://twitter.gd expected: - https://twitter.gd - description: gb is a valid country tld text: https://twitter.gb expected: - https://twitter.gb - description: ga is a valid country tld text: https://twitter.ga expected: - https://twitter.ga - description: fr is a valid country tld text: https://twitter.fr expected: - https://twitter.fr - description: fo is a valid country tld text: https://twitter.fo expected: - https://twitter.fo - description: fm is a valid country tld text: https://twitter.fm expected: - https://twitter.fm - description: fk is a valid country tld text: https://twitter.fk expected: - https://twitter.fk - description: fj is a valid country tld text: https://twitter.fj expected: - https://twitter.fj - description: fi is a valid country tld text: https://twitter.fi expected: - https://twitter.fi - description: eu is a valid country tld text: https://twitter.eu expected: - https://twitter.eu - description: et is a valid country tld text: https://twitter.et expected: - https://twitter.et - description: es is a valid country tld text: https://twitter.es expected: - https://twitter.es - description: er is a valid country tld text: https://twitter.er expected: - https://twitter.er - description: eh is a valid country tld text: https://twitter.eh expected: - https://twitter.eh - description: eg is a valid country tld text: https://twitter.eg expected: - https://twitter.eg - description: ee is a valid country tld text: https://twitter.ee expected: - https://twitter.ee - description: ec is a valid country tld text: https://twitter.ec expected: - https://twitter.ec - description: dz is a valid country tld text: https://twitter.dz expected: - https://twitter.dz - description: do is a valid country tld text: https://twitter.do expected: - https://twitter.do - description: dm is a valid country tld text: https://twitter.dm expected: - https://twitter.dm - description: dk is a valid country tld text: https://twitter.dk expected: - https://twitter.dk - description: dj is a valid country tld text: https://twitter.dj expected: - https://twitter.dj - description: de is a valid country tld text: https://twitter.de expected: - https://twitter.de - description: cz is a valid country tld text: https://twitter.cz expected: - https://twitter.cz - description: cy is a valid country tld text: https://twitter.cy expected: - https://twitter.cy - description: cx is a valid country tld text: https://twitter.cx expected: - https://twitter.cx - description: cw is a valid country tld text: https://twitter.cw expected: - https://twitter.cw - description: cv is a valid country tld text: https://twitter.cv expected: - https://twitter.cv - description: cu is a valid country tld text: https://twitter.cu expected: - https://twitter.cu - description: cr is a valid country tld text: https://twitter.cr expected: - https://twitter.cr - description: co is a valid country tld text: https://twitter.co expected: - https://twitter.co - description: cn is a valid country tld text: https://twitter.cn expected: - https://twitter.cn - description: cm is a valid country tld text: https://twitter.cm expected: - https://twitter.cm - description: cl is a valid country tld text: https://twitter.cl expected: - https://twitter.cl - description: ck is a valid country tld text: https://twitter.ck expected: - https://twitter.ck - description: ci is a valid country tld text: https://twitter.ci expected: - https://twitter.ci - description: ch is a valid country tld text: https://twitter.ch expected: - https://twitter.ch - description: cg is a valid country tld text: https://twitter.cg expected: - https://twitter.cg - description: cf is a valid country tld text: https://twitter.cf expected: - https://twitter.cf - description: cd is a valid country tld text: https://twitter.cd expected: - https://twitter.cd - description: cc is a valid country tld text: https://twitter.cc expected: - https://twitter.cc - description: ca is a valid country tld text: https://twitter.ca expected: - https://twitter.ca - description: bz is a valid country tld text: https://twitter.bz expected: - https://twitter.bz - description: by is a valid country tld text: https://twitter.by expected: - https://twitter.by - description: bw is a valid country tld text: https://twitter.bw expected: - https://twitter.bw - description: bv is a valid country tld text: https://twitter.bv expected: - https://twitter.bv - description: bt is a valid country tld text: https://twitter.bt expected: - https://twitter.bt - description: bs is a valid country tld text: https://twitter.bs expected: - https://twitter.bs - description: br is a valid country tld text: https://twitter.br expected: - https://twitter.br - description: bq is a valid country tld text: https://twitter.bq expected: - https://twitter.bq - description: bo is a valid country tld text: https://twitter.bo expected: - https://twitter.bo - description: bn is a valid country tld text: https://twitter.bn expected: - https://twitter.bn - description: bm is a valid country tld text: https://twitter.bm expected: - https://twitter.bm - description: bl is a valid country tld text: https://twitter.bl expected: - https://twitter.bl - description: bj is a valid country tld text: https://twitter.bj expected: - https://twitter.bj - description: bi is a valid country tld text: https://twitter.bi expected: - https://twitter.bi - description: bh is a valid country tld text: https://twitter.bh expected: - https://twitter.bh - description: bg is a valid country tld text: https://twitter.bg expected: - https://twitter.bg - description: bf is a valid country tld text: https://twitter.bf expected: - https://twitter.bf - description: be is a valid country tld text: https://twitter.be expected: - https://twitter.be - description: bd is a valid country tld text: https://twitter.bd expected: - https://twitter.bd - description: bb is a valid country tld text: https://twitter.bb expected: - https://twitter.bb - description: ba is a valid country tld text: https://twitter.ba expected: - https://twitter.ba - description: az is a valid country tld text: https://twitter.az expected: - https://twitter.az - description: ax is a valid country tld text: https://twitter.ax expected: - https://twitter.ax - description: aw is a valid country tld text: https://twitter.aw expected: - https://twitter.aw - description: au is a valid country tld text: https://twitter.au expected: - https://twitter.au - description: at is a valid country tld text: https://twitter.at expected: - https://twitter.at - description: as is a valid country tld text: https://twitter.as expected: - https://twitter.as - description: ar is a valid country tld text: https://twitter.ar expected: - https://twitter.ar - description: aq is a valid country tld text: https://twitter.aq expected: - https://twitter.aq - description: ao is a valid country tld text: https://twitter.ao expected: - https://twitter.ao - description: an is a valid country tld text: https://twitter.an expected: - https://twitter.an - description: am is a valid country tld text: https://twitter.am expected: - https://twitter.am - description: al is a valid country tld text: https://twitter.al expected: - https://twitter.al - description: ai is a valid country tld text: https://twitter.ai expected: - https://twitter.ai - description: ag is a valid country tld text: https://twitter.ag expected: - https://twitter.ag - description: af is a valid country tld text: https://twitter.af expected: - https://twitter.af - description: ae is a valid country tld text: https://twitter.ae expected: - https://twitter.ae - description: ad is a valid country tld text: https://twitter.ad expected: - https://twitter.ad - description: ac is a valid country tld text: https://twitter.ac expected: - https://twitter.ac generic: - description: "삼성 is a valid generic tld" text: https://twitter.삼성 expected: - https://twitter.삼성 - description: "닷컴 is a valid generic tld" text: https://twitter.닷컴 expected: - https://twitter.닷컴 - description: "닷넷 is a valid generic tld" text: https://twitter.닷넷 expected: - https://twitter.닷넷 - description: "香格里拉 is a valid generic tld" text: https://twitter.香格里拉 expected: - https://twitter.香格里拉 - description: "餐厅 is a valid generic tld" text: https://twitter.餐厅 expected: - https://twitter.餐厅 - description: "食品 is a valid generic tld" text: https://twitter.食品 expected: - https://twitter.食品 - description: "飞利浦 is a valid generic tld" text: https://twitter.飞利浦 expected: - https://twitter.飞利浦 - description: "電訊盈科 is a valid generic tld" text: https://twitter.電訊盈科 expected: - https://twitter.電訊盈科 - description: "集团 is a valid generic tld" text: https://twitter.集团 expected: - https://twitter.集团 - description: "通販 is a valid generic tld" text: https://twitter.通販 expected: - https://twitter.通販 - description: "购物 is a valid generic tld" text: https://twitter.购物 expected: - https://twitter.购物 - description: "谷歌 is a valid generic tld" text: https://twitter.谷歌 expected: - https://twitter.谷歌 - description: "诺基亚 is a valid generic tld" text: https://twitter.诺基亚 expected: - https://twitter.诺基亚 - description: "联通 is a valid generic tld" text: https://twitter.联通 expected: - https://twitter.联通 - description: "网络 is a valid generic tld" text: https://twitter.网络 expected: - https://twitter.网络 - description: "网站 is a valid generic tld" text: https://twitter.网站 expected: - https://twitter.网站 - description: "网店 is a valid generic tld" text: https://twitter.网店 expected: - https://twitter.网店 - description: "网址 is a valid generic tld" text: https://twitter.网址 expected: - https://twitter.网址 - description: "组织机构 is a valid generic tld" text: https://twitter.组织机构 expected: - https://twitter.组织机构 - description: "移动 is a valid generic tld" text: https://twitter.移动 expected: - https://twitter.移动 - description: "珠宝 is a valid generic tld" text: https://twitter.珠宝 expected: - https://twitter.珠宝 - description: "点看 is a valid generic tld" text: https://twitter.点看 expected: - https://twitter.点看 - description: "游戏 is a valid generic tld" text: https://twitter.游戏 expected: - https://twitter.游戏 - description: "淡马锡 is a valid generic tld" text: https://twitter.淡马锡 expected: - https://twitter.淡马锡 - description: "机构 is a valid generic tld" text: https://twitter.机构 expected: - https://twitter.机构 - description: "書籍 is a valid generic tld" text: https://twitter.書籍 expected: - https://twitter.書籍 - description: "时尚 is a valid generic tld" text: https://twitter.时尚 expected: - https://twitter.时尚 - description: "新闻 is a valid generic tld" text: https://twitter.新闻 expected: - https://twitter.新闻 - description: "政府 is a valid generic tld" text: https://twitter.政府 expected: - https://twitter.政府 - description: "政务 is a valid generic tld" text: https://twitter.政务 expected: - https://twitter.政务 - description: "手表 is a valid generic tld" text: https://twitter.手表 expected: - https://twitter.手表 - description: "手机 is a valid generic tld" text: https://twitter.手机 expected: - https://twitter.手机 - description: "我爱你 is a valid generic tld" text: https://twitter.我爱你 expected: - https://twitter.我爱你 - description: "慈善 is a valid generic tld" text: https://twitter.慈善 expected: - https://twitter.慈善 - description: "微博 is a valid generic tld" text: https://twitter.微博 expected: - https://twitter.微博 - description: "广东 is a valid generic tld" text: https://twitter.广东 expected: - https://twitter.广东 - description: "工行 is a valid generic tld" text: https://twitter.工行 expected: - https://twitter.工行 - description: "家電 is a valid generic tld" text: https://twitter.家電 expected: - https://twitter.家電 - description: "娱乐 is a valid generic tld" text: https://twitter.娱乐 expected: - https://twitter.娱乐 - description: "天主教 is a valid generic tld" text: https://twitter.天主教 expected: - https://twitter.天主教 - description: "大拿 is a valid generic tld" text: https://twitter.大拿 expected: - https://twitter.大拿 - description: "大众汽车 is a valid generic tld" text: https://twitter.大众汽车 expected: - https://twitter.大众汽车 - description: "在线 is a valid generic tld" text: https://twitter.在线 expected: - https://twitter.在线 - description: "嘉里大酒店 is a valid generic tld" text: https://twitter.嘉里大酒店 expected: - https://twitter.嘉里大酒店 - description: "嘉里 is a valid generic tld" text: https://twitter.嘉里 expected: - https://twitter.嘉里 - description: "商标 is a valid generic tld" text: https://twitter.商标 expected: - https://twitter.商标 - description: "商店 is a valid generic tld" text: https://twitter.商店 expected: - https://twitter.商店 - description: "商城 is a valid generic tld" text: https://twitter.商城 expected: - https://twitter.商城 - description: "公益 is a valid generic tld" text: https://twitter.公益 expected: - https://twitter.公益 - description: "公司 is a valid generic tld" text: https://twitter.公司 expected: - https://twitter.公司 - description: "八卦 is a valid generic tld" text: https://twitter.八卦 expected: - https://twitter.八卦 - description: "健康 is a valid generic tld" text: https://twitter.健康 expected: - https://twitter.健康 - description: "信息 is a valid generic tld" text: https://twitter.信息 expected: - https://twitter.信息 - description: "佛山 is a valid generic tld" text: https://twitter.佛山 expected: - https://twitter.佛山 - description: "企业 is a valid generic tld" text: https://twitter.企业 expected: - https://twitter.企业 - description: "中文网 is a valid generic tld" text: https://twitter.中文网 expected: - https://twitter.中文网 - description: "中信 is a valid generic tld" text: https://twitter.中信 expected: - https://twitter.中信 - description: "世界 is a valid generic tld" text: https://twitter.世界 expected: - https://twitter.世界 - description: "ポイント is a valid generic tld" text: https://twitter.ポイント expected: - https://twitter.ポイント - description: "ファッション is a valid generic tld" text: https://twitter.ファッション expected: - https://twitter.ファッション - description: "セール is a valid generic tld" text: https://twitter.セール expected: - https://twitter.セール - description: "ストア is a valid generic tld" text: https://twitter.ストア expected: - https://twitter.ストア - description: "コム is a valid generic tld" text: https://twitter.コム expected: - https://twitter.コム - description: "グーグル is a valid generic tld" text: https://twitter.グーグル expected: - https://twitter.グーグル - description: "クラウド is a valid generic tld" text: https://twitter.クラウド expected: - https://twitter.クラウド - description: "みんな is a valid generic tld" text: https://twitter.みんな expected: - https://twitter.みんな - description: "คอม is a valid generic tld" text: https://twitter.คอม expected: - https://twitter.คอม - description: "संगठन is a valid generic tld" text: https://twitter.संगठन expected: - https://twitter.संगठन - description: "नेट is a valid generic tld" text: https://twitter.नेट expected: - https://twitter.नेट - description: "कॉम is a valid generic tld" text: https://twitter.कॉम expected: - https://twitter.कॉम - description: "همراه is a valid generic tld" text: https://twitter.همراه expected: - https://twitter.همراه - description: "موقع is a valid generic tld" text: https://twitter.موقع expected: - https://twitter.موقع - description: "موبايلي is a valid generic tld" text: https://twitter.موبايلي expected: - https://twitter.موبايلي - description: "كوم is a valid generic tld" text: https://twitter.كوم expected: - https://twitter.كوم - description: "كاثوليك is a valid generic tld" text: https://twitter.كاثوليك expected: - https://twitter.كاثوليك - description: "عرب is a valid generic tld" text: https://twitter.عرب expected: - https://twitter.عرب - description: "شبكة is a valid generic tld" text: https://twitter.شبكة expected: - https://twitter.شبكة - description: "بيتك is a valid generic tld" text: https://twitter.بيتك expected: - https://twitter.بيتك - description: "بازار is a valid generic tld" text: https://twitter.بازار expected: - https://twitter.بازار - description: "العليان is a valid generic tld" text: https://twitter.العليان expected: - https://twitter.العليان - description: "ارامكو is a valid generic tld" text: https://twitter.ارامكو expected: - https://twitter.ارامكو - description: "اتصالات is a valid generic tld" text: https://twitter.اتصالات expected: - https://twitter.اتصالات - description: "ابوظبي is a valid generic tld" text: https://twitter.ابوظبي expected: - https://twitter.ابوظبي - description: "קום is a valid generic tld" text: https://twitter.קום expected: - https://twitter.קום - description: "сайт is a valid generic tld" text: https://twitter.сайт expected: - https://twitter.сайт - description: "рус is a valid generic tld" text: https://twitter.рус expected: - https://twitter.рус - description: "орг is a valid generic tld" text: https://twitter.орг expected: - https://twitter.орг - description: "онлайн is a valid generic tld" text: https://twitter.онлайн expected: - https://twitter.онлайн - description: "москва is a valid generic tld" text: https://twitter.москва expected: - https://twitter.москва - description: "ком is a valid generic tld" text: https://twitter.ком expected: - https://twitter.ком - description: "католик is a valid generic tld" text: https://twitter.католик expected: - https://twitter.католик - description: "дети is a valid generic tld" text: https://twitter.дети expected: - https://twitter.дети - description: zuerich is a valid generic tld text: https://twitter.zuerich expected: - https://twitter.zuerich - description: zone is a valid generic tld text: https://twitter.zone expected: - https://twitter.zone - description: zippo is a valid generic tld text: https://twitter.zippo expected: - https://twitter.zippo - description: zip is a valid generic tld text: https://twitter.zip expected: - https://twitter.zip - description: zero is a valid generic tld text: https://twitter.zero expected: - https://twitter.zero - description: zara is a valid generic tld text: https://twitter.zara expected: - https://twitter.zara - description: zappos is a valid generic tld text: https://twitter.zappos expected: - https://twitter.zappos - description: yun is a valid generic tld text: https://twitter.yun expected: - https://twitter.yun - description: youtube is a valid generic tld text: https://twitter.youtube expected: - https://twitter.youtube - description: you is a valid generic tld text: https://twitter.you expected: - https://twitter.you - description: yokohama is a valid generic tld text: https://twitter.yokohama expected: - https://twitter.yokohama - description: yoga is a valid generic tld text: https://twitter.yoga expected: - https://twitter.yoga - description: yodobashi is a valid generic tld text: https://twitter.yodobashi expected: - https://twitter.yodobashi - description: yandex is a valid generic tld text: https://twitter.yandex expected: - https://twitter.yandex - description: yamaxun is a valid generic tld text: https://twitter.yamaxun expected: - https://twitter.yamaxun - description: yahoo is a valid generic tld text: https://twitter.yahoo expected: - https://twitter.yahoo - description: yachts is a valid generic tld text: https://twitter.yachts expected: - https://twitter.yachts - description: xyz is a valid generic tld text: https://twitter.xyz expected: - https://twitter.xyz - description: xxx is a valid generic tld text: https://twitter.xxx expected: - https://twitter.xxx - description: xperia is a valid generic tld text: https://twitter.xperia expected: - https://twitter.xperia - description: xin is a valid generic tld text: https://twitter.xin expected: - https://twitter.xin - description: xihuan is a valid generic tld text: https://twitter.xihuan expected: - https://twitter.xihuan - description: xfinity is a valid generic tld text: https://twitter.xfinity expected: - https://twitter.xfinity - description: xerox is a valid generic tld text: https://twitter.xerox expected: - https://twitter.xerox - description: xbox is a valid generic tld text: https://twitter.xbox expected: - https://twitter.xbox - description: wtf is a valid generic tld text: https://twitter.wtf expected: - https://twitter.wtf - description: wtc is a valid generic tld text: https://twitter.wtc expected: - https://twitter.wtc - description: wow is a valid generic tld text: https://twitter.wow expected: - https://twitter.wow - description: world is a valid generic tld text: https://twitter.world expected: - https://twitter.world - description: works is a valid generic tld text: https://twitter.works expected: - https://twitter.works - description: work is a valid generic tld text: https://twitter.work expected: - https://twitter.work - description: woodside is a valid generic tld text: https://twitter.woodside expected: - https://twitter.woodside - description: wolterskluwer is a valid generic tld text: https://twitter.wolterskluwer expected: - https://twitter.wolterskluwer - description: wme is a valid generic tld text: https://twitter.wme expected: - https://twitter.wme - description: winners is a valid generic tld text: https://twitter.winners expected: - https://twitter.winners - description: wine is a valid generic tld text: https://twitter.wine expected: - https://twitter.wine - description: windows is a valid generic tld text: https://twitter.windows expected: - https://twitter.windows - description: win is a valid generic tld text: https://twitter.win expected: - https://twitter.win - description: williamhill is a valid generic tld text: https://twitter.williamhill expected: - https://twitter.williamhill - description: wiki is a valid generic tld text: https://twitter.wiki expected: - https://twitter.wiki - description: wien is a valid generic tld text: https://twitter.wien expected: - https://twitter.wien - description: whoswho is a valid generic tld text: https://twitter.whoswho expected: - https://twitter.whoswho - description: weir is a valid generic tld text: https://twitter.weir expected: - https://twitter.weir - description: weibo is a valid generic tld text: https://twitter.weibo expected: - https://twitter.weibo - description: wedding is a valid generic tld text: https://twitter.wedding expected: - https://twitter.wedding - description: wed is a valid generic tld text: https://twitter.wed expected: - https://twitter.wed - description: website is a valid generic tld text: https://twitter.website expected: - https://twitter.website - description: weber is a valid generic tld text: https://twitter.weber expected: - https://twitter.weber - description: webcam is a valid generic tld text: https://twitter.webcam expected: - https://twitter.webcam - description: weatherchannel is a valid generic tld text: https://twitter.weatherchannel expected: - https://twitter.weatherchannel - description: weather is a valid generic tld text: https://twitter.weather expected: - https://twitter.weather - description: watches is a valid generic tld text: https://twitter.watches expected: - https://twitter.watches - description: watch is a valid generic tld text: https://twitter.watch expected: - https://twitter.watch - description: warman is a valid generic tld text: https://twitter.warman expected: - https://twitter.warman - description: wanggou is a valid generic tld text: https://twitter.wanggou expected: - https://twitter.wanggou - description: wang is a valid generic tld text: https://twitter.wang expected: - https://twitter.wang - description: walter is a valid generic tld text: https://twitter.walter expected: - https://twitter.walter - description: walmart is a valid generic tld text: https://twitter.walmart expected: - https://twitter.walmart - description: wales is a valid generic tld text: https://twitter.wales expected: - https://twitter.wales - description: vuelos is a valid generic tld text: https://twitter.vuelos expected: - https://twitter.vuelos - description: voyage is a valid generic tld text: https://twitter.voyage expected: - https://twitter.voyage - description: voto is a valid generic tld text: https://twitter.voto expected: - https://twitter.voto - description: voting is a valid generic tld text: https://twitter.voting expected: - https://twitter.voting - description: vote is a valid generic tld text: https://twitter.vote expected: - https://twitter.vote - description: volvo is a valid generic tld text: https://twitter.volvo expected: - https://twitter.volvo - description: volkswagen is a valid generic tld text: https://twitter.volkswagen expected: - https://twitter.volkswagen - description: vodka is a valid generic tld text: https://twitter.vodka expected: - https://twitter.vodka - description: vlaanderen is a valid generic tld text: https://twitter.vlaanderen expected: - https://twitter.vlaanderen - description: vivo is a valid generic tld text: https://twitter.vivo expected: - https://twitter.vivo - description: viva is a valid generic tld text: https://twitter.viva expected: - https://twitter.viva - description: vistaprint is a valid generic tld text: https://twitter.vistaprint expected: - https://twitter.vistaprint - description: vista is a valid generic tld text: https://twitter.vista expected: - https://twitter.vista - description: vision is a valid generic tld text: https://twitter.vision expected: - https://twitter.vision - description: visa is a valid generic tld text: https://twitter.visa expected: - https://twitter.visa - description: virgin is a valid generic tld text: https://twitter.virgin expected: - https://twitter.virgin - description: vip is a valid generic tld text: https://twitter.vip expected: - https://twitter.vip - description: vin is a valid generic tld text: https://twitter.vin expected: - https://twitter.vin - description: villas is a valid generic tld text: https://twitter.villas expected: - https://twitter.villas - description: viking is a valid generic tld text: https://twitter.viking expected: - https://twitter.viking - description: vig is a valid generic tld text: https://twitter.vig expected: - https://twitter.vig - description: video is a valid generic tld text: https://twitter.video expected: - https://twitter.video - description: viajes is a valid generic tld text: https://twitter.viajes expected: - https://twitter.viajes - description: vet is a valid generic tld text: https://twitter.vet expected: - https://twitter.vet - description: versicherung is a valid generic tld text: https://twitter.versicherung expected: - https://twitter.versicherung - description: vermögensberatung is a valid generic tld text: https://twitter.vermögensberatung expected: - https://twitter.vermögensberatung - description: vermögensberater is a valid generic tld text: https://twitter.vermögensberater expected: - https://twitter.vermögensberater - description: verisign is a valid generic tld text: https://twitter.verisign expected: - https://twitter.verisign - description: ventures is a valid generic tld text: https://twitter.ventures expected: - https://twitter.ventures - description: vegas is a valid generic tld text: https://twitter.vegas expected: - https://twitter.vegas - description: vanguard is a valid generic tld text: https://twitter.vanguard expected: - https://twitter.vanguard - description: vana is a valid generic tld text: https://twitter.vana expected: - https://twitter.vana - description: vacations is a valid generic tld text: https://twitter.vacations expected: - https://twitter.vacations - description: ups is a valid generic tld text: https://twitter.ups expected: - https://twitter.ups - description: uol is a valid generic tld text: https://twitter.uol expected: - https://twitter.uol - description: uno is a valid generic tld text: https://twitter.uno expected: - https://twitter.uno - description: university is a valid generic tld text: https://twitter.university expected: - https://twitter.university - description: unicom is a valid generic tld text: https://twitter.unicom expected: - https://twitter.unicom - description: uconnect is a valid generic tld text: https://twitter.uconnect expected: - https://twitter.uconnect - description: ubs is a valid generic tld text: https://twitter.ubs expected: - https://twitter.ubs - description: ubank is a valid generic tld text: https://twitter.ubank expected: - https://twitter.ubank - description: tvs is a valid generic tld text: https://twitter.tvs expected: - https://twitter.tvs - description: tushu is a valid generic tld text: https://twitter.tushu expected: - https://twitter.tushu - description: tunes is a valid generic tld text: https://twitter.tunes expected: - https://twitter.tunes - description: tui is a valid generic tld text: https://twitter.tui expected: - https://twitter.tui - description: tube is a valid generic tld text: https://twitter.tube expected: - https://twitter.tube - description: trv is a valid generic tld text: https://twitter.trv expected: - https://twitter.trv - description: trust is a valid generic tld text: https://twitter.trust expected: - https://twitter.trust - description: travelersinsurance is a valid generic tld text: https://twitter.travelersinsurance expected: - https://twitter.travelersinsurance - description: travelers is a valid generic tld text: https://twitter.travelers expected: - https://twitter.travelers - description: travelchannel is a valid generic tld text: https://twitter.travelchannel expected: - https://twitter.travelchannel - description: travel is a valid generic tld text: https://twitter.travel expected: - https://twitter.travel - description: training is a valid generic tld text: https://twitter.training expected: - https://twitter.training - description: trading is a valid generic tld text: https://twitter.trading expected: - https://twitter.trading - description: trade is a valid generic tld text: https://twitter.trade expected: - https://twitter.trade - description: toys is a valid generic tld text: https://twitter.toys expected: - https://twitter.toys - description: toyota is a valid generic tld text: https://twitter.toyota expected: - https://twitter.toyota - description: town is a valid generic tld text: https://twitter.town expected: - https://twitter.town - description: tours is a valid generic tld text: https://twitter.tours expected: - https://twitter.tours - description: total is a valid generic tld text: https://twitter.total expected: - https://twitter.total - description: toshiba is a valid generic tld text: https://twitter.toshiba expected: - https://twitter.toshiba - description: toray is a valid generic tld text: https://twitter.toray expected: - https://twitter.toray - description: top is a valid generic tld text: https://twitter.top expected: - https://twitter.top - description: tools is a valid generic tld text: https://twitter.tools expected: - https://twitter.tools - description: tokyo is a valid generic tld text: https://twitter.tokyo expected: - https://twitter.tokyo - description: today is a valid generic tld text: https://twitter.today expected: - https://twitter.today - description: tmall is a valid generic tld text: https://twitter.tmall expected: - https://twitter.tmall - description: tkmaxx is a valid generic tld text: https://twitter.tkmaxx expected: - https://twitter.tkmaxx - description: tjx is a valid generic tld text: https://twitter.tjx expected: - https://twitter.tjx - description: tjmaxx is a valid generic tld text: https://twitter.tjmaxx expected: - https://twitter.tjmaxx - description: tirol is a valid generic tld text: https://twitter.tirol expected: - https://twitter.tirol - description: tires is a valid generic tld text: https://twitter.tires expected: - https://twitter.tires - description: tips is a valid generic tld text: https://twitter.tips expected: - https://twitter.tips - description: tiffany is a valid generic tld text: https://twitter.tiffany expected: - https://twitter.tiffany - description: tienda is a valid generic tld text: https://twitter.tienda expected: - https://twitter.tienda - description: tickets is a valid generic tld text: https://twitter.tickets expected: - https://twitter.tickets - description: tiaa is a valid generic tld text: https://twitter.tiaa expected: - https://twitter.tiaa - description: theatre is a valid generic tld text: https://twitter.theatre expected: - https://twitter.theatre - description: theater is a valid generic tld text: https://twitter.theater expected: - https://twitter.theater - description: thd is a valid generic tld text: https://twitter.thd expected: - https://twitter.thd - description: teva is a valid generic tld text: https://twitter.teva expected: - https://twitter.teva - description: tennis is a valid generic tld text: https://twitter.tennis expected: - https://twitter.tennis - description: temasek is a valid generic tld text: https://twitter.temasek expected: - https://twitter.temasek - description: telefonica is a valid generic tld text: https://twitter.telefonica expected: - https://twitter.telefonica - description: telecity is a valid generic tld text: https://twitter.telecity expected: - https://twitter.telecity - description: tel is a valid generic tld text: https://twitter.tel expected: - https://twitter.tel - description: technology is a valid generic tld text: https://twitter.technology expected: - https://twitter.technology - description: tech is a valid generic tld text: https://twitter.tech expected: - https://twitter.tech - description: team is a valid generic tld text: https://twitter.team expected: - https://twitter.team - description: tdk is a valid generic tld text: https://twitter.tdk expected: - https://twitter.tdk - description: tci is a valid generic tld text: https://twitter.tci expected: - https://twitter.tci - description: taxi is a valid generic tld text: https://twitter.taxi expected: - https://twitter.taxi - description: tax is a valid generic tld text: https://twitter.tax expected: - https://twitter.tax - description: tattoo is a valid generic tld text: https://twitter.tattoo expected: - https://twitter.tattoo - description: tatar is a valid generic tld text: https://twitter.tatar expected: - https://twitter.tatar - description: tatamotors is a valid generic tld text: https://twitter.tatamotors expected: - https://twitter.tatamotors - description: target is a valid generic tld text: https://twitter.target expected: - https://twitter.target - description: taobao is a valid generic tld text: https://twitter.taobao expected: - https://twitter.taobao - description: talk is a valid generic tld text: https://twitter.talk expected: - https://twitter.talk - description: taipei is a valid generic tld text: https://twitter.taipei expected: - https://twitter.taipei - description: tab is a valid generic tld text: https://twitter.tab expected: - https://twitter.tab - description: systems is a valid generic tld text: https://twitter.systems expected: - https://twitter.systems - description: symantec is a valid generic tld text: https://twitter.symantec expected: - https://twitter.symantec - description: sydney is a valid generic tld text: https://twitter.sydney expected: - https://twitter.sydney - description: swiss is a valid generic tld text: https://twitter.swiss expected: - https://twitter.swiss - description: swiftcover is a valid generic tld text: https://twitter.swiftcover expected: - https://twitter.swiftcover - description: swatch is a valid generic tld text: https://twitter.swatch expected: - https://twitter.swatch - description: suzuki is a valid generic tld text: https://twitter.suzuki expected: - https://twitter.suzuki - description: surgery is a valid generic tld text: https://twitter.surgery expected: - https://twitter.surgery - description: surf is a valid generic tld text: https://twitter.surf expected: - https://twitter.surf - description: support is a valid generic tld text: https://twitter.support expected: - https://twitter.support - description: supply is a valid generic tld text: https://twitter.supply expected: - https://twitter.supply - description: supplies is a valid generic tld text: https://twitter.supplies expected: - https://twitter.supplies - description: sucks is a valid generic tld text: https://twitter.sucks expected: - https://twitter.sucks - description: style is a valid generic tld text: https://twitter.style expected: - https://twitter.style - description: study is a valid generic tld text: https://twitter.study expected: - https://twitter.study - description: studio is a valid generic tld text: https://twitter.studio expected: - https://twitter.studio - description: stream is a valid generic tld text: https://twitter.stream expected: - https://twitter.stream - description: store is a valid generic tld text: https://twitter.store expected: - https://twitter.store - description: storage is a valid generic tld text: https://twitter.storage expected: - https://twitter.storage - description: stockholm is a valid generic tld text: https://twitter.stockholm expected: - https://twitter.stockholm - description: stcgroup is a valid generic tld text: https://twitter.stcgroup expected: - https://twitter.stcgroup - description: stc is a valid generic tld text: https://twitter.stc expected: - https://twitter.stc - description: statoil is a valid generic tld text: https://twitter.statoil expected: - https://twitter.statoil - description: statefarm is a valid generic tld text: https://twitter.statefarm expected: - https://twitter.statefarm - description: statebank is a valid generic tld text: https://twitter.statebank expected: - https://twitter.statebank - description: starhub is a valid generic tld text: https://twitter.starhub expected: - https://twitter.starhub - description: star is a valid generic tld text: https://twitter.star expected: - https://twitter.star - description: staples is a valid generic tld text: https://twitter.staples expected: - https://twitter.staples - description: stada is a valid generic tld text: https://twitter.stada expected: - https://twitter.stada - description: srt is a valid generic tld text: https://twitter.srt expected: - https://twitter.srt - description: srl is a valid generic tld text: https://twitter.srl expected: - https://twitter.srl - description: spreadbetting is a valid generic tld text: https://twitter.spreadbetting expected: - https://twitter.spreadbetting - description: spot is a valid generic tld text: https://twitter.spot expected: - https://twitter.spot - description: spiegel is a valid generic tld text: https://twitter.spiegel expected: - https://twitter.spiegel - description: space is a valid generic tld text: https://twitter.space expected: - https://twitter.space - description: soy is a valid generic tld text: https://twitter.soy expected: - https://twitter.soy - description: sony is a valid generic tld text: https://twitter.sony expected: - https://twitter.sony - description: song is a valid generic tld text: https://twitter.song expected: - https://twitter.song - description: solutions is a valid generic tld text: https://twitter.solutions expected: - https://twitter.solutions - description: solar is a valid generic tld text: https://twitter.solar expected: - https://twitter.solar - description: sohu is a valid generic tld text: https://twitter.sohu expected: - https://twitter.sohu - description: software is a valid generic tld text: https://twitter.software expected: - https://twitter.software - description: softbank is a valid generic tld text: https://twitter.softbank expected: - https://twitter.softbank - description: social is a valid generic tld text: https://twitter.social expected: - https://twitter.social - description: soccer is a valid generic tld text: https://twitter.soccer expected: - https://twitter.soccer - description: sncf is a valid generic tld text: https://twitter.sncf expected: - https://twitter.sncf - description: smile is a valid generic tld text: https://twitter.smile expected: - https://twitter.smile - description: smart is a valid generic tld text: https://twitter.smart expected: - https://twitter.smart - description: sling is a valid generic tld text: https://twitter.sling expected: - https://twitter.sling - description: skype is a valid generic tld text: https://twitter.skype expected: - https://twitter.skype - description: sky is a valid generic tld text: https://twitter.sky expected: - https://twitter.sky - description: skin is a valid generic tld text: https://twitter.skin expected: - https://twitter.skin - description: ski is a valid generic tld text: https://twitter.ski expected: - https://twitter.ski - description: site is a valid generic tld text: https://twitter.site expected: - https://twitter.site - description: singles is a valid generic tld text: https://twitter.singles expected: - https://twitter.singles - description: sina is a valid generic tld text: https://twitter.sina expected: - https://twitter.sina - description: silk is a valid generic tld text: https://twitter.silk expected: - https://twitter.silk - description: shriram is a valid generic tld text: https://twitter.shriram expected: - https://twitter.shriram - description: showtime is a valid generic tld text: https://twitter.showtime expected: - https://twitter.showtime - description: show is a valid generic tld text: https://twitter.show expected: - https://twitter.show - description: shouji is a valid generic tld text: https://twitter.shouji expected: - https://twitter.shouji - description: shopping is a valid generic tld text: https://twitter.shopping expected: - https://twitter.shopping - description: shop is a valid generic tld text: https://twitter.shop expected: - https://twitter.shop - description: shoes is a valid generic tld text: https://twitter.shoes expected: - https://twitter.shoes - description: shiksha is a valid generic tld text: https://twitter.shiksha expected: - https://twitter.shiksha - description: shia is a valid generic tld text: https://twitter.shia expected: - https://twitter.shia - description: shell is a valid generic tld text: https://twitter.shell expected: - https://twitter.shell - description: shaw is a valid generic tld text: https://twitter.shaw expected: - https://twitter.shaw - description: sharp is a valid generic tld text: https://twitter.sharp expected: - https://twitter.sharp - description: shangrila is a valid generic tld text: https://twitter.shangrila expected: - https://twitter.shangrila - description: sfr is a valid generic tld text: https://twitter.sfr expected: - https://twitter.sfr - description: sexy is a valid generic tld text: https://twitter.sexy expected: - https://twitter.sexy - description: sex is a valid generic tld text: https://twitter.sex expected: - https://twitter.sex - description: sew is a valid generic tld text: https://twitter.sew expected: - https://twitter.sew - description: seven is a valid generic tld text: https://twitter.seven expected: - https://twitter.seven - description: ses is a valid generic tld text: https://twitter.ses expected: - https://twitter.ses - description: services is a valid generic tld text: https://twitter.services expected: - https://twitter.services - description: sener is a valid generic tld text: https://twitter.sener expected: - https://twitter.sener - description: select is a valid generic tld text: https://twitter.select expected: - https://twitter.select - description: seek is a valid generic tld text: https://twitter.seek expected: - https://twitter.seek - description: security is a valid generic tld text: https://twitter.security expected: - https://twitter.security - description: secure is a valid generic tld text: https://twitter.secure expected: - https://twitter.secure - description: seat is a valid generic tld text: https://twitter.seat expected: - https://twitter.seat - description: search is a valid generic tld text: https://twitter.search expected: - https://twitter.search - description: scot is a valid generic tld text: https://twitter.scot expected: - https://twitter.scot - description: scor is a valid generic tld text: https://twitter.scor expected: - https://twitter.scor - description: scjohnson is a valid generic tld text: https://twitter.scjohnson expected: - https://twitter.scjohnson - description: science is a valid generic tld text: https://twitter.science expected: - https://twitter.science - description: schwarz is a valid generic tld text: https://twitter.schwarz expected: - https://twitter.schwarz - description: schule is a valid generic tld text: https://twitter.schule expected: - https://twitter.schule - description: school is a valid generic tld text: https://twitter.school expected: - https://twitter.school - description: scholarships is a valid generic tld text: https://twitter.scholarships expected: - https://twitter.scholarships - description: schmidt is a valid generic tld text: https://twitter.schmidt expected: - https://twitter.schmidt - description: schaeffler is a valid generic tld text: https://twitter.schaeffler expected: - https://twitter.schaeffler - description: scb is a valid generic tld text: https://twitter.scb expected: - https://twitter.scb - description: sca is a valid generic tld text: https://twitter.sca expected: - https://twitter.sca - description: sbs is a valid generic tld text: https://twitter.sbs expected: - https://twitter.sbs - description: sbi is a valid generic tld text: https://twitter.sbi expected: - https://twitter.sbi - description: saxo is a valid generic tld text: https://twitter.saxo expected: - https://twitter.saxo - description: save is a valid generic tld text: https://twitter.save expected: - https://twitter.save - description: sas is a valid generic tld text: https://twitter.sas expected: - https://twitter.sas - description: sarl is a valid generic tld text: https://twitter.sarl expected: - https://twitter.sarl - description: sapo is a valid generic tld text: https://twitter.sapo expected: - https://twitter.sapo - description: sap is a valid generic tld text: https://twitter.sap expected: - https://twitter.sap - description: sanofi is a valid generic tld text: https://twitter.sanofi expected: - https://twitter.sanofi - description: sandvikcoromant is a valid generic tld text: https://twitter.sandvikcoromant expected: - https://twitter.sandvikcoromant - description: sandvik is a valid generic tld text: https://twitter.sandvik expected: - https://twitter.sandvik - description: samsung is a valid generic tld text: https://twitter.samsung expected: - https://twitter.samsung - description: samsclub is a valid generic tld text: https://twitter.samsclub expected: - https://twitter.samsclub - description: salon is a valid generic tld text: https://twitter.salon expected: - https://twitter.salon - description: sale is a valid generic tld text: https://twitter.sale expected: - https://twitter.sale - description: sakura is a valid generic tld text: https://twitter.sakura expected: - https://twitter.sakura - description: safety is a valid generic tld text: https://twitter.safety expected: - https://twitter.safety - description: safe is a valid generic tld text: https://twitter.safe expected: - https://twitter.safe - description: saarland is a valid generic tld text: https://twitter.saarland expected: - https://twitter.saarland - description: ryukyu is a valid generic tld text: https://twitter.ryukyu expected: - https://twitter.ryukyu - description: rwe is a valid generic tld text: https://twitter.rwe expected: - https://twitter.rwe - description: run is a valid generic tld text: https://twitter.run expected: - https://twitter.run - description: ruhr is a valid generic tld text: https://twitter.ruhr expected: - https://twitter.ruhr - description: rugby is a valid generic tld text: https://twitter.rugby expected: - https://twitter.rugby - description: rsvp is a valid generic tld text: https://twitter.rsvp expected: - https://twitter.rsvp - description: room is a valid generic tld text: https://twitter.room expected: - https://twitter.room - description: rogers is a valid generic tld text: https://twitter.rogers expected: - https://twitter.rogers - description: rodeo is a valid generic tld text: https://twitter.rodeo expected: - https://twitter.rodeo - description: rocks is a valid generic tld text: https://twitter.rocks expected: - https://twitter.rocks - description: rocher is a valid generic tld text: https://twitter.rocher expected: - https://twitter.rocher - description: rmit is a valid generic tld text: https://twitter.rmit expected: - https://twitter.rmit - description: rip is a valid generic tld text: https://twitter.rip expected: - https://twitter.rip - description: rio is a valid generic tld text: https://twitter.rio expected: - https://twitter.rio - description: ril is a valid generic tld text: https://twitter.ril expected: - https://twitter.ril - description: rightathome is a valid generic tld text: https://twitter.rightathome expected: - https://twitter.rightathome - description: ricoh is a valid generic tld text: https://twitter.ricoh expected: - https://twitter.ricoh - description: richardli is a valid generic tld text: https://twitter.richardli expected: - https://twitter.richardli - description: rich is a valid generic tld text: https://twitter.rich expected: - https://twitter.rich - description: rexroth is a valid generic tld text: https://twitter.rexroth expected: - https://twitter.rexroth - description: reviews is a valid generic tld text: https://twitter.reviews expected: - https://twitter.reviews - description: review is a valid generic tld text: https://twitter.review expected: - https://twitter.review - description: restaurant is a valid generic tld text: https://twitter.restaurant expected: - https://twitter.restaurant - description: rest is a valid generic tld text: https://twitter.rest expected: - https://twitter.rest - description: republican is a valid generic tld text: https://twitter.republican expected: - https://twitter.republican - description: report is a valid generic tld text: https://twitter.report expected: - https://twitter.report - description: repair is a valid generic tld text: https://twitter.repair expected: - https://twitter.repair - description: rentals is a valid generic tld text: https://twitter.rentals expected: - https://twitter.rentals - description: rent is a valid generic tld text: https://twitter.rent expected: - https://twitter.rent - description: ren is a valid generic tld text: https://twitter.ren expected: - https://twitter.ren - description: reliance is a valid generic tld text: https://twitter.reliance expected: - https://twitter.reliance - description: reit is a valid generic tld text: https://twitter.reit expected: - https://twitter.reit - description: reisen is a valid generic tld text: https://twitter.reisen expected: - https://twitter.reisen - description: reise is a valid generic tld text: https://twitter.reise expected: - https://twitter.reise - description: rehab is a valid generic tld text: https://twitter.rehab expected: - https://twitter.rehab - description: redumbrella is a valid generic tld text: https://twitter.redumbrella expected: - https://twitter.redumbrella - description: redstone is a valid generic tld text: https://twitter.redstone expected: - https://twitter.redstone - description: red is a valid generic tld text: https://twitter.red expected: - https://twitter.red - description: recipes is a valid generic tld text: https://twitter.recipes expected: - https://twitter.recipes - description: realty is a valid generic tld text: https://twitter.realty expected: - https://twitter.realty - description: realtor is a valid generic tld text: https://twitter.realtor expected: - https://twitter.realtor - description: realestate is a valid generic tld text: https://twitter.realestate expected: - https://twitter.realestate - description: read is a valid generic tld text: https://twitter.read expected: - https://twitter.read - description: raid is a valid generic tld text: https://twitter.raid expected: - https://twitter.raid - description: radio is a valid generic tld text: https://twitter.radio expected: - https://twitter.radio - description: racing is a valid generic tld text: https://twitter.racing expected: - https://twitter.racing - description: qvc is a valid generic tld text: https://twitter.qvc expected: - https://twitter.qvc - description: quest is a valid generic tld text: https://twitter.quest expected: - https://twitter.quest - description: quebec is a valid generic tld text: https://twitter.quebec expected: - https://twitter.quebec - description: qpon is a valid generic tld text: https://twitter.qpon expected: - https://twitter.qpon - description: pwc is a valid generic tld text: https://twitter.pwc expected: - https://twitter.pwc - description: pub is a valid generic tld text: https://twitter.pub expected: - https://twitter.pub - description: prudential is a valid generic tld text: https://twitter.prudential expected: - https://twitter.prudential - description: pru is a valid generic tld text: https://twitter.pru expected: - https://twitter.pru - description: protection is a valid generic tld text: https://twitter.protection expected: - https://twitter.protection - description: property is a valid generic tld text: https://twitter.property expected: - https://twitter.property - description: properties is a valid generic tld text: https://twitter.properties expected: - https://twitter.properties - description: promo is a valid generic tld text: https://twitter.promo expected: - https://twitter.promo - description: progressive is a valid generic tld text: https://twitter.progressive expected: - https://twitter.progressive - description: prof is a valid generic tld text: https://twitter.prof expected: - https://twitter.prof - description: productions is a valid generic tld text: https://twitter.productions expected: - https://twitter.productions - description: prod is a valid generic tld text: https://twitter.prod expected: - https://twitter.prod - description: pro is a valid generic tld text: https://twitter.pro expected: - https://twitter.pro - description: prime is a valid generic tld text: https://twitter.prime expected: - https://twitter.prime - description: press is a valid generic tld text: https://twitter.press expected: - https://twitter.press - description: praxi is a valid generic tld text: https://twitter.praxi expected: - https://twitter.praxi - description: pramerica is a valid generic tld text: https://twitter.pramerica expected: - https://twitter.pramerica - description: post is a valid generic tld text: https://twitter.post expected: - https://twitter.post - description: porn is a valid generic tld text: https://twitter.porn expected: - https://twitter.porn - description: politie is a valid generic tld text: https://twitter.politie expected: - https://twitter.politie - description: poker is a valid generic tld text: https://twitter.poker expected: - https://twitter.poker - description: pohl is a valid generic tld text: https://twitter.pohl expected: - https://twitter.pohl - description: pnc is a valid generic tld text: https://twitter.pnc expected: - https://twitter.pnc - description: plus is a valid generic tld text: https://twitter.plus expected: - https://twitter.plus - description: plumbing is a valid generic tld text: https://twitter.plumbing expected: - https://twitter.plumbing - description: playstation is a valid generic tld text: https://twitter.playstation expected: - https://twitter.playstation - description: play is a valid generic tld text: https://twitter.play expected: - https://twitter.play - description: place is a valid generic tld text: https://twitter.place expected: - https://twitter.place - description: pizza is a valid generic tld text: https://twitter.pizza expected: - https://twitter.pizza - description: pioneer is a valid generic tld text: https://twitter.pioneer expected: - https://twitter.pioneer - description: pink is a valid generic tld text: https://twitter.pink expected: - https://twitter.pink - description: ping is a valid generic tld text: https://twitter.ping expected: - https://twitter.ping - description: pin is a valid generic tld text: https://twitter.pin expected: - https://twitter.pin - description: pid is a valid generic tld text: https://twitter.pid expected: - https://twitter.pid - description: pictures is a valid generic tld text: https://twitter.pictures expected: - https://twitter.pictures - description: pictet is a valid generic tld text: https://twitter.pictet expected: - https://twitter.pictet - description: pics is a valid generic tld text: https://twitter.pics expected: - https://twitter.pics - description: piaget is a valid generic tld text: https://twitter.piaget expected: - https://twitter.piaget - description: physio is a valid generic tld text: https://twitter.physio expected: - https://twitter.physio - description: photos is a valid generic tld text: https://twitter.photos expected: - https://twitter.photos - description: photography is a valid generic tld text: https://twitter.photography expected: - https://twitter.photography - description: photo is a valid generic tld text: https://twitter.photo expected: - https://twitter.photo - description: phone is a valid generic tld text: https://twitter.phone expected: - https://twitter.phone - description: philips is a valid generic tld text: https://twitter.philips expected: - https://twitter.philips - description: phd is a valid generic tld text: https://twitter.phd expected: - https://twitter.phd - description: pharmacy is a valid generic tld text: https://twitter.pharmacy expected: - https://twitter.pharmacy - description: pfizer is a valid generic tld text: https://twitter.pfizer expected: - https://twitter.pfizer - description: pet is a valid generic tld text: https://twitter.pet expected: - https://twitter.pet - description: pccw is a valid generic tld text: https://twitter.pccw expected: - https://twitter.pccw - description: pay is a valid generic tld text: https://twitter.pay expected: - https://twitter.pay - description: passagens is a valid generic tld text: https://twitter.passagens expected: - https://twitter.passagens - description: party is a valid generic tld text: https://twitter.party expected: - https://twitter.party - description: parts is a valid generic tld text: https://twitter.parts expected: - https://twitter.parts - description: partners is a valid generic tld text: https://twitter.partners expected: - https://twitter.partners - description: pars is a valid generic tld text: https://twitter.pars expected: - https://twitter.pars - description: paris is a valid generic tld text: https://twitter.paris expected: - https://twitter.paris - description: panerai is a valid generic tld text: https://twitter.panerai expected: - https://twitter.panerai - description: panasonic is a valid generic tld text: https://twitter.panasonic expected: - https://twitter.panasonic - description: pamperedchef is a valid generic tld text: https://twitter.pamperedchef expected: - https://twitter.pamperedchef - description: page is a valid generic tld text: https://twitter.page expected: - https://twitter.page - description: ovh is a valid generic tld text: https://twitter.ovh expected: - https://twitter.ovh - description: ott is a valid generic tld text: https://twitter.ott expected: - https://twitter.ott - description: otsuka is a valid generic tld text: https://twitter.otsuka expected: - https://twitter.otsuka - description: osaka is a valid generic tld text: https://twitter.osaka expected: - https://twitter.osaka - description: origins is a valid generic tld text: https://twitter.origins expected: - https://twitter.origins - description: orientexpress is a valid generic tld text: https://twitter.orientexpress expected: - https://twitter.orientexpress - description: organic is a valid generic tld text: https://twitter.organic expected: - https://twitter.organic - description: org is a valid generic tld text: https://twitter.org expected: - https://twitter.org - description: orange is a valid generic tld text: https://twitter.orange expected: - https://twitter.orange - description: oracle is a valid generic tld text: https://twitter.oracle expected: - https://twitter.oracle - description: open is a valid generic tld text: https://twitter.open expected: - https://twitter.open - description: ooo is a valid generic tld text: https://twitter.ooo expected: - https://twitter.ooo - description: onyourside is a valid generic tld text: https://twitter.onyourside expected: - https://twitter.onyourside - description: online is a valid generic tld text: https://twitter.online expected: - https://twitter.online - description: onl is a valid generic tld text: https://twitter.onl expected: - https://twitter.onl - description: ong is a valid generic tld text: https://twitter.ong expected: - https://twitter.ong - description: one is a valid generic tld text: https://twitter.one expected: - https://twitter.one - description: omega is a valid generic tld text: https://twitter.omega expected: - https://twitter.omega - description: ollo is a valid generic tld text: https://twitter.ollo expected: - https://twitter.ollo - description: oldnavy is a valid generic tld text: https://twitter.oldnavy expected: - https://twitter.oldnavy - description: olayangroup is a valid generic tld text: https://twitter.olayangroup expected: - https://twitter.olayangroup - description: olayan is a valid generic tld text: https://twitter.olayan expected: - https://twitter.olayan - description: okinawa is a valid generic tld text: https://twitter.okinawa expected: - https://twitter.okinawa - description: office is a valid generic tld text: https://twitter.office expected: - https://twitter.office - description: off is a valid generic tld text: https://twitter.off expected: - https://twitter.off - description: observer is a valid generic tld text: https://twitter.observer expected: - https://twitter.observer - description: obi is a valid generic tld text: https://twitter.obi expected: - https://twitter.obi - description: nyc is a valid generic tld text: https://twitter.nyc expected: - https://twitter.nyc - description: ntt is a valid generic tld text: https://twitter.ntt expected: - https://twitter.ntt - description: nrw is a valid generic tld text: https://twitter.nrw expected: - https://twitter.nrw - description: nra is a valid generic tld text: https://twitter.nra expected: - https://twitter.nra - description: nowtv is a valid generic tld text: https://twitter.nowtv expected: - https://twitter.nowtv - description: nowruz is a valid generic tld text: https://twitter.nowruz expected: - https://twitter.nowruz - description: now is a valid generic tld text: https://twitter.now expected: - https://twitter.now - description: norton is a valid generic tld text: https://twitter.norton expected: - https://twitter.norton - description: northwesternmutual is a valid generic tld text: https://twitter.northwesternmutual expected: - https://twitter.northwesternmutual - description: nokia is a valid generic tld text: https://twitter.nokia expected: - https://twitter.nokia - description: nissay is a valid generic tld text: https://twitter.nissay expected: - https://twitter.nissay - description: nissan is a valid generic tld text: https://twitter.nissan expected: - https://twitter.nissan - description: ninja is a valid generic tld text: https://twitter.ninja expected: - https://twitter.ninja - description: nikon is a valid generic tld text: https://twitter.nikon expected: - https://twitter.nikon - description: nike is a valid generic tld text: https://twitter.nike expected: - https://twitter.nike - description: nico is a valid generic tld text: https://twitter.nico expected: - https://twitter.nico - description: nhk is a valid generic tld text: https://twitter.nhk expected: - https://twitter.nhk - description: ngo is a valid generic tld text: https://twitter.ngo expected: - https://twitter.ngo - description: nfl is a valid generic tld text: https://twitter.nfl expected: - https://twitter.nfl - description: nexus is a valid generic tld text: https://twitter.nexus expected: - https://twitter.nexus - description: nextdirect is a valid generic tld text: https://twitter.nextdirect expected: - https://twitter.nextdirect - description: next is a valid generic tld text: https://twitter.next expected: - https://twitter.next - description: news is a valid generic tld text: https://twitter.news expected: - https://twitter.news - description: newholland is a valid generic tld text: https://twitter.newholland expected: - https://twitter.newholland - description: new is a valid generic tld text: https://twitter.new expected: - https://twitter.new - description: neustar is a valid generic tld text: https://twitter.neustar expected: - https://twitter.neustar - description: network is a valid generic tld text: https://twitter.network expected: - https://twitter.network - description: netflix is a valid generic tld text: https://twitter.netflix expected: - https://twitter.netflix - description: netbank is a valid generic tld text: https://twitter.netbank expected: - https://twitter.netbank - description: net is a valid generic tld text: https://twitter.net expected: - https://twitter.net - description: nec is a valid generic tld text: https://twitter.nec expected: - https://twitter.nec - description: nba is a valid generic tld text: https://twitter.nba expected: - https://twitter.nba - description: navy is a valid generic tld text: https://twitter.navy expected: - https://twitter.navy - description: natura is a valid generic tld text: https://twitter.natura expected: - https://twitter.natura - description: nationwide is a valid generic tld text: https://twitter.nationwide expected: - https://twitter.nationwide - description: name is a valid generic tld text: https://twitter.name expected: - https://twitter.name - description: nagoya is a valid generic tld text: https://twitter.nagoya expected: - https://twitter.nagoya - description: nadex is a valid generic tld text: https://twitter.nadex expected: - https://twitter.nadex - description: nab is a valid generic tld text: https://twitter.nab expected: - https://twitter.nab - description: mutuelle is a valid generic tld text: https://twitter.mutuelle expected: - https://twitter.mutuelle - description: mutual is a valid generic tld text: https://twitter.mutual expected: - https://twitter.mutual - description: museum is a valid generic tld text: https://twitter.museum expected: - https://twitter.museum - description: mtr is a valid generic tld text: https://twitter.mtr expected: - https://twitter.mtr - description: mtpc is a valid generic tld text: https://twitter.mtpc expected: - https://twitter.mtpc - description: mtn is a valid generic tld text: https://twitter.mtn expected: - https://twitter.mtn - description: msd is a valid generic tld text: https://twitter.msd expected: - https://twitter.msd - description: movistar is a valid generic tld text: https://twitter.movistar expected: - https://twitter.movistar - description: movie is a valid generic tld text: https://twitter.movie expected: - https://twitter.movie - description: mov is a valid generic tld text: https://twitter.mov expected: - https://twitter.mov - description: motorcycles is a valid generic tld text: https://twitter.motorcycles expected: - https://twitter.motorcycles - description: moto is a valid generic tld text: https://twitter.moto expected: - https://twitter.moto - description: moscow is a valid generic tld text: https://twitter.moscow expected: - https://twitter.moscow - description: mortgage is a valid generic tld text: https://twitter.mortgage expected: - https://twitter.mortgage - description: mormon is a valid generic tld text: https://twitter.mormon expected: - https://twitter.mormon - description: mopar is a valid generic tld text: https://twitter.mopar expected: - https://twitter.mopar - description: montblanc is a valid generic tld text: https://twitter.montblanc expected: - https://twitter.montblanc - description: monster is a valid generic tld text: https://twitter.monster expected: - https://twitter.monster - description: money is a valid generic tld text: https://twitter.money expected: - https://twitter.money - description: monash is a valid generic tld text: https://twitter.monash expected: - https://twitter.monash - description: mom is a valid generic tld text: https://twitter.mom expected: - https://twitter.mom - description: moi is a valid generic tld text: https://twitter.moi expected: - https://twitter.moi - description: moe is a valid generic tld text: https://twitter.moe expected: - https://twitter.moe - description: moda is a valid generic tld text: https://twitter.moda expected: - https://twitter.moda - description: mobily is a valid generic tld text: https://twitter.mobily expected: - https://twitter.mobily - description: mobile is a valid generic tld text: https://twitter.mobile expected: - https://twitter.mobile - description: mobi is a valid generic tld text: https://twitter.mobi expected: - https://twitter.mobi - description: mma is a valid generic tld text: https://twitter.mma expected: - https://twitter.mma - description: mls is a valid generic tld text: https://twitter.mls expected: - https://twitter.mls - description: mlb is a valid generic tld text: https://twitter.mlb expected: - https://twitter.mlb - description: mitsubishi is a valid generic tld text: https://twitter.mitsubishi expected: - https://twitter.mitsubishi - description: mit is a valid generic tld text: https://twitter.mit expected: - https://twitter.mit - description: mint is a valid generic tld text: https://twitter.mint expected: - https://twitter.mint - description: mini is a valid generic tld text: https://twitter.mini expected: - https://twitter.mini - description: mil is a valid generic tld text: https://twitter.mil expected: - https://twitter.mil - description: microsoft is a valid generic tld text: https://twitter.microsoft expected: - https://twitter.microsoft - description: miami is a valid generic tld text: https://twitter.miami expected: - https://twitter.miami - description: metlife is a valid generic tld text: https://twitter.metlife expected: - https://twitter.metlife - description: merckmsd is a valid generic tld text: https://twitter.merckmsd expected: - https://twitter.merckmsd - description: meo is a valid generic tld text: https://twitter.meo expected: - https://twitter.meo - description: menu is a valid generic tld text: https://twitter.menu expected: - https://twitter.menu - description: men is a valid generic tld text: https://twitter.men expected: - https://twitter.men - description: memorial is a valid generic tld text: https://twitter.memorial expected: - https://twitter.memorial - description: meme is a valid generic tld text: https://twitter.meme expected: - https://twitter.meme - description: melbourne is a valid generic tld text: https://twitter.melbourne expected: - https://twitter.melbourne - description: meet is a valid generic tld text: https://twitter.meet expected: - https://twitter.meet - description: media is a valid generic tld text: https://twitter.media expected: - https://twitter.media - description: med is a valid generic tld text: https://twitter.med expected: - https://twitter.med - description: mckinsey is a valid generic tld text: https://twitter.mckinsey expected: - https://twitter.mckinsey - description: mcdonalds is a valid generic tld text: https://twitter.mcdonalds expected: - https://twitter.mcdonalds - description: mcd is a valid generic tld text: https://twitter.mcd expected: - https://twitter.mcd - description: mba is a valid generic tld text: https://twitter.mba expected: - https://twitter.mba - description: mattel is a valid generic tld text: https://twitter.mattel expected: - https://twitter.mattel - description: maserati is a valid generic tld text: https://twitter.maserati expected: - https://twitter.maserati - description: marshalls is a valid generic tld text: https://twitter.marshalls expected: - https://twitter.marshalls - description: marriott is a valid generic tld text: https://twitter.marriott expected: - https://twitter.marriott - description: markets is a valid generic tld text: https://twitter.markets expected: - https://twitter.markets - description: marketing is a valid generic tld text: https://twitter.marketing expected: - https://twitter.marketing - description: market is a valid generic tld text: https://twitter.market expected: - https://twitter.market - description: map is a valid generic tld text: https://twitter.map expected: - https://twitter.map - description: mango is a valid generic tld text: https://twitter.mango expected: - https://twitter.mango - description: management is a valid generic tld text: https://twitter.management expected: - https://twitter.management - description: man is a valid generic tld text: https://twitter.man expected: - https://twitter.man - description: makeup is a valid generic tld text: https://twitter.makeup expected: - https://twitter.makeup - description: maison is a valid generic tld text: https://twitter.maison expected: - https://twitter.maison - description: maif is a valid generic tld text: https://twitter.maif expected: - https://twitter.maif - description: madrid is a valid generic tld text: https://twitter.madrid expected: - https://twitter.madrid - description: macys is a valid generic tld text: https://twitter.macys expected: - https://twitter.macys - description: luxury is a valid generic tld text: https://twitter.luxury expected: - https://twitter.luxury - description: luxe is a valid generic tld text: https://twitter.luxe expected: - https://twitter.luxe - description: lupin is a valid generic tld text: https://twitter.lupin expected: - https://twitter.lupin - description: lundbeck is a valid generic tld text: https://twitter.lundbeck expected: - https://twitter.lundbeck - description: ltda is a valid generic tld text: https://twitter.ltda expected: - https://twitter.ltda - description: ltd is a valid generic tld text: https://twitter.ltd expected: - https://twitter.ltd - description: lplfinancial is a valid generic tld text: https://twitter.lplfinancial expected: - https://twitter.lplfinancial - description: lpl is a valid generic tld text: https://twitter.lpl expected: - https://twitter.lpl - description: love is a valid generic tld text: https://twitter.love expected: - https://twitter.love - description: lotto is a valid generic tld text: https://twitter.lotto expected: - https://twitter.lotto - description: lotte is a valid generic tld text: https://twitter.lotte expected: - https://twitter.lotte - description: london is a valid generic tld text: https://twitter.london expected: - https://twitter.london - description: lol is a valid generic tld text: https://twitter.lol expected: - https://twitter.lol - description: loft is a valid generic tld text: https://twitter.loft expected: - https://twitter.loft - description: locus is a valid generic tld text: https://twitter.locus expected: - https://twitter.locus - description: locker is a valid generic tld text: https://twitter.locker expected: - https://twitter.locker - description: loans is a valid generic tld text: https://twitter.loans expected: - https://twitter.loans - description: loan is a valid generic tld text: https://twitter.loan expected: - https://twitter.loan - description: lixil is a valid generic tld text: https://twitter.lixil expected: - https://twitter.lixil - description: living is a valid generic tld text: https://twitter.living expected: - https://twitter.living - description: live is a valid generic tld text: https://twitter.live expected: - https://twitter.live - description: lipsy is a valid generic tld text: https://twitter.lipsy expected: - https://twitter.lipsy - description: link is a valid generic tld text: https://twitter.link expected: - https://twitter.link - description: linde is a valid generic tld text: https://twitter.linde expected: - https://twitter.linde - description: lincoln is a valid generic tld text: https://twitter.lincoln expected: - https://twitter.lincoln - description: limo is a valid generic tld text: https://twitter.limo expected: - https://twitter.limo - description: limited is a valid generic tld text: https://twitter.limited expected: - https://twitter.limited - description: lilly is a valid generic tld text: https://twitter.lilly expected: - https://twitter.lilly - description: like is a valid generic tld text: https://twitter.like expected: - https://twitter.like - description: lighting is a valid generic tld text: https://twitter.lighting expected: - https://twitter.lighting - description: lifestyle is a valid generic tld text: https://twitter.lifestyle expected: - https://twitter.lifestyle - description: lifeinsurance is a valid generic tld text: https://twitter.lifeinsurance expected: - https://twitter.lifeinsurance - description: life is a valid generic tld text: https://twitter.life expected: - https://twitter.life - description: lidl is a valid generic tld text: https://twitter.lidl expected: - https://twitter.lidl - description: liaison is a valid generic tld text: https://twitter.liaison expected: - https://twitter.liaison - description: lgbt is a valid generic tld text: https://twitter.lgbt expected: - https://twitter.lgbt - description: lexus is a valid generic tld text: https://twitter.lexus expected: - https://twitter.lexus - description: lego is a valid generic tld text: https://twitter.lego expected: - https://twitter.lego - description: legal is a valid generic tld text: https://twitter.legal expected: - https://twitter.legal - description: lefrak is a valid generic tld text: https://twitter.lefrak expected: - https://twitter.lefrak - description: leclerc is a valid generic tld text: https://twitter.leclerc expected: - https://twitter.leclerc - description: lease is a valid generic tld text: https://twitter.lease expected: - https://twitter.lease - description: lds is a valid generic tld text: https://twitter.lds expected: - https://twitter.lds - description: lawyer is a valid generic tld text: https://twitter.lawyer expected: - https://twitter.lawyer - description: law is a valid generic tld text: https://twitter.law expected: - https://twitter.law - description: latrobe is a valid generic tld text: https://twitter.latrobe expected: - https://twitter.latrobe - description: latino is a valid generic tld text: https://twitter.latino expected: - https://twitter.latino - description: lat is a valid generic tld text: https://twitter.lat expected: - https://twitter.lat - description: lasalle is a valid generic tld text: https://twitter.lasalle expected: - https://twitter.lasalle - description: lanxess is a valid generic tld text: https://twitter.lanxess expected: - https://twitter.lanxess - description: landrover is a valid generic tld text: https://twitter.landrover expected: - https://twitter.landrover - description: land is a valid generic tld text: https://twitter.land expected: - https://twitter.land - description: lancome is a valid generic tld text: https://twitter.lancome expected: - https://twitter.lancome - description: lancia is a valid generic tld text: https://twitter.lancia expected: - https://twitter.lancia - description: lancaster is a valid generic tld text: https://twitter.lancaster expected: - https://twitter.lancaster - description: lamer is a valid generic tld text: https://twitter.lamer expected: - https://twitter.lamer - description: lamborghini is a valid generic tld text: https://twitter.lamborghini expected: - https://twitter.lamborghini - description: ladbrokes is a valid generic tld text: https://twitter.ladbrokes expected: - https://twitter.ladbrokes - description: lacaixa is a valid generic tld text: https://twitter.lacaixa expected: - https://twitter.lacaixa - description: kyoto is a valid generic tld text: https://twitter.kyoto expected: - https://twitter.kyoto - description: kuokgroup is a valid generic tld text: https://twitter.kuokgroup expected: - https://twitter.kuokgroup - description: kred is a valid generic tld text: https://twitter.kred expected: - https://twitter.kred - description: krd is a valid generic tld text: https://twitter.krd expected: - https://twitter.krd - description: kpn is a valid generic tld text: https://twitter.kpn expected: - https://twitter.kpn - description: kpmg is a valid generic tld text: https://twitter.kpmg expected: - https://twitter.kpmg - description: kosher is a valid generic tld text: https://twitter.kosher expected: - https://twitter.kosher - description: komatsu is a valid generic tld text: https://twitter.komatsu expected: - https://twitter.komatsu - description: koeln is a valid generic tld text: https://twitter.koeln expected: - https://twitter.koeln - description: kiwi is a valid generic tld text: https://twitter.kiwi expected: - https://twitter.kiwi - description: kitchen is a valid generic tld text: https://twitter.kitchen expected: - https://twitter.kitchen - description: kindle is a valid generic tld text: https://twitter.kindle expected: - https://twitter.kindle - description: kinder is a valid generic tld text: https://twitter.kinder expected: - https://twitter.kinder - description: kim is a valid generic tld text: https://twitter.kim expected: - https://twitter.kim - description: kia is a valid generic tld text: https://twitter.kia expected: - https://twitter.kia - description: kfh is a valid generic tld text: https://twitter.kfh expected: - https://twitter.kfh - description: kerryproperties is a valid generic tld text: https://twitter.kerryproperties expected: - https://twitter.kerryproperties - description: kerrylogistics is a valid generic tld text: https://twitter.kerrylogistics expected: - https://twitter.kerrylogistics - description: kerryhotels is a valid generic tld text: https://twitter.kerryhotels expected: - https://twitter.kerryhotels - description: kddi is a valid generic tld text: https://twitter.kddi expected: - https://twitter.kddi - description: kaufen is a valid generic tld text: https://twitter.kaufen expected: - https://twitter.kaufen - description: juniper is a valid generic tld text: https://twitter.juniper expected: - https://twitter.juniper - description: juegos is a valid generic tld text: https://twitter.juegos expected: - https://twitter.juegos - description: jprs is a valid generic tld text: https://twitter.jprs expected: - https://twitter.jprs - description: jpmorgan is a valid generic tld text: https://twitter.jpmorgan expected: - https://twitter.jpmorgan - description: joy is a valid generic tld text: https://twitter.joy expected: - https://twitter.joy - description: jot is a valid generic tld text: https://twitter.jot expected: - https://twitter.jot - description: joburg is a valid generic tld text: https://twitter.joburg expected: - https://twitter.joburg - description: jobs is a valid generic tld text: https://twitter.jobs expected: - https://twitter.jobs - description: jnj is a valid generic tld text: https://twitter.jnj expected: - https://twitter.jnj - description: jmp is a valid generic tld text: https://twitter.jmp expected: - https://twitter.jmp - description: jll is a valid generic tld text: https://twitter.jll expected: - https://twitter.jll - description: jlc is a valid generic tld text: https://twitter.jlc expected: - https://twitter.jlc - description: jio is a valid generic tld text: https://twitter.jio expected: - https://twitter.jio - description: jewelry is a valid generic tld text: https://twitter.jewelry expected: - https://twitter.jewelry - description: jetzt is a valid generic tld text: https://twitter.jetzt expected: - https://twitter.jetzt - description: jeep is a valid generic tld text: https://twitter.jeep expected: - https://twitter.jeep - description: jcp is a valid generic tld text: https://twitter.jcp expected: - https://twitter.jcp - description: jcb is a valid generic tld text: https://twitter.jcb expected: - https://twitter.jcb - description: java is a valid generic tld text: https://twitter.java expected: - https://twitter.java - description: jaguar is a valid generic tld text: https://twitter.jaguar expected: - https://twitter.jaguar - description: iwc is a valid generic tld text: https://twitter.iwc expected: - https://twitter.iwc - description: iveco is a valid generic tld text: https://twitter.iveco expected: - https://twitter.iveco - description: itv is a valid generic tld text: https://twitter.itv expected: - https://twitter.itv - description: itau is a valid generic tld text: https://twitter.itau expected: - https://twitter.itau - description: istanbul is a valid generic tld text: https://twitter.istanbul expected: - https://twitter.istanbul - description: ist is a valid generic tld text: https://twitter.ist expected: - https://twitter.ist - description: ismaili is a valid generic tld text: https://twitter.ismaili expected: - https://twitter.ismaili - description: iselect is a valid generic tld text: https://twitter.iselect expected: - https://twitter.iselect - description: irish is a valid generic tld text: https://twitter.irish expected: - https://twitter.irish - description: ipiranga is a valid generic tld text: https://twitter.ipiranga expected: - https://twitter.ipiranga - description: investments is a valid generic tld text: https://twitter.investments expected: - https://twitter.investments - description: intuit is a valid generic tld text: https://twitter.intuit expected: - https://twitter.intuit - description: international is a valid generic tld text: https://twitter.international expected: - https://twitter.international - description: intel is a valid generic tld text: https://twitter.intel expected: - https://twitter.intel - description: int is a valid generic tld text: https://twitter.int expected: - https://twitter.int - description: insure is a valid generic tld text: https://twitter.insure expected: - https://twitter.insure - description: insurance is a valid generic tld text: https://twitter.insurance expected: - https://twitter.insurance - description: institute is a valid generic tld text: https://twitter.institute expected: - https://twitter.institute - description: ink is a valid generic tld text: https://twitter.ink expected: - https://twitter.ink - description: ing is a valid generic tld text: https://twitter.ing expected: - https://twitter.ing - description: info is a valid generic tld text: https://twitter.info expected: - https://twitter.info - description: infiniti is a valid generic tld text: https://twitter.infiniti expected: - https://twitter.infiniti - description: industries is a valid generic tld text: https://twitter.industries expected: - https://twitter.industries - description: immobilien is a valid generic tld text: https://twitter.immobilien expected: - https://twitter.immobilien - description: immo is a valid generic tld text: https://twitter.immo expected: - https://twitter.immo - description: imdb is a valid generic tld text: https://twitter.imdb expected: - https://twitter.imdb - description: imamat is a valid generic tld text: https://twitter.imamat expected: - https://twitter.imamat - description: ikano is a valid generic tld text: https://twitter.ikano expected: - https://twitter.ikano - description: iinet is a valid generic tld text: https://twitter.iinet expected: - https://twitter.iinet - description: ifm is a valid generic tld text: https://twitter.ifm expected: - https://twitter.ifm - description: ieee is a valid generic tld text: https://twitter.ieee expected: - https://twitter.ieee - description: icu is a valid generic tld text: https://twitter.icu expected: - https://twitter.icu - description: ice is a valid generic tld text: https://twitter.ice expected: - https://twitter.ice - description: icbc is a valid generic tld text: https://twitter.icbc expected: - https://twitter.icbc - description: ibm is a valid generic tld text: https://twitter.ibm expected: - https://twitter.ibm - description: hyundai is a valid generic tld text: https://twitter.hyundai expected: - https://twitter.hyundai - description: hyatt is a valid generic tld text: https://twitter.hyatt expected: - https://twitter.hyatt - description: hughes is a valid generic tld text: https://twitter.hughes expected: - https://twitter.hughes - description: htc is a valid generic tld text: https://twitter.htc expected: - https://twitter.htc - description: hsbc is a valid generic tld text: https://twitter.hsbc expected: - https://twitter.hsbc - description: how is a valid generic tld text: https://twitter.how expected: - https://twitter.how - description: house is a valid generic tld text: https://twitter.house expected: - https://twitter.house - description: hotmail is a valid generic tld text: https://twitter.hotmail expected: - https://twitter.hotmail - description: hotels is a valid generic tld text: https://twitter.hotels expected: - https://twitter.hotels - description: hoteles is a valid generic tld text: https://twitter.hoteles expected: - https://twitter.hoteles - description: hot is a valid generic tld text: https://twitter.hot expected: - https://twitter.hot - description: hosting is a valid generic tld text: https://twitter.hosting expected: - https://twitter.hosting - description: host is a valid generic tld text: https://twitter.host expected: - https://twitter.host - description: hospital is a valid generic tld text: https://twitter.hospital expected: - https://twitter.hospital - description: horse is a valid generic tld text: https://twitter.horse expected: - https://twitter.horse - description: honeywell is a valid generic tld text: https://twitter.honeywell expected: - https://twitter.honeywell - description: honda is a valid generic tld text: https://twitter.honda expected: - https://twitter.honda - description: homesense is a valid generic tld text: https://twitter.homesense expected: - https://twitter.homesense - description: homes is a valid generic tld text: https://twitter.homes expected: - https://twitter.homes - description: homegoods is a valid generic tld text: https://twitter.homegoods expected: - https://twitter.homegoods - description: homedepot is a valid generic tld text: https://twitter.homedepot expected: - https://twitter.homedepot - description: holiday is a valid generic tld text: https://twitter.holiday expected: - https://twitter.holiday - description: holdings is a valid generic tld text: https://twitter.holdings expected: - https://twitter.holdings - description: hockey is a valid generic tld text: https://twitter.hockey expected: - https://twitter.hockey - description: hkt is a valid generic tld text: https://twitter.hkt expected: - https://twitter.hkt - description: hiv is a valid generic tld text: https://twitter.hiv expected: - https://twitter.hiv - description: hitachi is a valid generic tld text: https://twitter.hitachi expected: - https://twitter.hitachi - description: hisamitsu is a valid generic tld text: https://twitter.hisamitsu expected: - https://twitter.hisamitsu - description: hiphop is a valid generic tld text: https://twitter.hiphop expected: - https://twitter.hiphop - description: hgtv is a valid generic tld text: https://twitter.hgtv expected: - https://twitter.hgtv - description: hermes is a valid generic tld text: https://twitter.hermes expected: - https://twitter.hermes - description: here is a valid generic tld text: https://twitter.here expected: - https://twitter.here - description: helsinki is a valid generic tld text: https://twitter.helsinki expected: - https://twitter.helsinki - description: help is a valid generic tld text: https://twitter.help expected: - https://twitter.help - description: healthcare is a valid generic tld text: https://twitter.healthcare expected: - https://twitter.healthcare - description: health is a valid generic tld text: https://twitter.health expected: - https://twitter.health - description: hdfcbank is a valid generic tld text: https://twitter.hdfcbank expected: - https://twitter.hdfcbank - description: hdfc is a valid generic tld text: https://twitter.hdfc expected: - https://twitter.hdfc - description: hbo is a valid generic tld text: https://twitter.hbo expected: - https://twitter.hbo - description: haus is a valid generic tld text: https://twitter.haus expected: - https://twitter.haus - description: hangout is a valid generic tld text: https://twitter.hangout expected: - https://twitter.hangout - description: hamburg is a valid generic tld text: https://twitter.hamburg expected: - https://twitter.hamburg - description: hair is a valid generic tld text: https://twitter.hair expected: - https://twitter.hair - description: guru is a valid generic tld text: https://twitter.guru expected: - https://twitter.guru - description: guitars is a valid generic tld text: https://twitter.guitars expected: - https://twitter.guitars - description: guide is a valid generic tld text: https://twitter.guide expected: - https://twitter.guide - description: guge is a valid generic tld text: https://twitter.guge expected: - https://twitter.guge - description: gucci is a valid generic tld text: https://twitter.gucci expected: - https://twitter.gucci - description: guardian is a valid generic tld text: https://twitter.guardian expected: - https://twitter.guardian - description: group is a valid generic tld text: https://twitter.group expected: - https://twitter.group - description: grocery is a valid generic tld text: https://twitter.grocery expected: - https://twitter.grocery - description: gripe is a valid generic tld text: https://twitter.gripe expected: - https://twitter.gripe - description: green is a valid generic tld text: https://twitter.green expected: - https://twitter.green - description: gratis is a valid generic tld text: https://twitter.gratis expected: - https://twitter.gratis - description: graphics is a valid generic tld text: https://twitter.graphics expected: - https://twitter.graphics - description: grainger is a valid generic tld text: https://twitter.grainger expected: - https://twitter.grainger - description: gov is a valid generic tld text: https://twitter.gov expected: - https://twitter.gov - description: got is a valid generic tld text: https://twitter.got expected: - https://twitter.got - description: gop is a valid generic tld text: https://twitter.gop expected: - https://twitter.gop - description: google is a valid generic tld text: https://twitter.google expected: - https://twitter.google - description: goog is a valid generic tld text: https://twitter.goog expected: - https://twitter.goog - description: goodyear is a valid generic tld text: https://twitter.goodyear expected: - https://twitter.goodyear - description: goodhands is a valid generic tld text: https://twitter.goodhands expected: - https://twitter.goodhands - description: goo is a valid generic tld text: https://twitter.goo expected: - https://twitter.goo - description: golf is a valid generic tld text: https://twitter.golf expected: - https://twitter.golf - description: goldpoint is a valid generic tld text: https://twitter.goldpoint expected: - https://twitter.goldpoint - description: gold is a valid generic tld text: https://twitter.gold expected: - https://twitter.gold - description: godaddy is a valid generic tld text: https://twitter.godaddy expected: - https://twitter.godaddy - description: gmx is a valid generic tld text: https://twitter.gmx expected: - https://twitter.gmx - description: gmo is a valid generic tld text: https://twitter.gmo expected: - https://twitter.gmo - description: gmbh is a valid generic tld text: https://twitter.gmbh expected: - https://twitter.gmbh - description: gmail is a valid generic tld text: https://twitter.gmail expected: - https://twitter.gmail - description: globo is a valid generic tld text: https://twitter.globo expected: - https://twitter.globo - description: global is a valid generic tld text: https://twitter.global expected: - https://twitter.global - description: gle is a valid generic tld text: https://twitter.gle expected: - https://twitter.gle - description: glass is a valid generic tld text: https://twitter.glass expected: - https://twitter.glass - description: glade is a valid generic tld text: https://twitter.glade expected: - https://twitter.glade - description: giving is a valid generic tld text: https://twitter.giving expected: - https://twitter.giving - description: gives is a valid generic tld text: https://twitter.gives expected: - https://twitter.gives - description: gifts is a valid generic tld text: https://twitter.gifts expected: - https://twitter.gifts - description: gift is a valid generic tld text: https://twitter.gift expected: - https://twitter.gift - description: ggee is a valid generic tld text: https://twitter.ggee expected: - https://twitter.ggee - description: george is a valid generic tld text: https://twitter.george expected: - https://twitter.george - description: genting is a valid generic tld text: https://twitter.genting expected: - https://twitter.genting - description: gent is a valid generic tld text: https://twitter.gent expected: - https://twitter.gent - description: gea is a valid generic tld text: https://twitter.gea expected: - https://twitter.gea - description: gdn is a valid generic tld text: https://twitter.gdn expected: - https://twitter.gdn - description: gbiz is a valid generic tld text: https://twitter.gbiz expected: - https://twitter.gbiz - description: garden is a valid generic tld text: https://twitter.garden expected: - https://twitter.garden - description: gap is a valid generic tld text: https://twitter.gap expected: - https://twitter.gap - description: games is a valid generic tld text: https://twitter.games expected: - https://twitter.games - description: game is a valid generic tld text: https://twitter.game expected: - https://twitter.game - description: gallup is a valid generic tld text: https://twitter.gallup expected: - https://twitter.gallup - description: gallo is a valid generic tld text: https://twitter.gallo expected: - https://twitter.gallo - description: gallery is a valid generic tld text: https://twitter.gallery expected: - https://twitter.gallery - description: gal is a valid generic tld text: https://twitter.gal expected: - https://twitter.gal - description: fyi is a valid generic tld text: https://twitter.fyi expected: - https://twitter.fyi - description: futbol is a valid generic tld text: https://twitter.futbol expected: - https://twitter.futbol - description: furniture is a valid generic tld text: https://twitter.furniture expected: - https://twitter.furniture - description: fund is a valid generic tld text: https://twitter.fund expected: - https://twitter.fund - description: fun is a valid generic tld text: https://twitter.fun expected: - https://twitter.fun - description: fujixerox is a valid generic tld text: https://twitter.fujixerox expected: - https://twitter.fujixerox - description: fujitsu is a valid generic tld text: https://twitter.fujitsu expected: - https://twitter.fujitsu - description: ftr is a valid generic tld text: https://twitter.ftr expected: - https://twitter.ftr - description: frontier is a valid generic tld text: https://twitter.frontier expected: - https://twitter.frontier - description: frontdoor is a valid generic tld text: https://twitter.frontdoor expected: - https://twitter.frontdoor - description: frogans is a valid generic tld text: https://twitter.frogans expected: - https://twitter.frogans - description: frl is a valid generic tld text: https://twitter.frl expected: - https://twitter.frl - description: fresenius is a valid generic tld text: https://twitter.fresenius expected: - https://twitter.fresenius - description: free is a valid generic tld text: https://twitter.free expected: - https://twitter.free - description: fox is a valid generic tld text: https://twitter.fox expected: - https://twitter.fox - description: foundation is a valid generic tld text: https://twitter.foundation expected: - https://twitter.foundation - description: forum is a valid generic tld text: https://twitter.forum expected: - https://twitter.forum - description: forsale is a valid generic tld text: https://twitter.forsale expected: - https://twitter.forsale - description: forex is a valid generic tld text: https://twitter.forex expected: - https://twitter.forex - description: ford is a valid generic tld text: https://twitter.ford expected: - https://twitter.ford - description: football is a valid generic tld text: https://twitter.football expected: - https://twitter.football - description: foodnetwork is a valid generic tld text: https://twitter.foodnetwork expected: - https://twitter.foodnetwork - description: food is a valid generic tld text: https://twitter.food expected: - https://twitter.food - description: foo is a valid generic tld text: https://twitter.foo expected: - https://twitter.foo - description: fly is a valid generic tld text: https://twitter.fly expected: - https://twitter.fly - description: flsmidth is a valid generic tld text: https://twitter.flsmidth expected: - https://twitter.flsmidth - description: flowers is a valid generic tld text: https://twitter.flowers expected: - https://twitter.flowers - description: florist is a valid generic tld text: https://twitter.florist expected: - https://twitter.florist - description: flir is a valid generic tld text: https://twitter.flir expected: - https://twitter.flir - description: flights is a valid generic tld text: https://twitter.flights expected: - https://twitter.flights - description: flickr is a valid generic tld text: https://twitter.flickr expected: - https://twitter.flickr - description: fitness is a valid generic tld text: https://twitter.fitness expected: - https://twitter.fitness - description: fit is a valid generic tld text: https://twitter.fit expected: - https://twitter.fit - description: fishing is a valid generic tld text: https://twitter.fishing expected: - https://twitter.fishing - description: fish is a valid generic tld text: https://twitter.fish expected: - https://twitter.fish - description: firmdale is a valid generic tld text: https://twitter.firmdale expected: - https://twitter.firmdale - description: firestone is a valid generic tld text: https://twitter.firestone expected: - https://twitter.firestone - description: fire is a valid generic tld text: https://twitter.fire expected: - https://twitter.fire - description: financial is a valid generic tld text: https://twitter.financial expected: - https://twitter.financial - description: finance is a valid generic tld text: https://twitter.finance expected: - https://twitter.finance - description: final is a valid generic tld text: https://twitter.final expected: - https://twitter.final - description: film is a valid generic tld text: https://twitter.film expected: - https://twitter.film - description: fido is a valid generic tld text: https://twitter.fido expected: - https://twitter.fido - description: fidelity is a valid generic tld text: https://twitter.fidelity expected: - https://twitter.fidelity - description: fiat is a valid generic tld text: https://twitter.fiat expected: - https://twitter.fiat - description: ferrero is a valid generic tld text: https://twitter.ferrero expected: - https://twitter.ferrero - description: ferrari is a valid generic tld text: https://twitter.ferrari expected: - https://twitter.ferrari - description: feedback is a valid generic tld text: https://twitter.feedback expected: - https://twitter.feedback - description: fedex is a valid generic tld text: https://twitter.fedex expected: - https://twitter.fedex - description: fast is a valid generic tld text: https://twitter.fast expected: - https://twitter.fast - description: fashion is a valid generic tld text: https://twitter.fashion expected: - https://twitter.fashion - description: farmers is a valid generic tld text: https://twitter.farmers expected: - https://twitter.farmers - description: farm is a valid generic tld text: https://twitter.farm expected: - https://twitter.farm - description: fans is a valid generic tld text: https://twitter.fans expected: - https://twitter.fans - description: fan is a valid generic tld text: https://twitter.fan expected: - https://twitter.fan - description: family is a valid generic tld text: https://twitter.family expected: - https://twitter.family - description: faith is a valid generic tld text: https://twitter.faith expected: - https://twitter.faith - description: fairwinds is a valid generic tld text: https://twitter.fairwinds expected: - https://twitter.fairwinds - description: fail is a valid generic tld text: https://twitter.fail expected: - https://twitter.fail - description: fage is a valid generic tld text: https://twitter.fage expected: - https://twitter.fage - description: extraspace is a valid generic tld text: https://twitter.extraspace expected: - https://twitter.extraspace - description: express is a valid generic tld text: https://twitter.express expected: - https://twitter.express - description: exposed is a valid generic tld text: https://twitter.exposed expected: - https://twitter.exposed - description: expert is a valid generic tld text: https://twitter.expert expected: - https://twitter.expert - description: exchange is a valid generic tld text: https://twitter.exchange expected: - https://twitter.exchange - description: everbank is a valid generic tld text: https://twitter.everbank expected: - https://twitter.everbank - description: events is a valid generic tld text: https://twitter.events expected: - https://twitter.events - description: eus is a valid generic tld text: https://twitter.eus expected: - https://twitter.eus - description: eurovision is a valid generic tld text: https://twitter.eurovision expected: - https://twitter.eurovision - description: etisalat is a valid generic tld text: https://twitter.etisalat expected: - https://twitter.etisalat - description: esurance is a valid generic tld text: https://twitter.esurance expected: - https://twitter.esurance - description: estate is a valid generic tld text: https://twitter.estate expected: - https://twitter.estate - description: esq is a valid generic tld text: https://twitter.esq expected: - https://twitter.esq - description: erni is a valid generic tld text: https://twitter.erni expected: - https://twitter.erni - description: ericsson is a valid generic tld text: https://twitter.ericsson expected: - https://twitter.ericsson - description: equipment is a valid generic tld text: https://twitter.equipment expected: - https://twitter.equipment - description: epson is a valid generic tld text: https://twitter.epson expected: - https://twitter.epson - description: epost is a valid generic tld text: https://twitter.epost expected: - https://twitter.epost - description: enterprises is a valid generic tld text: https://twitter.enterprises expected: - https://twitter.enterprises - description: engineering is a valid generic tld text: https://twitter.engineering expected: - https://twitter.engineering - description: engineer is a valid generic tld text: https://twitter.engineer expected: - https://twitter.engineer - description: energy is a valid generic tld text: https://twitter.energy expected: - https://twitter.energy - description: emerck is a valid generic tld text: https://twitter.emerck expected: - https://twitter.emerck - description: email is a valid generic tld text: https://twitter.email expected: - https://twitter.email - description: education is a valid generic tld text: https://twitter.education expected: - https://twitter.education - description: edu is a valid generic tld text: https://twitter.edu expected: - https://twitter.edu - description: edeka is a valid generic tld text: https://twitter.edeka expected: - https://twitter.edeka - description: eco is a valid generic tld text: https://twitter.eco expected: - https://twitter.eco - description: eat is a valid generic tld text: https://twitter.eat expected: - https://twitter.eat - description: earth is a valid generic tld text: https://twitter.earth expected: - https://twitter.earth - description: dvr is a valid generic tld text: https://twitter.dvr expected: - https://twitter.dvr - description: dvag is a valid generic tld text: https://twitter.dvag expected: - https://twitter.dvag - description: durban is a valid generic tld text: https://twitter.durban expected: - https://twitter.durban - description: dupont is a valid generic tld text: https://twitter.dupont expected: - https://twitter.dupont - description: duns is a valid generic tld text: https://twitter.duns expected: - https://twitter.duns - description: dunlop is a valid generic tld text: https://twitter.dunlop expected: - https://twitter.dunlop - description: duck is a valid generic tld text: https://twitter.duck expected: - https://twitter.duck - description: dubai is a valid generic tld text: https://twitter.dubai expected: - https://twitter.dubai - description: dtv is a valid generic tld text: https://twitter.dtv expected: - https://twitter.dtv - description: drive is a valid generic tld text: https://twitter.drive expected: - https://twitter.drive - description: download is a valid generic tld text: https://twitter.download expected: - https://twitter.download - description: dot is a valid generic tld text: https://twitter.dot expected: - https://twitter.dot - description: doosan is a valid generic tld text: https://twitter.doosan expected: - https://twitter.doosan - description: domains is a valid generic tld text: https://twitter.domains expected: - https://twitter.domains - description: doha is a valid generic tld text: https://twitter.doha expected: - https://twitter.doha - description: dog is a valid generic tld text: https://twitter.dog expected: - https://twitter.dog - description: dodge is a valid generic tld text: https://twitter.dodge expected: - https://twitter.dodge - description: doctor is a valid generic tld text: https://twitter.doctor expected: - https://twitter.doctor - description: docs is a valid generic tld text: https://twitter.docs expected: - https://twitter.docs - description: dnp is a valid generic tld text: https://twitter.dnp expected: - https://twitter.dnp - description: diy is a valid generic tld text: https://twitter.diy expected: - https://twitter.diy - description: dish is a valid generic tld text: https://twitter.dish expected: - https://twitter.dish - description: discover is a valid generic tld text: https://twitter.discover expected: - https://twitter.discover - description: discount is a valid generic tld text: https://twitter.discount expected: - https://twitter.discount - description: directory is a valid generic tld text: https://twitter.directory expected: - https://twitter.directory - description: direct is a valid generic tld text: https://twitter.direct expected: - https://twitter.direct - description: digital is a valid generic tld text: https://twitter.digital expected: - https://twitter.digital - description: diet is a valid generic tld text: https://twitter.diet expected: - https://twitter.diet - description: diamonds is a valid generic tld text: https://twitter.diamonds expected: - https://twitter.diamonds - description: dhl is a valid generic tld text: https://twitter.dhl expected: - https://twitter.dhl - description: dev is a valid generic tld text: https://twitter.dev expected: - https://twitter.dev - description: design is a valid generic tld text: https://twitter.design expected: - https://twitter.design - description: desi is a valid generic tld text: https://twitter.desi expected: - https://twitter.desi - description: dentist is a valid generic tld text: https://twitter.dentist expected: - https://twitter.dentist - description: dental is a valid generic tld text: https://twitter.dental expected: - https://twitter.dental - description: democrat is a valid generic tld text: https://twitter.democrat expected: - https://twitter.democrat - description: delta is a valid generic tld text: https://twitter.delta expected: - https://twitter.delta - description: deloitte is a valid generic tld text: https://twitter.deloitte expected: - https://twitter.deloitte - description: dell is a valid generic tld text: https://twitter.dell expected: - https://twitter.dell - description: delivery is a valid generic tld text: https://twitter.delivery expected: - https://twitter.delivery - description: degree is a valid generic tld text: https://twitter.degree expected: - https://twitter.degree - description: deals is a valid generic tld text: https://twitter.deals expected: - https://twitter.deals - description: dealer is a valid generic tld text: https://twitter.dealer expected: - https://twitter.dealer - description: deal is a valid generic tld text: https://twitter.deal expected: - https://twitter.deal - description: dds is a valid generic tld text: https://twitter.dds expected: - https://twitter.dds - description: dclk is a valid generic tld text: https://twitter.dclk expected: - https://twitter.dclk - description: day is a valid generic tld text: https://twitter.day expected: - https://twitter.day - description: datsun is a valid generic tld text: https://twitter.datsun expected: - https://twitter.datsun - description: dating is a valid generic tld text: https://twitter.dating expected: - https://twitter.dating - description: date is a valid generic tld text: https://twitter.date expected: - https://twitter.date - description: data is a valid generic tld text: https://twitter.data expected: - https://twitter.data - description: dance is a valid generic tld text: https://twitter.dance expected: - https://twitter.dance - description: dad is a valid generic tld text: https://twitter.dad expected: - https://twitter.dad - description: dabur is a valid generic tld text: https://twitter.dabur expected: - https://twitter.dabur - description: cyou is a valid generic tld text: https://twitter.cyou expected: - https://twitter.cyou - description: cymru is a valid generic tld text: https://twitter.cymru expected: - https://twitter.cymru - description: cuisinella is a valid generic tld text: https://twitter.cuisinella expected: - https://twitter.cuisinella - description: csc is a valid generic tld text: https://twitter.csc expected: - https://twitter.csc - description: cruises is a valid generic tld text: https://twitter.cruises expected: - https://twitter.cruises - description: cruise is a valid generic tld text: https://twitter.cruise expected: - https://twitter.cruise - description: crs is a valid generic tld text: https://twitter.crs expected: - https://twitter.crs - description: crown is a valid generic tld text: https://twitter.crown expected: - https://twitter.crown - description: cricket is a valid generic tld text: https://twitter.cricket expected: - https://twitter.cricket - description: creditunion is a valid generic tld text: https://twitter.creditunion expected: - https://twitter.creditunion - description: creditcard is a valid generic tld text: https://twitter.creditcard expected: - https://twitter.creditcard - description: credit is a valid generic tld text: https://twitter.credit expected: - https://twitter.credit - description: courses is a valid generic tld text: https://twitter.courses expected: - https://twitter.courses - description: coupons is a valid generic tld text: https://twitter.coupons expected: - https://twitter.coupons - description: coupon is a valid generic tld text: https://twitter.coupon expected: - https://twitter.coupon - description: country is a valid generic tld text: https://twitter.country expected: - https://twitter.country - description: corsica is a valid generic tld text: https://twitter.corsica expected: - https://twitter.corsica - description: coop is a valid generic tld text: https://twitter.coop expected: - https://twitter.coop - description: cool is a valid generic tld text: https://twitter.cool expected: - https://twitter.cool - description: cookingchannel is a valid generic tld text: https://twitter.cookingchannel expected: - https://twitter.cookingchannel - description: cooking is a valid generic tld text: https://twitter.cooking expected: - https://twitter.cooking - description: contractors is a valid generic tld text: https://twitter.contractors expected: - https://twitter.contractors - description: contact is a valid generic tld text: https://twitter.contact expected: - https://twitter.contact - description: consulting is a valid generic tld text: https://twitter.consulting expected: - https://twitter.consulting - description: construction is a valid generic tld text: https://twitter.construction expected: - https://twitter.construction - description: condos is a valid generic tld text: https://twitter.condos expected: - https://twitter.condos - description: comsec is a valid generic tld text: https://twitter.comsec expected: - https://twitter.comsec - description: computer is a valid generic tld text: https://twitter.computer expected: - https://twitter.computer - description: compare is a valid generic tld text: https://twitter.compare expected: - https://twitter.compare - description: company is a valid generic tld text: https://twitter.company expected: - https://twitter.company - description: community is a valid generic tld text: https://twitter.community expected: - https://twitter.community - description: commbank is a valid generic tld text: https://twitter.commbank expected: - https://twitter.commbank - description: comcast is a valid generic tld text: https://twitter.comcast expected: - https://twitter.comcast - description: com is a valid generic tld text: https://twitter.com expected: - https://twitter.com - description: cologne is a valid generic tld text: https://twitter.cologne expected: - https://twitter.cologne - description: college is a valid generic tld text: https://twitter.college expected: - https://twitter.college - description: coffee is a valid generic tld text: https://twitter.coffee expected: - https://twitter.coffee - description: codes is a valid generic tld text: https://twitter.codes expected: - https://twitter.codes - description: coach is a valid generic tld text: https://twitter.coach expected: - https://twitter.coach - description: clubmed is a valid generic tld text: https://twitter.clubmed expected: - https://twitter.clubmed - description: club is a valid generic tld text: https://twitter.club expected: - https://twitter.club - description: cloud is a valid generic tld text: https://twitter.cloud expected: - https://twitter.cloud - description: clothing is a valid generic tld text: https://twitter.clothing expected: - https://twitter.clothing - description: clinique is a valid generic tld text: https://twitter.clinique expected: - https://twitter.clinique - description: clinic is a valid generic tld text: https://twitter.clinic expected: - https://twitter.clinic - description: click is a valid generic tld text: https://twitter.click expected: - https://twitter.click - description: cleaning is a valid generic tld text: https://twitter.cleaning expected: - https://twitter.cleaning - description: claims is a valid generic tld text: https://twitter.claims expected: - https://twitter.claims - description: cityeats is a valid generic tld text: https://twitter.cityeats expected: - https://twitter.cityeats - description: city is a valid generic tld text: https://twitter.city expected: - https://twitter.city - description: citic is a valid generic tld text: https://twitter.citic expected: - https://twitter.citic - description: citi is a valid generic tld text: https://twitter.citi expected: - https://twitter.citi - description: citadel is a valid generic tld text: https://twitter.citadel expected: - https://twitter.citadel - description: cisco is a valid generic tld text: https://twitter.cisco expected: - https://twitter.cisco - description: circle is a valid generic tld text: https://twitter.circle expected: - https://twitter.circle - description: cipriani is a valid generic tld text: https://twitter.cipriani expected: - https://twitter.cipriani - description: church is a valid generic tld text: https://twitter.church expected: - https://twitter.church - description: chrysler is a valid generic tld text: https://twitter.chrysler expected: - https://twitter.chrysler - description: chrome is a valid generic tld text: https://twitter.chrome expected: - https://twitter.chrome - description: christmas is a valid generic tld text: https://twitter.christmas expected: - https://twitter.christmas - description: chloe is a valid generic tld text: https://twitter.chloe expected: - https://twitter.chloe - description: chintai is a valid generic tld text: https://twitter.chintai expected: - https://twitter.chintai - description: cheap is a valid generic tld text: https://twitter.cheap expected: - https://twitter.cheap - description: chat is a valid generic tld text: https://twitter.chat expected: - https://twitter.chat - description: chase is a valid generic tld text: https://twitter.chase expected: - https://twitter.chase - description: channel is a valid generic tld text: https://twitter.channel expected: - https://twitter.channel - description: chanel is a valid generic tld text: https://twitter.chanel expected: - https://twitter.chanel - description: cfd is a valid generic tld text: https://twitter.cfd expected: - https://twitter.cfd - description: cfa is a valid generic tld text: https://twitter.cfa expected: - https://twitter.cfa - description: cern is a valid generic tld text: https://twitter.cern expected: - https://twitter.cern - description: ceo is a valid generic tld text: https://twitter.ceo expected: - https://twitter.ceo - description: center is a valid generic tld text: https://twitter.center expected: - https://twitter.center - description: ceb is a valid generic tld text: https://twitter.ceb expected: - https://twitter.ceb - description: cbs is a valid generic tld text: https://twitter.cbs expected: - https://twitter.cbs - description: cbre is a valid generic tld text: https://twitter.cbre expected: - https://twitter.cbre - description: cbn is a valid generic tld text: https://twitter.cbn expected: - https://twitter.cbn - description: cba is a valid generic tld text: https://twitter.cba expected: - https://twitter.cba - description: catholic is a valid generic tld text: https://twitter.catholic expected: - https://twitter.catholic - description: catering is a valid generic tld text: https://twitter.catering expected: - https://twitter.catering - description: cat is a valid generic tld text: https://twitter.cat expected: - https://twitter.cat - description: casino is a valid generic tld text: https://twitter.casino expected: - https://twitter.casino - description: cash is a valid generic tld text: https://twitter.cash expected: - https://twitter.cash - description: caseih is a valid generic tld text: https://twitter.caseih expected: - https://twitter.caseih - description: case is a valid generic tld text: https://twitter.case expected: - https://twitter.case - description: casa is a valid generic tld text: https://twitter.casa expected: - https://twitter.casa - description: cartier is a valid generic tld text: https://twitter.cartier expected: - https://twitter.cartier - description: cars is a valid generic tld text: https://twitter.cars expected: - https://twitter.cars - description: careers is a valid generic tld text: https://twitter.careers expected: - https://twitter.careers - description: career is a valid generic tld text: https://twitter.career expected: - https://twitter.career - description: care is a valid generic tld text: https://twitter.care expected: - https://twitter.care - description: cards is a valid generic tld text: https://twitter.cards expected: - https://twitter.cards - description: caravan is a valid generic tld text: https://twitter.caravan expected: - https://twitter.caravan - description: car is a valid generic tld text: https://twitter.car expected: - https://twitter.car - description: capitalone is a valid generic tld text: https://twitter.capitalone expected: - https://twitter.capitalone - description: capital is a valid generic tld text: https://twitter.capital expected: - https://twitter.capital - description: capetown is a valid generic tld text: https://twitter.capetown expected: - https://twitter.capetown - description: canon is a valid generic tld text: https://twitter.canon expected: - https://twitter.canon - description: cancerresearch is a valid generic tld text: https://twitter.cancerresearch expected: - https://twitter.cancerresearch - description: camp is a valid generic tld text: https://twitter.camp expected: - https://twitter.camp - description: camera is a valid generic tld text: https://twitter.camera expected: - https://twitter.camera - description: cam is a valid generic tld text: https://twitter.cam expected: - https://twitter.cam - description: calvinklein is a valid generic tld text: https://twitter.calvinklein expected: - https://twitter.calvinklein - description: call is a valid generic tld text: https://twitter.call expected: - https://twitter.call - description: cal is a valid generic tld text: https://twitter.cal expected: - https://twitter.cal - description: cafe is a valid generic tld text: https://twitter.cafe expected: - https://twitter.cafe - description: cab is a valid generic tld text: https://twitter.cab expected: - https://twitter.cab - description: bzh is a valid generic tld text: https://twitter.bzh expected: - https://twitter.bzh - description: buzz is a valid generic tld text: https://twitter.buzz expected: - https://twitter.buzz - description: buy is a valid generic tld text: https://twitter.buy expected: - https://twitter.buy - description: business is a valid generic tld text: https://twitter.business expected: - https://twitter.business - description: builders is a valid generic tld text: https://twitter.builders expected: - https://twitter.builders - description: build is a valid generic tld text: https://twitter.build expected: - https://twitter.build - description: bugatti is a valid generic tld text: https://twitter.bugatti expected: - https://twitter.bugatti - description: budapest is a valid generic tld text: https://twitter.budapest expected: - https://twitter.budapest - description: brussels is a valid generic tld text: https://twitter.brussels expected: - https://twitter.brussels - description: brother is a valid generic tld text: https://twitter.brother expected: - https://twitter.brother - description: broker is a valid generic tld text: https://twitter.broker expected: - https://twitter.broker - description: broadway is a valid generic tld text: https://twitter.broadway expected: - https://twitter.broadway - description: bridgestone is a valid generic tld text: https://twitter.bridgestone expected: - https://twitter.bridgestone - description: bradesco is a valid generic tld text: https://twitter.bradesco expected: - https://twitter.bradesco - description: box is a valid generic tld text: https://twitter.box expected: - https://twitter.box - description: boutique is a valid generic tld text: https://twitter.boutique expected: - https://twitter.boutique - description: bot is a valid generic tld text: https://twitter.bot expected: - https://twitter.bot - description: boston is a valid generic tld text: https://twitter.boston expected: - https://twitter.boston - description: bostik is a valid generic tld text: https://twitter.bostik expected: - https://twitter.bostik - description: bosch is a valid generic tld text: https://twitter.bosch expected: - https://twitter.bosch - description: boots is a valid generic tld text: https://twitter.boots expected: - https://twitter.boots - description: booking is a valid generic tld text: https://twitter.booking expected: - https://twitter.booking - description: book is a valid generic tld text: https://twitter.book expected: - https://twitter.book - description: boo is a valid generic tld text: https://twitter.boo expected: - https://twitter.boo - description: bond is a valid generic tld text: https://twitter.bond expected: - https://twitter.bond - description: bom is a valid generic tld text: https://twitter.bom expected: - https://twitter.bom - description: bofa is a valid generic tld text: https://twitter.bofa expected: - https://twitter.bofa - description: boehringer is a valid generic tld text: https://twitter.boehringer expected: - https://twitter.boehringer - description: boats is a valid generic tld text: https://twitter.boats expected: - https://twitter.boats - description: bnpparibas is a valid generic tld text: https://twitter.bnpparibas expected: - https://twitter.bnpparibas - description: bnl is a valid generic tld text: https://twitter.bnl expected: - https://twitter.bnl - description: bmw is a valid generic tld text: https://twitter.bmw expected: - https://twitter.bmw - description: bms is a valid generic tld text: https://twitter.bms expected: - https://twitter.bms - description: blue is a valid generic tld text: https://twitter.blue expected: - https://twitter.blue - description: bloomberg is a valid generic tld text: https://twitter.bloomberg expected: - https://twitter.bloomberg - description: blog is a valid generic tld text: https://twitter.blog expected: - https://twitter.blog - description: blockbuster is a valid generic tld text: https://twitter.blockbuster expected: - https://twitter.blockbuster - description: blanco is a valid generic tld text: https://twitter.blanco expected: - https://twitter.blanco - description: blackfriday is a valid generic tld text: https://twitter.blackfriday expected: - https://twitter.blackfriday - description: black is a valid generic tld text: https://twitter.black expected: - https://twitter.black - description: biz is a valid generic tld text: https://twitter.biz expected: - https://twitter.biz - description: bio is a valid generic tld text: https://twitter.bio expected: - https://twitter.bio - description: bingo is a valid generic tld text: https://twitter.bingo expected: - https://twitter.bingo - description: bing is a valid generic tld text: https://twitter.bing expected: - https://twitter.bing - description: bike is a valid generic tld text: https://twitter.bike expected: - https://twitter.bike - description: bid is a valid generic tld text: https://twitter.bid expected: - https://twitter.bid - description: bible is a valid generic tld text: https://twitter.bible expected: - https://twitter.bible - description: bharti is a valid generic tld text: https://twitter.bharti expected: - https://twitter.bharti - description: bet is a valid generic tld text: https://twitter.bet expected: - https://twitter.bet - description: bestbuy is a valid generic tld text: https://twitter.bestbuy expected: - https://twitter.bestbuy - description: best is a valid generic tld text: https://twitter.best expected: - https://twitter.best - description: berlin is a valid generic tld text: https://twitter.berlin expected: - https://twitter.berlin - description: bentley is a valid generic tld text: https://twitter.bentley expected: - https://twitter.bentley - description: beer is a valid generic tld text: https://twitter.beer expected: - https://twitter.beer - description: beauty is a valid generic tld text: https://twitter.beauty expected: - https://twitter.beauty - description: beats is a valid generic tld text: https://twitter.beats expected: - https://twitter.beats - description: bcn is a valid generic tld text: https://twitter.bcn expected: - https://twitter.bcn - description: bcg is a valid generic tld text: https://twitter.bcg expected: - https://twitter.bcg - description: bbva is a valid generic tld text: https://twitter.bbva expected: - https://twitter.bbva - description: bbt is a valid generic tld text: https://twitter.bbt expected: - https://twitter.bbt - description: bbc is a valid generic tld text: https://twitter.bbc expected: - https://twitter.bbc - description: bayern is a valid generic tld text: https://twitter.bayern expected: - https://twitter.bayern - description: bauhaus is a valid generic tld text: https://twitter.bauhaus expected: - https://twitter.bauhaus - description: basketball is a valid generic tld text: https://twitter.basketball expected: - https://twitter.basketball - description: baseball is a valid generic tld text: https://twitter.baseball expected: - https://twitter.baseball - description: bargains is a valid generic tld text: https://twitter.bargains expected: - https://twitter.bargains - description: barefoot is a valid generic tld text: https://twitter.barefoot expected: - https://twitter.barefoot - description: barclays is a valid generic tld text: https://twitter.barclays expected: - https://twitter.barclays - description: barclaycard is a valid generic tld text: https://twitter.barclaycard expected: - https://twitter.barclaycard - description: barcelona is a valid generic tld text: https://twitter.barcelona expected: - https://twitter.barcelona - description: bar is a valid generic tld text: https://twitter.bar expected: - https://twitter.bar - description: bank is a valid generic tld text: https://twitter.bank expected: - https://twitter.bank - description: band is a valid generic tld text: https://twitter.band expected: - https://twitter.band - description: bananarepublic is a valid generic tld text: https://twitter.bananarepublic expected: - https://twitter.bananarepublic - description: banamex is a valid generic tld text: https://twitter.banamex expected: - https://twitter.banamex - description: baidu is a valid generic tld text: https://twitter.baidu expected: - https://twitter.baidu - description: baby is a valid generic tld text: https://twitter.baby expected: - https://twitter.baby - description: azure is a valid generic tld text: https://twitter.azure expected: - https://twitter.azure - description: axa is a valid generic tld text: https://twitter.axa expected: - https://twitter.axa - description: aws is a valid generic tld text: https://twitter.aws expected: - https://twitter.aws - description: avianca is a valid generic tld text: https://twitter.avianca expected: - https://twitter.avianca - description: autos is a valid generic tld text: https://twitter.autos expected: - https://twitter.autos - description: auto is a valid generic tld text: https://twitter.auto expected: - https://twitter.auto - description: author is a valid generic tld text: https://twitter.author expected: - https://twitter.author - description: auspost is a valid generic tld text: https://twitter.auspost expected: - https://twitter.auspost - description: audio is a valid generic tld text: https://twitter.audio expected: - https://twitter.audio - description: audible is a valid generic tld text: https://twitter.audible expected: - https://twitter.audible - description: audi is a valid generic tld text: https://twitter.audi expected: - https://twitter.audi - description: auction is a valid generic tld text: https://twitter.auction expected: - https://twitter.auction - description: attorney is a valid generic tld text: https://twitter.attorney expected: - https://twitter.attorney - description: athleta is a valid generic tld text: https://twitter.athleta expected: - https://twitter.athleta - description: associates is a valid generic tld text: https://twitter.associates expected: - https://twitter.associates - description: asia is a valid generic tld text: https://twitter.asia expected: - https://twitter.asia - description: asda is a valid generic tld text: https://twitter.asda expected: - https://twitter.asda - description: arte is a valid generic tld text: https://twitter.arte expected: - https://twitter.arte - description: art is a valid generic tld text: https://twitter.art expected: - https://twitter.art - description: arpa is a valid generic tld text: https://twitter.arpa expected: - https://twitter.arpa - description: army is a valid generic tld text: https://twitter.army expected: - https://twitter.army - description: archi is a valid generic tld text: https://twitter.archi expected: - https://twitter.archi - description: aramco is a valid generic tld text: https://twitter.aramco expected: - https://twitter.aramco - description: arab is a valid generic tld text: https://twitter.arab expected: - https://twitter.arab - description: aquarelle is a valid generic tld text: https://twitter.aquarelle expected: - https://twitter.aquarelle - description: apple is a valid generic tld text: https://twitter.apple expected: - https://twitter.apple - description: app is a valid generic tld text: https://twitter.app expected: - https://twitter.app - description: apartments is a valid generic tld text: https://twitter.apartments expected: - https://twitter.apartments - description: aol is a valid generic tld text: https://twitter.aol expected: - https://twitter.aol - description: anz is a valid generic tld text: https://twitter.anz expected: - https://twitter.anz - description: anquan is a valid generic tld text: https://twitter.anquan expected: - https://twitter.anquan - description: android is a valid generic tld text: https://twitter.android expected: - https://twitter.android - description: analytics is a valid generic tld text: https://twitter.analytics expected: - https://twitter.analytics - description: amsterdam is a valid generic tld text: https://twitter.amsterdam expected: - https://twitter.amsterdam - description: amica is a valid generic tld text: https://twitter.amica expected: - https://twitter.amica - description: amfam is a valid generic tld text: https://twitter.amfam expected: - https://twitter.amfam - description: amex is a valid generic tld text: https://twitter.amex expected: - https://twitter.amex - description: americanfamily is a valid generic tld text: https://twitter.americanfamily expected: - https://twitter.americanfamily - description: americanexpress is a valid generic tld text: https://twitter.americanexpress expected: - https://twitter.americanexpress - description: alstom is a valid generic tld text: https://twitter.alstom expected: - https://twitter.alstom - description: alsace is a valid generic tld text: https://twitter.alsace expected: - https://twitter.alsace - description: ally is a valid generic tld text: https://twitter.ally expected: - https://twitter.ally - description: allstate is a valid generic tld text: https://twitter.allstate expected: - https://twitter.allstate - description: allfinanz is a valid generic tld text: https://twitter.allfinanz expected: - https://twitter.allfinanz - description: alipay is a valid generic tld text: https://twitter.alipay expected: - https://twitter.alipay - description: alibaba is a valid generic tld text: https://twitter.alibaba expected: - https://twitter.alibaba - description: alfaromeo is a valid generic tld text: https://twitter.alfaromeo expected: - https://twitter.alfaromeo - description: akdn is a valid generic tld text: https://twitter.akdn expected: - https://twitter.akdn - description: airtel is a valid generic tld text: https://twitter.airtel expected: - https://twitter.airtel - description: airforce is a valid generic tld text: https://twitter.airforce expected: - https://twitter.airforce - description: airbus is a valid generic tld text: https://twitter.airbus expected: - https://twitter.airbus - description: aigo is a valid generic tld text: https://twitter.aigo expected: - https://twitter.aigo - description: aig is a valid generic tld text: https://twitter.aig expected: - https://twitter.aig - description: agency is a valid generic tld text: https://twitter.agency expected: - https://twitter.agency - description: agakhan is a valid generic tld text: https://twitter.agakhan expected: - https://twitter.agakhan - description: africa is a valid generic tld text: https://twitter.africa expected: - https://twitter.africa - description: afl is a valid generic tld text: https://twitter.afl expected: - https://twitter.afl - description: afamilycompany is a valid generic tld text: https://twitter.afamilycompany expected: - https://twitter.afamilycompany - description: aetna is a valid generic tld text: https://twitter.aetna expected: - https://twitter.aetna - description: aero is a valid generic tld text: https://twitter.aero expected: - https://twitter.aero - description: aeg is a valid generic tld text: https://twitter.aeg expected: - https://twitter.aeg - description: adult is a valid generic tld text: https://twitter.adult expected: - https://twitter.adult - description: ads is a valid generic tld text: https://twitter.ads expected: - https://twitter.ads - description: adac is a valid generic tld text: https://twitter.adac expected: - https://twitter.adac - description: actor is a valid generic tld text: https://twitter.actor expected: - https://twitter.actor - description: active is a valid generic tld text: https://twitter.active expected: - https://twitter.active - description: aco is a valid generic tld text: https://twitter.aco expected: - https://twitter.aco - description: accountants is a valid generic tld text: https://twitter.accountants expected: - https://twitter.accountants - description: accountant is a valid generic tld text: https://twitter.accountant expected: - https://twitter.accountant - description: accenture is a valid generic tld text: https://twitter.accenture expected: - https://twitter.accenture - description: academy is a valid generic tld text: https://twitter.academy expected: - https://twitter.academy - description: abudhabi is a valid generic tld text: https://twitter.abudhabi expected: - https://twitter.abudhabi - description: abogado is a valid generic tld text: https://twitter.abogado expected: - https://twitter.abogado - description: able is a valid generic tld text: https://twitter.able expected: - https://twitter.able - description: abc is a valid generic tld text: https://twitter.abc expected: - https://twitter.abc - description: abbvie is a valid generic tld text: https://twitter.abbvie expected: - https://twitter.abbvie - description: abbott is a valid generic tld text: https://twitter.abbott expected: - https://twitter.abbott - description: abb is a valid generic tld text: https://twitter.abb expected: - https://twitter.abb - description: abarth is a valid generic tld text: https://twitter.abarth expected: - https://twitter.abarth - description: aarp is a valid generic tld text: https://twitter.aarp expected: - https://twitter.aarp - description: aaa is a valid generic tld text: https://twitter.aaa expected: - https://twitter.aaa - description: onion is a valid generic tld text: https://twitter.onion expected: - https://twitter.onion twitter-text-1.14.7/test/twitter-text-conformance/000775 001751 001751 00000000000 13126461251 022224 5ustar00srudsrud000000 000000 twitter-text-1.14.7/test/twitter-text-conformance/TldLists.java000664 001751 001751 00000051323 13126461251 024635 0ustar00srudsrud000000 000000 // Auto-generated by conformance/Rakefile package com.twitter; import java.util.Arrays; import java.util.List; public class TldLists { public static final List GTLDS = Arrays.asList( "삼성", "닷컴", "닷넷", "香格里拉", "餐厅", "食品", "飞利浦", "電訊盈科", "集团", "通販", "购物", "谷歌", "诺基亚", "联通", "网络", "网站", "网店", "网址", "组织机构", "移动", "珠宝", "点看", "游戏", "淡马锡", "机构", "書籍", "时尚", "新闻", "政府", "政务", "手表", "手机", "我爱你", "慈善", "微博", "广东", "工行", "家電", "娱乐", "天主教", "大拿", "大众汽车", "在线", "嘉里大酒店", "嘉里", "商标", "商店", "商城", "公益", "公司", "八卦", "健康", "信息", "佛山", "企业", "中文网", "中信", "世界", "ポイント", "ファッション", "セール", "ストア", "コム", "グーグル", "クラウド", "みんな", "คอม", "संगठन", "नेट", "कॉम", "همراه", "موقع", "موبايلي", "كوم", "كاثوليك", "عرب", "شبكة", "بيتك", "بازار", "العليان", "ارامكو", "اتصالات", "ابوظبي", "קום", "сайт", "рус", "орг", "онлайн", "москва", "ком", "католик", "дети", "zuerich", "zone", "zippo", "zip", "zero", "zara", "zappos", "yun", "youtube", "you", "yokohama", "yoga", "yodobashi", "yandex", "yamaxun", "yahoo", "yachts", "xyz", "xxx", "xperia", "xin", "xihuan", "xfinity", "xerox", "xbox", "wtf", "wtc", "wow", "world", "works", "work", "woodside", "wolterskluwer", "wme", "winners", "wine", "windows", "win", "williamhill", "wiki", "wien", "whoswho", "weir", "weibo", "wedding", "wed", "website", "weber", "webcam", "weatherchannel", "weather", "watches", "watch", "warman", "wanggou", "wang", "walter", "walmart", "wales", "vuelos", "voyage", "voto", "voting", "vote", "volvo", "volkswagen", "vodka", "vlaanderen", "vivo", "viva", "vistaprint", "vista", "vision", "visa", "virgin", "vip", "vin", "villas", "viking", "vig", "video", "viajes", "vet", "versicherung", "vermögensberatung", "vermögensberater", "verisign", "ventures", "vegas", "vanguard", "vana", "vacations", "ups", "uol", "uno", "university", "unicom", "uconnect", "ubs", "ubank", "tvs", "tushu", "tunes", "tui", "tube", "trv", "trust", "travelersinsurance", "travelers", "travelchannel", "travel", "training", "trading", "trade", "toys", "toyota", "town", "tours", "total", "toshiba", "toray", "top", "tools", "tokyo", "today", "tmall", "tkmaxx", "tjx", "tjmaxx", "tirol", "tires", "tips", "tiffany", "tienda", "tickets", "tiaa", "theatre", "theater", "thd", "teva", "tennis", "temasek", "telefonica", "telecity", "tel", "technology", "tech", "team", "tdk", "tci", "taxi", "tax", "tattoo", "tatar", "tatamotors", "target", "taobao", "talk", "taipei", "tab", "systems", "symantec", "sydney", "swiss", "swiftcover", "swatch", "suzuki", "surgery", "surf", "support", "supply", "supplies", "sucks", "style", "study", "studio", "stream", "store", "storage", "stockholm", "stcgroup", "stc", "statoil", "statefarm", "statebank", "starhub", "star", "staples", "stada", "srt", "srl", "spreadbetting", "spot", "spiegel", "space", "soy", "sony", "song", "solutions", "solar", "sohu", "software", "softbank", "social", "soccer", "sncf", "smile", "smart", "sling", "skype", "sky", "skin", "ski", "site", "singles", "sina", "silk", "shriram", "showtime", "show", "shouji", "shopping", "shop", "shoes", "shiksha", "shia", "shell", "shaw", "sharp", "shangrila", "sfr", "sexy", "sex", "sew", "seven", "ses", "services", "sener", "select", "seek", "security", "secure", "seat", "search", "scot", "scor", "scjohnson", "science", "schwarz", "schule", "school", "scholarships", "schmidt", "schaeffler", "scb", "sca", "sbs", "sbi", "saxo", "save", "sas", "sarl", "sapo", "sap", "sanofi", "sandvikcoromant", "sandvik", "samsung", "samsclub", "salon", "sale", "sakura", "safety", "safe", "saarland", "ryukyu", "rwe", "run", "ruhr", "rugby", "rsvp", "room", "rogers", "rodeo", "rocks", "rocher", "rmit", "rip", "rio", "ril", "rightathome", "ricoh", "richardli", "rich", "rexroth", "reviews", "review", "restaurant", "rest", "republican", "report", "repair", "rentals", "rent", "ren", "reliance", "reit", "reisen", "reise", "rehab", "redumbrella", "redstone", "red", "recipes", "realty", "realtor", "realestate", "read", "raid", "radio", "racing", "qvc", "quest", "quebec", "qpon", "pwc", "pub", "prudential", "pru", "protection", "property", "properties", "promo", "progressive", "prof", "productions", "prod", "pro", "prime", "press", "praxi", "pramerica", "post", "porn", "politie", "poker", "pohl", "pnc", "plus", "plumbing", "playstation", "play", "place", "pizza", "pioneer", "pink", "ping", "pin", "pid", "pictures", "pictet", "pics", "piaget", "physio", "photos", "photography", "photo", "phone", "philips", "phd", "pharmacy", "pfizer", "pet", "pccw", "pay", "passagens", "party", "parts", "partners", "pars", "paris", "panerai", "panasonic", "pamperedchef", "page", "ovh", "ott", "otsuka", "osaka", "origins", "orientexpress", "organic", "org", "orange", "oracle", "open", "ooo", "onyourside", "online", "onl", "ong", "one", "omega", "ollo", "oldnavy", "olayangroup", "olayan", "okinawa", "office", "off", "observer", "obi", "nyc", "ntt", "nrw", "nra", "nowtv", "nowruz", "now", "norton", "northwesternmutual", "nokia", "nissay", "nissan", "ninja", "nikon", "nike", "nico", "nhk", "ngo", "nfl", "nexus", "nextdirect", "next", "news", "newholland", "new", "neustar", "network", "netflix", "netbank", "net", "nec", "nba", "navy", "natura", "nationwide", "name", "nagoya", "nadex", "nab", "mutuelle", "mutual", "museum", "mtr", "mtpc", "mtn", "msd", "movistar", "movie", "mov", "motorcycles", "moto", "moscow", "mortgage", "mormon", "mopar", "montblanc", "monster", "money", "monash", "mom", "moi", "moe", "moda", "mobily", "mobile", "mobi", "mma", "mls", "mlb", "mitsubishi", "mit", "mint", "mini", "mil", "microsoft", "miami", "metlife", "merckmsd", "meo", "menu", "men", "memorial", "meme", "melbourne", "meet", "media", "med", "mckinsey", "mcdonalds", "mcd", "mba", "mattel", "maserati", "marshalls", "marriott", "markets", "marketing", "market", "map", "mango", "management", "man", "makeup", "maison", "maif", "madrid", "macys", "luxury", "luxe", "lupin", "lundbeck", "ltda", "ltd", "lplfinancial", "lpl", "love", "lotto", "lotte", "london", "lol", "loft", "locus", "locker", "loans", "loan", "lixil", "living", "live", "lipsy", "link", "linde", "lincoln", "limo", "limited", "lilly", "like", "lighting", "lifestyle", "lifeinsurance", "life", "lidl", "liaison", "lgbt", "lexus", "lego", "legal", "lefrak", "leclerc", "lease", "lds", "lawyer", "law", "latrobe", "latino", "lat", "lasalle", "lanxess", "landrover", "land", "lancome", "lancia", "lancaster", "lamer", "lamborghini", "ladbrokes", "lacaixa", "kyoto", "kuokgroup", "kred", "krd", "kpn", "kpmg", "kosher", "komatsu", "koeln", "kiwi", "kitchen", "kindle", "kinder", "kim", "kia", "kfh", "kerryproperties", "kerrylogistics", "kerryhotels", "kddi", "kaufen", "juniper", "juegos", "jprs", "jpmorgan", "joy", "jot", "joburg", "jobs", "jnj", "jmp", "jll", "jlc", "jio", "jewelry", "jetzt", "jeep", "jcp", "jcb", "java", "jaguar", "iwc", "iveco", "itv", "itau", "istanbul", "ist", "ismaili", "iselect", "irish", "ipiranga", "investments", "intuit", "international", "intel", "int", "insure", "insurance", "institute", "ink", "ing", "info", "infiniti", "industries", "immobilien", "immo", "imdb", "imamat", "ikano", "iinet", "ifm", "ieee", "icu", "ice", "icbc", "ibm", "hyundai", "hyatt", "hughes", "htc", "hsbc", "how", "house", "hotmail", "hotels", "hoteles", "hot", "hosting", "host", "hospital", "horse", "honeywell", "honda", "homesense", "homes", "homegoods", "homedepot", "holiday", "holdings", "hockey", "hkt", "hiv", "hitachi", "hisamitsu", "hiphop", "hgtv", "hermes", "here", "helsinki", "help", "healthcare", "health", "hdfcbank", "hdfc", "hbo", "haus", "hangout", "hamburg", "hair", "guru", "guitars", "guide", "guge", "gucci", "guardian", "group", "grocery", "gripe", "green", "gratis", "graphics", "grainger", "gov", "got", "gop", "google", "goog", "goodyear", "goodhands", "goo", "golf", "goldpoint", "gold", "godaddy", "gmx", "gmo", "gmbh", "gmail", "globo", "global", "gle", "glass", "glade", "giving", "gives", "gifts", "gift", "ggee", "george", "genting", "gent", "gea", "gdn", "gbiz", "garden", "gap", "games", "game", "gallup", "gallo", "gallery", "gal", "fyi", "futbol", "furniture", "fund", "fun", "fujixerox", "fujitsu", "ftr", "frontier", "frontdoor", "frogans", "frl", "fresenius", "free", "fox", "foundation", "forum", "forsale", "forex", "ford", "football", "foodnetwork", "food", "foo", "fly", "flsmidth", "flowers", "florist", "flir", "flights", "flickr", "fitness", "fit", "fishing", "fish", "firmdale", "firestone", "fire", "financial", "finance", "final", "film", "fido", "fidelity", "fiat", "ferrero", "ferrari", "feedback", "fedex", "fast", "fashion", "farmers", "farm", "fans", "fan", "family", "faith", "fairwinds", "fail", "fage", "extraspace", "express", "exposed", "expert", "exchange", "everbank", "events", "eus", "eurovision", "etisalat", "esurance", "estate", "esq", "erni", "ericsson", "equipment", "epson", "epost", "enterprises", "engineering", "engineer", "energy", "emerck", "email", "education", "edu", "edeka", "eco", "eat", "earth", "dvr", "dvag", "durban", "dupont", "duns", "dunlop", "duck", "dubai", "dtv", "drive", "download", "dot", "doosan", "domains", "doha", "dog", "dodge", "doctor", "docs", "dnp", "diy", "dish", "discover", "discount", "directory", "direct", "digital", "diet", "diamonds", "dhl", "dev", "design", "desi", "dentist", "dental", "democrat", "delta", "deloitte", "dell", "delivery", "degree", "deals", "dealer", "deal", "dds", "dclk", "day", "datsun", "dating", "date", "data", "dance", "dad", "dabur", "cyou", "cymru", "cuisinella", "csc", "cruises", "cruise", "crs", "crown", "cricket", "creditunion", "creditcard", "credit", "courses", "coupons", "coupon", "country", "corsica", "coop", "cool", "cookingchannel", "cooking", "contractors", "contact", "consulting", "construction", "condos", "comsec", "computer", "compare", "company", "community", "commbank", "comcast", "com", "cologne", "college", "coffee", "codes", "coach", "clubmed", "club", "cloud", "clothing", "clinique", "clinic", "click", "cleaning", "claims", "cityeats", "city", "citic", "citi", "citadel", "cisco", "circle", "cipriani", "church", "chrysler", "chrome", "christmas", "chloe", "chintai", "cheap", "chat", "chase", "channel", "chanel", "cfd", "cfa", "cern", "ceo", "center", "ceb", "cbs", "cbre", "cbn", "cba", "catholic", "catering", "cat", "casino", "cash", "caseih", "case", "casa", "cartier", "cars", "careers", "career", "care", "cards", "caravan", "car", "capitalone", "capital", "capetown", "canon", "cancerresearch", "camp", "camera", "cam", "calvinklein", "call", "cal", "cafe", "cab", "bzh", "buzz", "buy", "business", "builders", "build", "bugatti", "budapest", "brussels", "brother", "broker", "broadway", "bridgestone", "bradesco", "box", "boutique", "bot", "boston", "bostik", "bosch", "boots", "booking", "book", "boo", "bond", "bom", "bofa", "boehringer", "boats", "bnpparibas", "bnl", "bmw", "bms", "blue", "bloomberg", "blog", "blockbuster", "blanco", "blackfriday", "black", "biz", "bio", "bingo", "bing", "bike", "bid", "bible", "bharti", "bet", "bestbuy", "best", "berlin", "bentley", "beer", "beauty", "beats", "bcn", "bcg", "bbva", "bbt", "bbc", "bayern", "bauhaus", "basketball", "baseball", "bargains", "barefoot", "barclays", "barclaycard", "barcelona", "bar", "bank", "band", "bananarepublic", "banamex", "baidu", "baby", "azure", "axa", "aws", "avianca", "autos", "auto", "author", "auspost", "audio", "audible", "audi", "auction", "attorney", "athleta", "associates", "asia", "asda", "arte", "art", "arpa", "army", "archi", "aramco", "arab", "aquarelle", "apple", "app", "apartments", "aol", "anz", "anquan", "android", "analytics", "amsterdam", "amica", "amfam", "amex", "americanfamily", "americanexpress", "alstom", "alsace", "ally", "allstate", "allfinanz", "alipay", "alibaba", "alfaromeo", "akdn", "airtel", "airforce", "airbus", "aigo", "aig", "agency", "agakhan", "africa", "afl", "afamilycompany", "aetna", "aero", "aeg", "adult", "ads", "adac", "actor", "active", "aco", "accountants", "accountant", "accenture", "academy", "abudhabi", "abogado", "able", "abc", "abbvie", "abbott", "abb", "abarth", "aarp", "aaa", "onion" ); public static final List CTLDS = Arrays.asList( "한국", "香港", "澳門", "新加坡", "台灣", "台湾", "中國", "中国", "გე", "ไทย", "ලංකා", "ഭാരതം", "ಭಾರತ", "భారత్", "சிங்கப்பூர்", "இலங்கை", "இந்தியா", "ଭାରତ", "ભારત", "ਭਾਰਤ", "ভাৰত", "ভারত", "বাংলা", "भारोत", "भारतम्", "भारत", "ڀارت", "پاکستان", "مليسيا", "مصر", "قطر", "فلسطين", "عمان", "عراق", "سورية", "سودان", "تونس", "بھارت", "بارت", "ایران", "امارات", "المغرب", "السعودية", "الجزائر", "الاردن", "հայ", "қаз", "укр", "срб", "рф", "мон", "мкд", "ею", "бел", "бг", "ελ", "zw", "zm", "za", "yt", "ye", "ws", "wf", "vu", "vn", "vi", "vg", "ve", "vc", "va", "uz", "uy", "us", "um", "uk", "ug", "ua", "tz", "tw", "tv", "tt", "tr", "tp", "to", "tn", "tm", "tl", "tk", "tj", "th", "tg", "tf", "td", "tc", "sz", "sy", "sx", "sv", "su", "st", "ss", "sr", "so", "sn", "sm", "sl", "sk", "sj", "si", "sh", "sg", "se", "sd", "sc", "sb", "sa", "rw", "ru", "rs", "ro", "re", "qa", "py", "pw", "pt", "ps", "pr", "pn", "pm", "pl", "pk", "ph", "pg", "pf", "pe", "pa", "om", "nz", "nu", "nr", "np", "no", "nl", "ni", "ng", "nf", "ne", "nc", "na", "mz", "my", "mx", "mw", "mv", "mu", "mt", "ms", "mr", "mq", "mp", "mo", "mn", "mm", "ml", "mk", "mh", "mg", "mf", "me", "md", "mc", "ma", "ly", "lv", "lu", "lt", "ls", "lr", "lk", "li", "lc", "lb", "la", "kz", "ky", "kw", "kr", "kp", "kn", "km", "ki", "kh", "kg", "ke", "jp", "jo", "jm", "je", "it", "is", "ir", "iq", "io", "in", "im", "il", "ie", "id", "hu", "ht", "hr", "hn", "hm", "hk", "gy", "gw", "gu", "gt", "gs", "gr", "gq", "gp", "gn", "gm", "gl", "gi", "gh", "gg", "gf", "ge", "gd", "gb", "ga", "fr", "fo", "fm", "fk", "fj", "fi", "eu", "et", "es", "er", "eh", "eg", "ee", "ec", "dz", "do", "dm", "dk", "dj", "de", "cz", "cy", "cx", "cw", "cv", "cu", "cr", "co", "cn", "cm", "cl", "ck", "ci", "ch", "cg", "cf", "cd", "cc", "ca", "bz", "by", "bw", "bv", "bt", "bs", "br", "bq", "bo", "bn", "bm", "bl", "bj", "bi", "bh", "bg", "bf", "be", "bd", "bb", "ba", "az", "ax", "aw", "au", "at", "as", "ar", "aq", "ao", "an", "am", "al", "ai", "ag", "af", "ae", "ad", "ac" ); } twitter-text-1.14.7/test/twitter-text-conformance/autolink.yml000664 001751 001751 00000152207 13126461251 024604 0ustar00srudsrud000000 000000 tests: usernames: - description: "Autolink trailing username" text: "text @username" expected: "text @username" - description: "Autolink username at the beginning" text: "@username text" expected: "@username text" - description: "DO NOT Autolink username preceded by a letter" text: "meet@the beach" expected: "meet@the beach" - description: "Autolink username preceded by puctuation" text: "great.@username" expected: "great.@username" - description: "Autolink username followed by puctuation" text: "@username&^$%^" expected: "@username&^$%^" - description: "Autolink username followed by Japanese" text: "@usernameの" expected: "@usernameの" - description: "Autolink username preceded by Japanese" text: "あ@username" expected: "あ@username" - description: "Autolink username surrounded by Japanese" text: "あ@usernameの" expected: "あ@usernameの" - description: "Autolink username in compressed RT" text: "RT@username: long Tweet is loooong" expected: "RT@username: long Tweet is loooong" - description: "Autolink alternate RT format in middle of text" text: "Check out RT:@username yas" expected: "Check out RT:@username yas" - description: "DO NOT Autolink domain of email address ending in RT like support@example.com" text: "Email support@example.com" expected: "Email support@example.com" - description: "DO NOT Autolink username followed by accented latin characters" text: "@aliceìnheiro something something" expected: "@aliceìnheiro something something" - description: "DO NOT Autolink username @_ in @_@" text: "oh, snap! @_@" expected: "oh, snap! @_@" - description: "Autolink username with full-width at sign (U+FF20)" text: "@username" expected: "@username" - description: "DO NOT Autolink username over 20 characters" text: "@username9012345678901" expected: "@username9012345678901" - description: "Autolink two usernames" text: "@foo @bar" expected: "@foo @bar" - description: "Autolink usernames followed by :" text: "@foo: @bar" expected: "@foo: @bar" - description: "Autolink usernames that are followed by international characters" text: "@foo îs in the house" expected: "@foo îs in the house" - description: "Preserve case when linking a username" text: "@MixedCase" expected: "@MixedCase" lists: - description: "Autolink list preceded by a space" text: "text @username/list" expected: "text @username/list" - description: "DO NOT Autolink list when space follows slash" text: "text @username/ list" expected: "text @username/ list" - description: "DO NOT Autolink list with empty username" text: "text @/list" expected: "text @/list" - description: "Autolink list at the beginning" text: "@username/list" expected: "@username/list" - description: "DO NOT Autolink list preceded by letter" text: "meet@the/beach" expected: "meet@the/beach" - description: "Autolink list preceded by puctuation" text: "great.@username/list" expected: "great.@username/list" - description: "Autolink list followed by puctuation" text: "@username/list&^$%^" expected: "@username/list&^$%^" - description: "Autolink list name over 25 characters (truncated to 25)" text: "@username/list567890123456789012345A" expected: "@username/list567890123456789012345A" - description: "Autolink list that contains an _" text: "text @username/list_name" expected: "text @username/list_name" - description: "Autolink list that contains a -" text: "text @username/list-name" expected: "text @username/list-name" - description: "Autolink list that contains a number" text: "text @username/list123" expected: "text @username/list123" - description: "DO NOT Autolink list starting with a number" text: "@username/1list" expected: "@username/1list" hashtags: - description: "Autolink trailing hashtag" text: "text #hashtag" expected: "text #hashtag" - description: "Autolink alphanumeric hashtag (letter-number-letter)" text: "text #hash0tag" expected: "text #hash0tag" - description: "Autolink alphanumeric hashtag (number-letter)" text: "text #1tag" expected: "text #1tag" - description: "Autolink hashtag with underscore" text: "text #hash_tag" expected: "text #hash_tag" - description: "DO NOT Autolink all-numeric hashtags" text: "text #1234" expected: "text #1234" - description: "DO NOT Autolink hashtag preceded by a letter" text: "text#hashtag" expected: "text#hashtag" - description: "DO NOT Autolink hashtag that begins with \ufe0f (Emoji style hash sign)" text: "#️hashtag" expected: "#️hashtag" - description: "DO NOT Autolink hashtag that begins with \ufe0f (Keycap style hash sign)" text: "#⃣hashtag" expected: "#⃣hashtag" - description: "Autolink multiple hashtags" text: "text #hashtag1 #hashtag2" expected: "text #hashtag1 #hashtag2" - description: "Autolink hashtag preceded by a period" text: "text.#hashtag" expected: "text.#hashtag" - description: "DO NOT Autolink hashtag preceded by &" text: "&#nbsp;" expected: "&#nbsp;" - description: "Autolink hashtag followed by ! (! not included)" text: "text #hashtag!" expected: "text #hashtag!" - description: "Autolink two hashtags separated by a slash" text: "text #dodge/#answer" expected: "text #dodge/#answer" - description: "Autolink hashtag before a slash" text: "text #dodge/answer" expected: "text #dodge/answer" - description: "Autolink hashtag after a slash" text: "text dodge/#answer" expected: "text dodge/#answer" - description: "Autolink hashtag followed by Japanese" text: "text #hashtagの" expected: "text #hashtagの" - description: "Autolink hashtag preceded by full-width space (U+3000)" text: "text #hashtag" expected: "text #hashtag" - description: "Autolink hashtag followed by full-width space (U+3000)" text: "#hashtag text" expected: "#hashtag text" - description: "Autolink hashtag with full-width hash (U+FF03)" text: "#hashtag" expected: "#hashtag" - description: "Autolink hashtag with accented character at the start" text: "#éhashtag" expected: "#éhashtag" - description: "Autolink hashtag with accented character at the end" text: "#hashtagé" expected: "#hashtagé" - description: "Autolink hashtag with accented character in the middle" text: "#hashétag" expected: "#hashétag" - description: "Autolink hashtags in Korean" text: "What is #트위터 anyway?" expected: "What is #트위터 anyway?" - description: "Autolink hashtags in Russian" text: "What is #ашок anyway?" expected: "What is #ашок anyway?" - description: "Autolink a katakana hashtag preceded by a space and followed by a space" text: "カタカナ #カタカナ カタカナ" expected: "カタカナ #カタカナ カタカナ" - description: "Autolink a katakana hashtag preceded by a space and followed by a bracket" text: "カタカナ #カタカナ」カタカナ" expected: "カタカナ #カタカナ」カタカナ" - description: "Autolink a katakana hashtag preceded by a space and followed by a edge" text: "カタカナ #カタカナ" expected: "カタカナ #カタカナ" - description: "Autolink a katakana hashtag preceded by a bracket and followed by a space" text: "カタカナ「#カタカナ カタカナ" expected: "カタカナ「#カタカナ カタカナ" - description: "Autolink a katakana hashtag preceded by a bracket and followed by a bracket" text: "カタカナ「#カタカナ」カタカナ" expected: "カタカナ「#カタカナ」カタカナ" - description: "Autolink a katakana hashtag preceded by a bracket and followed by a edge" text: "カタカナ「#カタカナ" expected: "カタカナ「#カタカナ" - description: "Autolink a katakana hashtag preceded by a edge and followed by a space" text: "#カタカナ カタカナ" expected: "#カタカナ カタカナ" - description: "Autolink a katakana hashtag preceded by a edge and followed by a bracket" text: "#カタカナ」カタカナ" expected: "#カタカナ」カタカナ" - description: "Autolink a katakana hashtag preceded by a edge and followed by a edge" text: "#カタカナ" expected: "#カタカナ" - description: "Autolink a katakana hashtag with a voiced sounds mark followed by a space" text: "#ハッシュタグ テスト" expected: "#ハッシュタグ テスト" - description: "Autolink a katakana hashtag with a voiced sounds mark followed by numbers" text: "#ハッシュタグ123" expected: "#ハッシュタグ123" - description: "Autolink a katakana hashtag with another voiced sounds mark" text: "#パピプペポ" expected: "#パピプペポ" - description: "Autolink a kanji hashtag preceded by a space and followed by a space" text: "漢字 #漢字 漢字" expected: "漢字 #漢字 漢字" - description: "Autolink a kanji hashtag preceded by a space and followed by a bracket" text: "漢字 #漢字」漢字" expected: "漢字 #漢字」漢字" - description: "Autolink a kanji hashtag preceded by a space and followed by a edge" text: "漢字 #漢字" expected: "漢字 #漢字" - description: "Autolink a kanji hashtag preceded by a bracket and followed by a space" text: "漢字「#漢字 漢字" expected: "漢字「#漢字 漢字" - description: "Autolink a kanji hashtag preceded by a bracket and followed by a bracket" text: "漢字「#漢字」漢字" expected: "漢字「#漢字」漢字" - description: "Autolink a kanji hashtag preceded by a bracket and followed by a edge" text: "漢字「#漢字" expected: "漢字「#漢字" - description: "Autolink a kanji hashtag preceded by a edge and followed by a space" text: "#漢字 漢字" expected: "#漢字 漢字" - description: "Autolink a kanji hashtag preceded by a edge and followed by a bracket" text: "#漢字」漢字" expected: "#漢字」漢字" - description: "Autolink a kanji hashtag preceded by a edge and followed by a edge" text: "#漢字" expected: "#漢字" - description: "Autolink a kanji hashtag preceded by an ideographic comma, followed by an ideographic period" text: "これは、#大丈夫。" expected: "これは、#大丈夫。" - description: "Autolink a hiragana hashtag preceded by a space and followed by a space" text: "ひらがな #ひらがな ひらがな" expected: "ひらがな #ひらがな ひらがな" - description: "Autolink a hiragana hashtag preceded by a space and followed by a bracket" text: "ひらがな #ひらがな」ひらがな" expected: "ひらがな #ひらがな」ひらがな" - description: "Autolink a hiragana hashtag preceded by a space and followed by a edge" text: "ひらがな #ひらがな" expected: "ひらがな #ひらがな" - description: "Autolink a hiragana hashtag preceded by a bracket and followed by a space" text: "ひらがな「#ひらがな ひらがな" expected: "ひらがな「#ひらがな ひらがな" - description: "Autolink a hiragana hashtag preceded by a bracket and followed by a bracket" text: "ひらがな「#ひらがな」ひらがな" expected: "ひらがな「#ひらがな」ひらがな" - description: "Autolink a hiragana hashtag preceded by a bracket and followed by a edge" text: "ひらがな「#ひらがな" expected: "ひらがな「#ひらがな" - description: "Autolink a hiragana hashtag preceded by a edge and followed by a space" text: "#ひらがな ひらがな" expected: "#ひらがな ひらがな" - description: "Autolink a hiragana hashtag preceded by a edge and followed by a bracket" text: "#ひらがな」ひらがな" expected: "#ひらがな」ひらがな" - description: "Autolink a hiragana hashtag preceded by a edge and followed by a edge" text: "#ひらがな" expected: "#ひらがな" - description: "Autolink a Kanji/Katakana mix hashtag" text: "日本語ハッシュタグ #日本語ハッシュタグ" expected: "日本語ハッシュタグ #日本語ハッシュタグ" - description: "DO NOT autolink a hashtag without a preceding space" text: "日本語ハッシュタグ#日本語ハッシュタグ" expected: "日本語ハッシュタグ#日本語ハッシュタグ" - description: "DO NOT include a punctuation in a hashtag" text: "#日本語ハッシュタグ。" expected: "#日本語ハッシュタグ。" - description: "Autolink a hashtag after a punctuation" text: "日本語ハッシュタグ。#日本語ハッシュタグ" expected: "日本語ハッシュタグ。#日本語ハッシュタグ" - description: "Autolink a hashtag with chouon" text: "長音ハッシュタグ。#サッカー" expected: "長音ハッシュタグ。#サッカー" - description: "Autolink a hashtag with half-width chouon" text: "長音ハッシュタグ。#サッカー" expected: "長音ハッシュタグ。#サッカー" - description: "Autolink a hashtag with half-width # after full-width !" text: "できましたよー!#日本語ハッシュタグ。" expected: "できましたよー!#日本語ハッシュタグ。" - description: "Autolink a hashtag with full-width # after full-width !" text: "できましたよー!#日本語ハッシュタグ。" expected: "できましたよー!#日本語ハッシュタグ。" - description: "Autolink a hashtag containing ideographic iteration mark" text: "#云々" expected: "#云々" - description: "Autolink multiple hashtags in multiple languages" text: "Hashtags in #中文, #日本語, #한국말, and #русский! Try it out!" expected: "Hashtags in #中文, #日本語, #한국말, and #русский! Try it out!" - description: "Autolink should allow for ş (U+015F) in a hashtag" text: "Here’s a test tweet for you: #Ateş #qrşt #ştu #ş" expected: "Here’s a test tweet for you: #Ateş #qrşt #ştu " - description: "Autolink a hashtag with Latin extended character" text: "#mûǁae" expected: "#mûǁae" # Please be careful with changes to this test case - what looks like "á" is really a + U+0301, and many editors will silently convert this to U+00E1. - description: "Autolink hashtags with combining diacritics" text: "#táim #hag̃ua" expected: "#táim #hag̃ua" - description: "Autolink Arabic hashtag" text: "Arabic hashtag: #فارسی #لس_آنجلس" expected: "Arabic hashtag: #فارسی #لس_آنجلس" - description: "Autolink Thai hashtag" text: "Thai hashtag: #รายละเอียด" expected: "Thai hashtag: #รายละเอียด" urls: - description: "Autolink URL with pipe character" text: "text http://example.com/pipe|character?yes|pipe|character" expected: "text http://example.com/pipe|character?yes|pipe|character" - description: "Autolink trailing url" text: "text http://example.com" expected: "text http://example.com" - description: "Autolink url in mid-text" text: "text http://example.com more text" expected: "text http://example.com more text" - description: "Autolink url in Japanese text" text: "いまなにしてるhttp://example.comいまなにしてる" expected: "いまなにしてるhttp://example.comいまなにしてる" - description: "Autolink url surrounded by parentheses does not capture them" text: "text (http://example.com)" expected: "text (http://example.com)" - description: "Autolink url with path surrounded by parentheses does not capture them" text: "text (http://example.com/test)" expected: "text (http://example.com/test)" - description: "Autolink url with embedded parentheses" text: "text http://msdn.com/S(deadbeef)/page.htm" expected: "text http://msdn.com/S(deadbeef)/page.htm" - description: "Autolink url with embedded parentheses without linking surrounding parentheses" text: "text (URL in parentheses http://msdn.com/S(deadbeef))" expected: "text (URL in parentheses http://msdn.com/S(deadbeef))" - description: "Autolink Rdio #music url with double balanced nested parentheses" text: "text https://rdio.com/artist/50_Cent/album/We_Up/track/We_Up_(Album_Version_(Edited))/" expected: "text https://rdio.com/artist/50_Cent/album/We_Up/track/We_Up_(Album_Version_(Edited))/" - description: "Autolink Rdio #music url with double balanced nested parentheses without linking surrounding parentheses" text: "text (URL in parentheses https://rdio.com/artist/50_Cent/album/We_Up/track/We_Up_(Album_Version_(Edited))/)" expected: "text (URL in parentheses https://rdio.com/artist/50_Cent/album/We_Up/track/We_Up_(Album_Version_(Edited))/)" - description: "Autolink url followed by nested parentheses (without them)" text: "text https://rdio.com/artist/50_Cent/album/We_Up/track/We_Up(URL description with spaces and (parentheses))" expected: "text https://rdio.com/artist/50_Cent/album/We_Up/track/We_Up(URL description with spaces and (parentheses))" - description: "Autolink url followed by completely unbalanced nested parentheses (without them)" text: "text https://rdio.com/artist/50_Cent/album/We_Up/track/We_Up_(Album_Version_(Edited" expected: "text https://rdio.com/artist/50_Cent/album/We_Up/track/We_Up_(Album_Version_(Edited" - description: "Extract valid URL: http://msdn.microsoft.com/ja-jp/library/system.net.httpwebrequest(v=VS.100).aspx" text: "text http://msdn.microsoft.com/ja-jp/library/system.net.httpwebrequest(v=VS.100).aspx" expected: "text http://msdn.microsoft.com/ja-jp/library/system.net.httpwebrequest(v=VS.100).aspx" - description: "Autolink url with balanced parens hiding XSS" text: 'text http://foo.com/("onclick="alert(1)")' expected: 'text http://foo.com/("onclick="alert(1)")' - description: "Autolink url should NOT capture unbalanced parens" text: "Parenthetically bad http://example.com/i_has_a_) thing" expected: "Parenthetically bad http://example.com/i_has_a_) thing" - description: "Autolink url containing unicode characters" text: "I enjoy Macintosh Brand computers: http://✪df.ws/ejp" expected: "I enjoy Macintosh Brand computers: http://✪df.ws/ejp" - description: "Autolink url with .co. under TLD" text: "test http://www.example.co.jp" expected: "test http://www.example.co.jp" - description: "Autolink url with .sx TLD" text: "test http://www.example.sx" expected: "test http://www.example.sx" - description: "DO NOT Autolink url containing ! character in the domain" text: "badly formatted http://foo!bar.com" expected: "badly formatted http://foo!bar.com" - description: "DO NOT Autolink url containing _ character in the domain" text: "badly formatted http://foo_bar.com" expected: "badly formatted http://foo_bar.com" - description: "Autolink url preceded by :" text: "text:http://example.com" expected: "text:http://example.com" - description: "Autolink url followed by ? (without it)" text: "text http://example.com?" expected: "text http://example.com?" - description: "Autolink url followed by ! (without it)" text: "text http://example.com!" expected: "text http://example.com!" - description: "Autolink url followed by , (without it)" text: "text http://example.com," expected: "text http://example.com," - description: "Autolink url with path followed by a comma (wihout the comma)" text: "In http://example.com/test, Douglas explains 42." expected: "In http://example.com/test, Douglas explains 42." - description: "Autolink url followed by . (without it)" text: "text http://example.com." expected: "text http://example.com." - description: "Autolink url followed by : (without it)" text: "text http://example.com:" expected: "text http://example.com:" - description: "Autolink url followed by ; (without it)" text: "text http://example.com;" expected: "text http://example.com;" - description: "Autolink url followed by ] (without it)" text: "text http://example.com]" expected: "text http://example.com]" - description: "Autolink url followed by ) (without it)" text: "text http://example.com)" expected: "text http://example.com)" - description: "Autolink url followed by } (without it)" text: "text http://example.com}" expected: "text http://example.com}" - description: "Autolink url followed by = (without it)" text: "text http://example.com=" expected: "text http://example.com=" - description: "Autolink url followed by ' (without it)" text: "text http://example.com'" expected: "text http://example.com'" - description: "Autolink url preceded by /" text: "text /http://example.com" expected: "text /http://example.com" - description: "Autolink url preceded by !" text: "text !http://example.com" expected: "text !http://example.com" - description: "DO NOT Autolink url preceded by =" text: "text =http://example.com" expected: "text =http://example.com" - description: "Autolink url surrounded by double quotes" text: "text \"http://example.com\"" expected: "text \"http://example.com\"" - description: "DO NOT Autolink url preceded by @" text: "@http://example.com" expected: "@http://example.com" - description: "DO NOT Autolink domain in email address" text: "foo@bar.com" expected: "foo@bar.com" - description: "Autolink url embedded in link tag" text: "http://example.com" expected: "http://example.com" - description: "Autolink multiple urls" text: "http://example.com https://sslexample.com http://sub.example.com" expected: "http://example.com https://sslexample.com http://sub.example.com" - description: "Autolink url with long TLD" text: "http://example.mobi/path" expected: "http://example.mobi/path" - description: "Autolink url containing ending with #value (not as url + hashtag)" text: "http://foo.com/?#foo" expected: "http://foo.com/?#foo" - description: "DO NOT Autolink url without protocol (with www)" text: "www.example.biz" expected: "www.example.biz" - description: "DO NOT Autolink url without protocol (with WWW)" text: "WWW.EXAMPLE.BIZ" expected: "WWW.EXAMPLE.BIZ" - description: "DO NOT Autolink URL without protocol and without www (ending in .com)" text: "foo.com" expected: "foo.com" - description: "DO NOT Autolink URL without protocol and without www (ending in .org)" text: "foo.org" expected: "foo.org" - description: "DO NOT Autolink URL without protocol and without www (ending in .net)" text: "foo.net" expected: "foo.net" - description: "DO NOT Autolink URL without protocol and without www (ending in .gov)" text: "foo.gov" expected: "foo.gov" - description: "DO NOT Autolink URL without protocol and without www (ending in .edu)" text: "foo.edu" expected: "foo.edu" - description: "DO NOT Autolink URL without protocol and without www not ending in /.(edu|com|gov|org|net)/" text: "foo.it twitter.co.jp foo.commerce foo.nettastic foo.us foo.co.uk" expected: "foo.it twitter.co.jp foo.commerce foo.nettastic foo.us foo.co.uk" - description: "Multiple URLs with different protocols but not without a protocol" text: "http://foo.com AND https://bar.com AND www.foobar.com" expected: "http://foo.com AND https://bar.com AND www.foobar.com" - description: "Autolink raw domain followed by domain only links the first" text: "See http://example.com example.com" expected: "See http://example.com example.com" - description: "Autolink url that includes @-sign and numeric dir under it" text: "http://www.flickr.com/photos/29674651@N00/4382024406" expected: "http://www.flickr.com/photos/29674651@N00/4382024406" - description: "Autolink url that includes @-sign and non-numeric dir under it" text: "http://www.flickr.com/photos/29674651@N00/foobar" expected: "http://www.flickr.com/photos/29674651@N00/foobar" - description: "Autolink url with a hashtag-looking fragment" text: "http://www.example.com/#answer" expected: "http://www.example.com/#answer" - description: "Autolink URL with only a domain followed by a period doesn't swallow the period." text: "I think it's proper to end sentences with a period http://tell.me.com. Even when they contain a URL." expected: "I think it's proper to end sentences with a period http://tell.me.com. Even when they contain a URL." - description: "Autolink URL with a path followed by a period doesn't swallow the period." text: "I think it's proper to end sentences with a period http://tell.me/why. Even when they contain a URL." expected: "I think it's proper to end sentences with a period http://tell.me/why. Even when they contain a URL." - description: "Autolink URL with a query followed by a period doesn't swallow the period." text: "I think it's proper to end sentences with a period http://tell.me/why?=because.i.want.it. Even when they contain a URL." expected: "I think it's proper to end sentences with a period http://tell.me/why?=because.i.want.it. Even when they contain a URL." - description: "Autolink URL with a hyphen in the domain name" text: "Czech out sweet deals at http://mrs.domain-dash.biz ok?" expected: "Czech out sweet deals at http://mrs.domain-dash.biz ok?" - description: "Autolink an IDN (punycode) domain and TLD" text: "See also: http://xn--80abe5aohbnkjb.xn--p1ai/" expected: "See also: http://xn--80abe5aohbnkjb.xn--p1ai/" - description: "Autolink URL should NOT autolink www...foo" text: "Is www...foo a valid URL?" expected: "Is www...foo a valid URL?" - description: "Autolink URL should NOT autolink www.-foo.com" text: "Is www.-foo.com a valid URL?" expected: "Is www.-foo.com a valid URL?" - description: "Autolink URL should NOT autolink a domain with a valid dash but no protocol" text: "Is www.foo-bar.com a valid URL?" expected: "Is www.foo-bar.com a valid URL?" - description: "Autolink URL should autolink a domain with a valid dash and a protocol" text: "Is http://www.foo-bar.com a valid URL?" expected: "Is http://www.foo-bar.com a valid URL?" - description: "Autolink URL should link search urls (with &lang=, not ⟨)" text: "Check out http://search.twitter.com/#!/search?q=avro&lang=en" expected: "Check out http://search.twitter.com/#!/search?q=avro&lang=en" - description: "Autolink URL should link urls with very long paths" text: "Check out http://example.com/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" expected: "Check out http://example.com/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" - description: "Autolink URL should HTML escape the URL" text: "example: https://twitter.com/otm_m@\"onmousedown=\"alert('foo')\" style=background-color:yellow;color:yellow;\"/" expected: "example: https://twitter.com/otm_m@\"onmousedown=\"alert('foo')\" style=background-color:yellow;color:yellow;\"/" - description: "Autolink URL should autolink a URL with a - or + at the end of the path" text: "Go to http://example.com/a+ or http://example.com/a-" expected: "Go to http://example.com/a+ or http://example.com/a-" - description: "Autolink URL should autolink a URL with a - or + at the end of the path and query parameters" text: "Go to http://example.com/a+?this=that or http://example.com/a-?this=that" expected: "Go to http://example.com/a+?this=that or http://example.com/a-?this=that" - description: "Autolink URL should autolink URLs with longer paths ending in -" text: "Go to http://example.com/view/slug-url-?foo=bar" expected: "Go to http://example.com/view/slug-url-?foo=bar" - description: "Autolink URL should NOT link URLs with domains beginning in a space" text: "@user Try http:// example.com/path" expected: "@user Try http:// example.com/path" - description: "Autolink URL should NOT link URLs with domains beginning in a non-breaking space (U+00A0)" text: "@user Try http:// example.com/path" expected: "@user Try http:// example.com/path" - description: "Autolink URL should link paths containing accented characters" text: "See: http://example.com/café" expected: "See: http://example.com/café" - description: "Autolink URL should link paths containing Cyrillic characters" text: "Go to http://example.com/Русские_слова maybe?" expected: "Go to http://example.com/Русские_слова maybe?" - description: "Autolink URL should not link URL without protocol" text: "See: www.twitter.com or twitter.com/twitter" expected: "See: www.twitter.com or twitter.com/twitter" - description: "Autolink t.co URL followed by punctuation" text: "See: http://t.co/abcde's page" expected: "See: http://t.co/abcde's page" - description: "DO NOT autolink URL if preceded by $" text: "$https://twitter.com $twitter.com $http://t.co/abcde $t.co/abcde $t.co $TVI.CA $RBS.CA" expected: "$https://twitter.com $twitter.com $http://t.co/abcde $t.co/abcde $t.co $TVI.CA $RBS.CA" cashtags: - description: "Autolink a cashtag" text: "$STOCK" expected: "$STOCK" - description: "Autolink a cashtag in text" text: "Text $STOCK text $symbol text" expected: "Text $STOCK text $symbol text" all: - description: "Autolink all does not break on URL with @" text: "http://www.flickr.com/photos/29674651@N00/4382024406 if you know what's good for you." expected: "http://www.flickr.com/photos/29674651@N00/4382024406 if you know what's good for you." - description: "Correctly handles URL followed directly by @user" text: "See: http://example.com/@user" expected: "See: http://example.com/@user" - description: "Correctly handles URL params containing @user" text: "See: http://example.com/?@user=@user" expected: "See: http://example.com/?@user=@user" - description: "Correctly handles URL with an @user followed by trailing /" text: "See: http://example.com/@user/" expected: "See: http://example.com/@user/" - description: "Does not allow an XSS after an @" text: "See: http://x.xx.com/@\"style=\"color:pink\"onmouseover=alert(1)//" expected: "See: http://x.xx.com/@\"style=\"color:pink\"onmouseover=alert(1)//" - description: "DO NOT autolink URLs if preceded by # or @" text: "#https://twitter.com @https://twitter.com" expected: "#https://twitter.com @https://twitter.com" - description: "Autolink url with a hashtag-looking fragment" text: "http://www.example.com/#answer" expected: "http://www.example.com/#answer" - description: "Autolink hashtag if followed by . and TLD" text: "#twitter.com #twitter.co.jp" expected: "#twitter.com #twitter.co.jp" - description: "Autolink @mention if followed by . and TLD" text: "@twitter.com @twitter.co.jp" expected: "@twitter.com @twitter.co.jp" - description: "Autolink a cashtag" text: "$STOCK" expected: "$STOCK" json: - description: "Do not autolink if JSON is empty." text: "This is a tweet with no entity." json: '{"hashtags":[], "urls":[], "user_mentions":[]}' expected: "This is a tweet with no entity." - description: "Autolink username" text: "text @username" json: '{"hashtags":[], "urls":[], "user_mentions":[{"screen_name": "username", "name": "@username", "id": 318686216, "id_str": "318686216", "indices": [5, 14]}]}' expected: "text @username" - description: "Autolink hashtag" text: "text #hashtag" json: '{"hashtags":[{"text":"hashtag", "indices":[5,13]}], "urls":[], "user_mentions":[]}' expected: "text #hashtag" - description: "Autolink URL" text: "text http://t.co/gksG6xlq" json: '{"hashtags":[], "urls":[{"url": "http://t.co/gksG6xlq", "expanded_url": "http://twitter.com/", "display_url": "twitter.com", "indices": [5, 25]}], "user_mentions":[]}' expected: "text  http://twitter.com/ " - description: "Autolink all" text: "text http://t.co/gksG6xlq text #hashtag text @username" json: '{"hashtags":[{"text":"hashtag", "indices":[31,39]}], "urls":[{"url": "http://t.co/gksG6xlq", "expanded_url": "http://twitter.com/", "display_url": "twitter.com", "indices": [5, 25]}], "user_mentions":[{"screen_name": "username", "name": "@username", "id": 318686216, "id_str": "318686216", "indices": [45, 54]}]}' expected: "text  http://twitter.com/  text #hashtag text @username" twitter-text-1.14.7/test/twitter-text-conformance/Gemfile.lock000664 001751 001751 00000000245 13126461251 024447 0ustar00srudsrud000000 000000 GEM remote: https://rubygems.org/ specs: mini_portile (0.6.0) nokogiri (1.6.3.1) mini_portile (= 0.6.0) PLATFORMS ruby DEPENDENCIES nokogiri