pax_global_header 0000666 0000000 0000000 00000000064 12240766003 0014512 g ustar 00root root 0000000 0000000 52 comment=b35a48ae9a40ce18f9c3e15c6eae1fd5f5e3b222
twitter-text-rb-1.7.0/ 0000775 0000000 0000000 00000000000 12240766003 0014604 5 ustar 00root root 0000000 0000000 twitter-text-rb-1.7.0/.gemtest 0000664 0000000 0000000 00000000000 12240766003 0016243 0 ustar 00root root 0000000 0000000 twitter-text-rb-1.7.0/.gitignore 0000664 0000000 0000000 00000000563 12240766003 0016600 0 ustar 00root root 0000000 0000000 *.gem
*.rbc
*.sw[a-p]
*.tmproj
*.tmproject
*.un~
*~
.DS_Store
.Spotlight-V100
.Trashes
._*
.bundle
.config
.directory
.elc
.emacs.desktop
.emacs.desktop.lock
.redcar
.yardoc
Desktop.ini
Gemfile.lock
Icon?
InstalledFiles
Session.vim
Thumbs.db
\#*\#
_yardoc
auto-save-list
coverage
doc
lib/bundler/man
pkg
pkg/*
rdoc
spec/reports
test/tmp
test/version_tmp
tmp
tmtags
tramp
twitter-text-rb-1.7.0/.gitmodules 0000664 0000000 0000000 00000000217 12240766003 0016761 0 ustar 00root root 0000000 0000000 [submodule "test/twitter-text-conformance"]
path = test/twitter-text-conformance
url = git://github.com/twitter/twitter-text-conformance.git
twitter-text-rb-1.7.0/.rspec 0000664 0000000 0000000 00000000030 12240766003 0015712 0 ustar 00root root 0000000 0000000 --color
--format=nested
twitter-text-rb-1.7.0/.travis.yml 0000664 0000000 0000000 00000000062 12240766003 0016713 0 ustar 00root root 0000000 0000000 language: ruby
rvm:
- 1.8.7
- 1.9.3
- 2.0.0
twitter-text-rb-1.7.0/Gemfile 0000664 0000000 0000000 00000000137 12240766003 0016100 0 ustar 00root root 0000000 0000000 source "http://rubygems.org"
# Specify the gem's dependencies in twitter-text.gemspec
gemspec
twitter-text-rb-1.7.0/LICENSE 0000664 0000000 0000000 00000023610 12240766003 0015613 0 ustar 00root root 0000000 0000000 Copyright 2011 Twitter, Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this work except in compliance with the License.
You may obtain a copy of the License below, or at:
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
twitter-text-rb-1.7.0/README.rdoc 0000664 0000000 0000000 00000007120 12240766003 0016412 0 ustar 00root root 0000000 0000000 == twitter-text {}[http://travis-ci.org/twitter/twitter-text-rb] {}[https://codeclimate.com/github/twitter/twitter-text-rb]
A gem that provides text processing routines for Twitter Tweets. The major
reason for this is to unify the various auto-linking and extraction of
usernames, lists, hashtags and URLs.
== Extraction Examples
# Extraction
class MyClass
include Twitter::Extractor
usernames = extract_mentioned_screen_names("Mentioning @twitter and @jack")
# usernames = ["twitter", "jack"]
end
# Extraction with a block argument
class MyClass
include Twitter::Extractor
extract_reply_screen_name("@twitter are you hiring?").do |username|
# username = "twitter"
end
end
== Auto-linking Examples
# Auto-link
class MyClass
include Twitter::Autolink
html = auto_link("link @user, please #request")
end
# For Ruby on Rails you want to add this to app/helpers/application_helper.rb
module ApplicationHelper
include Twitter::Autolink
end
# Now the auto_link function is available in every view. So in index.html.erb:
<%= auto_link("link @user, please #request") %>
=== Usernames
Username extraction and linking matches all valid Twitter usernames but does
not verify that the username is a valid Twitter account.
=== Lists
Auto-link and extract list names when they are written in @user/list-name
format.
=== Hashtags
Auto-link and extract hashtags, where a hashtag can contain most letters or
numbers but cannot be solely numbers and cannot contain punctuation.
=== URLs
Asian languages like Chinese, Japanese or Korean may not use a delimiter such as
a space to separate normal text from URLs making it difficult to identify where
the URL ends and the text starts.
For this reason twitter-text currently does not support extracting or auto-linking
of URLs immediately followed by non-Latin characters.
Example: "http://twitter.com/は素晴らしい" .
The normal text is "は素晴らしい" and is not part of the URL even though
it isn't space separated.
=== International
Special care has been taken to be sure that auto-linking and extraction work
in Tweets of all languages. This means that languages without spaces between
words should work equally well.
=== Hit Highlighting
Use to provide emphasis around the "hits" returned from the Search API, built
to work against text that has been auto-linked already.
=== Conformance
To run the Conformance suite, you'll need to add that project as a git submodule. From the root twitter-text-rb directory, run:
git submodule add git@github.com:twitter/twitter-text-conformance.git test/twitter-text-conformance/
git submodule init
git submodule update
=== Thanks
Thanks to everybody who has filed issues, provided feedback or contributed patches. Patches courtesy of:
* At Twitter …
* Matt Sanford - http://github.com/mzsanford
* Raffi Krikorian - http://github.com/r
* Ben Cherry - http://github.com/bcherry
* Patrick Ewing - http://github.com/hoverbird
* Jeff Smick - http://github.com/sprsquish
* Kenneth Kufluk - https://github.com/kennethkufluk
* Keita Fujii - https://github.com/keitaf
* Yoshimasa Niwa - https://github.com/niw
* Patches from the community …
* Jean-Philippe Bougie - http://github.com/jpbougie
* Erik Michaels-Ober - https://github.com/sferik
* Anyone who has filed an issue. It helps. Really.
=== Copyright and License
Copyright 2011 Twitter, Inc.
Licensed under the Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0
twitter-text-rb-1.7.0/Rakefile 0000664 0000000 0000000 00000004075 12240766003 0016257 0 ustar 00root root 0000000 0000000 require 'bundler'
include Rake::DSL
Bundler::GemHelper.install_tasks
task :default => ['spec', 'test:conformance']
task :test => :spec
require 'rspec/core/rake_task'
RSpec::Core::RakeTask.new(:spec)
def conformance_version(dir)
require 'digest'
Dir[File.join(dir, '*')].inject(Digest::SHA1.new){|digest, file| digest.update(Digest::SHA1.file(file).hexdigest) }
end
namespace :test do
namespace :conformance do
desc "Update conformance testing data"
task :update do
puts "Updating conformance data ... "
system("git submodule init") || raise("Failed to init submodule")
system("git submodule update") || raise("Failed to update submodule")
puts "Updating conformance data ... DONE"
end
desc "Change conformance test data to the lastest version"
task :latest => ['conformance:update'] do
current_dir = File.dirname(__FILE__)
submodule_dir = File.join(File.dirname(__FILE__), "test", "twitter-text-conformance")
version_before = conformance_version(submodule_dir)
system("cd #{submodule_dir} && git pull origin master") || raise("Failed to pull submodule version")
system("cd #{current_dir}")
if conformance_version(submodule_dir) != version_before
system("cd #{current_dir} && git add #{submodule_dir}") || raise("Failed to add upgrade files")
system("git commit -m \"Upgraded to the latest conformance suite\" #{submodule_dir}") || raise("Failed to commit upgraded conformacne data")
puts "Upgraded conformance suite."
else
puts "No conformance suite changes."
end
end
desc "Run conformance test suite"
task :run do
ruby '-rubygems', "test/conformance_test.rb"
end
end
desc "Run conformance test suite"
task :conformance => ['conformance:latest', 'conformance:run'] do
end
end
require 'rdoc/task'
namespace :doc do
RDoc::Task.new do |rd|
rd.main = "README.rdoc"
rd.rdoc_dir = 'doc'
rd.rdoc_files.include("README.rdoc", "lib/**/*.rb")
end
end
desc "Run cruise control build"
task :cruise => [:spec, 'test:conformance'] do
end
twitter-text-rb-1.7.0/lib/ 0000775 0000000 0000000 00000000000 12240766003 0015352 5 ustar 00root root 0000000 0000000 twitter-text-rb-1.7.0/lib/twitter-text.rb 0000664 0000000 0000000 00000000665 12240766003 0020372 0 ustar 00root root 0000000 0000000 major, minor, patch = RUBY_VERSION.split('.')
$RUBY_1_9 = if major.to_i == 1 && minor.to_i < 9
# Ruby 1.8 KCODE check. Not needed on 1.9 and later.
raise("twitter-text requires the $KCODE variable be set to 'UTF8' or 'u'") unless $KCODE[0].chr =~ /u/i
false
else
true
end
%w(
deprecation
regex
rewriter
autolink
extractor
unicode
validation
hit_highlighter
).each do |name|
require "twitter-text/#{name}"
end
twitter-text-rb-1.7.0/lib/twitter-text/ 0000775 0000000 0000000 00000000000 12240766003 0020036 5 ustar 00root root 0000000 0000000 twitter-text-rb-1.7.0/lib/twitter-text/autolink.rb 0000664 0000000 0000000 00000053624 12240766003 0022223 0 ustar 00root root 0000000 0000000 # encoding: UTF-8
require 'set'
require 'twitter-text/hash_helper'
module Twitter
# A module for including Tweet auto-linking in a class. The primary use of this is for helpers/views so they can auto-link
# usernames, lists, hashtags and URLs.
module Autolink extend self
# Default CSS class for auto-linked lists
DEFAULT_LIST_CLASS = "tweet-url list-slug".freeze
# Default CSS class for auto-linked usernames
DEFAULT_USERNAME_CLASS = "tweet-url username".freeze
# Default CSS class for auto-linked hashtags
DEFAULT_HASHTAG_CLASS = "tweet-url hashtag".freeze
# Default CSS class for auto-linked cashtags
DEFAULT_CASHTAG_CLASS = "tweet-url cashtag".freeze
# Default URL base for auto-linked usernames
DEFAULT_USERNAME_URL_BASE = "https://twitter.com/".freeze
# Default URL base for auto-linked lists
DEFAULT_LIST_URL_BASE = "https://twitter.com/".freeze
# Default URL base for auto-linked hashtags
DEFAULT_HASHTAG_URL_BASE = "https://twitter.com/#!/search?q=%23".freeze
# Default URL base for auto-linked cashtags
DEFAULT_CASHTAG_URL_BASE = "https://twitter.com/#!/search?q=%24".freeze
# Default attributes for invisible span tag
DEFAULT_INVISIBLE_TAG_ATTRS = "style='position:absolute;left:-9999px;'".freeze
DEFAULT_OPTIONS = {
:list_class => DEFAULT_LIST_CLASS,
:username_class => DEFAULT_USERNAME_CLASS,
:hashtag_class => DEFAULT_HASHTAG_CLASS,
:cashtag_class => DEFAULT_CASHTAG_CLASS,
:username_url_base => DEFAULT_USERNAME_URL_BASE,
:list_url_base => DEFAULT_LIST_URL_BASE,
:hashtag_url_base => DEFAULT_HASHTAG_URL_BASE,
:cashtag_url_base => DEFAULT_CASHTAG_URL_BASE,
:invisible_tag_attrs => DEFAULT_INVISIBLE_TAG_ATTRS
}.freeze
def auto_link_with_json(text, json, options = {})
# concatenate entities
entities = json.values().flatten()
# map JSON entity to twitter-text entity
entities.each do |entity|
HashHelper.symbolize_keys!(entity)
# hashtag
entity[:hashtag] = entity[:text] if entity[:text]
end
auto_link_entities(text, entities, options)
end
def auto_link_entities(text, entities, options = {}, &block)
return text if entities.empty?
# NOTE deprecate these attributes not options keys in options hash, then use html_attrs
options = DEFAULT_OPTIONS.merge(options)
options[:html_attrs] = extract_html_attrs_from_options!(options)
options[:html_attrs][:rel] ||= "nofollow" unless options[:suppress_no_follow]
Twitter::Rewriter.rewrite_entities(text, entities) do |entity, chars|
if entity[:url]
link_to_url(entity, chars, options, &block)
elsif entity[:hashtag]
link_to_hashtag(entity, chars, options, &block)
elsif entity[:screen_name]
link_to_screen_name(entity, chars, options, &block)
elsif entity[:cashtag]
link_to_cashtag(entity, chars, options, &block)
end
end
end
# Add tags around the usernames, lists, hashtags and URLs in the provided text.
# The tags can be controlled with the following entries in the options hash:
# Also any elements in the options hash will be converted to HTML attributes
# and place in the tag.
#
# :url_class:: class to add to url tags
# :list_class:: class to add to list tags
# :username_class:: class to add to username tags
# :hashtag_class:: class to add to hashtag tags
# :cashtag_class:: class to add to cashtag tags
# :username_url_base:: the value for href attribute on username links. The @username (minus the @) will be appended at the end of this.
# :list_url_base:: the value for href attribute on list links. The @username/list (minus the @) will be appended at the end of this.
# :hashtag_url_base:: the value for href attribute on hashtag links. The #hashtag (minus the #) will be appended at the end of this.
# :cashtag_url_base:: the value for href attribute on cashtag links. The $cashtag (minus the $) will be appended at the end of this.
# :invisible_tag_attrs:: HTML attribute to add to invisible span tags
# :username_include_symbol:: place the @ symbol within username and list links
# :suppress_lists:: disable auto-linking to lists
# :suppress_no_follow:: do not add rel="nofollow" to auto-linked items
# :symbol_tag:: tag to apply around symbol (@, #, $) in username / hashtag / cashtag links
# :text_with_symbol_tag:: tag to apply around text part in username / hashtag / cashtag links
# :url_target:: the value for target attribute on URL links.
# :link_attribute_block:: function to modify the attributes of a link based on the entity. called with |entity, attributes| params, and should modify the attributes hash.
# :link_text_block:: function to modify the text of a link based on the entity. called with |entity, text| params, and should return a modified text.
def auto_link(text, options = {}, &block)
auto_link_entities(text, Extractor.extract_entities_with_indices(text, :extract_url_without_protocol => false), options, &block)
end
# Add tags around the usernames and lists in the provided text. The
# tags can be controlled with the following entries in the options hash.
# Also any elements in the options hash will be converted to HTML attributes
# and place in the tag.
#
# :list_class:: class to add to list tags
# :username_class:: class to add to username tags
# :username_url_base:: the value for href attribute on username links. The @username (minus the @) will be appended at the end of this.
# :list_url_base:: the value for href attribute on list links. The @username/list (minus the @) will be appended at the end of this.
# :username_include_symbol:: place the @ symbol within username and list links
# :suppress_lists:: disable auto-linking to lists
# :suppress_no_follow:: do not add rel="nofollow" to auto-linked items
# :symbol_tag:: tag to apply around symbol (@, #, $) in username / hashtag / cashtag links
# :text_with_symbol_tag:: tag to apply around text part in username / hashtag / cashtag links
# :link_attribute_block:: function to modify the attributes of a link based on the entity. called with |entity, attributes| params, and should modify the attributes hash.
# :link_text_block:: function to modify the text of a link based on the entity. called with |entity, text| params, and should return a modified text.
def auto_link_usernames_or_lists(text, options = {}, &block) # :yields: list_or_username
auto_link_entities(text, Extractor.extract_mentions_or_lists_with_indices(text), options, &block)
end
# Add tags around the hashtags in the provided text.
# The tags can be controlled with the following entries in the options hash.
# Also any elements in the options hash will be converted to HTML attributes
# and place in the tag.
#
# :hashtag_class:: class to add to hashtag tags
# :hashtag_url_base:: the value for href attribute. The hashtag text (minus the #) will be appended at the end of this.
# :suppress_no_follow:: do not add rel="nofollow" to auto-linked items
# :symbol_tag:: tag to apply around symbol (@, #, $) in username / hashtag / cashtag links
# :text_with_symbol_tag:: tag to apply around text part in username / hashtag / cashtag links
# :link_attribute_block:: function to modify the attributes of a link based on the entity. called with |entity, attributes| params, and should modify the attributes hash.
# :link_text_block:: function to modify the text of a link based on the entity. called with |entity, text| params, and should return a modified text.
def auto_link_hashtags(text, options = {}, &block) # :yields: hashtag_text
auto_link_entities(text, Extractor.extract_hashtags_with_indices(text), options, &block)
end
# Add tags around the cashtags in the provided text.
# The tags can be controlled with the following entries in the options hash.
# Also any elements in the options hash will be converted to HTML attributes
# and place in the tag.
#
# :cashtag_class:: class to add to cashtag tags
# :cashtag_url_base:: the value for href attribute. The cashtag text (minus the $) will be appended at the end of this.
# :suppress_no_follow:: do not add rel="nofollow" to auto-linked items
# :symbol_tag:: tag to apply around symbol (@, #, $) in username / hashtag / cashtag links
# :text_with_symbol_tag:: tag to apply around text part in username / hashtag / cashtag links
# :link_attribute_block:: function to modify the attributes of a link based on the entity. called with |entity, attributes| params, and should modify the attributes hash.
# :link_text_block:: function to modify the text of a link based on the entity. called with |entity, text| params, and should return a modified text.
def auto_link_cashtags(text, options = {}, &block) # :yields: cashtag_text
auto_link_entities(text, Extractor.extract_cashtags_with_indices(text), options, &block)
end
# Add tags around the URLs in the provided text.
# The tags can be controlled with the following entries in the options hash.
# Also any elements in the options hash will be converted to HTML attributes
# and place in the tag.
#
# :url_class:: class to add to url tags
# :invisible_tag_attrs:: HTML attribute to add to invisible span tags
# :suppress_no_follow:: do not add rel="nofollow" to auto-linked items
# :symbol_tag:: tag to apply around symbol (@, #, $) in username / hashtag / cashtag links
# :text_with_symbol_tag:: tag to apply around text part in username / hashtag / cashtag links
# :url_target:: the value for target attribute on URL links.
# :link_attribute_block:: function to modify the attributes of a link based on the entity. called with |entity, attributes| params, and should modify the attributes hash.
# :link_text_block:: function to modify the text of a link based on the entity. called with |entity, text| params, and should return a modified text.
def auto_link_urls(text, options = {}, &block)
auto_link_entities(text, Extractor.extract_urls_with_indices(text, :extract_url_without_protocol => false), options, &block)
end
# These methods are deprecated, will be removed in future.
extend Deprecation
# Deprecated: Please use auto_link_urls instead.
# Add tags around the URLs in the provided text.
# Any elements in the href_options hash will be converted to HTML attributes
# and place in the tag.
# Unless href_options contains :suppress_no_follow
# the rel="nofollow" attribute will be added.
alias :auto_link_urls_custom :auto_link_urls
deprecate :auto_link_urls_custom, :auto_link_urls
private
HTML_ENTITIES = {
'&' => '&',
'>' => '>',
'<' => '<',
'"' => '"',
"'" => '''
}
def html_escape(text)
text && text.to_s.gsub(/[&"'><]/) do |character|
HTML_ENTITIES[character]
end
end
# NOTE We will make this private in future.
public :html_escape
# Options which should not be passed as HTML attributes
OPTIONS_NOT_ATTRIBUTES = Set.new([
:url_class, :list_class, :username_class, :hashtag_class, :cashtag_class,
:username_url_base, :list_url_base, :hashtag_url_base, :cashtag_url_base,
:username_url_block, :list_url_block, :hashtag_url_block, :cashtag_url_block, :link_url_block,
:username_include_symbol, :suppress_lists, :suppress_no_follow, :url_entities,
:invisible_tag_attrs, :symbol_tag, :text_with_symbol_tag, :url_target,
:link_attribute_block, :link_text_block
]).freeze
def extract_html_attrs_from_options!(options)
html_attrs = {}
options.reject! do |key, value|
unless OPTIONS_NOT_ATTRIBUTES.include?(key)
html_attrs[key] = value
true
end
end
html_attrs
end
def url_entities_hash(url_entities)
(url_entities || {}).inject({}) do |entities, entity|
HashHelper.symbolize_keys!(entity)
entities[entity[:url]] = entity
entities
end
end
def link_to_url(entity, chars, options = {})
url = entity[:url]
href = if options[:link_url_block]
options[:link_url_block].call(url)
else
url
end
# NOTE auto link to urls do not use any default values and options
# like url_class but use suppress_no_follow.
html_attrs = options[:html_attrs].dup
html_attrs[:class] = options[:url_class] if options.key?(:url_class)
# add target attribute only if :url_target is specified
html_attrs[:target] = options[:url_target] if options.key?(:url_target)
url_entities = url_entities_hash(options[:url_entities])
# use entity from urlEntities if available
url_entity = url_entities[url] || entity
link_text = if url_entity[:display_url]
html_attrs[:title] ||= url_entity[:expanded_url]
link_url_with_entity(url_entity, options)
else
html_escape(url)
end
link_to_text(entity, link_text, href, html_attrs, options)
end
def link_url_with_entity(entity, options)
display_url = entity[:display_url]
expanded_url = entity[:expanded_url]
invisible_tag_attrs = options[:invisible_tag_attrs] || DEFAULT_INVISIBLE_TAG_ATTRS
# Goal: If a user copies and pastes a tweet containing t.co'ed link, the resulting paste
# should contain the full original URL (expanded_url), not the display URL.
#
# Method: Whenever possible, we actually emit HTML that contains expanded_url, and use
# font-size:0 to hide those parts that should not be displayed (because they are not part of display_url).
# Elements with font-size:0 get copied even though they are not visible.
# Note that display:none doesn't work here. Elements with display:none don't get copied.
#
# Additionally, we want to *display* ellipses, but we don't want them copied. To make this happen we
# wrap the ellipses in a tco-ellipsis class and provide an onCopy handler that sets display:none on
# everything with the tco-ellipsis class.
#
# Exception: pic.twitter.com images, for which expandedUrl = "https://twitter.com/#!/username/status/1234/photo/1
# For those URLs, display_url is not a substring of expanded_url, so we don't do anything special to render the elided parts.
# For a pic.twitter.com URL, the only elided part will be the "https://", so this is fine.
display_url_sans_ellipses = display_url.gsub("…", "")
if expanded_url.include?(display_url_sans_ellipses)
before_display_url, after_display_url = expanded_url.split(display_url_sans_ellipses, 2)
preceding_ellipsis = /\A…/.match(display_url).to_s
following_ellipsis = /…\z/.match(display_url).to_s
# As an example: The user tweets "hi http://longdomainname.com/foo"
# This gets shortened to "hi http://t.co/xyzabc", with display_url = "…nname.com/foo"
# This will get rendered as:
#
# …
#
# http://longdomai
#
#
# nname.com/foo
#
#
#
# …
#
%(#{preceding_ellipsis}) <<
%(#{html_escape(before_display_url)}) <<
%(#{html_escape(display_url_sans_ellipses)}) <<
%(#{html_escape(after_display_url)}) <<
%(#{following_ellipsis})
else
html_escape(display_url)
end
end
def link_to_hashtag(entity, chars, options = {})
hash = chars[entity[:indices].first]
hashtag = entity[:hashtag]
hashtag = yield(hashtag) if block_given?
hashtag_class = options[:hashtag_class]
if hashtag.match Twitter::Regex::REGEXEN[:rtl_chars]
hashtag_class += ' rtl'
end
href = if options[:hashtag_url_block]
options[:hashtag_url_block].call(hashtag)
else
"#{options[:hashtag_url_base]}#{hashtag}"
end
html_attrs = {
:class => hashtag_class,
# FIXME As our conformance test, hash in title should be half-width,
# this should be bug of conformance data.
:title => "##{hashtag}"
}.merge(options[:html_attrs])
link_to_text_with_symbol(entity, hash, hashtag, href, html_attrs, options)
end
def link_to_cashtag(entity, chars, options = {})
dollar = chars[entity[:indices].first]
cashtag = entity[:cashtag]
cashtag = yield(cashtag) if block_given?
href = if options[:cashtag_url_block]
options[:cashtag_url_block].call(cashtag)
else
"#{options[:cashtag_url_base]}#{cashtag}"
end
html_attrs = {
:class => "#{options[:cashtag_class]}",
:title => "$#{cashtag}"
}.merge(options[:html_attrs])
link_to_text_with_symbol(entity, dollar, cashtag, href, html_attrs, options)
end
def link_to_screen_name(entity, chars, options = {})
name = "#{entity[:screen_name]}#{entity[:list_slug]}"
chunk = name
chunk = yield(name) if block_given?
name.downcase!
at = chars[entity[:indices].first]
html_attrs = options[:html_attrs].dup
if entity[:list_slug] && !entity[:list_slug].empty? && !options[:suppress_lists]
href = if options[:list_url_block]
options[:list_url_block].call(name)
else
"#{options[:list_url_base]}#{name}"
end
html_attrs[:class] ||= "#{options[:list_class]}"
else
href = if options[:username_url_block]
options[:username_url_block].call(chunk)
else
"#{options[:username_url_base]}#{name}"
end
html_attrs[:class] ||= "#{options[:username_class]}"
end
link_to_text_with_symbol(entity, at, chunk, href, html_attrs, options)
end
def link_to_text_with_symbol(entity, symbol, text, href, attributes = {}, options = {})
tagged_symbol = options[:symbol_tag] ? "<#{options[:symbol_tag]}>#{symbol}#{options[:symbol_tag]}>" : symbol
text = html_escape(text)
tagged_text = options[:text_with_symbol_tag] ? "<#{options[:text_with_symbol_tag]}>#{text}#{options[:text_with_symbol_tag]}>" : text
if options[:username_include_symbol] || symbol !~ Twitter::Regex::REGEXEN[:at_signs]
"#{link_to_text(entity, tagged_symbol + tagged_text, href, attributes, options)}"
else
"#{tagged_symbol}#{link_to_text(entity, tagged_text, href, attributes, options)}"
end
end
def link_to_text(entity, text, href, attributes = {}, options = {})
attributes[:href] = href
options[:link_attribute_block].call(entity, attributes) if options[:link_attribute_block]
text = options[:link_text_block].call(entity, text) if options[:link_text_block]
%(#{text})
end
BOOLEAN_ATTRIBUTES = Set.new([:disabled, :readonly, :multiple, :checked]).freeze
def tag_attrs(attributes)
attributes.keys.sort_by{|k| k.to_s}.inject("") do |attrs, key|
value = attributes[key]
if BOOLEAN_ATTRIBUTES.include?(key)
value = value ? key : nil
end
unless value.nil?
value = case value
when Array
value.compact.join(" ")
else
value
end
attrs << %( #{html_escape(key)}="#{html_escape(value)}")
end
attrs
end
end
end
end
twitter-text-rb-1.7.0/lib/twitter-text/deprecation.rb 0000664 0000000 0000000 00000000677 12240766003 0022672 0 ustar 00root root 0000000 0000000 module Twitter
module Deprecation
def deprecate(method, new_method = nil)
deprecated_method = :"deprecated_#{method}"
message = "Deprecation: `#{method}` is deprecated."
message << " Please use `#{new_method}` instead." if new_method
alias_method(deprecated_method, method)
define_method method do |*args, &block|
warn message
send(deprecated_method, *args, &block)
end
end
end
end
twitter-text-rb-1.7.0/lib/twitter-text/extractor.rb 0000664 0000000 0000000 00000030170 12240766003 0022377 0 ustar 00root root 0000000 0000000 # encoding: UTF-8
class String
# Helper function to count the character length by first converting to an
# array. This is needed because with unicode strings, the return value
# of length may be incorrect
def char_length
if respond_to? :codepoints
length
else
chars.kind_of?(Enumerable) ? chars.to_a.size : chars.size
end
end
# Helper function to convert this string into an array of unicode characters.
def to_char_a
@to_char_a ||= if chars.kind_of?(Enumerable)
chars.to_a
else
char_array = []
0.upto(char_length - 1) { |i| char_array << [chars.slice(i)].pack('U') }
char_array
end
end
end
# Helper functions to return character offsets instead of byte offsets.
class MatchData
def char_begin(n)
if string.respond_to? :codepoints
self.begin(n)
else
string[0, self.begin(n)].char_length
end
end
def char_end(n)
if string.respond_to? :codepoints
self.end(n)
else
string[0, self.end(n)].char_length
end
end
end
module Twitter
# A module for including Tweet parsing in a class. This module provides function for the extraction and processing
# of usernames, lists, URLs and hashtags.
module Extractor extend self
# Remove overlapping entities.
# This returns a new array with no overlapping entities.
def remove_overlapping_entities(entities)
# sort by start index
entities = entities.sort_by{|entity| entity[:indices].first}
# remove duplicates
prev = nil
entities.reject!{|entity| (prev && prev[:indices].last > entity[:indices].first) || (prev = entity) && false}
entities
end
# Extracts all usernames, lists, hashtags and URLs in the Tweet text
# along with the indices for where the entity ocurred
# If the text is nil or contains no entity an empty array
# will be returned.
#
# If a block is given then it will be called for each entity.
def extract_entities_with_indices(text, options = {}, &block)
# extract all entities
entities = extract_urls_with_indices(text, options) +
extract_hashtags_with_indices(text, :check_url_overlap => false) +
extract_mentions_or_lists_with_indices(text) +
extract_cashtags_with_indices(text)
return [] if entities.empty?
entities = remove_overlapping_entities(entities)
entities.each(&block) if block_given?
entities
end
# Extracts a list of all usernames mentioned in the Tweet text. If the
# text is nil or contains no username mentions an empty array
# will be returned.
#
# If a block is given then it will be called for each username.
def extract_mentioned_screen_names(text, &block) # :yields: username
screen_names = extract_mentioned_screen_names_with_indices(text).map{|m| m[:screen_name]}
screen_names.each(&block) if block_given?
screen_names
end
# Extracts a list of all usernames mentioned in the Tweet text
# along with the indices for where the mention ocurred. If the
# text is nil or contains no username mentions, an empty array
# will be returned.
#
# If a block is given, then it will be called with each username, the start
# index, and the end index in the text.
def extract_mentioned_screen_names_with_indices(text) # :yields: username, start, end
return [] unless text
possible_screen_names = []
extract_mentions_or_lists_with_indices(text) do |screen_name, list_slug, start_position, end_position|
next unless list_slug.empty?
possible_screen_names << {
:screen_name => screen_name,
:indices => [start_position, end_position]
}
end
if block_given?
possible_screen_names.each do |mention|
yield mention[:screen_name], mention[:indices].first, mention[:indices].last
end
end
possible_screen_names
end
# Extracts a list of all usernames or lists mentioned in the Tweet text
# along with the indices for where the mention ocurred. If the
# text is nil or contains no username or list mentions, an empty array
# will be returned.
#
# If a block is given, then it will be called with each username, list slug, the start
# index, and the end index in the text. The list_slug will be an empty stirng
# if this is a username mention.
def extract_mentions_or_lists_with_indices(text) # :yields: username, list_slug, start, end
return [] unless text =~ /[@@]/
possible_entries = []
text.to_s.scan(Twitter::Regex[:valid_mention_or_list]) do |before, at, screen_name, list_slug|
match_data = $~
after = $'
unless after =~ Twitter::Regex[:end_mention_match]
start_position = match_data.char_begin(3) - 1
end_position = match_data.char_end(list_slug.nil? ? 3 : 4)
possible_entries << {
:screen_name => screen_name,
:list_slug => list_slug || "",
:indices => [start_position, end_position]
}
end
end
if block_given?
possible_entries.each do |mention|
yield mention[:screen_name], mention[:list_slug], mention[:indices].first, mention[:indices].last
end
end
possible_entries
end
# Extracts the username username replied to in the Tweet text. If the
# text is nil or is not a reply nil will be returned.
#
# If a block is given then it will be called with the username replied to (if any)
def extract_reply_screen_name(text) # :yields: username
return nil unless text
possible_screen_name = text.match(Twitter::Regex[:valid_reply])
return unless possible_screen_name.respond_to?(:captures)
return if $' =~ Twitter::Regex[:end_mention_match]
screen_name = possible_screen_name.captures.first
yield screen_name if block_given?
screen_name
end
# Extracts a list of all URLs included in the Tweet text. If the
# text is nil or contains no URLs an empty array
# will be returned.
#
# If a block is given then it will be called for each URL.
def extract_urls(text, &block) # :yields: url
urls = extract_urls_with_indices(text).map{|u| u[:url]}
urls.each(&block) if block_given?
urls
end
# Extracts a list of all URLs included in the Tweet text along
# with the indices. If the text is nil or contains no
# URLs an empty array will be returned.
#
# If a block is given then it will be called for each URL.
def extract_urls_with_indices(text, options = {:extract_url_without_protocol => true}) # :yields: url, start, end
return [] unless text && (options[:extract_url_without_protocol] ? text.index(".") : text.index(":"))
urls = []
position = 0
text.to_s.scan(Twitter::Regex[:valid_url]) do |all, before, url, protocol, domain, port, path, query|
valid_url_match_data = $~
start_position = valid_url_match_data.char_begin(3)
end_position = valid_url_match_data.char_end(3)
# If protocol is missing and domain contains non-ASCII characters,
# extract ASCII-only domains.
if !protocol
next if !options[:extract_url_without_protocol] || before =~ Twitter::Regex[:invalid_url_without_protocol_preceding_chars]
last_url = nil
last_url_invalid_match = nil
domain.scan(Twitter::Regex[:valid_ascii_domain]) do |ascii_domain|
last_url = {
:url => ascii_domain,
:indices => [start_position + $~.char_begin(0),
start_position + $~.char_end(0)]
}
last_url_invalid_match = ascii_domain =~ Twitter::Regex[:invalid_short_domain]
urls << last_url unless last_url_invalid_match
end
# no ASCII-only domain found. Skip the entire URL
next unless last_url
# last_url only contains domain. Need to add path and query if they exist.
if path
# last_url was not added. Add it to urls here.
urls << last_url if last_url_invalid_match
last_url[:url] = url.sub(domain, last_url[:url])
last_url[:indices][1] = end_position
end
else
# In the case of t.co URLs, don't allow additional path characters
if url =~ Twitter::Regex[:valid_tco_url]
url = $&
end_position = start_position + url.char_length
end
urls << {
:url => url,
:indices => [start_position, end_position]
}
end
end
urls.each{|url| yield url[:url], url[:indices].first, url[:indices].last} if block_given?
urls
end
# Extracts a list of all hashtags included in the Tweet text. If the
# text is nil or contains no hashtags an empty array
# will be returned. The array returned will not include the leading #
# character.
#
# If a block is given then it will be called for each hashtag.
def extract_hashtags(text, &block) # :yields: hashtag_text
hashtags = extract_hashtags_with_indices(text).map{|h| h[:hashtag]}
hashtags.each(&block) if block_given?
hashtags
end
# Extracts a list of all hashtags included in the Tweet text. If the
# text is nil or contains no hashtags an empty array
# will be returned. The array returned will not include the leading #
# character.
#
# If a block is given then it will be called for each hashtag.
def extract_hashtags_with_indices(text, options = {:check_url_overlap => true}) # :yields: hashtag_text, start, end
return [] unless text =~ /[##]/
tags = []
text.scan(Twitter::Regex[:valid_hashtag]) do |before, hash, hash_text|
match_data = $~
start_position = match_data.char_begin(2)
end_position = match_data.char_end(3)
after = $'
unless after =~ Twitter::Regex[:end_hashtag_match]
tags << {
:hashtag => hash_text,
:indices => [start_position, end_position]
}
end
end
if options[:check_url_overlap]
# extract URLs
urls = extract_urls_with_indices(text)
unless urls.empty?
tags.concat(urls)
# remove duplicates
tags = remove_overlapping_entities(tags)
# remove URL entities
tags.reject!{|entity| !entity[:hashtag] }
end
end
tags.each{|tag| yield tag[:hashtag], tag[:indices].first, tag[:indices].last} if block_given?
tags
end
# Extracts a list of all cashtags included in the Tweet text. If the
# text is nil or contains no cashtags an empty array
# will be returned. The array returned will not include the leading $
# character.
#
# If a block is given then it will be called for each cashtag.
def extract_cashtags(text, &block) # :yields: cashtag_text
cashtags = extract_cashtags_with_indices(text).map{|h| h[:cashtag]}
cashtags.each(&block) if block_given?
cashtags
end
# Extracts a list of all cashtags included in the Tweet text. If the
# text is nil or contains no cashtags an empty array
# will be returned. The array returned will not include the leading $
# character.
#
# If a block is given then it will be called for each cashtag.
def extract_cashtags_with_indices(text) # :yields: cashtag_text, start, end
return [] unless text =~ /\$/
tags = []
text.scan(Twitter::Regex[:valid_cashtag]) do |before, dollar, cash_text|
match_data = $~
start_position = match_data.char_begin(2)
end_position = match_data.char_end(3)
tags << {
:cashtag => cash_text,
:indices => [start_position, end_position]
}
end
tags.each{|tag| yield tag[:cashtag], tag[:indices].first, tag[:indices].last} if block_given?
tags
end
end
end
twitter-text-rb-1.7.0/lib/twitter-text/hash_helper.rb 0000664 0000000 0000000 00000001170 12240766003 0022644 0 ustar 00root root 0000000 0000000 module Twitter
module HashHelper
# Return a new hash with all keys converted to symbols, as long as
# they respond to +to_sym+.
#
# { 'name' => 'Rob', 'years' => '28' }.symbolize_keys
# #=> { :name => "Rob", :years => "28" }
def self.symbolize_keys(hash)
hash.dup.symbolize_keys!
end
# Destructively convert all keys to symbols, as long as they respond
# to +to_sym+. Same as +symbolize_keys+, but modifies +self+.
def self.symbolize_keys!(hash)
hash.keys.each do |key|
hash[(key.to_sym rescue key) || key] = hash.delete(key)
end
hash
end
end
end
twitter-text-rb-1.7.0/lib/twitter-text/hit_highlighter.rb 0000664 0000000 0000000 00000005340 12240766003 0023527 0 ustar 00root root 0000000 0000000 module Twitter
# Module for doing "hit highlighting" on tweets that have been auto-linked already.
# Useful with the results returned from the Search API.
module HitHighlighter extend self
# Default Tag used for hit highlighting
DEFAULT_HIGHLIGHT_TAG = "em"
# Add tags around the hits provided in the text. The
# hits should be an array of (start, end) index pairs, relative to the original
# text, before auto-linking (but the text may already be auto-linked if desired)
#
# The tags can be overridden using the :tag option. For example:
#
# irb> hit_highlight("test hit here", [[5, 8]], :tag => 'strong')
# => "test hit here"
def hit_highlight(text, hits = [], options = {})
if hits.empty?
return text
end
tag_name = options[:tag] || DEFAULT_HIGHLIGHT_TAG
tags = ["<" + tag_name + ">", "" + tag_name + ">"]
chunks = text.split(/[<>]/)
result = []
chunk_index, chunk = 0, chunks[0]
chunk_chars = chunk.to_s.to_char_a
prev_chunks_len = 0
chunk_cursor = 0
start_in_chunk = false
for hit, index in hits.flatten.each_with_index do
tag = tags[index % 2]
placed = false
until chunk.nil? || hit < prev_chunks_len + chunk.length do
result << chunk_chars[chunk_cursor..-1]
if start_in_chunk && hit == prev_chunks_len + chunk_chars.length
result << tag
placed = true
end
# correctly handle highlights that end on the final character.
if tag_text = chunks[chunk_index+1]
result << "<#{tag_text}>"
end
prev_chunks_len += chunk_chars.length
chunk_cursor = 0
chunk_index += 2
chunk = chunks[chunk_index]
chunk_chars = chunk.to_s.to_char_a
start_in_chunk = false
end
if !placed && !chunk.nil?
hit_spot = hit - prev_chunks_len
result << chunk_chars[chunk_cursor...hit_spot] << tag
chunk_cursor = hit_spot
if index % 2 == 0
start_in_chunk = true
else
start_in_chunk = false
end
placed = true
end
# ultimate fallback, hits that run off the end get a closing tag
if !placed
result << tag
end
end
if chunk
if chunk_cursor < chunk_chars.length
result << chunk_chars[chunk_cursor..-1]
end
(chunk_index+1).upto(chunks.length-1).each do |index|
result << (index.even? ? chunks[index] : "<#{chunks[index]}>")
end
end
result.flatten.join
end
end
end
twitter-text-rb-1.7.0/lib/twitter-text/regex.rb 0000664 0000000 0000000 00000042472 12240766003 0021506 0 ustar 00root root 0000000 0000000 # encoding: UTF-8
module Twitter
# A collection of regular expressions for parsing Tweet text. The regular expression
# list is frozen at load time to ensure immutability. These regular expressions are
# used throughout the Twitter classes. Special care has been taken to make
# sure these reular expressions work with Tweets in all languages.
class Regex
REGEXEN = {} # :nodoc:
def self.regex_range(from, to = nil) # :nodoc:
if $RUBY_1_9
if to
"\\u{#{from.to_s(16).rjust(4, '0')}}-\\u{#{to.to_s(16).rjust(4, '0')}}"
else
"\\u{#{from.to_s(16).rjust(4, '0')}}"
end
else
if to
[from].pack('U') + '-' + [to].pack('U')
else
[from].pack('U')
end
end
end
# Space is more than %20, U+3000 for example is the full-width space used with Kanji. Provide a short-hand
# to access both the list of characters and a pattern suitible for use with String#split
# Taken from: ActiveSupport::Multibyte::Handlers::UTF8Handler::UNICODE_WHITESPACE
UNICODE_SPACES = [
(0x0009..0x000D).to_a, # White_Space # Cc [5] ..
0x0020, # White_Space # Zs SPACE
0x0085, # White_Space # Cc
0x00A0, # White_Space # Zs NO-BREAK SPACE
0x1680, # White_Space # Zs OGHAM SPACE MARK
0x180E, # White_Space # Zs MONGOLIAN VOWEL SEPARATOR
(0x2000..0x200A).to_a, # White_Space # Zs [11] EN QUAD..HAIR SPACE
0x2028, # White_Space # Zl LINE SEPARATOR
0x2029, # White_Space # Zp PARAGRAPH SEPARATOR
0x202F, # White_Space # Zs NARROW NO-BREAK SPACE
0x205F, # White_Space # Zs MEDIUM MATHEMATICAL SPACE
0x3000, # White_Space # Zs IDEOGRAPHIC SPACE
].flatten.map{|c| [c].pack('U*')}.freeze
REGEXEN[:spaces] = /[#{UNICODE_SPACES.join('')}]/o
# Character not allowed in Tweets
INVALID_CHARACTERS = [
0xFFFE, 0xFEFF, # BOM
0xFFFF, # Special
0x202A, 0x202B, 0x202C, 0x202D, 0x202E # Directional change
].map{|cp| [cp].pack('U') }.freeze
REGEXEN[:invalid_control_characters] = /[#{INVALID_CHARACTERS.join('')}]/o
major, minor, patch = RUBY_VERSION.split('.')
if major.to_i >= 2 || major.to_i == 1 && minor.to_i >= 9 || (defined?(RUBY_ENGINE) && ["jruby", "rbx"].include?(RUBY_ENGINE))
REGEXEN[:list_name] = /[a-zA-Z][a-zA-Z0-9_\-\u0080-\u00ff]{0,24}/
else
# This line barfs at compile time in Ruby 1.9, JRuby, or Rubinius.
REGEXEN[:list_name] = eval("/[a-zA-Z][a-zA-Z0-9_\\-\x80-\xff]{0,24}/")
end
# Latin accented characters
# Excludes 0xd7 from the range (the multiplication sign, confusable with "x").
# Also excludes 0xf7, the division sign
LATIN_ACCENTS = [
regex_range(0xc0, 0xd6),
regex_range(0xd8, 0xf6),
regex_range(0xf8, 0xff),
regex_range(0x0100, 0x024f),
regex_range(0x0253, 0x0254),
regex_range(0x0256, 0x0257),
regex_range(0x0259),
regex_range(0x025b),
regex_range(0x0263),
regex_range(0x0268),
regex_range(0x026f),
regex_range(0x0272),
regex_range(0x0289),
regex_range(0x028b),
regex_range(0x02bb),
regex_range(0x0300, 0x036f),
regex_range(0x1e00, 0x1eff)
].join('').freeze
RTL_CHARACTERS = [
regex_range(0x0600,0x06FF),
regex_range(0x0750,0x077F),
regex_range(0x0590,0x05FF),
regex_range(0xFE70,0xFEFF)
].join('').freeze
NON_LATIN_HASHTAG_CHARS = [
# Cyrillic (Russian, Ukrainian, etc.)
regex_range(0x0400, 0x04ff), # Cyrillic
regex_range(0x0500, 0x0527), # Cyrillic Supplement
regex_range(0x2de0, 0x2dff), # Cyrillic Extended A
regex_range(0xa640, 0xa69f), # Cyrillic Extended B
regex_range(0x0591, 0x05bf), # Hebrew
regex_range(0x05c1, 0x05c2),
regex_range(0x05c4, 0x05c5),
regex_range(0x05c7),
regex_range(0x05d0, 0x05ea),
regex_range(0x05f0, 0x05f4),
regex_range(0xfb12, 0xfb28), # Hebrew Presentation Forms
regex_range(0xfb2a, 0xfb36),
regex_range(0xfb38, 0xfb3c),
regex_range(0xfb3e),
regex_range(0xfb40, 0xfb41),
regex_range(0xfb43, 0xfb44),
regex_range(0xfb46, 0xfb4f),
regex_range(0x0610, 0x061a), # Arabic
regex_range(0x0620, 0x065f),
regex_range(0x066e, 0x06d3),
regex_range(0x06d5, 0x06dc),
regex_range(0x06de, 0x06e8),
regex_range(0x06ea, 0x06ef),
regex_range(0x06fa, 0x06fc),
regex_range(0x06ff),
regex_range(0x0750, 0x077f), # Arabic Supplement
regex_range(0x08a0), # Arabic Extended A
regex_range(0x08a2, 0x08ac),
regex_range(0x08e4, 0x08fe),
regex_range(0xfb50, 0xfbb1), # Arabic Pres. Forms A
regex_range(0xfbd3, 0xfd3d),
regex_range(0xfd50, 0xfd8f),
regex_range(0xfd92, 0xfdc7),
regex_range(0xfdf0, 0xfdfb),
regex_range(0xfe70, 0xfe74), # Arabic Pres. Forms B
regex_range(0xfe76, 0xfefc),
regex_range(0x200c, 0x200c), # Zero-Width Non-Joiner
regex_range(0x0e01, 0x0e3a), # Thai
regex_range(0x0e40, 0x0e4e), # Hangul (Korean)
regex_range(0x1100, 0x11ff), # Hangul Jamo
regex_range(0x3130, 0x3185), # Hangul Compatibility Jamo
regex_range(0xA960, 0xA97F), # Hangul Jamo Extended-A
regex_range(0xAC00, 0xD7AF), # Hangul Syllables
regex_range(0xD7B0, 0xD7FF), # Hangul Jamo Extended-B
regex_range(0xFFA1, 0xFFDC) # Half-width Hangul
].join('').freeze
REGEXEN[:latin_accents] = /[#{LATIN_ACCENTS}]+/o
CJ_HASHTAG_CHARACTERS = [
regex_range(0x30A1, 0x30FA), regex_range(0x30FC, 0x30FE), # Katakana (full-width)
regex_range(0xFF66, 0xFF9F), # Katakana (half-width)
regex_range(0xFF10, 0xFF19), regex_range(0xFF21, 0xFF3A), regex_range(0xFF41, 0xFF5A), # Latin (full-width)
regex_range(0x3041, 0x3096), regex_range(0x3099, 0x309E), # Hiragana
regex_range(0x3400, 0x4DBF), # Kanji (CJK Extension A)
regex_range(0x4E00, 0x9FFF), # Kanji (Unified)
regex_range(0x20000, 0x2A6DF), # Kanji (CJK Extension B)
regex_range(0x2A700, 0x2B73F), # Kanji (CJK Extension C)
regex_range(0x2B740, 0x2B81F), # Kanji (CJK Extension D)
regex_range(0x2F800, 0x2FA1F), regex_range(0x3003), regex_range(0x3005), regex_range(0x303B) # Kanji (CJK supplement)
].join('').freeze
PUNCTUATION_CHARS = '!"#$%&\'()*+,-./:;<=>?@\[\]^_\`{|}~'
SPACE_CHARS = " \t\n\x0B\f\r"
CTRL_CHARS = "\x00-\x1F\x7F"
# A hashtag must contain latin characters, numbers and underscores, but not all numbers.
HASHTAG_ALPHA = /[a-z_#{LATIN_ACCENTS}#{NON_LATIN_HASHTAG_CHARS}#{CJ_HASHTAG_CHARACTERS}]/io
HASHTAG_ALPHANUMERIC = /[a-z0-9_#{LATIN_ACCENTS}#{NON_LATIN_HASHTAG_CHARS}#{CJ_HASHTAG_CHARACTERS}]/io
HASHTAG_BOUNDARY = /\A|\z|[^&a-z0-9_#{LATIN_ACCENTS}#{NON_LATIN_HASHTAG_CHARS}#{CJ_HASHTAG_CHARACTERS}]/o
HASHTAG = /(#{HASHTAG_BOUNDARY})(#|#)(#{HASHTAG_ALPHANUMERIC}*#{HASHTAG_ALPHA}#{HASHTAG_ALPHANUMERIC}*)/io
REGEXEN[:valid_hashtag] = /#{HASHTAG}/io
# Used in Extractor for final filtering
REGEXEN[:end_hashtag_match] = /\A(?:[##]|:\/\/)/o
REGEXEN[:valid_mention_preceding_chars] = /(?:[^a-zA-Z0-9_!#\$%&*@@]|^|[rR][tT]:?)/o
REGEXEN[:at_signs] = /[@@]/
REGEXEN[:valid_mention_or_list] = /
(#{REGEXEN[:valid_mention_preceding_chars]}) # $1: Preceeding character
(#{REGEXEN[:at_signs]}) # $2: At mark
([a-zA-Z0-9_]{1,20}) # $3: Screen name
(\/[a-zA-Z][a-zA-Z0-9_\-]{0,24})? # $4: List (optional)
/ox
REGEXEN[:valid_reply] = /^(?:#{REGEXEN[:spaces]})*#{REGEXEN[:at_signs]}([a-zA-Z0-9_]{1,20})/o
# Used in Extractor for final filtering
REGEXEN[:end_mention_match] = /\A(?:#{REGEXEN[:at_signs]}|#{REGEXEN[:latin_accents]}|:\/\/)/o
# URL related hash regex collection
REGEXEN[:valid_url_preceding_chars] = /(?:[^A-Z0-9@@$###{INVALID_CHARACTERS.join('')}]|^)/io
REGEXEN[:invalid_url_without_protocol_preceding_chars] = /[-_.\/]$/
DOMAIN_VALID_CHARS = "[^#{PUNCTUATION_CHARS}#{SPACE_CHARS}#{CTRL_CHARS}#{INVALID_CHARACTERS.join('')}#{UNICODE_SPACES.join('')}]"
REGEXEN[:valid_subdomain] = /(?:(?:#{DOMAIN_VALID_CHARS}(?:[_-]|#{DOMAIN_VALID_CHARS})*)?#{DOMAIN_VALID_CHARS}\.)/io
REGEXEN[:valid_domain_name] = /(?:(?:#{DOMAIN_VALID_CHARS}(?:[-]|#{DOMAIN_VALID_CHARS})*)?#{DOMAIN_VALID_CHARS}\.)/io
REGEXEN[:valid_gTLD] = /(?:(?:aero|asia|biz|cat|com|coop|edu|gov|info|int|jobs|mil|mobi|museum|name|net|org|pro|tel|travel|xxx)(?=[^0-9a-z@]|$))/i
REGEXEN[:valid_ccTLD] = %r{
(?:
(?:ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|
ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|
gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|
lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|
pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|
th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|za|zm|zw)
(?=[^0-9a-z@]|$)
)
}ix
REGEXEN[:valid_punycode] = /(?:xn--[0-9a-z]+)/i
REGEXEN[:valid_domain] = /(?:
#{REGEXEN[:valid_subdomain]}*#{REGEXEN[:valid_domain_name]}
(?:#{REGEXEN[:valid_gTLD]}|#{REGEXEN[:valid_ccTLD]}|#{REGEXEN[:valid_punycode]})
)/iox
# This is used in Extractor
REGEXEN[:valid_ascii_domain] = /
(?:(?:[A-Za-z0-9\-_]|#{REGEXEN[:latin_accents]})+\.)+
(?:#{REGEXEN[:valid_gTLD]}|#{REGEXEN[:valid_ccTLD]}|#{REGEXEN[:valid_punycode]})
/iox
# This is used in Extractor for stricter t.co URL extraction
REGEXEN[:valid_tco_url] = /^https?:\/\/t\.co\/[a-z0-9]+/i
# This is used in Extractor to filter out unwanted URLs.
REGEXEN[:invalid_short_domain] = /\A#{REGEXEN[:valid_domain_name]}#{REGEXEN[:valid_ccTLD]}\Z/io
REGEXEN[:valid_port_number] = /[0-9]+/
REGEXEN[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\,\.\$\/%#\[\]\-_~&|@#{LATIN_ACCENTS}]/io
# Allow URL paths to contain up to two nested levels of balanced parens
# 1. Used in Wikipedia URLs like /Primer_(film)
# 2. Used in IIS sessions like /S(dfd346)/
# 3. Used in Rdio URLs like /track/We_Up_(Album_Version_(Edited))/
REGEXEN[:valid_url_balanced_parens] = /
\(
(?:
#{REGEXEN[:valid_general_url_path_chars]}+
|
# allow one nested level of balanced parentheses
(?:
#{REGEXEN[:valid_general_url_path_chars]}*
\(
#{REGEXEN[:valid_general_url_path_chars]}+
\)
#{REGEXEN[:valid_general_url_path_chars]}*
)
)
\)
/iox
# Valid end-of-path chracters (so /foo. does not gobble the period).
# 1. Allow = for empty URL parameters and other URL-join artifacts
REGEXEN[:valid_url_path_ending_chars] = /[a-z0-9=_#\/\+\-#{LATIN_ACCENTS}]|(?:#{REGEXEN[:valid_url_balanced_parens]})/io
REGEXEN[:valid_url_path] = /(?:
(?:
#{REGEXEN[:valid_general_url_path_chars]}*
(?:#{REGEXEN[:valid_url_balanced_parens]} #{REGEXEN[:valid_general_url_path_chars]}*)*
#{REGEXEN[:valid_url_path_ending_chars]}
)|(?:#{REGEXEN[:valid_general_url_path_chars]}+\/)
)/iox
REGEXEN[:valid_url_query_chars] = /[a-z0-9!?\*'\(\);:&=\+\$\/%#\[\]\-_\.,~|@]/i
REGEXEN[:valid_url_query_ending_chars] = /[a-z0-9_&=#\/]/i
REGEXEN[:valid_url] = %r{
( # $1 total match
(#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceeding chracter
( # $3 URL
(https?:\/\/)? # $4 Protocol (optional)
(#{REGEXEN[:valid_domain]}) # $5 Domain(s)
(?::(#{REGEXEN[:valid_port_number]}))? # $6 Port number (optional)
(/#{REGEXEN[:valid_url_path]}*)? # $7 URL Path and anchor
(\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $8 Query String
)
)
}iox
REGEXEN[:cashtag] = /[a-z]{1,6}(?:[._][a-z]{1,2})?/i
REGEXEN[:valid_cashtag] = /(^|#{REGEXEN[:spaces]})(\$)(#{REGEXEN[:cashtag]})(?=$|\s|[#{PUNCTUATION_CHARS}])/i
# These URL validation pattern strings are based on the ABNF from RFC 3986
REGEXEN[:validate_url_unreserved] = /[a-z0-9\-._~]/i
REGEXEN[:validate_url_pct_encoded] = /(?:%[0-9a-f]{2})/i
REGEXEN[:validate_url_sub_delims] = /[!$&'()*+,;=]/i
REGEXEN[:validate_url_pchar] = /(?:
#{REGEXEN[:validate_url_unreserved]}|
#{REGEXEN[:validate_url_pct_encoded]}|
#{REGEXEN[:validate_url_sub_delims]}|
[:\|@]
)/iox
REGEXEN[:validate_url_scheme] = /(?:[a-z][a-z0-9+\-.]*)/i
REGEXEN[:validate_url_userinfo] = /(?:
#{REGEXEN[:validate_url_unreserved]}|
#{REGEXEN[:validate_url_pct_encoded]}|
#{REGEXEN[:validate_url_sub_delims]}|
:
)*/iox
REGEXEN[:validate_url_dec_octet] = /(?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5]))/i
REGEXEN[:validate_url_ipv4] =
/(?:#{REGEXEN[:validate_url_dec_octet]}(?:\.#{REGEXEN[:validate_url_dec_octet]}){3})/iox
# Punting on real IPv6 validation for now
REGEXEN[:validate_url_ipv6] = /(?:\[[a-f0-9:\.]+\])/i
# Also punting on IPvFuture for now
REGEXEN[:validate_url_ip] = /(?:
#{REGEXEN[:validate_url_ipv4]}|
#{REGEXEN[:validate_url_ipv6]}
)/iox
# This is more strict than the rfc specifies
REGEXEN[:validate_url_subdomain_segment] = /(?:[a-z0-9](?:[a-z0-9_\-]*[a-z0-9])?)/i
REGEXEN[:validate_url_domain_segment] = /(?:[a-z0-9](?:[a-z0-9\-]*[a-z0-9])?)/i
REGEXEN[:validate_url_domain_tld] = /(?:[a-z](?:[a-z0-9\-]*[a-z0-9])?)/i
REGEXEN[:validate_url_domain] = /(?:(?:#{REGEXEN[:validate_url_subdomain_segment]}\.)*
(?:#{REGEXEN[:validate_url_domain_segment]}\.)
#{REGEXEN[:validate_url_domain_tld]})/iox
REGEXEN[:validate_url_host] = /(?:
#{REGEXEN[:validate_url_ip]}|
#{REGEXEN[:validate_url_domain]}
)/iox
# Unencoded internationalized domains - this doesn't check for invalid UTF-8 sequences
REGEXEN[:validate_url_unicode_subdomain_segment] =
/(?:(?:[a-z0-9]|[^\x00-\x7f])(?:(?:[a-z0-9_\-]|[^\x00-\x7f])*(?:[a-z0-9]|[^\x00-\x7f]))?)/ix
REGEXEN[:validate_url_unicode_domain_segment] =
/(?:(?:[a-z0-9]|[^\x00-\x7f])(?:(?:[a-z0-9\-]|[^\x00-\x7f])*(?:[a-z0-9]|[^\x00-\x7f]))?)/ix
REGEXEN[:validate_url_unicode_domain_tld] =
/(?:(?:[a-z]|[^\x00-\x7f])(?:(?:[a-z0-9\-]|[^\x00-\x7f])*(?:[a-z0-9]|[^\x00-\x7f]))?)/ix
REGEXEN[:validate_url_unicode_domain] = /(?:(?:#{REGEXEN[:validate_url_unicode_subdomain_segment]}\.)*
(?:#{REGEXEN[:validate_url_unicode_domain_segment]}\.)
#{REGEXEN[:validate_url_unicode_domain_tld]})/iox
REGEXEN[:validate_url_unicode_host] = /(?:
#{REGEXEN[:validate_url_ip]}|
#{REGEXEN[:validate_url_unicode_domain]}
)/iox
REGEXEN[:validate_url_port] = /[0-9]{1,5}/
REGEXEN[:validate_url_unicode_authority] = %r{
(?:(#{REGEXEN[:validate_url_userinfo]})@)? # $1 userinfo
(#{REGEXEN[:validate_url_unicode_host]}) # $2 host
(?::(#{REGEXEN[:validate_url_port]}))? # $3 port
}iox
REGEXEN[:validate_url_authority] = %r{
(?:(#{REGEXEN[:validate_url_userinfo]})@)? # $1 userinfo
(#{REGEXEN[:validate_url_host]}) # $2 host
(?::(#{REGEXEN[:validate_url_port]}))? # $3 port
}iox
REGEXEN[:validate_url_path] = %r{(/#{REGEXEN[:validate_url_pchar]}*)*}i
REGEXEN[:validate_url_query] = %r{(#{REGEXEN[:validate_url_pchar]}|/|\?)*}i
REGEXEN[:validate_url_fragment] = %r{(#{REGEXEN[:validate_url_pchar]}|/|\?)*}i
# Modified version of RFC 3986 Appendix B
REGEXEN[:validate_url_unencoded] = %r{
\A # Full URL
(?:
([^:/?#]+):// # $1 Scheme
)?
([^/?#]*) # $2 Authority
([^?#]*) # $3 Path
(?:
\?([^#]*) # $4 Query
)?
(?:
\#(.*) # $5 Fragment
)?\Z
}ix
REGEXEN[:rtl_chars] = /[#{RTL_CHARACTERS}]/io
REGEXEN.each_pair{|k,v| v.freeze }
# Return the regular expression for a given key. If the key
# is not a known symbol a nil will be returned.
def self.[](key)
REGEXEN[key]
end
end
end
twitter-text-rb-1.7.0/lib/twitter-text/rewriter.rb 0000664 0000000 0000000 00000003625 12240766003 0022234 0 ustar 00root root 0000000 0000000 module Twitter
# A module provides base methods to rewrite usernames, lists, hashtags and URLs.
module Rewriter extend self
def rewrite_entities(text, entities)
chars = text.to_s.to_char_a
# sort by start index
entities = entities.sort_by{|entity| entity[:indices].first}
result = []
last_index = entities.inject(0) do |last_index, entity|
result << chars[last_index...entity[:indices].first]
result << yield(entity, chars)
entity[:indices].last
end
result << chars[last_index..-1]
result.flatten.join
end
# These methods are deprecated, will be removed in future.
extend Deprecation
def rewrite(text, options = {})
[:hashtags, :urls, :usernames_or_lists].inject(text) do |key|
options[key] ? send(:"rewrite_#{key}", text, &options[key]) : text
end
end
deprecate :rewrite, :rewrite_entities
def rewrite_usernames_or_lists(text)
entities = Extractor.extract_mentions_or_lists_with_indices(text)
rewrite_entities(text, entities) do |entity, chars|
at = chars[entity[:indices].first]
list_slug = entity[:list_slug]
list_slug = nil if list_slug.empty?
yield(at, entity[:screen_name], list_slug)
end
end
deprecate :rewrite_usernames_or_lists, :rewrite_entities
def rewrite_hashtags(text)
entities = Extractor.extract_hashtags_with_indices(text)
rewrite_entities(text, entities) do |entity, chars|
hash = chars[entity[:indices].first]
yield(hash, entity[:hashtag])
end
end
deprecate :rewrite_hashtags, :rewrite_entities
def rewrite_urls(text)
entities = Extractor.extract_urls_with_indices(text, :extract_url_without_protocol => false)
rewrite_entities(text, entities) do |entity, chars|
yield(entity[:url])
end
end
deprecate :rewrite_urls, :rewrite_entities
end
end
twitter-text-rb-1.7.0/lib/twitter-text/unicode.rb 0000664 0000000 0000000 00000001624 12240766003 0022014 0 ustar 00root root 0000000 0000000 module Twitter
# This module lazily defines constants of the form Uxxxx for all Unicode
# codepoints from U0000 to U10FFFF. The value of each constant is the
# UTF-8 string for the codepoint.
# Examples:
# copyright = Unicode::U00A9
# euro = Unicode::U20AC
# infinity = Unicode::U221E
#
module Unicode
CODEPOINT_REGEX = /^U_?([0-9a-fA-F]{4,5}|10[0-9a-fA-F]{4})$/
def self.const_missing(name)
# Check that the constant name is of the right form: U0000 to U10FFFF
if name.to_s =~ CODEPOINT_REGEX
# Convert the codepoint to an immutable UTF-8 string,
# define a real constant for that value and return the value
#p name, name.class
const_set(name, [$1.to_i(16)].pack("U").freeze)
else # Raise an error for constants that are not Unicode.
raise NameError, "Uninitialized constant: Unicode::#{name}"
end
end
end
end
twitter-text-rb-1.7.0/lib/twitter-text/validation.rb 0000664 0000000 0000000 00000010771 12240766003 0022523 0 ustar 00root root 0000000 0000000 require 'unf'
module Twitter
module Validation extend self
MAX_LENGTH = 140
DEFAULT_TCO_URL_LENGTHS = {
:short_url_length => 22,
:short_url_length_https => 23,
:characters_reserved_per_media => 22
}.freeze
# Returns the length of the string as it would be displayed. This is equivilent to the length of the Unicode NFC
# (See: http://www.unicode.org/reports/tr15). This is needed in order to consistently calculate the length of a
# string no matter which actual form was transmitted. For example:
#
# U+0065 Latin Small Letter E
# + U+0301 Combining Acute Accent
# ----------
# = 2 bytes, 2 characters, displayed as é (1 visual glyph)
# … The NFC of {U+0065, U+0301} is {U+00E9}, which is a single chracter and a +display_length+ of 1
#
# The string could also contain U+00E9 already, in which case the canonicalization will not change the value.
#
def tweet_length(text, options = {})
options = DEFAULT_TCO_URL_LENGTHS.merge(options)
length = text.to_nfc.unpack("U*").length
Twitter::Extractor.extract_urls_with_indices(text) do |url, start_position, end_position|
length += start_position - end_position
length += url.downcase =~ /^https:\/\// ? options[:short_url_length_https] : options[:short_url_length]
end
length
end
# Check the text for any reason that it may not be valid as a Tweet. This is meant as a pre-validation
# before posting to api.twitter.com. There are several server-side reasons for Tweets to fail but this pre-validation
# will allow quicker feedback.
#
# Returns false if this text is valid. Otherwise one of the following Symbols will be returned:
#
# :too_long:: if the text is too long
# :empty:: if the text is nil or empty
# :invalid_characters:: if the text contains non-Unicode or any of the disallowed Unicode characters
def tweet_invalid?(text)
return :empty if !text || text.empty?
begin
return :too_long if tweet_length(text) > MAX_LENGTH
return :invalid_characters if Twitter::Regex::INVALID_CHARACTERS.any?{|invalid_char| text.include?(invalid_char) }
rescue ArgumentError => e
# non-Unicode value.
return :invalid_characters
end
return false
end
def valid_tweet_text?(text)
!tweet_invalid?(text)
end
def valid_username?(username)
return false if !username || username.empty?
extracted = Twitter::Extractor.extract_mentioned_screen_names(username)
# Should extract the username minus the @ sign, hence the [1..-1]
extracted.size == 1 && extracted.first == username[1..-1]
end
VALID_LIST_RE = /\A#{Twitter::Regex[:valid_mention_or_list]}\z/o
def valid_list?(username_list)
match = username_list.match(VALID_LIST_RE)
# Must have matched and had nothing before or after
!!(match && match[1] == "" && match[4] && !match[4].empty?)
end
def valid_hashtag?(hashtag)
return false if !hashtag || hashtag.empty?
extracted = Twitter::Extractor.extract_hashtags(hashtag)
# Should extract the hashtag minus the # sign, hence the [1..-1]
extracted.size == 1 && extracted.first == hashtag[1..-1]
end
def valid_url?(url, unicode_domains=true, require_protocol=true)
return false if !url || url.empty?
url_parts = url.match(Twitter::Regex[:validate_url_unencoded])
return false unless (url_parts && url_parts.to_s == url)
scheme, authority, path, query, fragment = url_parts.captures
return false unless ((!require_protocol ||
(valid_match?(scheme, Twitter::Regex[:validate_url_scheme]) && scheme.match(/\Ahttps?\Z/i))) &&
valid_match?(path, Twitter::Regex[:validate_url_path]) &&
valid_match?(query, Twitter::Regex[:validate_url_query], true) &&
valid_match?(fragment, Twitter::Regex[:validate_url_fragment], true))
return (unicode_domains && valid_match?(authority, Twitter::Regex[:validate_url_unicode_authority])) ||
(!unicode_domains && valid_match?(authority, Twitter::Regex[:validate_url_authority]))
end
private
def valid_match?(string, regex, optional=false)
return (string && string.match(regex) && $~.to_s == string) unless optional
!(string && (!string.match(regex) || $~.to_s != string))
end
end
end
twitter-text-rb-1.7.0/script/ 0000775 0000000 0000000 00000000000 12240766003 0016110 5 ustar 00root root 0000000 0000000 twitter-text-rb-1.7.0/script/destroy 0000775 0000000 0000000 00000000560 12240766003 0017530 0 ustar 00root root 0000000 0000000 #!/usr/bin/env ruby
APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
begin
require 'rubigen'
rescue LoadError
require 'rubygems'
require 'rubigen'
end
require 'rubigen/scripts/destroy'
ARGV.shift if ['--help', '-h'].include?(ARGV[0])
RubiGen::Base.use_component_sources! [:newgem_simple, :test_unit]
RubiGen::Scripts::Destroy.new.run(ARGV)
twitter-text-rb-1.7.0/script/generate 0000775 0000000 0000000 00000000562 12240766003 0017633 0 ustar 00root root 0000000 0000000 #!/usr/bin/env ruby
APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
begin
require 'rubigen'
rescue LoadError
require 'rubygems'
require 'rubigen'
end
require 'rubigen/scripts/generate'
ARGV.shift if ['--help', '-h'].include?(ARGV[0])
RubiGen::Base.use_component_sources! [:newgem_simple, :test_unit]
RubiGen::Scripts::Generate.new.run(ARGV)
twitter-text-rb-1.7.0/spec/ 0000775 0000000 0000000 00000000000 12240766003 0015536 5 ustar 00root root 0000000 0000000 twitter-text-rb-1.7.0/spec/autolinking_spec.rb 0000664 0000000 0000000 00000072715 12240766003 0021435 0 ustar 00root root 0000000 0000000 # encoding: utf-8
require File.dirname(__FILE__) + '/spec_helper'
class TestAutolink
include Twitter::Autolink
end
describe Twitter::Autolink do
def original_text; end
def url; end
describe "auto_link_custom" do
before do
@autolinked_text = TestAutolink.new.auto_link(original_text) if original_text
end
describe "username autolinking" do
context "username preceded by a space" do
def original_text; "hello @jacob"; end
it "should be linked" do
@autolinked_text.should link_to_screen_name('jacob')
end
end
context "username at beginning of line" do
def original_text; "@jacob you're cool"; end
it "should be linked" do
@autolinked_text.should link_to_screen_name('jacob')
end
end
context "username preceded by word character" do
def original_text; "meet@the beach"; end
it "should not be linked" do
Nokogiri::HTML(@autolinked_text).search('a').should be_empty
end
end
context "username preceded by non-word character" do
def original_text; "great.@jacob"; end
it "should be linked" do
@autolinked_text.should link_to_screen_name('jacob')
end
end
context "username containing non-word characters" do
def original_text; "@zach&^$%^"; end
it "should not be linked" do
@autolinked_text.should link_to_screen_name('zach')
end
end
context "username over twenty characters" do
def original_text
@twenty_character_username = "zach" * 5
"@" + @twenty_character_username + "1"
end
it "should not be linked" do
@autolinked_text.should link_to_screen_name(@twenty_character_username)
end
end
context "username followed by japanese" do
def original_text; "@jacobの"; end
it "should be linked" do
@autolinked_text.should link_to_screen_name('jacob')
end
end
context "username preceded by japanese" do
def original_text; "あ@matz"; end
it "should be linked" do
@autolinked_text.should link_to_screen_name('matz')
end
end
context "username surrounded by japanese" do
def original_text; "あ@yoshimiの"; end
it "should be linked" do
@autolinked_text.should link_to_screen_name('yoshimi')
end
end
context "username using full-width at-sign" do
def original_text
"#{[0xFF20].pack('U')}jacob"
end
it "should be linked" do
@autolinked_text.should link_to_screen_name('jacob')
end
end
end
describe "list path autolinking" do
context "when List is not available" do
it "should not be linked" do
@autolinked_text = TestAutolink.new.auto_link_usernames_or_lists("hello @jacob/my-list", :suppress_lists => true)
@autolinked_text.should_not link_to_list_path('jacob/my-list')
@autolinked_text.should include('my-list')
end
end
context "slug preceded by a space" do
def original_text; "hello @jacob/my-list"; end
it "should be linked" do
@autolinked_text.should link_to_list_path('jacob/my-list')
end
end
context "username followed by a slash but no list" do
def original_text; "hello @jacob/ my-list"; end
it "should NOT be linked" do
@autolinked_text.should_not link_to_list_path('jacob/my-list')
@autolinked_text.should link_to_screen_name('jacob')
end
end
context "empty username followed by a list" do
def original_text; "hello @/my-list"; end
it "should NOT be linked" do
Nokogiri::HTML(@autolinked_text).search('a').should be_empty
end
end
context "list slug at beginning of line" do
def original_text; "@jacob/my-list"; end
it "should be linked" do
@autolinked_text.should link_to_list_path('jacob/my-list')
end
end
context "username preceded by alpha-numeric character" do
def original_text; "meet@the/beach"; end
it "should not be linked" do
Nokogiri::HTML(@autolinked_text).search('a').should be_empty
end
end
context "username preceded by non-word character" do
def original_text; "great.@jacob/my-list"; end
it "should be linked" do
@autolinked_text = TestAutolink.new.auto_link("great.@jacob/my-list")
@autolinked_text.should link_to_list_path('jacob/my-list')
end
end
context "username containing non-word characters" do
def original_text; "@zach/test&^$%^"; end
it "should be linked" do
@autolinked_text.should link_to_list_path('zach/test')
end
end
context "username over twenty characters" do
def original_text
@twentyfive_character_list = "jack/" + ("a" * 25)
"@#{@twentyfive_character_list}12345"
end
it "should be linked" do
@autolinked_text.should link_to_list_path(@twentyfive_character_list)
end
end
end
describe "hashtag autolinking" do
context "with an all numeric hashtag" do
def original_text; "#123"; end
it "should not be linked" do
@autolinked_text.should_not have_autolinked_hashtag('#123')
end
end
context "with a hashtag with alphanumeric characters" do
def original_text; "#ab1d"; end
it "should be linked" do
@autolinked_text.should have_autolinked_hashtag('#ab1d')
end
end
context "with a hashtag with underscores" do
def original_text; "#a_b_c_d"; end
it "should be linked" do
@autolinked_text.should have_autolinked_hashtag(original_text)
end
end
context "with a hashtag that is preceded by a word character" do
def original_text; "ab#cd"; end
it "should not be linked" do
@autolinked_text.should_not have_autolinked_hashtag(original_text)
end
end
context "with a page anchor in a url" do
def original_text; "Here's my url: http://foobar.com/#home"; end
it "should not link the hashtag" do
@autolinked_text.should_not have_autolinked_hashtag('#home')
end
it "should link the url" do
@autolinked_text.should have_autolinked_url('http://foobar.com/#home')
end
end
context "with a hashtag that starts with a number but has word characters" do
def original_text; "#2ab"; end
it "should be linked" do
@autolinked_text.should have_autolinked_hashtag(original_text)
end
end
context "with multiple valid hashtags" do
def original_text; "I'm frickin' awesome #ab #cd #ef"; end
it "links each hashtag" do
@autolinked_text.should have_autolinked_hashtag('#ab')
@autolinked_text.should have_autolinked_hashtag('#cd')
@autolinked_text.should have_autolinked_hashtag('#ef')
end
end
context "with a hashtag preceded by a ." do
def original_text; "ok, great.#abc"; end
it "should be linked" do
@autolinked_text.should have_autolinked_hashtag('#abc')
end
end
context "with a hashtag preceded by a &" do
def original_text; "nbsp;"; end
it "should not be linked" do
@autolinked_text.should_not have_autolinked_hashtag('#nbsp;')
end
end
context "with a hashtag that ends in an !" do
def original_text; "#great!"; end
it "should be linked, but should not include the !" do
@autolinked_text.should have_autolinked_hashtag('#great')
end
end
context "with a hashtag followed by Japanese" do
def original_text; "#twj_devの"; end
it "should be linked" do
@autolinked_text.should have_autolinked_hashtag('#twj_devの')
end
end
context "with a hashtag preceded by a full-width space" do
def original_text; "#{[0x3000].pack('U')}#twj_dev"; end
it "should be linked" do
@autolinked_text.should have_autolinked_hashtag('#twj_dev')
end
end
context "with a hashtag followed by a full-width space" do
def original_text; "#twj_dev#{[0x3000].pack('U')}"; end
it "should be linked" do
@autolinked_text.should have_autolinked_hashtag('#twj_dev')
end
end
context "with a hashtag using full-width hash" do
def original_text; "#{[0xFF03].pack('U')}twj_dev"; end
it "should be linked" do
link = Nokogiri::HTML(@autolinked_text).search('a')
(link.inner_text.respond_to?(:force_encoding) ? link.inner_text.force_encoding("utf-8") : link.inner_text).should == "#{[0xFF03].pack('U')}twj_dev"
link.first['href'].should == 'https://twitter.com/#!/search?q=%23twj_dev'
end
end
context "with a hashtag containing an accented latin character" do
def original_text
# the hashtag is #éhashtag
"##{[0x00e9].pack('U')}hashtag"
end
it "should be linked" do
@autolinked_text.should == "#éhashtag"
end
end
end
describe "URL autolinking" do
def url; "http://www.google.com"; end
context "when embedded in plain text" do
def original_text; "On my search engine #{url} I found good links."; end
it "should be linked" do
@autolinked_text.should have_autolinked_url(url)
end
end
context "when surrounded by Japanese;" do
def original_text; "いまなにしてる#{url}いまなにしてる"; end
it "should be linked" do
@autolinked_text.should have_autolinked_url(url)
end
end
context "with a path surrounded by parentheses;" do
def original_text; "I found a neatness (#{url})"; end
it "should be linked" do
@autolinked_text.should have_autolinked_url(url)
end
context "when the URL ends with a slash;" do
def url; "http://www.google.com/"; end
it "should be linked" do
@autolinked_text.should have_autolinked_url(url)
end
end
context "when the URL has a path;" do
def url; "http://www.google.com/fsdfasdf"; end
it "should be linked" do
@autolinked_text.should have_autolinked_url(url)
end
end
end
context "when path contains parens" do
def original_text; "I found a neatness (#{url})"; end
it "should be linked" do
@autolinked_text.should have_autolinked_url(url)
end
context "wikipedia" do
def url; "http://en.wikipedia.org/wiki/Madonna_(artist)"; end
it "should be linked" do
@autolinked_text.should have_autolinked_url(url)
end
end
context "IIS session" do
def url; "http://msdn.com/S(deadbeef)/page.htm"; end
it "should be linked" do
@autolinked_text.should have_autolinked_url(url)
end
end
context "unbalanced parens" do
def url; "http://example.com/i_has_a_("; end
it "should be linked" do
@autolinked_text.should have_autolinked_url("http://example.com/i_has_a_")
end
end
context "balanced parens with a double quote inside" do
def url; "http://foo.com/foo_(\")_bar" end
it "should be linked" do
@autolinked_text.should have_autolinked_url("http://foo.com/foo_")
end
end
context "balanced parens hiding XSS" do
def url; 'http://x.xx.com/("style="color:red"onmouseover="alert(1)' end
it "should be linked" do
@autolinked_text.should have_autolinked_url("http://x.xx.com/")
end
end
end
context "when preceded by a :" do
def original_text; "Check this out @hoverbird:#{url}"; end
it "should be linked" do
@autolinked_text.should have_autolinked_url(url)
end
end
context "with a URL ending in allowed punctuation" do
it "does not consume ending punctuation" do
matcher = TestAutolink.new
%w| ? ! , . : ; ] ) } = \ ' |.each do |char|
matcher.auto_link("#{url}#{char}").should have_autolinked_url(url)
end
end
end
context "with a URL preceded in forbidden characters" do
it "should be linked" do
matcher = TestAutolink.new
%w| \ ' / ! = |.each do |char|
matcher.auto_link("#{char}#{url}").should have_autolinked_url(url)
end
end
end
context "when embedded in a link tag" do
def original_text; "#{url}"; end
it "should be linked" do
@autolinked_text.should have_autolinked_url(url)
end
end
context "with multiple URLs" do
def original_text; "http://www.links.org link at start of page, link at end http://www.foo.org"; end
it "should autolink each one" do
@autolinked_text.should have_autolinked_url('http://www.links.org')
@autolinked_text.should have_autolinked_url('http://www.foo.org')
end
end
context "with multiple URLs in different formats" do
def original_text; "http://foo.com https://bar.com http://mail.foobar.org"; end
it "should autolink each one, in the proper order" do
@autolinked_text.should have_autolinked_url('http://foo.com')
@autolinked_text.should have_autolinked_url('https://bar.com')
@autolinked_text.should have_autolinked_url('http://mail.foobar.org')
end
end
context "with a URL having a long TLD" do
def original_text; "Yahoo integriert Facebook http://golem.mobi/0912/71607.html"; end
it "should autolink it" do
@autolinked_text.should have_autolinked_url('http://golem.mobi/0912/71607.html')
end
end
context "with a url lacking the protocol" do
def original_text; "I like www.foobar.com dudes"; end
it "does not link at all" do
link = Nokogiri::HTML(@autolinked_text).search('a')
link.should be_empty
end
end
context "with a @ in a URL" do
context "with XSS attack" do
def original_text; 'http://x.xx.com/@"style="color:pink"onmouseover=alert(1)//'; end
it "should not allow XSS follwing @" do
@autolinked_text.should have_autolinked_url('http://x.xx.com/')
end
end
context "with a username not followed by a /" do
def original_text; 'http://example.com/@foobar'; end
it "should link url" do
@autolinked_text.should have_autolinked_url('http://example.com/@foobar')
end
end
context "with a username followed by a /" do
def original_text; 'http://example.com/@foobar/'; end
it "should not link the username but link full url" do
@autolinked_text.should have_autolinked_url('http://example.com/@foobar/')
@autolinked_text.should_not link_to_screen_name('foobar')
end
end
end
context "regex engine quirks" do
context "does not spiral out of control on repeated periods" do
def original_text; "Test a ton of periods http://example.com/path.........................................."; end
it "should autolink" do
@autolinked_text.should have_autolinked_url('http://example.com/path')
end
end
context "does not spiral out of control on repeated dashes" do
def original_text; "Single char file ext http://www.bestbuy.com/site/Currie+Technologies+-+Ezip+400+Scooter/9885188.p?id=1218189013070&skuId=9885188"; end
it "should autolink" do
@autolinked_text.should have_autolinked_url('http://www.bestbuy.com/site/Currie+Technologies+-+Ezip+400+Scooter/9885188.p?id=1218189013070&skuId=9885188')
end
end
end
end
describe "Autolink all" do
before do
@linker = TestAutolink.new
end
it "should allow url/hashtag overlap" do
auto_linked = @linker.auto_link("https://twitter.com/#search")
auto_linked.should have_autolinked_url('https://twitter.com/#search')
end
it "should not add invalid option in HTML tags" do
auto_linked = @linker.auto_link("https://twitter.com/ is a URL, not a hashtag", :hashtag_class => 'hashtag_classname')
auto_linked.should have_autolinked_url('https://twitter.com/')
auto_linked.should_not include('hashtag_class')
auto_linked.should_not include('hashtag_classname')
end
it "should autolink url/hashtag/mention in text with Unicode supplementary characters" do
auto_linked = @linker.auto_link("#{[0x10400].pack('U')} #hashtag #{[0x10400].pack('U')} @mention #{[0x10400].pack('U')} http://twitter.com/")
auto_linked.should have_autolinked_hashtag('#hashtag')
auto_linked.should link_to_screen_name('mention')
auto_linked.should have_autolinked_url('http://twitter.com/')
end
end
end
describe "autolinking options" do
before do
@linker = TestAutolink.new
end
it "should show display_url when :url_entities provided" do
linked = @linker.auto_link("http://t.co/0JG5Mcq", :url_entities => [{
"url" => "http://t.co/0JG5Mcq",
"display_url" => "blog.twitter.com/2011/05/twitte…",
"expanded_url" => "http://blog.twitter.com/2011/05/twitter-for-mac-update.html",
"indices" => [
84,
103
]
}])
html = Nokogiri::HTML(linked)
html.search('a').should_not be_empty
html.search('a[@href="http://t.co/0JG5Mcq"]').should_not be_empty
html.search('span[@class=js-display-url]').inner_text.should == "blog.twitter.com/2011/05/twitte"
html.inner_text.should == " http://blog.twitter.com/2011/05/twitter-for-mac-update.html …"
html.search('span[@style="position:absolute;left:-9999px;"]').size.should == 4
end
it "should accept invisible_tag_attrs option" do
linked = @linker.auto_link("http://t.co/0JG5Mcq",
{
:url_entities => [{
"url" => "http://t.co/0JG5Mcq",
"display_url" => "blog.twitter.com/2011/05/twitte…",
"expanded_url" => "http://blog.twitter.com/2011/05/twitter-for-mac-update.html",
"indices" => [
0,
19
]
}],
:invisible_tag_attrs => "style='dummy;'"
})
html = Nokogiri::HTML(linked)
html.search('span[@style="dummy;"]').size.should == 4
end
it "should show display_url if available in entity" do
linked = @linker.auto_link_entities("http://t.co/0JG5Mcq",
[{
:url => "http://t.co/0JG5Mcq",
:display_url => "blog.twitter.com/2011/05/twitte…",
:expanded_url => "http://blog.twitter.com/2011/05/twitter-for-mac-update.html",
:indices => [0, 19]
}]
)
html = Nokogiri::HTML(linked)
html.search('a').should_not be_empty
html.search('a[@href="http://t.co/0JG5Mcq"]').should_not be_empty
html.search('span[@class=js-display-url]').inner_text.should == "blog.twitter.com/2011/05/twitte"
html.inner_text.should == " http://blog.twitter.com/2011/05/twitter-for-mac-update.html …"
end
it "should apply :class as a CSS class" do
linked = @linker.auto_link("http://example.com/", :class => 'myclass')
linked.should have_autolinked_url('http://example.com/')
linked.should match(/myclass/)
end
it "should apply :url_class only on URL" do
linked = @linker.auto_link("http://twitter.com")
linked.should have_autolinked_url('http://twitter.com')
linked.should_not match(/class/)
linked = @linker.auto_link("http://twitter.com", :url_class => 'testClass')
linked.should have_autolinked_url('http://twitter.com')
linked.should match(/class=\"testClass\"/)
linked = @linker.auto_link("#hash @tw", :url_class => 'testClass')
linked.should match(/class=\"tweet-url hashtag\"/)
linked.should match(/class=\"tweet-url username\"/)
linked.should_not match(/class=\"testClass\"/)
end
it "should add rel=nofollow by default" do
linked = @linker.auto_link("http://example.com/")
linked.should have_autolinked_url('http://example.com/')
linked.should match(/nofollow/)
end
it "should include the '@' symbol in a username when passed :username_include_symbol" do
linked = @linker.auto_link("@user", :username_include_symbol => true)
linked.should link_to_screen_name('user', '@user')
end
it "should include the '@' symbol in a list when passed :username_include_symbol" do
linked = @linker.auto_link("@user/list", :username_include_symbol => true)
linked.should link_to_list_path('user/list', '@user/list')
end
it "should not add rel=nofollow when passed :suppress_no_follow" do
linked = @linker.auto_link("http://example.com/", :suppress_no_follow => true)
linked.should have_autolinked_url('http://example.com/')
linked.should_not match(/nofollow/)
end
it "should not add a target attribute by default" do
linked = @linker.auto_link("http://example.com/")
linked.should have_autolinked_url('http://example.com/')
linked.should_not match(/target=/)
end
it "should respect the :target option" do
linked = @linker.auto_link("http://example.com/", :target => 'mywindow')
linked.should have_autolinked_url('http://example.com/')
linked.should match(/target="mywindow"/)
end
it "should customize href by username_url_block option" do
linked = @linker.auto_link("@test", :username_url_block => lambda{|a| "dummy"})
linked.should have_autolinked_url('dummy', 'test')
end
it "should customize href by list_url_block option" do
linked = @linker.auto_link("@test/list", :list_url_block => lambda{|a| "dummy"})
linked.should have_autolinked_url('dummy', 'test/list')
end
it "should customize href by hashtag_url_block option" do
linked = @linker.auto_link("#hashtag", :hashtag_url_block => lambda{|a| "dummy"})
linked.should have_autolinked_url('dummy', '#hashtag')
end
it "should customize href by cashtag_url_block option" do
linked = @linker.auto_link("$CASH", :cashtag_url_block => lambda{|a| "dummy"})
linked.should have_autolinked_url('dummy', '$CASH')
end
it "should customize href by link_url_block option" do
linked = @linker.auto_link("http://example.com/", :link_url_block => lambda{|a| "dummy"})
linked.should have_autolinked_url('dummy', 'http://example.com/')
end
it "should modify link attributes by link_attribute_block" do
linked = @linker.auto_link("#hash @mention",
:link_attribute_block => lambda{|entity, attributes|
attributes[:"dummy-hash-attr"] = "test" if entity[:hashtag]
}
)
linked.should match(/]+hashtag[^>]+dummy-hash-attr=\"test\"[^>]+>/)
linked.should_not match(/]+username[^>]+dummy-hash-attr=\"test\"[^>]+>/)
linked.should_not match(/link_attribute_block/i)
linked = @linker.auto_link("@mention http://twitter.com/",
:link_attribute_block => lambda{|entity, attributes|
attributes["dummy-url-attr"] = entity[:url] if entity[:url]
}
)
linked.should_not match(/]+username[^>]+dummy-url-attr=\"http:\/\/twitter.com\/\"[^>]*>/)
linked.should match(/]+dummy-url-attr=\"http:\/\/twitter.com\/\"/)
end
it "should modify link text by link_text_block" do
linked = @linker.auto_link("#hash @mention",
:link_text_block => lambda{|entity, text|
entity[:hashtag] ? "#replaced" : "pre_#{text}_post"
}
)
linked.should match(/]+>#replaced<\/a>/)
linked.should match(/]+>pre_mention_post<\/a>/)
linked = @linker.auto_link("#hash @mention", {
:link_text_block => lambda{|entity, text|
"pre_#{text}_post"
},
:symbol_tag => "s", :text_with_symbol_tag => "b", :username_include_symbol => true
})
linked.should match(/]+>pre_#<\/s>hash<\/b>_post<\/a>/)
linked.should match(/]+>pre_@<\/s>mention<\/b>_post<\/a>/)
end
it "should apply :url_target only to auto-linked URLs" do
auto_linked = @linker.auto_link("#hashtag @mention http://test.com/", {:url_target => '_blank'})
auto_linked.should have_autolinked_hashtag('#hashtag')
auto_linked.should link_to_screen_name('mention')
auto_linked.should have_autolinked_url('http://test.com/')
auto_linked.should_not match(/]+hashtag[^>]+target[^>]+>/)
auto_linked.should_not match(/]+username[^>]+target[^>]+>/)
auto_linked.should match(/]+test.com[^>]+target=\"_blank\"[^>]*>/)
end
end
describe "link_url_with_entity" do
before do
@linker = TestAutolink.new
end
it "should use display_url and expanded_url" do
@linker.send(:link_url_with_entity,
{
:url => "http://t.co/abcde",
:display_url => "twitter.com",
:expanded_url => "http://twitter.com/"},
{:invisible_tag_attrs => "class='invisible'"}).gsub('"', "'").should == "http://twitter.com/";
end
it "should correctly handle display_url ending with '…'" do
@linker.send(:link_url_with_entity,
{
:url => "http://t.co/abcde",
:display_url => "twitter.com…",
:expanded_url => "http://twitter.com/abcdefg"},
{:invisible_tag_attrs => "class='invisible'"}).gsub('"', "'").should == "http://twitter.com/abcdefg…";
end
it "should correctly handle display_url starting with '…'" do
@linker.send(:link_url_with_entity,
{
:url => "http://t.co/abcde",
:display_url => "…tter.com/abcdefg",
:expanded_url => "http://twitter.com/abcdefg"},
{:invisible_tag_attrs => "class='invisible'"}).gsub('"', "'").should == "…http://twitter.com/abcdefg";
end
it "should not create spans if display_url and expanded_url are on different domains" do
@linker.send(:link_url_with_entity,
{
:url => "http://t.co/abcde",
:display_url => "pic.twitter.com/xyz",
:expanded_url => "http://twitter.com/foo/statuses/123/photo/1"},
{:invisible_tag_attrs => "class='invisible'"}).gsub('"', "'").should == "pic.twitter.com/xyz"
end
end
describe "symbol_tag" do
before do
@linker = TestAutolink.new
end
it "should put :symbol_tag around symbol" do
@linker.auto_link("@mention", {:symbol_tag => 's', :username_include_symbol=>true}).should match(/@<\/s>mention/)
@linker.auto_link("#hash", {:symbol_tag => 's'}).should match(/#<\/s>hash/)
result = @linker.auto_link("@mention #hash $CASH", {:symbol_tag => 'b', :username_include_symbol=>true})
result.should match(/@<\/b>mention/)
result.should match(/#<\/b>hash/)
result.should match(/\$<\/b>CASH/)
end
it "should put :text_with_symbol_tag around text" do
result = @linker.auto_link("@mention #hash $CASH", {:text_with_symbol_tag => 'b'})
result.should match(/mention<\/b>/)
result.should match(/hash<\/b>/)
result.should match(/CASH<\/b>/)
end
it "should put :symbol_tag around symbol and :text_with_symbol_tag around text" do
result = @linker.auto_link("@mention #hash $CASH", {:symbol_tag => 's', :text_with_symbol_tag => 'b', :username_include_symbol=>true})
result.should match(/@<\/s>mention<\/b>/)
result.should match(/#<\/s>hash<\/b>/)
result.should match(/\$<\/s>CASH<\/b>/)
end
end
describe "html_escape" do
before do
@linker = TestAutolink.new
end
it "should escape html entities properly" do
@linker.html_escape("&").should == "&"
@linker.html_escape(">").should == ">"
@linker.html_escape("<").should == "<"
@linker.html_escape("\"").should == """
@linker.html_escape("'").should == "'"
@linker.html_escape("&<>\"").should == "&<>""
@linker.html_escape("
").should == "<div>"
@linker.html_escape("a&b").should == "a&b"
@linker.html_escape("twitter & friends").should == "<a href="https://twitter.com" target="_blank">twitter & friends</a>"
@linker.html_escape("&").should == "&"
@linker.html_escape(nil).should == nil
end
end
end
twitter-text-rb-1.7.0/spec/extractor_spec.rb 0000664 0000000 0000000 00000032227 12240766003 0021116 0 ustar 00root root 0000000 0000000 # encoding: utf-8
require File.dirname(__FILE__) + '/spec_helper'
class TestExtractor
include Twitter::Extractor
end
describe Twitter::Extractor do
before do
@extractor = TestExtractor.new
end
describe "mentions" do
context "single screen name alone " do
it "should be linked" do
@extractor.extract_mentioned_screen_names("@alice").should == ["alice"]
end
it "should be linked with _" do
@extractor.extract_mentioned_screen_names("@alice_adams").should == ["alice_adams"]
end
it "should be linked if numeric" do
@extractor.extract_mentioned_screen_names("@1234").should == ["1234"]
end
end
context "multiple screen names" do
it "should both be linked" do
@extractor.extract_mentioned_screen_names("@alice @bob").should == ["alice", "bob"]
end
end
context "screen names embedded in text" do
it "should be linked in Latin text" do
@extractor.extract_mentioned_screen_names("waiting for @alice to arrive").should == ["alice"]
end
it "should be linked in Japanese text" do
@extractor.extract_mentioned_screen_names("の@aliceに到着を待っている").should == ["alice"]
end
it "should ignore mentions preceded by !, @, #, $, %, & or *" do
invalid_chars = ['!', '@', '#', '$', '%', '&', '*']
invalid_chars.each do |c|
@extractor.extract_mentioned_screen_names("f#{c}@kn").should == []
end
end
end
it "should accept a block arugment and call it in order" do
needed = ["alice", "bob"]
@extractor.extract_mentioned_screen_names("@alice @bob") do |sn|
sn.should == needed.shift
end
needed.should == []
end
end
describe "mentions with indices" do
context "single screen name alone " do
it "should be linked and the correct indices" do
@extractor.extract_mentioned_screen_names_with_indices("@alice").should == [{:screen_name => "alice", :indices => [0, 6]}]
end
it "should be linked with _ and the correct indices" do
@extractor.extract_mentioned_screen_names_with_indices("@alice_adams").should == [{:screen_name => "alice_adams", :indices => [0, 12]}]
end
it "should be linked if numeric and the correct indices" do
@extractor.extract_mentioned_screen_names_with_indices("@1234").should == [{:screen_name => "1234", :indices => [0, 5]}]
end
end
context "multiple screen names" do
it "should both be linked with the correct indices" do
@extractor.extract_mentioned_screen_names_with_indices("@alice @bob").should ==
[{:screen_name => "alice", :indices => [0, 6]},
{:screen_name => "bob", :indices => [7, 11]}]
end
it "should be linked with the correct indices even when repeated" do
@extractor.extract_mentioned_screen_names_with_indices("@alice @alice @bob").should ==
[{:screen_name => "alice", :indices => [0, 6]},
{:screen_name => "alice", :indices => [7, 13]},
{:screen_name => "bob", :indices => [14, 18]}]
end
end
context "screen names embedded in text" do
it "should be linked in Latin text with the correct indices" do
@extractor.extract_mentioned_screen_names_with_indices("waiting for @alice to arrive").should == [{:screen_name => "alice", :indices => [12, 18]}]
end
it "should be linked in Japanese text with the correct indices" do
@extractor.extract_mentioned_screen_names_with_indices("の@aliceに到着を待っている").should == [{:screen_name => "alice", :indices => [1, 7]}]
end
end
it "should accept a block arugment and call it in order" do
needed = [{:screen_name => "alice", :indices => [0, 6]}, {:screen_name => "bob", :indices => [7, 11]}]
@extractor.extract_mentioned_screen_names_with_indices("@alice @bob") do |sn, start_index, end_index|
data = needed.shift
sn.should == data[:screen_name]
start_index.should == data[:indices].first
end_index.should == data[:indices].last
end
needed.should == []
end
it "should extract screen name in text with supplementary character" do
@extractor.extract_mentioned_screen_names_with_indices("#{[0x10400].pack('U')} @alice").should == [{:screen_name => "alice", :indices => [2, 8]}]
end
end
describe "replies" do
context "should be extracted from" do
it "should extract from lone name" do
@extractor.extract_reply_screen_name("@alice").should == "alice"
end
it "should extract from the start" do
@extractor.extract_reply_screen_name("@alice reply text").should == "alice"
end
it "should extract preceded by a space" do
@extractor.extract_reply_screen_name(" @alice reply text").should == "alice"
end
it "should extract preceded by a full-width space" do
@extractor.extract_reply_screen_name("#{[0x3000].pack('U')}@alice reply text").should == "alice"
end
end
context "should not be extracted from" do
it "should not be extracted when preceded by text" do
@extractor.extract_reply_screen_name("reply @alice text").should == nil
end
it "should not be extracted when preceded by puctuation" do
%w(. / _ - + # ! @).each do |punct|
@extractor.extract_reply_screen_name("#{punct}@alice text").should == nil
end
end
end
context "should accept a block arugment" do
it "should call the block on match" do
@extractor.extract_reply_screen_name("@alice") do |sn|
sn.should == "alice"
end
end
it "should not call the block on no match" do
calls = 0
@extractor.extract_reply_screen_name("not a reply") do |sn|
calls += 1
end
calls.should == 0
end
end
end
describe "urls" do
describe "matching URLS" do
TestUrls::VALID.each do |url|
it "should extract the URL #{url} and prefix it with a protocol if missing" do
@extractor.extract_urls(url).first.should include(url)
end
it "should match the URL #{url} when it's embedded in other text" do
text = "Sweet url: #{url} I found. #awesome"
@extractor.extract_urls(text).first.should include(url)
end
end
end
describe "invalid URLS" do
it "does not link urls with invalid domains" do
@extractor.extract_urls("http://tld-too-short.x").should == []
end
end
describe "t.co URLS" do
TestUrls::TCO.each do |url|
it "should only extract the t.co URL from the URL #{url}" do
extracted_urls = @extractor.extract_urls(url)
extracted_urls.size.should == 1
extracted_url = extracted_urls.first
extracted_url.should_not == url
extracted_url.should == url[0...20]
end
it "should match the t.co URL from the URL #{url} when it's embedded in other text" do
text = "Sweet url: #{url} I found. #awesome"
extracted_urls = @extractor.extract_urls(text)
extracted_urls.size.should == 1
extracted_url = extracted_urls.first
extracted_url.should_not == url
extracted_url.should == url[0...20]
end
end
end
end
describe "urls with indices" do
describe "matching URLS" do
TestUrls::VALID.each do |url|
it "should extract the URL #{url} and prefix it with a protocol if missing" do
extracted_urls = @extractor.extract_urls_with_indices(url)
extracted_urls.size.should == 1
extracted_url = extracted_urls.first
extracted_url[:url].should include(url)
extracted_url[:indices].first.should == 0
extracted_url[:indices].last.should == url.chars.to_a.size
end
it "should match the URL #{url} when it's embedded in other text" do
text = "Sweet url: #{url} I found. #awesome"
extracted_urls = @extractor.extract_urls_with_indices(text)
extracted_urls.size.should == 1
extracted_url = extracted_urls.first
extracted_url[:url].should include(url)
extracted_url[:indices].first.should == 11
extracted_url[:indices].last.should == 11 + url.chars.to_a.size
end
end
it "should extract URL in text with supplementary character" do
@extractor.extract_urls_with_indices("#{[0x10400].pack('U')} http://twitter.com").should == [{:url => "http://twitter.com", :indices => [2, 20]}]
end
end
describe "invalid URLS" do
it "does not link urls with invalid domains" do
@extractor.extract_urls_with_indices("http://tld-too-short.x").should == []
end
end
describe "t.co URLS" do
TestUrls::TCO.each do |url|
it "should only extract the t.co URL from the URL #{url} and adjust indices correctly" do
extracted_urls = @extractor.extract_urls_with_indices(url)
extracted_urls.size.should == 1
extracted_url = extracted_urls.first
extracted_url[:url].should_not include(url)
extracted_url[:url].should include(url[0...20])
extracted_url[:indices].first.should == 0
extracted_url[:indices].last.should == 20
end
it "should match the t.co URL from the URL #{url} when it's embedded in other text" do
text = "Sweet url: #{url} I found. #awesome"
extracted_urls = @extractor.extract_urls_with_indices(text)
extracted_urls.size.should == 1
extracted_url = extracted_urls.first
extracted_url[:url].should_not include(url)
extracted_url[:url].should include(url[0...20])
extracted_url[:indices].first.should == 11
extracted_url[:indices].last.should == 31
end
end
end
end
describe "hashtags" do
context "extracts latin/numeric hashtags" do
%w(text text123 123text).each do |hashtag|
it "should extract ##{hashtag}" do
@extractor.extract_hashtags("##{hashtag}").should == [hashtag]
end
it "should extract ##{hashtag} within text" do
@extractor.extract_hashtags("pre-text ##{hashtag} post-text").should == [hashtag]
end
end
end
context "international hashtags" do
context "should allow accents" do
%w(mañana café münchen).each do |hashtag|
it "should extract ##{hashtag}" do
@extractor.extract_hashtags("##{hashtag}").should == [hashtag]
end
it "should extract ##{hashtag} within text" do
@extractor.extract_hashtags("pre-text ##{hashtag} post-text").should == [hashtag]
end
end
it "should not allow the multiplication character" do
@extractor.extract_hashtags("#pre#{Twitter::Unicode::U00D7}post").should == ["pre"]
end
it "should not allow the division character" do
@extractor.extract_hashtags("#pre#{Twitter::Unicode::U00F7}post").should == ["pre"]
end
end
end
it "should not extract numeric hashtags" do
@extractor.extract_hashtags("#1234").should == []
end
it "should extract hashtag followed by punctuations" do
@extractor.extract_hashtags("#test1: #test2; #test3\"").should == ["test1", "test2" ,"test3"]
end
end
describe "hashtags with indices" do
def match_hashtag_in_text(hashtag, text, offset = 0)
extracted_hashtags = @extractor.extract_hashtags_with_indices(text)
extracted_hashtags.size.should == 1
extracted_hashtag = extracted_hashtags.first
extracted_hashtag[:hashtag].should == hashtag
extracted_hashtag[:indices].first.should == offset
extracted_hashtag[:indices].last.should == offset + hashtag.chars.to_a.size + 1
end
def not_match_hashtag_in_text(text)
extracted_hashtags = @extractor.extract_hashtags_with_indices(text)
extracted_hashtags.size.should == 0
end
context "extracts latin/numeric hashtags" do
%w(text text123 123text).each do |hashtag|
it "should extract ##{hashtag}" do
match_hashtag_in_text(hashtag, "##{hashtag}")
end
it "should extract ##{hashtag} within text" do
match_hashtag_in_text(hashtag, "pre-text ##{hashtag} post-text", 9)
end
end
end
context "international hashtags" do
context "should allow accents" do
%w(mañana café münchen).each do |hashtag|
it "should extract ##{hashtag}" do
match_hashtag_in_text(hashtag, "##{hashtag}")
end
it "should extract ##{hashtag} within text" do
match_hashtag_in_text(hashtag, "pre-text ##{hashtag} post-text", 9)
end
end
it "should not allow the multiplication character" do
match_hashtag_in_text("pre", "#pre#{[0xd7].pack('U')}post", 0)
end
it "should not allow the division character" do
match_hashtag_in_text("pre", "#pre#{[0xf7].pack('U')}post", 0)
end
end
end
it "should not extract numeric hashtags" do
not_match_hashtag_in_text("#1234")
end
it "should extract hashtag in text with supplementary character" do
match_hashtag_in_text("hashtag", "#{[0x10400].pack('U')} #hashtag", 2)
end
end
end
twitter-text-rb-1.7.0/spec/hithighlighter_spec.rb 0000664 0000000 0000000 00000006055 12240766003 0022106 0 ustar 00root root 0000000 0000000 # encoding: utf-8
require File.dirname(__FILE__) + '/spec_helper'
class TestHitHighlighter
include Twitter::HitHighlighter
end
describe Twitter::HitHighlighter do
describe "highlight" do
before do
@highlighter = TestHitHighlighter.new
end
context "with options" do
before do
@original = "Testing this hit highliter"
@hits = [[13,16]]
end
it "should default to tags" do
@highlighter.hit_highlight(@original, @hits).should == "Testing this hit highliter"
end
it "should allow tag override" do
@highlighter.hit_highlight(@original, @hits, :tag => 'b').should == "Testing this hit highliter"
end
end
context "without links" do
before do
@original = "Hey! this is a test tweet"
end
it "should return original when no hits are provided" do
@highlighter.hit_highlight(@original).should == @original
end
it "should highlight one hit" do
@highlighter.hit_highlight(@original, hits = [[5, 9]]).should == "Hey! this is a test tweet"
end
it "should highlight two hits" do
@highlighter.hit_highlight(@original, hits = [[5, 9], [15, 19]]).should == "Hey! this is a test tweet"
end
it "should correctly highlight first-word hits" do
@highlighter.hit_highlight(@original, hits = [[0, 3]]).should == "Hey! this is a test tweet"
end
it "should correctly highlight last-word hits" do
@highlighter.hit_highlight(@original, hits = [[20, 25]]).should == "Hey! this is a test tweet"
end
end
context "with links" do
it "should highlight with a single link" do
@highlighter.hit_highlight("@bcherry this was a test tweet", [[9, 13]]).should == "@bcherrythis was a test tweet"
end
it "should highlight with link at the end" do
@highlighter.hit_highlight("test test test", [[5, 9]]).should == "test testtest"
end
it "should highlight with a link at the beginning" do
@highlighter.hit_highlight("test test test", [[5, 9]]).should == "testtest test"
end
it "should highlight an entire link" do
@highlighter.hit_highlight("test test test", [[5, 9]]).should == "test test test"
end
it "should highlight within a link" do
@highlighter.hit_highlight("test test test", [[6, 8]]).should == "test test test"
end
it "should highlight around a link" do
@highlighter.hit_highlight("test test test", [[3, 11]]).should == "test test test"
end
it "should fail gracefully with bad hits" do
@highlighter.hit_highlight("test test", [[5, 20]]).should == "test test"
end
it "should not mess up with touching tags" do
@highlighter.hit_highlight("foofoo", [[3,6]]).should == "foofoo"
end
end
end
end
twitter-text-rb-1.7.0/spec/regex_spec.rb 0000664 0000000 0000000 00000002132 12240766003 0020205 0 ustar 00root root 0000000 0000000 # encoding: utf-8
require File.dirname(__FILE__) + '/spec_helper'
describe "Twitter::Regex regular expressions" do
describe "matching URLS" do
TestUrls::VALID.each do |url|
it "should match the URL #{url}" do
url.should match_autolink_expression
end
it "should match the URL #{url} when it's embedded in other text" do
text = "Sweet url: #{url} I found. #awesome"
url.should match_autolink_expression_in(text)
end
end
end
describe "invalid URLS" do
it "does not link urls with invalid characters" do
TestUrls::INVALID.each {|url| url.should_not match_autolink_expression}
end
end
describe "matching List names" do
it "should match if less than 25 characters" do
name = "Shuffleboard Community"
name.length.should < 25
name.should match(Twitter::Regex::REGEXEN[:list_name])
end
it "should not match if greater than 25 characters" do
name = "Most Glorious Shady Meadows Shuffleboard Community"
name.length.should > 25
name.should match(Twitter::Regex[:list_name])
end
end
end
twitter-text-rb-1.7.0/spec/rewriter_spec.rb 0000664 0000000 0000000 00000040002 12240766003 0020734 0 ustar 00root root 0000000 0000000 # encoding: utf-8
require File.dirname(__FILE__) + '/spec_helper'
describe Twitter::Rewriter do
def original_text; end
def url; end
def block(*args)
if Array === @block_args
unless Array === @block_args.first
@block_args = [@block_args]
end
@block_args << args
else
@block_args = args
end
"[rewritten]"
end
describe "rewrite usernames" do #{{{
before do
@rewritten_text = Twitter::Rewriter.rewrite_usernames_or_lists(original_text, &method(:block))
end
context "username preceded by a space" do
def original_text; "hello @jacob"; end
it "should be rewritten" do
@block_args.should == ["@", "jacob", nil]
@rewritten_text.should == "hello [rewritten]"
end
end
context "username at beginning of line" do
def original_text; "@jacob you're cool"; end
it "should be rewritten" do
@block_args.should == ["@", "jacob", nil]
@rewritten_text.should == "[rewritten] you're cool"
end
end
context "username preceded by word character" do
def original_text; "meet@the beach"; end
it "should not be rewritten" do
@block_args.should be_nil
@rewritten_text.should == "meet@the beach"
end
end
context "username preceded by non-word character" do
def original_text; "great.@jacob"; end
it "should be rewritten" do
@block_args.should == ["@", "jacob", nil]
@rewritten_text.should == "great.[rewritten]"
end
end
context "username containing non-word characters" do
def original_text; "@jacob&^$%^"; end
it "should be rewritten" do
@block_args.should == ["@", "jacob", nil]
@rewritten_text.should == "[rewritten]&^$%^"
end
end
context "username over twenty characters" do
def original_text
@twenty_character_username = "zach" * 5
"@" + @twenty_character_username + "1"
end
it "should be rewritten" do
@block_args.should == ["@", @twenty_character_username, nil]
@rewritten_text.should == "[rewritten]1"
end
end
context "username followed by japanese" do
def original_text; "@jacobの"; end
it "should be rewritten" do
@block_args.should == ["@", "jacob", nil]
@rewritten_text.should == "[rewritten]の"
end
end
context "username preceded by japanese" do
def original_text; "あ@jacob"; end
it "should be rewritten" do
@block_args.should == ["@", "jacob", nil]
@rewritten_text.should == "あ[rewritten]"
end
end
context "username surrounded by japanese" do
def original_text; "あ@jacobの"; end
it "should be rewritten" do
@block_args.should == ["@", "jacob", nil]
@rewritten_text.should == "あ[rewritten]の"
end
end
context "username using full-width at-sign" do
def original_text
"#{[0xFF20].pack('U')}jacob"
end
it "should be rewritten" do
@block_args.should == ["@", "jacob", nil]
@rewritten_text.should == "[rewritten]"
end
end
end #}}}
describe "rewrite lists" do #{{{
before do
@rewritten_text = Twitter::Rewriter.rewrite_usernames_or_lists(original_text, &method(:block))
end
context "slug preceded by a space" do
def original_text; "hello @jacob/my-list"; end
it "should be rewritten" do
@block_args.should == ["@", "jacob", "/my-list"]
@rewritten_text.should == "hello [rewritten]"
end
end
context "username followed by a slash but no list" do
def original_text; "hello @jacob/ my-list"; end
it "should not be rewritten" do
@block_args.should == ["@", "jacob", nil]
@rewritten_text.should == "hello [rewritten]/ my-list"
end
end
context "empty username followed by a list" do
def original_text; "hello @/my-list"; end
it "should not be rewritten" do
@block_args.should be_nil
@rewritten_text.should == "hello @/my-list"
end
end
context "list slug at beginning of line" do
def original_text; "@jacob/my-list"; end
it "should be rewritten" do
@block_args.should == ["@", "jacob", "/my-list"]
@rewritten_text.should == "[rewritten]"
end
end
context "username preceded by alpha-numeric character" do
def original_text; "meet@jacob/my-list"; end
it "should not be rewritten" do
@block_args.should be_nil
@rewritten_text.should == "meet@jacob/my-list"
end
end
context "username preceded by non-word character" do
def original_text; "great.@jacob/my-list"; end
it "should be rewritten" do
@block_args.should == ["@", "jacob", "/my-list"]
@rewritten_text.should == "great.[rewritten]"
end
end
context "username containing non-word characters" do
def original_text; "@jacob/my-list&^$%^"; end
it "should be rewritten" do
@block_args.should == ["@", "jacob", "/my-list"]
@rewritten_text.should == "[rewritten]&^$%^"
end
end
context "username over twenty characters" do
def original_text
@twentyfive_character_list = "a" * 25
"@jacob/#{@twentyfive_character_list}12345"
end
it "should be rewritten" do
@block_args.should == ["@", "jacob", "/#{@twentyfive_character_list}"]
@rewritten_text.should == "[rewritten]12345"
end
end
end #}}}
describe "rewrite hashtags" do #{{{
before do
@rewritten_text = Twitter::Rewriter.rewrite_hashtags(original_text, &method(:block))
end
context "with an all numeric hashtag" do
def original_text; "#123"; end
it "should not be rewritten" do
@block_args.should be_nil
@rewritten_text.should == "#123"
end
end
context "with a hashtag with alphanumeric characters" do
def original_text; "#ab1d"; end
it "should be rewritten" do
@block_args.should == ["#", "ab1d"]
@rewritten_text.should == "[rewritten]"
end
end
context "with a hashtag with underscores" do
def original_text; "#a_b_c_d"; end
it "should be rewritten" do
@block_args.should == ["#", "a_b_c_d"]
@rewritten_text.should == "[rewritten]"
end
end
context "with a hashtag that is preceded by a word character" do
def original_text; "ab#cd"; end
it "should not be rewritten" do
@block_args.should be_nil
@rewritten_text.should == "ab#cd"
end
end
context "with a hashtag that starts with a number but has word characters" do
def original_text; "#2ab"; end
it "should be rewritten" do
@block_args.should == ["#", "2ab"]
@rewritten_text.should == "[rewritten]"
end
end
context "with multiple valid hashtags" do
def original_text; "I'm frickin' awesome #ab #cd #ef"; end
it "rewrites each hashtag" do
@block_args.should == [["#", "ab"], ["#", "cd"], ["#", "ef"]]
@rewritten_text.should == "I'm frickin' awesome [rewritten] [rewritten] [rewritten]"
end
end
context "with a hashtag preceded by a ." do
def original_text; "ok, great.#abc"; end
it "should be rewritten" do
@block_args.should == ["#", "abc"]
@rewritten_text.should == "ok, great.[rewritten]"
end
end
context "with a hashtag preceded by a &" do
def original_text; "nbsp;"; end
it "should not be rewritten" do
@block_args.should be_nil
@rewritten_text.should == "nbsp;"
end
end
context "with a hashtag that ends in an !" do
def original_text; "#great!"; end
it "should be rewritten, but should not include the !" do
@block_args.should == ["#", "great"];
@rewritten_text.should == "[rewritten]!"
end
end
context "with a hashtag followed by Japanese" do
def original_text; "#twj_devの"; end
it "should be rewritten" do
@block_args.should == ["#", "twj_devの"];
@rewritten_text.should == "[rewritten]"
end
end
context "with a hashtag preceded by a full-width space" do
def original_text; "#{[0x3000].pack('U')}#twj_dev"; end
it "should be rewritten" do
@block_args.should == ["#", "twj_dev"];
@rewritten_text.should == " [rewritten]"
end
end
context "with a hashtag followed by a full-width space" do
def original_text; "#twj_dev#{[0x3000].pack('U')}"; end
it "should be rewritten" do
@block_args.should == ["#", "twj_dev"];
@rewritten_text.should == "[rewritten] "
end
end
context "with a hashtag using full-width hash" do
def original_text; "#{[0xFF03].pack('U')}twj_dev"; end
it "should be rewritten" do
@block_args.should == ["#", "twj_dev"];
@rewritten_text.should == "[rewritten]"
end
end
context "with a hashtag containing an accented latin character" do
def original_text
# the hashtag is #éhashtag
"##{[0x00e9].pack('U')}hashtag"
end
it "should be rewritten" do
@block_args.should == ["#", "éhashtag"];
@rewritten_text.should == "[rewritten]"
end
end
end #}}}
describe "rewrite urls" do #{{{
def url; "http://www.google.com"; end
before do
@rewritten_text = Twitter::Rewriter.rewrite_urls(original_text, &method(:block))
end
context "when embedded in plain text" do
def original_text; "On my search engine #{url} I found good links."; end
it "should be rewritten" do
@block_args.should == [url];
@rewritten_text.should == "On my search engine [rewritten] I found good links."
end
end
context "when surrounded by Japanese;" do
def original_text; "いまなにしてる#{url}いまなにしてる"; end
it "should be rewritten" do
@block_args.should == [url];
@rewritten_text.should == "いまなにしてる[rewritten]いまなにしてる"
end
end
context "with a path surrounded by parentheses;" do
def original_text; "I found a neatness (#{url})"; end
it "should be rewritten" do
@block_args.should == [url];
@rewritten_text.should == "I found a neatness ([rewritten])"
end
context "when the URL ends with a slash;" do
def url; "http://www.google.com/"; end
it "should be rewritten" do
@block_args.should == [url];
@rewritten_text.should == "I found a neatness ([rewritten])"
end
end
context "when the URL has a path;" do
def url; "http://www.google.com/fsdfasdf"; end
it "should be rewritten" do
@block_args.should == [url];
@rewritten_text.should == "I found a neatness ([rewritten])"
end
end
end
context "when path contains parens" do
def original_text; "I found a neatness (#{url})"; end
it "should be rewritten" do
@block_args.should == [url];
@rewritten_text.should == "I found a neatness ([rewritten])"
end
context "wikipedia" do
def url; "http://en.wikipedia.org/wiki/Madonna_(artist)"; end
it "should be rewritten" do
@block_args.should == [url];
@rewritten_text.should == "I found a neatness ([rewritten])"
end
end
context "IIS session" do
def url; "http://msdn.com/S(deadbeef)/page.htm"; end
it "should be rewritten" do
@block_args.should == [url];
@rewritten_text.should == "I found a neatness ([rewritten])"
end
end
context "unbalanced parens" do
def url; "http://example.com/i_has_a_("; end
it "should be rewritten" do
@block_args.should == ["http://example.com/i_has_a_"];
@rewritten_text.should == "I found a neatness ([rewritten]()"
end
end
context "balanced parens with a double quote inside" do
def url; "http://foo.bar.com/foo_(\")_bar" end
it "should be rewritten" do
@block_args.should == ["http://foo.bar.com/foo_"];
@rewritten_text.should == "I found a neatness ([rewritten](\")_bar)"
end
end
context "balanced parens hiding XSS" do
def url; 'http://x.xx.com/("style="color:red"onmouseover="alert(1)' end
it "should be rewritten" do
@block_args.should == ["http://x.xx.com/"];
@rewritten_text.should == 'I found a neatness ([rewritten]("style="color:red"onmouseover="alert(1))'
end
end
end
context "when preceded by a :" do
def original_text; "Check this out @hoverbird:#{url}"; end
it "should be rewritten" do
@block_args.should == [url];
@rewritten_text.should == "Check this out @hoverbird:[rewritten]"
end
end
context "with a URL ending in allowed punctuation" do
it "does not consume ending punctuation" do
%w| ? ! , . : ; ] ) } = \ ' |.each do |char|
Twitter::Rewriter.rewrite_urls("#{url}#{char}") do |url|
url.should == url; "[rewritten]"
end.should == "[rewritten]#{char}"
end
end
end
context "with a URL preceded in forbidden characters" do
it "should be rewritten" do
%w| \ ' / ! = |.each do |char|
Twitter::Rewriter.rewrite_urls("#{char}#{url}") do |url|
"[rewritten]" # should not be called here.
end.should == "#{char}[rewritten]"
end
end
end
context "when embedded in a link tag" do
def original_text; "#{url}"; end
it "should be rewritten" do
@block_args.should == [url];
@rewritten_text.should == "[rewritten]"
end
end
context "with multiple URLs" do
def original_text; "http://www.links.org link at start of page, link at end http://www.foo.org"; end
it "should autolink each one" do
@block_args.should == [["http://www.links.org"], ["http://www.foo.org"]];
@rewritten_text.should == "[rewritten] link at start of page, link at end [rewritten]"
end
end
context "with multiple URLs in different formats" do
def original_text; "http://foo.com https://bar.com http://mail.foobar.org"; end
it "should autolink each one, in the proper order" do
@block_args.should == [["http://foo.com"], ["https://bar.com"], ["http://mail.foobar.org"]];
@rewritten_text.should == "[rewritten] [rewritten] [rewritten]"
end
end
context "with a URL having a long TLD" do
def original_text; "Yahoo integriert Facebook http://golem.mobi/0912/71607.html"; end
it "should autolink it" do
@block_args.should == ["http://golem.mobi/0912/71607.html"]
@rewritten_text.should == "Yahoo integriert Facebook [rewritten]"
end
end
context "with a url lacking the protocol" do
def original_text; "I like www.foobar.com dudes"; end
it "does not link at all" do
@block_args.should be_nil
@rewritten_text.should == "I like www.foobar.com dudes"
end
end
context "with a @ in a URL" do
context "with XSS attack" do
def original_text; 'http://x.xx.com/@"style="color:pink"onmouseover=alert(1)//'; end
it "should not allow XSS follwing @" do
@block_args.should == ["http://x.xx.com/"]
@rewritten_text.should == '[rewritten]@"style="color:pink"onmouseover=alert(1)//'
end
end
context "with a username not followed by a /" do
def original_text; "http://example.com/@foobar"; end
it "should link url" do
@block_args.should == ["http://example.com/@foobar"]
@rewritten_text.should == "[rewritten]"
end
end
context "with a username followed by a /" do
def original_text; "http://example.com/@foobar/"; end
it "should not link the username but link full url" do
@block_args.should == ["http://example.com/@foobar/"]
@rewritten_text.should == "[rewritten]"
end
end
end
end #}}}
end
# vim: foldmethod=marker
twitter-text-rb-1.7.0/spec/spec_helper.rb 0000664 0000000 0000000 00000007602 12240766003 0020361 0 ustar 00root root 0000000 0000000 $TESTING=true
# Ruby 1.8 encoding check
major, minor, patch = RUBY_VERSION.split('.')
if major.to_i == 1 && minor.to_i < 9
$KCODE='u'
end
$:.push File.join(File.dirname(__FILE__), '..', 'lib')
require 'nokogiri'
require 'json'
require 'simplecov'
SimpleCov.start do
add_group 'Libraries', 'lib'
end
require File.expand_path('../../lib/twitter-text', __FILE__)
require File.expand_path('../test_urls', __FILE__)
RSpec.configure do |config|
config.include TestUrls
end
RSpec::Matchers.define :match_autolink_expression do
match do |string|
!Twitter::Extractor.extract_urls(string).empty?
end
end
RSpec::Matchers.define :match_autolink_expression_in do |text|
match do |url|
@match_data = Twitter::Regex[:valid_url].match(text)
@match_data && @match_data.to_s.strip == url
end
failure_message_for_should do |url|
"Expected to find url '#{url}' in text '#{text}', but the match was #{@match_data.captures}'"
end
end
RSpec::Matchers.define :have_autolinked_url do |url, inner_text|
match do |text|
@link = Nokogiri::HTML(text).search("a[@href='#{url}']")
@link &&
@link.inner_text &&
(inner_text && @link.inner_text == inner_text) || (!inner_text && @link.inner_text == url)
end
failure_message_for_should do |text|
"Expected url '#{url}'#{", inner_text '#{inner_text}'" if inner_text} to be autolinked in '#{text}'"
end
end
RSpec::Matchers.define :link_to_screen_name do |screen_name, inner_text|
expected = inner_text ? inner_text : screen_name
match do |text|
@link = Nokogiri::HTML(text).search("a.username")
@link &&
@link.inner_text == expected &&
"https://twitter.com/#{screen_name}".downcase.should == @link.first['href']
end
failure_message_for_should do |text|
if @link.first
"Expected link '#{@link.inner_text}' with href '#{@link.first['href']}' to match screen_name '#{expected}', but it does not."
else
"Expected screen name '#{screen_name}' to be autolinked in '#{text}', but no link was found."
end
end
failure_message_for_should_not do |text|
"Expected link '#{@link.inner_text}' with href '#{@link.first['href']}' not to match screen_name '#{expected}', but it does."
end
description do
"contain a link with the name and href pointing to the expected screen_name"
end
end
RSpec::Matchers.define :link_to_list_path do |list_path, inner_text|
expected = inner_text ? inner_text : list_path
match do |text|
@link = Nokogiri::HTML(text).search("a.list-slug")
@link &&
@link.inner_text == expected &&
"https://twitter.com/#{list_path}".downcase.should == @link.first['href']
end
failure_message_for_should do |text|
if @link.first
"Expected link '#{@link.inner_text}' with href '#{@link.first['href']}' to match the list path '#{expected}', but it does not."
else
"Expected list path '#{list_path}' to be autolinked in '#{text}', but no link was found."
end
end
failure_message_for_should_not do |text|
"Expected link '#{@link.inner_text}' with href '#{@link.first['href']}' not to match the list path '#{expected}', but it does."
end
description do
"contain a link with the list title and an href pointing to the list path"
end
end
RSpec::Matchers.define :have_autolinked_hashtag do |hashtag|
match do |text|
@link = Nokogiri::HTML(text).search("a[@href='https://twitter.com/#!/search?q=#{hashtag.sub(/^#/, '%23')}']")
@link &&
@link.inner_text &&
@link.inner_text == hashtag
end
failure_message_for_should do |text|
if @link.first
"Expected link text to be [#{hashtag}], but it was [#{@link.inner_text}] in #{text}"
else
"Expected hashtag #{hashtag} to be autolinked in '#{text}', but no link was found."
end
end
failure_message_for_should_not do |text|
"Expected link '#{@link.inner_text}' with href '#{@link.first['href']}' not to match the hashtag '#{hashtag}', but it does."
end
end
twitter-text-rb-1.7.0/spec/test_urls.rb 0000664 0000000 0000000 00000005006 12240766003 0020110 0 ustar 00root root 0000000 0000000 # encoding: utf-8
module TestUrls
VALID = [
"http://google.com",
"http://foobar.com/#",
"http://google.com/#foo",
"http://google.com/#search?q=iphone%20-filter%3Alinks",
"http://twitter.com/#search?q=iphone%20-filter%3Alinks",
"http://somedomain.com/index.php?path=/abc/def/",
"http://www.boingboing.net/2007/02/14/katamari_damacy_phon.html",
"http://somehost.com:3000",
"http://xo.com/~matthew+%-x",
"http://en.wikipedia.org/wiki/Primer_(film)",
"http://www.ams.org/bookstore-getitem/item=mbk-59",
"http://chilp.it/?77e8fd",
"http://tell.me/why",
"http://longtlds.info",
"http://✪df.ws/ejp",
"http://日本.com",
"http://search.twitter.com/search?q=avro&lang=en",
"http://mrs.domain-dash.biz",
"http://x.com/has/one/char/domain",
"http://t.co/nwcLTFF",
"http://sub_domain-dash.twitter.com",
"http://a.b.cd",
"http://a_b.c-d.com",
"http://a-b.b.com",
"http://twitter-dash.com",
"http://msdn.microsoft.com/ja-jp/library/system.net.httpwebrequest(v=VS.100).aspx",
"www.foobar.com",
"WWW.FOOBAR.COM",
"www.foobar.co.jp",
"http://t.co",
"t.co/nwcLTFF"
] unless defined?(TestUrls::VALID)
INVALID = [
"http://no-tld",
"http://tld-too-short.x",
"http://-doman_dash.com",
"http://_leadingunderscore.twitter.com",
"http://trailingunderscore_.twitter.com",
"http://-leadingdash.twitter.com",
"http://trailingdash-.twitter.com",
"http://-leadingdash.com",
"http://trailingdash-.com",
"http://no_underscores.com",
"http://test.c_o_m",
"http://test.c-o-m",
"http://twitt#{[0x202A].pack('U')}er.com",
"http://twitt#{[0x202B].pack('U')}er.com",
"http://twitt#{[0x202C].pack('U')}er.com",
"http://twitt#{[0x202D].pack('U')}er.com",
"http://twitt#{[0x202E].pack('U')}er.com"
] unless defined?(TestUrls::INVALID)
TCO = [
"http://t.co/P53cv5yO!",
"http://t.co/fQJmiPGg***",
"http://t.co/pbY2NfTZ's",
"http://t.co/2vYHpAc5;",
"http://t.co/ulYGBYSo:",
"http://t.co/GeT4bSiw=win",
"http://t.co/8MkmHU0k+fun",
"http://t.co/TKLp64dY.yes,",
"http://t.co/8vuO27cI$$",
"http://t.co/rPYTvdA8/",
"http://t.co/WvtMw5ku%",
"http://t.co/8t7G3ddS#",
"http://t.co/nfHNJDV2/#!",
"http://t.co/gK6NOXHs[good]",
"http://t.co/dMrT0o1Y]bad",
"http://t.co/FNkPfmii-",
"http://t.co/sMgS3pjI_oh",
"http://t.co/F8Dq3Plb~",
"http://t.co/ivvH58vC&help",
"http://t.co/iUBL15zD|NZ5KYLQ8"
] unless defined?(TestUrls::TCO)
end
twitter-text-rb-1.7.0/spec/twitter_text_spec.rb 0000664 0000000 0000000 00000000631 12240766003 0021643 0 ustar 00root root 0000000 0000000 # encoding: utf-8
require File.dirname(__FILE__) + '/spec_helper'
major, minor, patch = RUBY_VERSION.split('.')
if major.to_i == 1 && minor.to_i < 9
describe "base" do
before do
$KCODE = 'NONE'
end
after do
$KCODE = 'u'
end
it "should raise with invalid KCODE on Ruby < 1.9" do
lambda do
require 'twitter-text'
end.should raise_error
end
end
end
twitter-text-rb-1.7.0/spec/unicode_spec.rb 0000664 0000000 0000000 00000001665 12240766003 0020533 0 ustar 00root root 0000000 0000000 # encoding: utf-8
require File.dirname(__FILE__) + '/spec_helper'
describe Twitter::Unicode do
it "should lazy-init constants" do
Twitter::Unicode.const_defined?(:UFEB6).should == false
Twitter::Unicode::UFEB6.should_not be_nil
Twitter::Unicode::UFEB6.should be_kind_of(String)
Twitter::Unicode.const_defined?(:UFEB6).should == true
end
it "should return corresponding character" do
Twitter::Unicode::UFEB6.should == [0xfeb6].pack('U')
end
it "should allow lowercase notation" do
Twitter::Unicode::Ufeb6.should == Twitter::Unicode::UFEB6
Twitter::Unicode::Ufeb6.should === Twitter::Unicode::UFEB6
end
it "should allow underscore notation" do
Twitter::Unicode::U_FEB6.should == Twitter::Unicode::UFEB6
Twitter::Unicode::U_FEB6.should === Twitter::Unicode::UFEB6
end
it "should raise on invalid codepoints" do
lambda { Twitter::Unicode::FFFFFF }.should raise_error(NameError)
end
end
twitter-text-rb-1.7.0/spec/validation_spec.rb 0000664 0000000 0000000 00000003023 12240766003 0021225 0 ustar 00root root 0000000 0000000 # encoding: binary
require File.dirname(__FILE__) + '/spec_helper'
class TestValidation
include Twitter::Validation
end
describe Twitter::Validation do
it "should disallow invalid BOM character" do
TestValidation.new.tweet_invalid?("Bom:#{Twitter::Unicode::UFFFE}").should == :invalid_characters
TestValidation.new.tweet_invalid?("Bom:#{Twitter::Unicode::UFEFF}").should == :invalid_characters
end
it "should disallow invalid U+FFFF character" do
TestValidation.new.tweet_invalid?("Bom:#{Twitter::Unicode::UFFFF}").should == :invalid_characters
end
it "should disallow direction change characters" do
[0x202A, 0x202B, 0x202C, 0x202D, 0x202E].map{|cp| [cp].pack('U') }.each do |char|
TestValidation.new.tweet_invalid?("Invalid:#{char}").should == :invalid_characters
end
end
it "should disallow non-Unicode" do
TestValidation.new.tweet_invalid?("not-Unicode:\xfff0").should == :invalid_characters
end
it "should allow <= 140 combined accent characters" do
char = [0x65, 0x0301].pack('U')
TestValidation.new.tweet_invalid?(char * 139).should == false
TestValidation.new.tweet_invalid?(char * 140).should == false
TestValidation.new.tweet_invalid?(char * 141).should == :too_long
end
it "should allow <= 140 multi-byte characters" do
char = [ 0x1d106 ].pack('U')
TestValidation.new.tweet_invalid?(char * 139).should == false
TestValidation.new.tweet_invalid?(char * 140).should == false
TestValidation.new.tweet_invalid?(char * 141).should == :too_long
end
end
twitter-text-rb-1.7.0/test/ 0000775 0000000 0000000 00000000000 12240766003 0015563 5 ustar 00root root 0000000 0000000 twitter-text-rb-1.7.0/test/conformance_test.rb 0000664 0000000 0000000 00000014245 12240766003 0021447 0 ustar 00root root 0000000 0000000 require 'multi_json'
require 'nokogiri'
require 'test/unit'
require 'yaml'
# Ruby 1.8 encoding check
major, minor, patch = RUBY_VERSION.split('.')
if major.to_i == 1 && minor.to_i < 9
$KCODE='u'
end
require File.expand_path('../../lib/twitter-text', __FILE__)
class ConformanceTest < Test::Unit::TestCase
include Twitter::Extractor
include Twitter::Autolink
include Twitter::HitHighlighter
include Twitter::Validation
private
%w(description expected text json hits).each do |key|
define_method key.to_sym do
@test_info[key]
end
end
def assert_equal_without_attribute_order(expected, actual, failure_message = nil)
assert_block(build_message(failure_message, "> expected but was\n>", expected, actual)) do
equal_nodes?(Nokogiri::HTML(expected).root, Nokogiri::HTML(actual).root)
end
end
def equal_nodes?(expected, actual)
return false unless expected.name == actual.name
return false unless ordered_attributes(expected) == ordered_attributes(actual)
return false if expected.text? && actual.text? && expected.content != actual.content
expected.children.each_with_index do |child, index|
return false unless equal_nodes?(child, actual.children[index])
end
true
end
def ordered_attributes(element)
element.attribute_nodes.map{|attr| [attr.name, attr.value]}.sort
end
CONFORMANCE_DIR = ENV['CONFORMANCE_DIR'] || File.expand_path("../twitter-text-conformance", __FILE__)
def self.def_conformance_test(file, test_type, &block)
yaml = YAML.load_file(File.join(CONFORMANCE_DIR, file))
raise "No such test suite: #{test_type.to_s}" unless yaml["tests"][test_type.to_s]
yaml["tests"][test_type.to_s].each do |test_info|
name = :"test_#{test_type} #{test_info['description']}"
define_method name do
@test_info = test_info
instance_eval(&block)
end
end
end
public
# Extractor Conformance
def_conformance_test("extract.yml", :replies) do
assert_equal expected, extract_reply_screen_name(text), description
end
def_conformance_test("extract.yml", :mentions) do
assert_equal expected, extract_mentioned_screen_names(text), description
end
def_conformance_test("extract.yml", :mentions_with_indices) do
e = expected.map{|elem| elem.inject({}){|h, (k,v)| h[k.to_sym] = v; h} }
assert_equal e, extract_mentioned_screen_names_with_indices(text), description
end
def_conformance_test("extract.yml", :mentions_or_lists_with_indices) do
e = expected.map{|elem| elem.inject({}){|h, (k,v)| h[k.to_sym] = v; h} }
assert_equal e, extract_mentions_or_lists_with_indices(text), description
end
def_conformance_test("extract.yml", :urls) do
assert_equal expected, extract_urls(text), description
expected.each do |expected_url|
assert_equal true, valid_url?(expected_url, true, false), "expected url [#{expected_url}] not valid"
end
end
def_conformance_test("extract.yml", :urls_with_indices) do
e = expected.map{|elem| elem.inject({}){|h, (k,v)| h[k.to_sym] = v; h} }
assert_equal e, extract_urls_with_indices(text), description
end
def_conformance_test("extract.yml", :hashtags) do
assert_equal expected, extract_hashtags(text), description
end
def_conformance_test("extract.yml", :hashtags_with_indices) do
e = expected.map{|elem| elem.inject({}){|h, (k,v)| h[k.to_sym] = v; h} }
assert_equal e, extract_hashtags_with_indices(text), description
end
def_conformance_test("extract.yml", :cashtags) do
assert_equal expected, extract_cashtags(text), description
end
def_conformance_test("extract.yml", :cashtags_with_indices) do
e = expected.map{|elem| elem.inject({}){|h, (k,v)| h[k.to_sym] = v; h} }
assert_equal e, extract_cashtags_with_indices(text), description
end
# Autolink Conformance
def_conformance_test("autolink.yml", :usernames) do
assert_equal_without_attribute_order expected, auto_link_usernames_or_lists(text, :suppress_no_follow => true), description
end
def_conformance_test("autolink.yml", :lists) do
assert_equal_without_attribute_order expected, auto_link_usernames_or_lists(text, :suppress_no_follow => true), description
end
def_conformance_test("autolink.yml", :urls) do
assert_equal_without_attribute_order expected, auto_link_urls(text, :suppress_no_follow => true), description
end
def_conformance_test("autolink.yml", :hashtags) do
assert_equal_without_attribute_order expected, auto_link_hashtags(text, :suppress_no_follow => true), description
end
def_conformance_test("autolink.yml", :cashtags) do
assert_equal_without_attribute_order expected, auto_link_cashtags(text, :suppress_no_follow => true), description
end
def_conformance_test("autolink.yml", :all) do
assert_equal_without_attribute_order expected, auto_link(text, :suppress_no_follow => true), description
end
def_conformance_test("autolink.yml", :json) do
assert_equal_without_attribute_order expected, auto_link_with_json(text, MultiJson.load(json), :suppress_no_follow => true), description
end
# HitHighlighter Conformance
def_conformance_test("hit_highlighting.yml", :plain_text) do
assert_equal expected, hit_highlight(text, hits), description
end
def_conformance_test("hit_highlighting.yml", :with_links) do
assert_equal expected, hit_highlight(text, hits), description
end
# Validation Conformance
def_conformance_test("validate.yml", :tweets) do
assert_equal expected, valid_tweet_text?(text), description
end
def_conformance_test("validate.yml", :usernames) do
assert_equal expected, valid_username?(text), description
end
def_conformance_test("validate.yml", :lists) do
assert_equal expected, valid_list?(text), description
end
def_conformance_test("validate.yml", :urls) do
assert_equal expected, valid_url?(text), description
end
def_conformance_test("validate.yml", :urls_without_protocol) do
assert_equal expected, valid_url?(text, true, false), description
end
def_conformance_test("validate.yml", :hashtags) do
assert_equal expected, valid_hashtag?(text), description
end
def_conformance_test("validate.yml", :lengths) do
assert_equal expected, tweet_length(text), description
end
end
twitter-text-rb-1.7.0/test/twitter-text-conformance/ 0000775 0000000 0000000 00000000000 12240766003 0022537 5 ustar 00root root 0000000 0000000 twitter-text-rb-1.7.0/twitter-text.gemspec 0000664 0000000 0000000 00000002421 12240766003 0020634 0 ustar 00root root 0000000 0000000 # encoding: utf-8
Gem::Specification.new do |s|
s.name = "twitter-text"
s.version = "1.7.0"
s.authors = ["Matt Sanford", "Patrick Ewing", "Ben Cherry", "Britt Selvitelle",
"Raffi Krikorian", "J.P. Cummins", "Yoshimasa Niwa", "Keita Fujii", "James Koval"]
s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com", "bcherry@gmail.com", "bs@brittspace.com",
"raffi@twitter.com", "jcummins@twitter.com", "niw@niw.at", "keita@twitter.com", "jkoval@twitter.com"]
s.homepage = "http://twitter.com"
s.description = s.summary = "A gem that provides text handling for Twitter"
s.license = "Apache 2.0"
s.platform = Gem::Platform::RUBY
s.has_rdoc = true
s.summary = "Twitter text handling library"
s.add_development_dependency "multi_json", "~> 1.3"
s.add_development_dependency "nokogiri", "~> 1.5.10"
s.add_development_dependency "rake"
s.add_development_dependency "rdoc"
s.add_development_dependency "rspec"
s.add_development_dependency "simplecov"
s.add_runtime_dependency "unf", "~> 0.1.0"
s.files = `git ls-files`.split("\n")
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
s.require_paths = ["lib"]
end