htmlentities-4.3.1/0000755000004100000410000000000011665456537014302 5ustar www-datawww-datahtmlentities-4.3.1/test/0000755000004100000410000000000011665456537015261 5ustar www-datawww-datahtmlentities-4.3.1/test/encoding_test.rb0000644000004100000410000000572011665456537020437 0ustar www-datawww-data# encoding: UTF-8 require File.expand_path("../common", __FILE__) class HTMLEntities::EncodingTest < Test::Unit::TestCase def setup @entities = [:xhtml1, :html4, :expanded].map{ |a| HTMLEntities.new(a) } end def assert_encode(expected, input, *args) @entities.each do |coder| assert_equal expected, coder.encode(input, *args) end end def test_should_encode_basic_entities assert_encode '&', '&', :basic assert_encode '"', '"' assert_encode '<', '<', :basic assert_encode '<', '<' end def test_should_encode_basic_entities_to_decimal assert_encode '&', '&', :decimal assert_encode '"', '"', :decimal assert_encode '<', '<', :decimal assert_encode '>', '>', :decimal assert_encode ''', "'", :decimal end def test_should_encode_basic_entities_to_hexadecimal assert_encode '&', '&', :hexadecimal assert_encode '"', '"', :hexadecimal assert_encode '<', '<', :hexadecimal assert_encode '>', '>', :hexadecimal assert_encode ''', "'", :hexadecimal end def test_should_encode_extended_named_entities assert_encode '±', '±', :named assert_encode 'ð', 'ð', :named assert_encode 'Œ', 'Œ', :named assert_encode 'œ', 'œ', :named end def test_should_encode_decimal_entities assert_encode '“', '“', :decimal assert_encode '…', '…', :decimal end def test_should_encode_hexadecimal_entities assert_encode '−', '−', :hexadecimal assert_encode '—', '—', :hexadecimal end def test_should_encode_text_using_mix_of_entities assert_encode( '"bientôt" & 文字', '"bientôt" & 文字', :basic, :named, :hexadecimal ) assert_encode( '"bientôt" & 文字', '"bientôt" & 文字', :basic, :named, :decimal ) end def test_should_sort_commands_when_encoding_using_mix_of_entities assert_encode( '"bientôt" & 文字', '"bientôt" & 文字', :named, :hexadecimal, :basic ) assert_encode( '"bientôt" & 文字', '"bientôt" & 文字', :decimal, :named, :basic ) end def test_should_detect_illegal_encoding_command assert_raise HTMLEntities::InstructionError do HTMLEntities.new.encode('foo', :bar, :baz) end end def test_should_not_encode_normal_ASCII assert_encode '`', '`' assert_encode ' ', ' ' end def test_should_double_encode_existing_entity assert_encode '&amp;', '&' end def test_should_not_mutate_string_being_encoded original = "<£" input = original.dup HTMLEntities.new.encode(input, :basic, :decimal) assert_equal original, input end def test_should_ducktype_parameter_to_string_before_encoding obj = Object.new def obj.to_s; "foo"; end assert_encode "foo", obj end end htmlentities-4.3.1/test/expanded_test.rb0000644000004100000410000001024211665456537020434 0ustar www-datawww-data# encoding: UTF-8 require File.expand_path("../common", __FILE__) class HTMLEntities::ExpandedTest < Test::Unit::TestCase attr_reader :html_entities def setup @html_entities = HTMLEntities.new(:expanded) end TEST_ENTITIES_SET = [ ['sub', 0x2282, "xhtml", nil, "⊂", ], ['sup', 0x2283, "xhtml", nil, "⊃", ], ['nsub', 0x2284, "xhtml", nil, "⊄", ], ['subE', 0x2286, nil, "skip", "⊆", ], ['sube', 0x2286, "xhtml", nil, "⊆", ], ['supE', 0x2287, nil, "skip", "⊇", ], ['supe', 0x2287, "xhtml", nil, "⊇", ], ['bottom', 0x22a5, nil, "skip", "⊥", ], ['perp', 0x22a5, "xhtml", nil, "⊥", ], ['models', 0x22a7, nil, nil, "⊧", ], ['vDash', 0x22a8, nil, nil, "⊨", ], ['Vdash', 0x22a9, nil, nil, "⊩", ], ['Vvdash', 0x22aa, nil, nil, "⊪", ], ['nvdash', 0x22ac, nil, nil, "⊬", ], ['nvDash', 0x22ad, nil, nil, "⊭", ], ['nVdash', 0x22ae, nil, nil, "⊮", ], ['nsubE', 0x2288, nil, nil, "⊈", ], ['nsube', 0x2288, nil, "skip", "⊈", ], ['nsupE', 0x2289, nil, nil, "⊉", ], ['nsupe', 0x2289, nil, "skip", "⊉", ], ['subnE', 0x228a, nil, nil, "⊊", ], ['subne', 0x228a, nil, "skip", "⊊", ], ['vsubnE', 0x228a, nil, "skip", "⊊", ], ['vsubne', 0x228a, nil, "skip", "⊊", ], ['nsc', 0x2281, nil, nil, "⊁", ], ['nsup', 0x2285, nil, nil, "⊅", ], ['b.alpha', 0x03b1, nil, "skip", "α", ], ['b.beta', 0x03b2, nil, "skip", "β", ], ['b.chi', 0x03c7, nil, "skip", "χ", ], ['b.Delta', 0x0394, nil, "skip", "Δ", ], ] def test_should_encode_apos_entity assert_equal "'", html_entities.encode("'", :named) # note: the normal ' 0x0027, not ʼ 0x02BC end def test_should_decode_apos_entity assert_equal "é'", html_entities.decode("é'") end def test_should_decode_dotted_entity assert_equal "Θ", html_entities.decode("&b.Theta;") end def test_should_encode_from_test_set TEST_ENTITIES_SET.each do |ent, _, _, skip, decoded| next if skip assert_equal "&#{ent};", html_entities.encode(decoded, :named) end end def test_should_decode_from_test_set TEST_ENTITIES_SET.each do |ent, _, _, _, decoded| assert_equal decoded, html_entities.decode("&#{ent};") end end def test_should_round_trip_preferred_entities TEST_ENTITIES_SET.each do |ent, _, _, skip, decoded| next if skip assert_equal "&#{ent};", html_entities.encode(html_entities.decode("&#{ent};"), :named) assert_equal decoded, html_entities.decode(html_entities.encode(decoded, :named)) end end def test_should_not_round_trip_decoding_skipped_entities TEST_ENTITIES_SET.each do |ent, _, _, skip, decoded| next unless skip assert_not_equal "&#{ent};", html_entities.encode(html_entities.decode("&#{ent};"), :named) end end def test_should_round_trip_encoding_skipped_entities TEST_ENTITIES_SET.each do |ent, _, _, skip, decoded| next unless skip assert_equal decoded, html_entities.decode(html_entities.encode(decoded, :named)) end end def test_should_treat_all_xhtml1_named_entities_as_xhtml_does xhtml_encoder = HTMLEntities.new(:xhtml1) HTMLEntities::MAPPINGS['xhtml1'].each do |ent, decoded| assert_equal xhtml_encoder.decode("&#{ent};"), html_entities.decode("&#{ent};") assert_equal xhtml_encoder.encode(decoded, :named), html_entities.encode(decoded, :named) end end def test_should_not_agree_with_xhtml1_when_not_in_xhtml xhtml_encoder = HTMLEntities.new(:xhtml1) TEST_ENTITIES_SET.each do |ent, _, xhtml1, skip, decoded| next if xhtml1 || skip assert_not_equal xhtml_encoder.decode("&#{ent};"), html_entities.decode("&#{ent};") assert_not_equal xhtml_encoder.encode(decoded, :named), html_entities.encode(decoded, :named) end end end htmlentities-4.3.1/test/common.rb0000644000004100000410000000026611665456537017102 0ustar www-datawww-data$:.unshift(File.dirname(__FILE__) + '/../lib') require 'test/unit' require 'htmlentities' ENCODING_AWARE_RUBY = "1.9".respond_to?(:encoding) $KCODE = 'u' unless ENCODING_AWARE_RUBY htmlentities-4.3.1/test/ruby_1_9_test.rb0000644000004100000410000000377711665456537020314 0ustar www-datawww-data# encoding: UTF-8 require File.expand_path("../common", __FILE__) if ENCODING_AWARE_RUBY class HTMLEntities::Ruby19Test < Test::Unit::TestCase def test_should_encode_ascii_to_ascii s = "".encode(Encoding::US_ASCII) assert_equal Encoding::US_ASCII, s.encoding t = HTMLEntities.new.encode(s) assert_equal "<elan>", t assert_equal Encoding::US_ASCII, t.encoding end def test_should_encode_utf8_to_utf8_if_needed s = "<élan>" assert_equal Encoding::UTF_8, s.encoding t = HTMLEntities.new.encode(s) assert_equal "<élan>", t assert_equal Encoding::UTF_8, t.encoding end def test_should_encode_utf8_to_ascii_if_possible s = "" assert_equal Encoding::UTF_8, s.encoding t = HTMLEntities.new.encode(s) assert_equal "<elan>", t assert_equal Encoding::US_ASCII, t.encoding end def test_should_encode_other_encoding_to_utf8 s = "<élan>".encode(Encoding::ISO_8859_1) assert_equal Encoding::ISO_8859_1, s.encoding t = HTMLEntities.new.encode(s) assert_equal "<élan>", t assert_equal Encoding::UTF_8, t.encoding end def test_should_decode_ascii_to_utf8 s = "<élan>".encode(Encoding::US_ASCII) assert_equal Encoding::US_ASCII, s.encoding t = HTMLEntities.new.decode(s) assert_equal "<élan>", t assert_equal Encoding::UTF_8, t.encoding end def test_should_decode_utf8_to_utf8 s = "<élan>".encode(Encoding::UTF_8) assert_equal Encoding::UTF_8, s.encoding t = HTMLEntities.new.decode(s) assert_equal "<élan>", t assert_equal Encoding::UTF_8, t.encoding end def test_should_decode_other_encoding_to_utf8 s = "<élan>".encode(Encoding::ISO_8859_1) assert_equal Encoding::ISO_8859_1, s.encoding t = HTMLEntities.new.decode(s) assert_equal "<élan>", t assert_equal Encoding::UTF_8, t.encoding end end end htmlentities-4.3.1/test/entities_test.rb0000644000004100000410000000101311665456537020464 0ustar www-datawww-data# encoding: UTF-8 require File.expand_path("../common", __FILE__) class HTMLEntities::EntitiesTest < Test::Unit::TestCase def test_should_raise_exception_when_unknown_flavor_specified assert_raises HTMLEntities::UnknownFlavor do HTMLEntities.new('foo') end end def test_should_allow_symbol_for_flavor assert_nothing_raised do HTMLEntities.new(:xhtml1) end end def test_should_allow_upper_case_flavor assert_nothing_raised do HTMLEntities.new('XHTML1') end end end htmlentities-4.3.1/test/ruby_1_8_test.rb0000644000004100000410000000070211665456537020274 0ustar www-datawww-data# encoding: UTF-8 require File.expand_path("../common", __FILE__) unless ENCODING_AWARE_RUBY class HTMLEntities::Ruby18Test < Test::Unit::TestCase # Reported by Benoit Larroque def test_should_encode_without_error_when_KCODE_is_not_UTF_8 kcode = $KCODE $KCODE = "n" coder = HTMLEntities.new text = [8212].pack('U') assert_equal "—", coder.encode(text, :decimal) $KCODE = kcode end end end htmlentities-4.3.1/test/decoding_test.rb0000644000004100000410000000521511665456537020424 0ustar www-datawww-data# encoding: UTF-8 require File.expand_path("../common", __FILE__) class HTMLEntities::DecodingTest < Test::Unit::TestCase def setup @entities = [:xhtml1, :html4, :expanded].map{ |a| HTMLEntities.new(a) } end def assert_decode(expected, input) @entities.each do |coder| assert_equal expected, coder.decode(input) end end def test_should_decode_basic_entities assert_decode '&', '&' assert_decode '<', '<' assert_decode '"', '"' end def test_should_decode_extended_named_entities assert_decode '±', '±' assert_decode 'ð', 'ð' assert_decode 'Œ', 'Œ' assert_decode 'œ', 'œ' end def test_should_decode_decimal_entities assert_decode '“', '“' assert_decode '…', '…' assert_decode ' ', ' ' end def test_should_decode_hexadecimal_entities assert_decode '−', '−' assert_decode '—', '—' assert_decode '`', '`' assert_decode '`', '`' end def test_should_not_mutate_string_being_decoded original = "<£" input = original.dup HTMLEntities.new.decode(input) assert_equal original, input end def test_should_decode_text_with_mix_of_entities # Just a random headline - I needed something with accented letters. assert_decode( 'Le tabac pourrait bientôt être banni dans tous les lieux publics en France', 'Le tabac pourrait bientôt être banni dans tous les lieux publics en France' ) assert_decode( '"bientôt" & 文字', '"bientôt" & 文字' ) end def test_should_decode_empty_string assert_decode '', '' end def test_should_skip_unknown_entity assert_decode '&bogus;', '&bogus;' end def test_should_decode_double_encoded_entity_once assert_decode '&', '&amp;' end # Faults found and patched by Moonwolf def test_should_decode_full_hexadecimal_range (0..127).each do |codepoint| assert_decode [codepoint].pack('U'), "&\#x#{codepoint.to_s(16)};" end end # Reported by Dallas DeVries and Johan Duflost def test_should_decode_named_entities_reported_as_missing_in_3_0_1 assert_decode [178].pack('U'), '²' assert_decode [8226].pack('U'), '•' assert_decode [948].pack('U'), 'δ' end # Reported by ckruse def test_should_decode_only_first_element_in_masked_entities input = '&#3346;' expected = 'ഒ' assert_decode expected, input end def test_should_ducktype_parameter_to_string_before_encoding obj = Object.new def obj.to_s; "foo"; end assert_decode "foo", obj end end htmlentities-4.3.1/test/xhtml1_test.rb0000644000004100000410000000104511665456537020062 0ustar www-datawww-data# encoding: UTF-8 require File.expand_path("../common", __FILE__) class HTMLEntities::XHTML1Test < Test::Unit::TestCase attr_reader :html_entities def setup @html_entities = HTMLEntities.new('xhtml1') end def test_should_encode_apos_entity assert_equal "'", html_entities.encode("'", :basic) end def test_should_decode_apos_entity assert_equal "é'", html_entities.decode("é'") end def test_should_not_decode_dotted_entity assert_equal "&b.Theta;", html_entities.decode("&b.Theta;") end end htmlentities-4.3.1/test/interoperability_test.rb0000644000004100000410000000073311665456537022235 0ustar www-datawww-data# encoding: UTF-8 require File.expand_path("../common", __FILE__) if ENV["RUN_INTEROPERABILITY_TESTS"] class HTMLEntities::InteroperabilityTest < Test::Unit::TestCase def test_should_encode_active_support_safe_buffer require 'active_support' string = "

This is a test

" buffer = ActiveSupport::SafeBuffer.new(string) coder = HTMLEntities.new assert_equal coder.encode(string, :named), coder.encode(buffer, :named) end end end htmlentities-4.3.1/test/html4_test.rb0000644000004100000410000000106711665456537017701 0ustar www-datawww-data# encoding: UTF-8 require File.expand_path("../common", __FILE__) class HTML4Test < Test::Unit::TestCase attr_reader :html_entities def setup @html_entities = HTMLEntities.new('html4') end # Found by Marcos Kuhns def test_should_not_encode_apos_entity assert_equal "'", html_entities.encode("'", :basic) end def test_should_not_decode_apos_entity assert_equal "é'", html_entities.decode("é'") end def test_should_not_decode_dotted_entity assert_equal "&b.Theta;", html_entities.decode("&b.Theta;") end end htmlentities-4.3.1/test/roundtrip_test.rb0000644000004100000410000000431211665456537020673 0ustar www-datawww-data# encoding: UTF-8 require File.expand_path("../common", __FILE__) class HTMLEntities::RoundtripTest < Test::Unit::TestCase attr_reader :xhtml1_entities, :html4_entities def setup @xhtml1_entities = HTMLEntities.new('xhtml1') @html4_entities = HTMLEntities.new('html4') end def test_should_roundtrip_xhtml1_entities_via_named_encoding each_mapping 'xhtml1' do |name, string| assert_equal string, xhtml1_entities.decode(xhtml1_entities.encode(string, :named)) end end def test_should_roundtrip_xhtml1_entities_via_basic_and_named_encoding each_mapping 'xhtml1' do |name, string| assert_equal string, xhtml1_entities.decode(xhtml1_entities.encode(string, :basic, :named)) end end def test_should_roundtrip_xhtml1_entities_via_basic_named_and_decimal_encoding each_mapping 'xhtml1' do |name, string| assert_equal string, xhtml1_entities.decode(xhtml1_entities.encode(string, :basic, :named, :decimal)) end end def test_should_roundtrip_xhtml1_entities_via_hexadecimal_encoding each_mapping 'xhtml1' do |name, string| assert_equal string, xhtml1_entities.decode(xhtml1_entities.encode(string, :hexadecimal)) end end def test_should_roundtrip_html4_entities_via_named_encoding each_mapping 'html4' do |name, string| assert_equal string, html4_entities.decode(html4_entities.encode(string, :named)) end end def test_should_roundtrip_html4_entities_via_basic_and_named_encoding each_mapping 'html4' do |name, string| assert_equal string, html4_entities.decode(html4_entities.encode(string, :basic, :named)) end end def test_should_roundtrip_html4_entities_via_basic_named_and_decimal_encoding each_mapping 'html4' do |name, string| assert_equal string, html4_entities.decode(html4_entities.encode(string, :basic, :named, :decimal)) end end def test_should_roundtrip_html4_entities_via_hexadecimal_encoding each_mapping 'html4' do |name, string| assert_equal string, html4_entities.decode(html4_entities.encode(string, :hexadecimal)) end end def each_mapping(flavor) HTMLEntities::MAPPINGS[flavor].each do |name, codepoint| yield name, [codepoint].pack('U') end end end htmlentities-4.3.1/History.txt0000644000004100000410000000545711665456537016517 0ustar www-datawww-data== 4.3.1 (2011-11-30) * Fix bug when working with Rails 3/Ruby 1.8.7 * Make character encoding behaviour consistent in Ruby 1.9.2/1.9.3 == 4.3.0 (2011-03-29) * Use Ruby 1.9's encoding support where available. * Deprecated HTMLEntities.encode_entities/decode_entities interface is now removed. == 4.2.4 (2011-01-30) * Fix issue where double-escaped entities were not correctly escaped. Bug reported by Christian Kruse. == 4.2.3 (2011-01-07) * Additional entities from Junya Ishihara. * Performance improvements. == 4.2.1 (2010-04-05) * Fixed error on Ruby 1.8.x when $KCODE was not set to "UTF8". Thanks to Benoit Larroque for the bug report. == 4.2.0 (2009-08-24) * Added benchmarking code and improved performance. == 4.1.0 (2009-08-15) * Now works with Ruby 1.9.1 and JRuby. * Reverted lazy loading of entity mappings as this is not thread-safe. * Finally removed the deprecated String#encode_entities and #decode_entities methods. == 4.0.1 (2008-06-03) * Added :expanded charset -- the ~1000 SGML entities from ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MISC/SGML.TXT (extra code by Philip (flip) Kromer , entity info from John Cowan #) == 4.0.0 (2007-03-15) * New instantiation-based interface (but legacy interface is preserved for compatibility. * Handles HTML4 as well as XHTML1 (the former lacks the ' entity). * Encodes basic entities numerically when :basic isn't specified and :decimal or :hexadecimal is. * Performs a maximum of two gsub passes instead of three when encoding, which should be more efficient on long strings. == 3.1.0 (2007-01-19) * Now understands all the entities referred to in the XHTML 1.0 DTD (253 entities compared with 131 in version 3.0.1). * Calls to_s on parameters to play nicely with Rails 1.2.1. * Entity mapping data is now lazily loaded. == 3.0.1 (2005-04-08) * Improved documentation. == 3.0.0 (2005-04-08) * Changed licence to MIT due to confusion with previous 'Fair' licence (my intention was to be liberal, not obscure). * Moved basic functionality out of String class; for previous behaviour, require 'htmlentities/string'. * Changed version numbering scheme. * Now available as a Gem. == 2.2 (2005-11-07) * Important bug fixes -- thanks to Moonwolf. * Decoding hexadecimal entities now accepts 'f' as a hex digit. (D'oh!) * Decimal decoding edge cases addressed. * Test cases added. == 2.1 (2005-10-31) * Removed some unnecessary code in basic entity encoding. * Improved handling of encoding: commands are now automatically sorted, so the user doesn't have to worry about their order. * Now using setup.rb. * Tests moved to separate file. == 2.0 (2005-08-23) * Added encoding to entities. * Decoding interface unchanged. * Fixed a bug with handling high codepoints. == 1.0 (2005-08-03) * Initial release. * Decoding only. htmlentities-4.3.1/COPYING.txt0000644000004100000410000000206711665456537016160 0ustar www-datawww-data== Licence (MIT) Copyright (c) 2005-2009 Paul Battley Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. htmlentities-4.3.1/metadata.yml0000644000004100000410000000345711665456537016616 0ustar www-datawww-data--- !ruby/object:Gem::Specification name: htmlentities version: !ruby/object:Gem::Version prerelease: version: 4.3.1 platform: ruby authors: - Paul Battley autorequire: bindir: bin cert_chain: [] date: 2011-11-30 00:00:00 Z dependencies: [] description: email: pbattley@gmail.com executables: [] extensions: [] extra_rdoc_files: - History.txt - COPYING.txt files: - lib/htmlentities/encoder.rb - lib/htmlentities/decoder.rb - lib/htmlentities/mappings/xhtml1.rb - lib/htmlentities/mappings/expanded.rb - lib/htmlentities/mappings/html4.rb - lib/htmlentities/flavors.rb - lib/htmlentities/version.rb - lib/htmlentities.rb - test/html4_test.rb - test/ruby_1_8_test.rb - test/roundtrip_test.rb - test/encoding_test.rb - test/common.rb - test/xhtml1_test.rb - test/decoding_test.rb - test/ruby_1_9_test.rb - test/expanded_test.rb - test/entities_test.rb - test/interoperability_test.rb - perf/benchmark.rb - perf/performance.rb - perf/profile.rb - History.txt - COPYING.txt homepage: https://github.com/threedaymonk/htmlentities licenses: [] post_install_message: rdoc_options: [] require_paths: - lib required_ruby_version: !ruby/object:Gem::Requirement none: false requirements: - - ">=" - !ruby/object:Gem::Version version: "0" required_rubygems_version: !ruby/object:Gem::Requirement none: false requirements: - - ">=" - !ruby/object:Gem::Version version: "0" requirements: [] rubyforge_project: rubygems_version: 1.8.11 signing_key: specification_version: 3 summary: A module for encoding and decoding (X)HTML entities. test_files: - test/html4_test.rb - test/ruby_1_8_test.rb - test/roundtrip_test.rb - test/encoding_test.rb - test/xhtml1_test.rb - test/decoding_test.rb - test/ruby_1_9_test.rb - test/expanded_test.rb - test/entities_test.rb - test/interoperability_test.rb htmlentities-4.3.1/perf/0000755000004100000410000000000011665456537015236 5ustar www-datawww-datahtmlentities-4.3.1/perf/benchmark.rb0000644000004100000410000000051711665456537017520 0ustar www-datawww-data# encoding: UTF-8 $KCODE = 'u' unless "1.9".respond_to?(:encoding) require File.expand_path("../performance", __FILE__) require "benchmark" job = HTMLEntitiesJob.new job.all(100) # Warm up to give JRuby a fair shake. Benchmark.benchmark do |b| b.report("Encoding"){ job.encode(100) } b.report("Decoding"){ job.decode(100) } end htmlentities-4.3.1/perf/profile.rb0000644000004100000410000000054311665456537017225 0ustar www-datawww-data# encoding: UTF-8 $KCODE = 'u' unless "1.9".respond_to?(:encoding) require File.join(File.dirname(__FILE__), "performance") require "profiler" job = HTMLEntitiesJob.new puts "Encoding" Profiler__::start_profile job.encode(1) Profiler__::print_profile($stdout) puts "Decoding" Profiler__::start_profile job.decode(1) Profiler__::print_profile($stdout) htmlentities-4.3.1/perf/performance.rb0000644000004100000410000000126211665456537020065 0ustar www-datawww-data# encoding: UTF-8 $KCODE = 'u' unless "1.9".respond_to?(:encoding) $:.unshift(File.join(File.dirname(__FILE__), "..", "lib")) require "htmlentities" class HTMLEntitiesJob def initialize @coder = HTMLEntities.new @decoded = File.read(File.join(File.dirname(__FILE__), "sample")) @encoded = @coder.encode(@decoded, :basic, :named, :hexadecimal) end def encode(cycles) cycles.times do @coder.encode(@decoded, :basic, :named, :hexadecimal) @coder.encode(@decoded, :basic, :named, :decimal) end end def decode(cycles) cycles.times do @coder.decode(@encoded) end end def all(cycles) encode(cycles) decode(cycles) end end htmlentities-4.3.1/lib/0000755000004100000410000000000011665456537015050 5ustar www-datawww-datahtmlentities-4.3.1/lib/htmlentities.rb0000644000004100000410000000545511665456537020117 0ustar www-datawww-data# encoding: UTF-8 require 'htmlentities/flavors' require 'htmlentities/encoder' require 'htmlentities/decoder' require 'htmlentities/version' # # HTML entity encoding and decoding for Ruby # class HTMLEntities UnknownFlavor = Class.new(RuntimeError) # # Create a new HTMLEntities coder for the specified flavor. # Available flavors are 'html4', 'expanded' and 'xhtml1' (the default). # # The only difference in functionality between html4 and xhtml1 is in the # handling of the apos (apostrophe) named entity, which is not defined in # HTML4. # # 'expanded' includes a large number of additional SGML entities drawn from # ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MISC/SGML.TXT # it "maps SGML character entities from various public sets (namely, ISOamsa, # ISOamsb, ISOamsc, ISOamsn, ISOamso, ISOamsr, ISObox, ISOcyr1, ISOcyr2, # ISOdia, ISOgrk1, ISOgrk2, ISOgrk3, ISOgrk4, ISOlat1, ISOlat2, ISOnum, # ISOpub, ISOtech, HTMLspecial, HTMLsymbol) to corresponding Unicode # characters." (sgml.txt). # # 'expanded' is a strict superset of the XHTML entities: every xhtml named # entity encodes and decodes the same under :expanded as under :xhtml1 # def initialize(flavor='xhtml1') @flavor = flavor.to_s.downcase raise UnknownFlavor, "Unknown flavor #{flavor}" unless FLAVORS.include?(@flavor) end # # Decode entities in a string into their UTF-8 # equivalents. The string should already be in UTF-8 encoding. # # Unknown named entities will not be converted # def decode(source) (@decoder ||= Decoder.new(@flavor)).decode(source) end # # Encode codepoints into their corresponding entities. Various operations # are possible, and may be specified in order: # # :basic :: Convert the five XML entities ('"<>&) # :named :: Convert non-ASCII characters to their named HTML 4.01 equivalent # :decimal :: Convert non-ASCII characters to decimal entities (e.g. Ӓ) # :hexadecimal :: Convert non-ASCII characters to hexadecimal entities (e.g. # ካ) # # You can specify the commands in any order, but they will be executed in # the order listed above to ensure that entity ampersands are not # clobbered and that named entities are replaced before numeric ones. # # If no instructions are specified, :basic will be used. # # Examples: # encode(str) - XML-safe # encode(str, :basic, :decimal) - XML-safe and 7-bit clean # encode(str, :basic, :named, :decimal) - 7-bit clean, with all # non-ASCII characters replaced with their named entity where possible, and # decimal equivalents otherwise. # # Note: It is the program's responsibility to ensure that the source # contains valid UTF-8 before calling this method. # def encode(source, *instructions) Encoder.new(@flavor, instructions).encode(source) end end htmlentities-4.3.1/lib/htmlentities/0000755000004100000410000000000011665456537017561 5ustar www-datawww-datahtmlentities-4.3.1/lib/htmlentities/version.rb0000644000004100000410000000021611665456537021572 0ustar www-datawww-dataclass HTMLEntities module VERSION #:nodoc: MAJOR = 4 MINOR = 3 TINY = 1 STRING = [MAJOR, MINOR, TINY].join('.') end end htmlentities-4.3.1/lib/htmlentities/decoder.rb0000644000004100000410000000203411665456537021512 0ustar www-datawww-dataclass HTMLEntities class Decoder #:nodoc: def initialize(flavor) @flavor = flavor @map = HTMLEntities::MAPPINGS[@flavor] @entity_regexp = entity_regexp end def decode(source) prepare(source).gsub(@entity_regexp){ if $1 && codepoint = @map[$1] [codepoint].pack('U') elsif $2 [$2.to_i(10)].pack('U') elsif $3 [$3.to_i(16)].pack('U') else $& end } end private if "1.9".respond_to?(:encoding) def prepare(string) #:nodoc: string.to_s.encode(Encoding::UTF_8) end else def prepare(string) #:nodoc: string.to_s end end def entity_regexp key_lengths = @map.keys.map{ |k| k.length } entity_name_pattern = if @flavor == 'expanded' '(?:b\.)?[a-z][a-z0-9]' else '[a-z][a-z0-9]' end /&(?:(#{entity_name_pattern}{#{key_lengths.min - 1},#{key_lengths.max - 1}})|#([0-9]{1,7})|#x([0-9a-f]{1,6}));/i end end end htmlentities-4.3.1/lib/htmlentities/encoder.rb0000644000004100000410000000653111665456537021532 0ustar www-datawww-dataclass HTMLEntities InstructionError = Class.new(RuntimeError) class Encoder #:nodoc: INSTRUCTIONS = [:basic, :named, :decimal, :hexadecimal] def initialize(flavor, instructions) @flavor = flavor instructions = [:basic] if instructions.empty? validate_instructions(instructions) build_basic_entity_encoder(instructions) build_extended_entity_encoder(instructions) end def encode(source) post_process( prepare(source). gsub(basic_entity_regexp){ |match| encode_basic(match) }. gsub(extended_entity_regexp){ |match| encode_extended(match) } ) end private if "1.9".respond_to?(:encoding) def prepare(string) #:nodoc: string.to_s.encode(Encoding::UTF_8) end def post_process(string) if string.encoding != Encoding::ASCII && string.match(/\A[\x01-\x7F]*\z/) string.encode(Encoding::ASCII) else string end end else def prepare(string) #:nodoc: string.to_s end def post_process(string) string end end def basic_entity_regexp @basic_entity_regexp ||= @flavor.match(/^html/) ? /[<>"&]/ : /[<>'"&]/ end def extended_entity_regexp @extended_entity_regexp ||= ( options = [nil] if encoding_aware? pattern = '[^\u{20}-\u{7E}]' else pattern = '[^\x20-\x7E]' options << "U" end pattern << "|'" if @flavor == 'html4' Regexp.new(pattern, *options) ) end def validate_instructions(instructions) unknown_instructions = instructions - INSTRUCTIONS if unknown_instructions.any? raise InstructionError, "unknown encode_entities command(s): #{unknown_instructions.inspect}" end if (instructions.include?(:decimal) && instructions.include?(:hexadecimal)) raise InstructionError, "hexadecimal and decimal encoding are mutually exclusive" end end def build_basic_entity_encoder(instructions) if instructions.include?(:basic) || instructions.include?(:named) method = :encode_named elsif instructions.include?(:decimal) method = :encode_decimal elsif instructions.include?(:hexadecimal) method = :encode_hexadecimal end instance_eval "def encode_basic(char)\n#{method}(char)\nend" end def build_extended_entity_encoder(instructions) definition = "def encode_extended(char)\n" ([:named, :decimal, :hexadecimal] & instructions).each do |encoder| definition << "encoded = encode_#{encoder}(char)\n" definition << "return encoded if encoded\n" end definition << "char\n" definition << "end" instance_eval definition end def encode_named(char) cp = char.unpack('U')[0] (e = reverse_map[cp]) && "&#{e};" end def encode_decimal(char) "&##{char.unpack('U')[0]};" end def encode_hexadecimal(char) "&#x#{char.unpack('U')[0].to_s(16)};" end def reverse_map @reverse_map ||= ( skips = HTMLEntities::SKIP_DUP_ENCODINGS[@flavor] map = HTMLEntities::MAPPINGS[@flavor] uniqmap = skips ? map.reject{|ent,hx| skips.include? ent} : map uniqmap.invert ) end def encoding_aware? "1.9".respond_to?(:encoding) end end end htmlentities-4.3.1/lib/htmlentities/flavors.rb0000644000004100000410000000042011665456537021556 0ustar www-datawww-dataclass HTMLEntities FLAVORS = %w[html4 xhtml1 expanded] MAPPINGS = {} unless defined? MAPPINGS SKIP_DUP_ENCODINGS = {} unless defined? SKIP_DUP_ENCODINGS end HTMLEntities::FLAVORS.each do |flavor| require "htmlentities/mappings/#{flavor}" end htmlentities-4.3.1/lib/htmlentities/mappings/0000755000004100000410000000000011665456537021377 5ustar www-datawww-datahtmlentities-4.3.1/lib/htmlentities/mappings/expanded.rb0000644000004100000410000025015211665456537023521 0ustar www-datawww-data# encoding: UTF-8 class HTMLEntities # # This table added by Philip (flip) Kromer # using the mapping by John Cowan (25 July 1997) at # ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MISC/SGML.TXT # # The following table maps SGML character entities from various # public sets (namely, ISOamsa, ISOamsb, ISOamsc, ISOamsn, ISOamso, # ISOamsr, ISObox, ISOcyr1, ISOcyr2, ISOdia, ISOgrk1, ISOgrk2, # ISOgrk3, ISOgrk4, ISOlat1, ISOlat2, ISOnum, ISOpub, ISOtech, # HTMLspecial, HTMLsymbol) to corresponding Unicode characters. # # The table has five tab-separated fields: # :bare => SGML character entity name # :hex => Unicode 2.0 character code # :entity => SGML character entity # :type => SGML public entity set # :udesc => Unicode 2.0 character name (UPPER CASE) # # Entries which don't have Unicode equivalents have "0x????" for # :hex and a lower case :udesc (from the public entity set DTD). # # For reasons I (flip) don't understand, the source file mapped # ' to 0x02BC rather than its XML definition of 0x027. I've # added a line specifying 0x027; the 'original' is commented out. # http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references # # The mapping is not reversible, because many distinctions are # unified away in Unicode, particularly between mathematical # symbols. To make it reversible, one symbol was arbitrarily chosen # to encode from hex using these rules: # # * if it's also an XHTML 1.0 entity, use its XHTML reverse mapping. # * otherwise, just use the first entity encountered, # * avoiding the &b.foo; type entities # # The table is sorted case-blind by SGML character entity name. # # The contents of this table are drawn from various sources, and are # in the public domain. # MAPPINGS['expanded'] = { 'Aacgr' => 0x0386, # Ά GREEK CAPITAL LETTER ALPHA WITH TONOS 'aacgr' => 0x03ac, # ά GREEK SMALL LETTER ALPHA WITH TONOS 'Aacute' => 0x00c1, # Á xhtml LATIN CAPITAL LETTER A WITH ACUTE 'aacute' => 0x00e1, # á xhtml LATIN SMALL LETTER A WITH ACUTE 'Abreve' => 0x0102, # Ă LATIN CAPITAL LETTER A WITH BREVE 'abreve' => 0x0103, # ă LATIN SMALL LETTER A WITH BREVE 'acaron' => 0x01ce, # ǎ LATIN SMALL LETTER A WITH CARON 'Acirc' => 0x00c2, #  xhtml LATIN CAPITAL LETTER A WITH CIRCUMFLEX 'acirc' => 0x00e2, # â xhtml LATIN SMALL LETTER A WITH CIRCUMFLEX 'acute' => 0x00b4, # ´ xhtml ACUTE ACCENT 'Acy' => 0x0410, # А CYRILLIC CAPITAL LETTER A 'acy' => 0x0430, # а CYRILLIC SMALL LETTER A 'AElig' => 0x00c6, # Æ xhtml LATIN CAPITAL LETTER AE 'aelig' => 0x00e6, # æ xhtml LATIN SMALL LETTER AE 'Agr' => 0x0391, # Α dup skip GREEK CAPITAL LETTER ALPHA 'agr' => 0x03b1, # α dup skip GREEK SMALL LETTER ALPHA 'Agrave' => 0x00c0, # À xhtml LATIN CAPITAL LETTER A WITH GRAVE 'agrave' => 0x00e0, # à xhtml LATIN SMALL LETTER A WITH GRAVE 'alefsym' => 0x2135, # ℵ dup xhtml ALEF SYMBOL 'aleph' => 0x2135, # ℵ dup skip ALEF SYMBOL 'Alpha' => 0x0391, # Α dup xhtml GREEK CAPITAL LETTER ALPHA 'alpha' => 0x03b1, # α dup xhtml GREEK SMALL LETTER ALPHA 'Amacr' => 0x0100, # Ā LATIN CAPITAL LETTER A WITH MACRON 'amacr' => 0x0101, # ā LATIN SMALL LETTER A WITH MACRON 'amalg' => 0x2210, # ∐ dup N-ARY COPRODUCT 'amp' => 0x0026, # & xhtml AMPERSAND 'and' => 0x2227, # ∧ xhtml LOGICAL AND 'ang' => 0x2220, # ∠ xhtml ANGLE 'ang90' => 0x221f, # ∟ RIGHT ANGLE 'angmsd' => 0x2221, # ∡ MEASURED ANGLE 'angsph' => 0x2222, # ∢ SPHERICAL ANGLE 'angst' => 0x212b, # Å ANGSTROM SIGN 'Aogon' => 0x0104, # Ą LATIN CAPITAL LETTER A WITH OGONEK 'aogon' => 0x0105, # ą LATIN SMALL LETTER A WITH OGONEK 'ap' => 0x2248, # ≈ dup skip ALMOST EQUAL TO 'ape' => 0x224a, # ≊ ALMOST EQUAL OR EQUAL TO 'apolhook' => 0x0105, # ą LATIN SMALL LETTER A WITH OGONEK 'apos' => 0x0027, # ' xhtml MODIFIER LETTER APOSTROPHE 'Aring' => 0x00c5, # Å xhtml LATIN CAPITAL LETTER A WITH RING ABOVE 'aring' => 0x00e5, # å xhtml LATIN SMALL LETTER A WITH RING ABOVE 'ast' => 0x002a, # * ASTERISK 'asymp' => 0x2248, # ≈ dup xhtml ALMOST EQUAL TO 'Atilde' => 0x00c3, # à xhtml LATIN CAPITAL LETTER A WITH TILDE 'atilde' => 0x00e3, # ã xhtml LATIN SMALL LETTER A WITH TILDE 'Auml' => 0x00c4, # Ä xhtml LATIN CAPITAL LETTER A WITH DIAERESIS 'auml' => 0x00e4, # ä xhtml LATIN SMALL LETTER A WITH DIAERESIS 'b.alpha' => 0x03b1, # α dup skip GREEK SMALL LETTER ALPHA 'b.beta' => 0x03b2, # β dup skip GREEK SMALL LETTER BETA 'b.chi' => 0x03c7, # χ dup skip GREEK SMALL LETTER CHI 'b.Delta' => 0x0394, # Δ dup skip GREEK CAPITAL LETTER DELTA 'b.delta' => 0x03b4, # δ dup skip GREEK SMALL LETTER DELTA 'b.epsi' => 0x03b5, # ε dup skip GREEK SMALL LETTER EPSILON 'b.epsis' => 0x03b5, # ε dup skip GREEK SMALL LETTER EPSILON 'b.epsiv' => 0x03b5, # ε dup skip GREEK SMALL LETTER EPSILON 'b.eta' => 0x03b7, # η dup skip GREEK SMALL LETTER ETA 'b.Gamma' => 0x0393, # Γ dup skip GREEK CAPITAL LETTER GAMMA 'b.gamma' => 0x03b3, # γ dup skip GREEK SMALL LETTER GAMMA 'b.gammad' => 0x03dc, # Ϝ dup skip GREEK LETTER DIGAMMA 'b.iota' => 0x03b9, # ι dup skip GREEK SMALL LETTER IOTA 'b.kappa' => 0x03ba, # κ dup skip GREEK SMALL LETTER KAPPA 'b.kappav' => 0x03f0, # ϰ dup skip GREEK KAPPA SYMBOL 'b.Lambda' => 0x039b, # Λ dup skip GREEK CAPITAL LETTER LAMDA 'b.lambda' => 0x03bb, # λ dup skip GREEK SMALL LETTER LAMDA 'b.mu' => 0x03bc, # μ dup skip GREEK SMALL LETTER MU 'b.nu' => 0x03bd, # ν dup skip GREEK SMALL LETTER NU 'b.Omega' => 0x03a9, # Ω dup skip GREEK CAPITAL LETTER OMEGA 'b.omega' => 0x03ce, # ώ dup skip GREEK SMALL LETTER OMEGA WITH TONOS 'b.Phi' => 0x03a6, # Φ dup skip GREEK CAPITAL LETTER PHI 'b.phis' => 0x03c6, # φ dup skip GREEK SMALL LETTER PHI 'b.phiv' => 0x03d5, # ϕ dup skip GREEK PHI SYMBOL 'b.Pi' => 0x03a0, # Π dup skip GREEK CAPITAL LETTER PI 'b.pi' => 0x03c0, # π dup skip GREEK SMALL LETTER PI 'b.piv' => 0x03d6, # ϖ dup skip GREEK PI SYMBOL 'b.Psi' => 0x03a8, # Ψ dup skip GREEK CAPITAL LETTER PSI 'b.psi' => 0x03c8, # ψ dup skip GREEK SMALL LETTER PSI 'b.rho' => 0x03c1, # ρ dup skip GREEK SMALL LETTER RHO 'b.rhov' => 0x03f1, # ϱ dup skip GREEK RHO SYMBOL 'b.Sigma' => 0x03a3, # Σ dup skip GREEK CAPITAL LETTER SIGMA 'b.sigma' => 0x03c3, # σ dup skip GREEK SMALL LETTER SIGMA 'b.sigmav' => 0x03c2, # ς dup skip GREEK SMALL LETTER FINAL SIGMA 'b.tau' => 0x03c4, # τ dup skip GREEK SMALL LETTER TAU 'b.Theta' => 0x0398, # Θ dup skip GREEK CAPITAL LETTER THETA 'b.thetas' => 0x03b8, # θ dup skip GREEK SMALL LETTER THETA 'b.thetav' => 0x03d1, # ϑ dup skip GREEK THETA SYMBOL 'b.Upsi' => 0x03a5, # Υ dup skip GREEK CAPITAL LETTER UPSILON 'b.upsi' => 0x03c5, # υ dup skip GREEK SMALL LETTER UPSILON 'b.Xi' => 0x039e, # Ξ dup skip GREEK CAPITAL LETTER XI 'b.xi' => 0x03be, # ξ dup skip GREEK SMALL LETTER XI 'b.zeta' => 0x03b6, # ζ dup skip GREEK SMALL LETTER ZETA 'barwed' => 0x22bc, # ⊼ NAND 'Barwed' => 0x2306, # ⌆ PERSPECTIVE 'bcong' => 0x224c, # ≌ ALL EQUAL TO 'Bcy' => 0x0411, # Б CYRILLIC CAPITAL LETTER BE 'bcy' => 0x0431, # б CYRILLIC SMALL LETTER BE 'bdquo' => 0x201e, # „ dup xhtml DOUBLE LOW-9 QUOTATION MARK 'becaus' => 0x2235, # ∵ BECAUSE 'bepsi' => 0x220d, # ∍ SMALL CONTAINS AS MEMBER 'bernou' => 0x212c, # ℬ SCRIPT CAPITAL B 'Beta' => 0x0392, # Β dup xhtml GREEK CAPITAL LETTER BETA 'beta' => 0x03b2, # β dup xhtml GREEK SMALL LETTER BETA 'beth' => 0x2136, # ℶ BET SYMBOL 'Bgr' => 0x0392, # Β dup skip GREEK CAPITAL LETTER BETA 'bgr' => 0x03b2, # β dup skip GREEK SMALL LETTER BETA 'blank' => 0x2423, # ␣ OPEN BOX 'blk12' => 0x2592, # ▒ MEDIUM SHADE 'blk14' => 0x2591, # ░ LIGHT SHADE 'blk34' => 0x2593, # ▓ DARK SHADE 'block' => 0x2588, # █ FULL BLOCK 'bottom' => 0x22a5, # ⊥ dup skip UP TACK 'bowtie' => 0x22c8, # ⋈ BOWTIE 'boxdl' => 0x2510, # ┐ BOX DRAWINGS LIGHT DOWN AND LEFT 'boxdL' => 0x2555, # ╕ BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE 'boxDl' => 0x2556, # ╖ BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE 'boxDL' => 0x2557, # ╗ BOX DRAWINGS DOUBLE DOWN AND LEFT 'boxdr' => 0x250c, # ┌ BOX DRAWINGS LIGHT DOWN AND RIGHT 'boxdR' => 0x2552, # ╒ BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE 'boxDr' => 0x2553, # ╓ BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE 'boxDR' => 0x2554, # ╔ BOX DRAWINGS DOUBLE DOWN AND RIGHT 'boxh' => 0x2500, # ─ BOX DRAWINGS LIGHT HORIZONTAL 'boxH' => 0x2550, # ═ BOX DRAWINGS DOUBLE HORIZONTAL 'boxhd' => 0x252c, # ┬ BOX DRAWINGS LIGHT DOWN AND HORIZONTAL 'boxHd' => 0x2564, # ╤ BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE 'boxhD' => 0x2565, # ╥ BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE 'boxHD' => 0x2566, # ╦ BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL 'boxhu' => 0x2534, # ┴ BOX DRAWINGS LIGHT UP AND HORIZONTAL 'boxHu' => 0x2567, # ╧ BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE 'boxhU' => 0x2568, # ╨ BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE 'boxHU' => 0x2569, # ╩ BOX DRAWINGS DOUBLE UP AND HORIZONTAL 'boxul' => 0x2518, # ┘ BOX DRAWINGS LIGHT UP AND LEFT 'boxuL' => 0x255b, # ╛ BOX DRAWINGS UP SINGLE AND LEFT DOUBLE 'boxUl' => 0x255c, # ╜ BOX DRAWINGS UP DOUBLE AND LEFT SINGLE 'boxUL' => 0x255d, # ╝ BOX DRAWINGS DOUBLE UP AND LEFT 'boxur' => 0x2514, # └ BOX DRAWINGS LIGHT UP AND RIGHT 'boxuR' => 0x2558, # ╘ BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE 'boxUr' => 0x2559, # ╙ BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE 'boxUR' => 0x255a, # ╚ BOX DRAWINGS DOUBLE UP AND RIGHT 'boxv' => 0x2502, # │ BOX DRAWINGS LIGHT VERTICAL 'boxV' => 0x2551, # ║ BOX DRAWINGS DOUBLE VERTICAL 'boxvh' => 0x253c, # ┼ BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL 'boxvH' => 0x256a, # ╪ BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE 'boxVh' => 0x256b, # ╫ BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE 'boxVH' => 0x256c, # ╬ BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL 'boxvl' => 0x2524, # ┤ BOX DRAWINGS LIGHT VERTICAL AND LEFT 'boxvL' => 0x2561, # ╡ BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE 'boxVl' => 0x2562, # ╢ BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE 'boxVL' => 0x2563, # ╣ BOX DRAWINGS DOUBLE VERTICAL AND LEFT 'boxvr' => 0x251c, # ├ BOX DRAWINGS LIGHT VERTICAL AND RIGHT 'boxvR' => 0x255e, # ╞ BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE 'boxVr' => 0x255f, # ╟ BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE 'boxVR' => 0x2560, # ╠ BOX DRAWINGS DOUBLE VERTICAL AND RIGHT 'bprime' => 0x2035, # ‵ REVERSED PRIME 'breve' => 0x02d8, # ˘ BREVE 'brvbar' => 0x00a6, # ¦ xhtml BROKEN BAR 'bsim' => 0x223d, # ∽ REVERSED TILDE 'bsime' => 0x22cd, # ⋍ REVERSED TILDE EQUALS 'bsol' => 0x005c, # \ dup REVERSE SOLIDUS 'bull' => 0x2022, # • xhtml BULLET 'bump' => 0x224e, # ≎ GEOMETRICALLY EQUIVALENT TO 'bumpe' => 0x224f, # ≏ DIFFERENCE BETWEEN 'Cacute' => 0x0106, # Ć LATIN CAPITAL LETTER C WITH ACUTE 'cacute' => 0x0107, # ć LATIN SMALL LETTER C WITH ACUTE 'cap' => 0x2229, # ∩ xhtml INTERSECTION 'Cap' => 0x22d2, # ⋒ DOUBLE INTERSECTION 'caret' => 0x2041, # ⁁ CARET INSERTION POINT 'caron' => 0x02c7, # ˇ CARON 'Ccaron' => 0x010c, # Č LATIN CAPITAL LETTER C WITH CARON 'ccaron' => 0x010d, # č LATIN SMALL LETTER C WITH CARON 'Ccedil' => 0x00c7, # Ç xhtml LATIN CAPITAL LETTER C WITH CEDILLA 'ccedil' => 0x00e7, # ç xhtml LATIN SMALL LETTER C WITH CEDILLA 'Ccirc' => 0x0108, # Ĉ LATIN CAPITAL LETTER C WITH CIRCUMFLEX 'ccirc' => 0x0109, # ĉ LATIN SMALL LETTER C WITH CIRCUMFLEX 'Cdot' => 0x010a, # Ċ LATIN CAPITAL LETTER C WITH DOT ABOVE 'cdot' => 0x010b, # ċ LATIN SMALL LETTER C WITH DOT ABOVE 'cedil' => 0x00b8, # ¸ xhtml CEDILLA 'cent' => 0x00a2, # ¢ xhtml CENT SIGN 'CHcy' => 0x0427, # Ч CYRILLIC CAPITAL LETTER CHE 'chcy' => 0x0447, # ч CYRILLIC SMALL LETTER CHE 'check' => 0x2713, # ✓ CHECK MARK 'Chi' => 0x03a7, # Χ dup xhtml GREEK CAPITAL LETTER CHI 'chi' => 0x03c7, # χ dup xhtml GREEK SMALL LETTER CHI 'cir' => 0x25cb, # ○ dup WHITE CIRCLE 'circ' => 0x02c6, # ˆ xhtml MODIFIER LETTER CIRCUMFLEX ACCENT 'cire' => 0x2257, # ≗ RING EQUAL TO 'clubs' => 0x2663, # ♣ xhtml BLACK CLUB SUIT 'Codot' => 0x010a, # Ċ LATIN CAPITAL LETTER C WITH DOT ABOVE 'colon' => 0x003a, # : COLON 'colone' => 0x2254, # ≔ COLON EQUALS 'comma' => 0x002c, # , COMMA 'commat' => 0x0040, # @ COMMERCIAL AT 'comp' => 0x2201, # ∁ COMPLEMENT 'compfn' => 0x2218, # ∘ RING OPERATOR 'cong' => 0x2245, # ≅ xhtml APPROXIMATELY EQUAL TO 'conint' => 0x222e, # ∮ CONTOUR INTEGRAL 'coprod' => 0x2210, # ∐ dup skip N-ARY COPRODUCT 'copy' => 0x00a9, # © xhtml COPYRIGHT SIGN 'copysr' => 0x2117, # ℗ SOUND RECORDING COPYRIGHT 'crarr' => 0x21b5, # ↵ xhtml DOWNWARDS ARROW WITH CORNER LEFTWARDS 'cross' => 0x2717, # ✗ BALLOT X 'cuepr' => 0x22de, # ⋞ EQUAL TO OR PRECEDES 'cuesc' => 0x22df, # ⋟ EQUAL TO OR SUCCEEDS 'cularr' => 0x21b6, # ↶ ANTICLOCKWISE TOP SEMICIRCLE ARROW 'cup' => 0x222a, # ∪ xhtml UNION 'Cup' => 0x22d3, # ⋓ DOUBLE UNION 'cupre' => 0x227c, # ≼ dup PRECEDES OR EQUAL TO 'curarr' => 0x21b7, # ↷ CLOCKWISE TOP SEMICIRCLE ARROW 'curren' => 0x00a4, # ¤ xhtml CURRENCY SIGN 'cuvee' => 0x22ce, # ⋎ CURLY LOGICAL OR 'cuwed' => 0x22cf, # ⋏ CURLY LOGICAL AND 'dagger' => 0x2020, # † xhtml DAGGER 'Dagger' => 0x2021, # ‡ xhtml DOUBLE DAGGER 'daleth' => 0x2138, # ℸ DALET SYMBOL 'darr' => 0x2193, # ↓ xhtml DOWNWARDS ARROW 'dArr' => 0x21d3, # ⇓ xhtml DOWNWARDS DOUBLE ARROW 'darr2' => 0x21ca, # ⇊ DOWNWARDS PAIRED ARROWS 'dash' => 0x2010, # ‐ HYPHEN 'dashv' => 0x22a3, # ⊣ LEFT TACK 'dblac' => 0x02dd, # ˝ DOUBLE ACUTE ACCENT 'Dcaron' => 0x010e, # Ď LATIN CAPITAL LETTER D WITH CARON 'dcaron' => 0x010f, # ď LATIN SMALL LETTER D WITH CARON 'dcross' => 0x0111, # đ LATIN SMALL LETTER D WITH STROKE 'Dcy' => 0x0414, # Д CYRILLIC CAPITAL LETTER DE 'dcy' => 0x0434, # д CYRILLIC SMALL LETTER DE 'deg' => 0x00b0, # ° xhtml DEGREE SIGN 'Delta' => 0x0394, # Δ dup xhtml GREEK CAPITAL LETTER DELTA 'delta' => 0x03b4, # δ dup xhtml GREEK SMALL LETTER DELTA 'Dgr' => 0x0394, # Δ dup skip GREEK CAPITAL LETTER DELTA 'dgr' => 0x03b4, # δ dup skip GREEK SMALL LETTER DELTA 'dharl' => 0x21c3, # ⇃ DOWNWARDS HARPOON WITH BARB LEFTWARDS 'dharr' => 0x21c2, # ⇂ DOWNWARDS HARPOON WITH BARB RIGHTWARDS 'diam' => 0x22c4, # ⋄ DIAMOND OPERATOR 'diams' => 0x2666, # ♦ xhtml BLACK DIAMOND SUIT 'die' => 0x00a8, # ¨ dup skip DIAERESIS 'divide' => 0x00f7, # ÷ xhtml DIVISION SIGN 'divonx' => 0x22c7, # ⋇ DIVISION TIMES 'DJcy' => 0x0402, # Ђ CYRILLIC CAPITAL LETTER DJE 'djcy' => 0x0452, # ђ CYRILLIC SMALL LETTER DJE 'dlarr' => 0x2199, # ↙ SOUTH WEST ARROW 'dlcorn' => 0x231e, # ⌞ BOTTOM LEFT CORNER 'dlcrop' => 0x230d, # ⌍ BOTTOM LEFT CROP 'dollar' => 0x0024, # $ DOLLAR SIGN 'Dot' => 0x00a8, # ¨ dup skip DIAERESIS 'dot' => 0x02d9, # ˙ DOT ABOVE 'DotDot' => 0x20dc, # ⃜ COMBINING FOUR DOTS ABOVE 'drarr' => 0x2198, # ↘ SOUTH EAST ARROW 'drcorn' => 0x231f, # ⌟ BOTTOM RIGHT CORNER 'drcrop' => 0x230c, # ⌌ BOTTOM RIGHT CROP 'DScy' => 0x0405, # Ѕ CYRILLIC CAPITAL LETTER DZE 'dscy' => 0x0455, # ѕ CYRILLIC SMALL LETTER DZE 'Dstrok' => 0x0110, # Đ LATIN CAPITAL LETTER D WITH STROKE 'dstrok' => 0x0111, # đ LATIN SMALL LETTER D WITH STROKE 'dsubdot' => 0x1e0d, # ḍ LATIN SMALL LETTER D WITH DOT BELOW 'dtri' => 0x25bf, # ▿ WHITE DOWN-POINTING SMALL TRIANGLE 'dtrif' => 0x25be, # ▾ BLACK DOWN-POINTING SMALL TRIANGLE 'DZcy' => 0x040f, # Џ CYRILLIC CAPITAL LETTER DZHE 'dzcy' => 0x045f, # џ CYRILLIC SMALL LETTER DZHE 'Eacgr' => 0x0388, # Έ GREEK CAPITAL LETTER EPSILON WITH TONOS 'eacgr' => 0x03ad, # έ GREEK SMALL LETTER EPSILON WITH TONOS 'Eacute' => 0x00c9, # É xhtml LATIN CAPITAL LETTER E WITH ACUTE 'eacute' => 0x00e9, # é xhtml LATIN SMALL LETTER E WITH ACUTE 'Ecaron' => 0x011a, # Ě LATIN CAPITAL LETTER E WITH CARON 'ecaron' => 0x011b, # ě LATIN SMALL LETTER E WITH CARON 'ecir' => 0x2256, # ≖ RING IN EQUAL TO 'Ecirc' => 0x00ca, # Ê xhtml LATIN CAPITAL LETTER E WITH CIRCUMFLEX 'ecirc' => 0x00ea, # ê xhtml LATIN SMALL LETTER E WITH CIRCUMFLEX 'ecolon' => 0x2255, # ≕ EQUALS COLON 'Ecy' => 0x042d, # Э CYRILLIC CAPITAL LETTER E 'ecy' => 0x044d, # э CYRILLIC SMALL LETTER E 'Edot' => 0x0116, # Ė LATIN CAPITAL LETTER E WITH DOT ABOVE 'edot' => 0x0117, # ė LATIN SMALL LETTER E WITH DOT ABOVE 'eDot' => 0x2251, # ≑ GEOMETRICALLY EQUAL TO 'EEacgr' => 0x0389, # Ή GREEK CAPITAL LETTER ETA WITH TONOS 'eeacgr' => 0x03ae, # ή GREEK SMALL LETTER ETA WITH TONOS 'EEgr' => 0x0397, # Η dup skip GREEK CAPITAL LETTER ETA 'eegr' => 0x03b7, # η dup skip GREEK SMALL LETTER ETA 'efDot' => 0x2252, # ≒ APPROXIMATELY EQUAL TO OR THE IMAGE OF 'Egr' => 0x0395, # Ε dup skip GREEK CAPITAL LETTER EPSILON 'egr' => 0x03b5, # ε dup skip GREEK SMALL LETTER EPSILON 'Egrave' => 0x00c8, # È xhtml LATIN CAPITAL LETTER E WITH GRAVE 'egrave' => 0x00e8, # è xhtml LATIN SMALL LETTER E WITH GRAVE 'egs' => 0x22dd, # ⋝ EQUAL TO OR GREATER-THAN 'ell' => 0x2113, # ℓ SCRIPT SMALL L 'els' => 0x22dc, # ⋜ EQUAL TO OR LESS-THAN 'Emacr' => 0x0112, # Ē LATIN CAPITAL LETTER E WITH MACRON 'emacr' => 0x0113, # ē LATIN SMALL LETTER E WITH MACRON 'empty' => 0x2205, # ∅ xhtml EMPTY SET 'emsp' => 0x2003, #   xhtml EM SPACE 'emsp13' => 0x2004, #   THREE-PER-EM SPACE 'emsp14' => 0x2005, #   FOUR-PER-EM SPACE 'ENG' => 0x014a, # Ŋ LATIN CAPITAL LETTER ENG 'eng' => 0x014b, # ŋ LATIN SMALL LETTER ENG 'ensp' => 0x2002, #   xhtml EN SPACE 'Eogon' => 0x0118, # Ę LATIN CAPITAL LETTER E WITH OGONEK 'eogon' => 0x0119, # ę LATIN SMALL LETTER E WITH OGONEK 'epolhook' => 0x0119, # ę LATIN SMALL LETTER E WITH OGONEK 'epsi' => 0x03b5, # ε dup skip GREEK SMALL LETTER EPSILON 'Epsilon' => 0x0395, # Ε dup xhtml GREEK CAPITAL LETTER EPSILON 'epsilon' => 0x03b5, # ε dup xhtml GREEK SMALL LETTER EPSILON 'epsis' => 0x220a, # ∊ SMALL ELEMENT OF 'equals' => 0x003d, # = EQUALS SIGN 'equiv' => 0x2261, # ≡ xhtml IDENTICAL TO 'erDot' => 0x2253, # ≓ IMAGE OF OR APPROXIMATELY EQUAL TO 'esdot' => 0x2250, # ≐ APPROACHES THE LIMIT 'Eta' => 0x0397, # Η dup xhtml GREEK CAPITAL LETTER ETA 'eta' => 0x03b7, # η dup xhtml GREEK SMALL LETTER ETA 'ETH' => 0x00d0, # Ð xhtml LATIN CAPITAL LETTER ETH 'eth' => 0x00f0, # ð xhtml LATIN SMALL LETTER ETH 'Euml' => 0x00cb, # Ë xhtml LATIN CAPITAL LETTER E WITH DIAERESIS 'euml' => 0x00eb, # ë xhtml LATIN SMALL LETTER E WITH DIAERESIS 'excl' => 0x0021, # ! EXCLAMATION MARK 'exist' => 0x2203, # ∃ xhtml THERE EXISTS 'Fcy' => 0x0424, # Ф CYRILLIC CAPITAL LETTER EF 'fcy' => 0x0444, # ф CYRILLIC SMALL LETTER EF 'female' => 0x2640, # ♀ FEMALE SIGN 'ffilig' => 0xfb03, # ffi LATIN SMALL LIGATURE FFI 'fflig' => 0xfb00, # ff LATIN SMALL LIGATURE FF 'ffllig' => 0xfb04, # ffl LATIN SMALL LIGATURE FFL 'filig' => 0xfb01, # fi LATIN SMALL LIGATURE FI 'flat' => 0x266d, # ♭ MUSIC FLAT SIGN 'fllig' => 0xfb02, # fl LATIN SMALL LIGATURE FL 'fnof' => 0x0192, # ƒ xhtml LATIN SMALL LETTER F WITH HOOK 'forall' => 0x2200, # ∀ xhtml FOR ALL 'fork' => 0x22d4, # ⋔ PITCHFORK 'frac12' => 0x00bd, # ½ dup xhtml VULGAR FRACTION ONE HALF 'frac13' => 0x2153, # ⅓ VULGAR FRACTION ONE THIRD 'frac14' => 0x00bc, # ¼ xhtml VULGAR FRACTION ONE QUARTER 'frac15' => 0x2155, # ⅕ VULGAR FRACTION ONE FIFTH 'frac16' => 0x2159, # ⅙ VULGAR FRACTION ONE SIXTH 'frac18' => 0x215b, # ⅛ VULGAR FRACTION ONE EIGHTH 'frac23' => 0x2154, # ⅔ VULGAR FRACTION TWO THIRDS 'frac25' => 0x2156, # ⅖ VULGAR FRACTION TWO FIFTHS 'frac34' => 0x00be, # ¾ xhtml VULGAR FRACTION THREE QUARTERS 'frac35' => 0x2157, # ⅗ VULGAR FRACTION THREE FIFTHS 'frac38' => 0x215c, # ⅜ VULGAR FRACTION THREE EIGHTHS 'frac45' => 0x2158, # ⅘ VULGAR FRACTION FOUR FIFTHS 'frac56' => 0x215a, # ⅚ VULGAR FRACTION FIVE SIXTHS 'frac58' => 0x215d, # ⅝ VULGAR FRACTION FIVE EIGHTHS 'frac78' => 0x215e, # ⅞ VULGAR FRACTION SEVEN EIGHTHS 'frasl' => 0x2044, # ⁄ xhtml FRACTION SLASH 'frown' => 0x2322, # ⌢ dup FROWN 'gacute' => 0x01f5, # ǵ LATIN SMALL LETTER G WITH ACUTE 'Gamma' => 0x0393, # Γ dup xhtml GREEK CAPITAL LETTER GAMMA 'gamma' => 0x03b3, # γ dup xhtml GREEK SMALL LETTER GAMMA 'gammad' => 0x03dc, # Ϝ dup GREEK LETTER DIGAMMA 'Gbreve' => 0x011e, # Ğ LATIN CAPITAL LETTER G WITH BREVE 'gbreve' => 0x011f, # ğ LATIN SMALL LETTER G WITH BREVE 'Gcedil' => 0x0122, # Ģ LATIN CAPITAL LETTER G WITH CEDILLA 'gcedil' => 0x0123, # ģ LATIN SMALL LETTER G WITH CEDILLA 'Gcirc' => 0x011c, # Ĝ LATIN CAPITAL LETTER G WITH CIRCUMFLEX 'gcirc' => 0x011d, # ĝ LATIN SMALL LETTER G WITH CIRCUMFLEX 'Gcy' => 0x0413, # Г CYRILLIC CAPITAL LETTER GHE 'gcy' => 0x0433, # г CYRILLIC SMALL LETTER GHE 'Gdot' => 0x0120, # Ġ LATIN CAPITAL LETTER G WITH DOT ABOVE 'gdot' => 0x0121, # ġ LATIN SMALL LETTER G WITH DOT ABOVE 'ge' => 0x2265, # ≥ dup xhtml GREATER-THAN OR EQUAL TO 'gE' => 0x2267, # ≧ GREATER-THAN OVER EQUAL TO 'gel' => 0x22db, # ⋛ GREATER-THAN EQUAL TO OR LESS-THAN 'ges' => 0x2265, # ≥ dup skip GREATER-THAN OR EQUAL TO 'Gg' => 0x22d9, # ⋙ VERY MUCH GREATER-THAN 'Ggr' => 0x0393, # Γ dup skip GREEK CAPITAL LETTER GAMMA 'ggr' => 0x03b3, # γ dup skip GREEK SMALL LETTER GAMMA 'gimel' => 0x2137, # ℷ GIMEL SYMBOL 'GJcy' => 0x0403, # Ѓ CYRILLIC CAPITAL LETTER GJE 'gjcy' => 0x0453, # ѓ CYRILLIC SMALL LETTER GJE 'gl' => 0x2277, # ≷ GREATER-THAN OR LESS-THAN 'gnE' => 0x2269, # ≩ dup GREATER-THAN BUT NOT EQUAL TO 'gne' => 0x2269, # ≩ dup skip GREATER-THAN BUT NOT EQUAL TO 'gnsim' => 0x22e7, # ⋧ GREATER-THAN BUT NOT EQUIVALENT TO 'godot' => 0x0121, # ġ LATIN SMALL LETTER G WITH DOT ABOVE 'grave' => 0x0060, # ` GRAVE ACCENT 'gsdot' => 0x22d7, # ⋗ GREATER-THAN WITH DOT 'gsim' => 0x2273, # ≳ GREATER-THAN OR EQUIVALENT TO 'gt' => 0x003e, # > xhtml GREATER-THAN SIGN 'Gt' => 0x226b, # ≫ MUCH GREATER-THAN 'gvnE' => 0x2269, # ≩ dup skip GREATER-THAN BUT NOT EQUAL TO 'hairsp' => 0x200a, #   HAIR SPACE 'half' => 0x00bd, # ½ dup skip VULGAR FRACTION ONE HALF 'hamilt' => 0x210b, # ℋ SCRIPT CAPITAL H 'HARDcy' => 0x042a, # Ъ CYRILLIC CAPITAL LETTER HARD SIGN 'hardcy' => 0x044a, # ъ CYRILLIC SMALL LETTER HARD SIGN 'harr' => 0x2194, # ↔ dup xhtml LEFT RIGHT ARROW 'hArr' => 0x21d4, # ⇔ dup xhtml LEFT RIGHT DOUBLE ARROW 'harrw' => 0x21ad, # ↭ LEFT RIGHT WAVE ARROW 'Hcirc' => 0x0124, # Ĥ LATIN CAPITAL LETTER H WITH CIRCUMFLEX 'hcirc' => 0x0125, # ĥ LATIN SMALL LETTER H WITH CIRCUMFLEX 'hearts' => 0x2665, # ♥ xhtml BLACK HEART SUIT 'hellip' => 0x2026, # … dup xhtml HORIZONTAL ELLIPSIS 'horbar' => 0x2015, # ― HORIZONTAL BAR 'Hstrok' => 0x0126, # Ħ LATIN CAPITAL LETTER H WITH STROKE 'hstrok' => 0x0127, # ħ LATIN SMALL LETTER H WITH STROKE 'Hsubdot' => 0x1e25, # Ḥ LATIN CAPITAL LETTER H WITH DOT BELOW 'hsubdot' => 0x1e25, # ḥ LATIN SMALL LETTER H WITH DOT BELOW 'hybull' => 0x2043, # ⁃ HYPHEN BULLET 'hyphen' => 0x002d, # - HYPHEN-MINUS 'Iacgr' => 0x038a, # Ί GREEK CAPITAL LETTER IOTA WITH TONOS 'iacgr' => 0x03af, # ί GREEK SMALL LETTER IOTA WITH TONOS 'Iacute' => 0x00cd, # Í xhtml LATIN CAPITAL LETTER I WITH ACUTE 'iacute' => 0x00ed, # í xhtml LATIN SMALL LETTER I WITH ACUTE 'Icirc' => 0x00ce, # Î xhtml LATIN CAPITAL LETTER I WITH CIRCUMFLEX 'icirc' => 0x00ee, # î xhtml LATIN SMALL LETTER I WITH CIRCUMFLEX 'Icy' => 0x0418, # И CYRILLIC CAPITAL LETTER I 'icy' => 0x0438, # и CYRILLIC SMALL LETTER I 'idiagr' => 0x0390, # ΐ GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS 'Idigr' => 0x03aa, # Ϊ GREEK CAPITAL LETTER IOTA WITH DIALYTIKA 'idigr' => 0x03ca, # ϊ GREEK SMALL LETTER IOTA WITH DIALYTIKA 'Idot' => 0x0130, # İ LATIN CAPITAL LETTER I WITH DOT ABOVE 'idotless' => 0x0131, # ı LATIN SMALL LETTER DOTLESS I 'IEcy' => 0x0415, # Е CYRILLIC CAPITAL LETTER IE 'iecy' => 0x0435, # е CYRILLIC SMALL LETTER IE 'iexcl' => 0x00a1, # ¡ xhtml INVERTED EXCLAMATION MARK 'iff' => 0x21d4, # ⇔ dup skip LEFT RIGHT DOUBLE ARROW 'Igr' => 0x0399, # Ι dup skip GREEK CAPITAL LETTER IOTA 'igr' => 0x03b9, # ι dup skip GREEK SMALL LETTER IOTA 'Igrave' => 0x00cc, # Ì xhtml LATIN CAPITAL LETTER I WITH GRAVE 'igrave' => 0x00ec, # ì xhtml LATIN SMALL LETTER I WITH GRAVE 'IJlig' => 0x0132, # IJ LATIN CAPITAL LIGATURE IJ 'ijlig' => 0x0133, # ij LATIN SMALL LIGATURE IJ 'Imacr' => 0x012a, # Ī LATIN CAPITAL LETTER I WITH MACRON 'imacr' => 0x012b, # ī LATIN SMALL LETTER I WITH MACRON 'image' => 0x2111, # ℑ xhtml BLACK-LETTER CAPITAL I 'incare' => 0x2105, # ℅ CARE OF 'infin' => 0x221e, # ∞ xhtml INFINITY 'inodot' => 0x0131, # ı dup LATIN SMALL LETTER DOTLESS I 'inodot' => 0x0131, # ı dup LATIN SMALL LETTER DOTLESS I 'int' => 0x222b, # ∫ xhtml INTEGRAL 'intcal' => 0x22ba, # ⊺ INTERCALATE 'IOcy' => 0x0401, # Ё CYRILLIC CAPITAL LETTER IO 'iocy' => 0x0451, # ё CYRILLIC SMALL LETTER IO 'Iodot' => 0x0130, # İ LATIN CAPITAL LETTER I WITH DOT ABOVE 'Iogon' => 0x012e, # Į LATIN CAPITAL LETTER I WITH OGONEK 'iogon' => 0x012f, # į LATIN SMALL LETTER I WITH OGONEK 'Iota' => 0x0399, # Ι dup xhtml GREEK CAPITAL LETTER IOTA 'iota' => 0x03b9, # ι dup xhtml GREEK SMALL LETTER IOTA 'iquest' => 0x00bf, # ¿ xhtml INVERTED QUESTION MARK 'isin' => 0x2208, # ∈ xhtml ELEMENT OF 'Itilde' => 0x0128, # Ĩ LATIN CAPITAL LETTER I WITH TILDE 'itilde' => 0x0129, # ĩ LATIN SMALL LETTER I WITH TILDE 'Iukcy' => 0x0406, # І CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I 'iukcy' => 0x0456, # і CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I 'Iuml' => 0x00cf, # Ï xhtml LATIN CAPITAL LETTER I WITH DIAERESIS 'iuml' => 0x00ef, # ï xhtml LATIN SMALL LETTER I WITH DIAERESIS 'Jcirc' => 0x0134, # Ĵ LATIN CAPITAL LETTER J WITH CIRCUMFLEX 'jcirc' => 0x0135, # ĵ LATIN SMALL LETTER J WITH CIRCUMFLEX 'Jcy' => 0x0419, # Й CYRILLIC CAPITAL LETTER SHORT I 'jcy' => 0x0439, # й CYRILLIC SMALL LETTER SHORT I 'Jsercy' => 0x0408, # Ј CYRILLIC CAPITAL LETTER JE 'jsercy' => 0x0458, # ј CYRILLIC SMALL LETTER JE 'Jukcy' => 0x0404, # Є CYRILLIC CAPITAL LETTER UKRAINIAN IE 'jukcy' => 0x0454, # є CYRILLIC SMALL LETTER UKRAINIAN IE 'Kappa' => 0x039a, # Κ dup xhtml GREEK CAPITAL LETTER KAPPA 'kappa' => 0x03ba, # κ dup xhtml GREEK SMALL LETTER KAPPA 'kappav' => 0x03f0, # ϰ dup GREEK KAPPA SYMBOL 'Kcedil' => 0x0136, # Ķ LATIN CAPITAL LETTER K WITH CEDILLA 'kcedil' => 0x0137, # ķ LATIN SMALL LETTER K WITH CEDILLA 'Kcy' => 0x041a, # К CYRILLIC CAPITAL LETTER KA 'kcy' => 0x043a, # к CYRILLIC SMALL LETTER KA 'Kgr' => 0x039a, # Κ dup skip GREEK CAPITAL LETTER KAPPA 'kgr' => 0x03ba, # κ dup skip GREEK SMALL LETTER KAPPA 'kgreen' => 0x0138, # ĸ LATIN SMALL LETTER KRA 'KHcy' => 0x0425, # Х CYRILLIC CAPITAL LETTER HA 'khcy' => 0x0445, # х CYRILLIC SMALL LETTER HA 'KHgr' => 0x03a7, # Χ dup skip GREEK CAPITAL LETTER CHI 'khgr' => 0x03c7, # χ dup skip GREEK SMALL LETTER CHI 'KJcy' => 0x040c, # Ќ CYRILLIC CAPITAL LETTER KJE 'kjcy' => 0x045c, # ќ CYRILLIC SMALL LETTER KJE 'ksubdot' => 0x1e33, # ḳ LATIN SMALL LETTER K WITH DOT BELOW 'lAarr' => 0x21da, # ⇚ LEFTWARDS TRIPLE ARROW 'Lacute' => 0x0139, # Ĺ LATIN CAPITAL LETTER L WITH ACUTE 'lacute' => 0x013a, # ĺ LATIN SMALL LETTER L WITH ACUTE 'lagran' => 0x2112, # ℒ SCRIPT CAPITAL L 'Lambda' => 0x039b, # Λ dup xhtml GREEK CAPITAL LETTER LAMDA 'lambda' => 0x03bb, # λ dup xhtml GREEK SMALL LETTER LAMDA 'lang' => 0x2329, # 〈 xhtml LEFT-POINTING ANGLE BRACKET 'laquo' => 0x00ab, # « xhtml LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 'larr' => 0x2190, # ← xhtml LEFTWARDS ARROW 'Larr' => 0x219e, # ↞ LEFTWARDS TWO HEADED ARROW 'lArr' => 0x21d0, # ⇐ dup xhtml LEFTWARDS DOUBLE ARROW 'larr2' => 0x21c7, # ⇇ LEFTWARDS PAIRED ARROWS 'larrhk' => 0x21a9, # ↩ LEFTWARDS ARROW WITH HOOK 'larrlp' => 0x21ab, # ↫ LEFTWARDS ARROW WITH LOOP 'larrtl' => 0x21a2, # ↢ LEFTWARDS ARROW WITH TAIL 'Lcaron' => 0x013d, # Ľ LATIN CAPITAL LETTER L WITH CARON 'lcaron' => 0x013e, # ľ LATIN SMALL LETTER L WITH CARON 'Lcedil' => 0x013b, # Ļ LATIN CAPITAL LETTER L WITH CEDILLA 'lcedil' => 0x013c, # ļ LATIN SMALL LETTER L WITH CEDILLA 'lceil' => 0x2308, # ⌈ xhtml LEFT CEILING 'lcub' => 0x007b, # { LEFT CURLY BRACKET 'Lcy' => 0x041b, # Л CYRILLIC CAPITAL LETTER EL 'lcy' => 0x043b, # л CYRILLIC SMALL LETTER EL 'ldot' => 0x22d6, # ⋖ LESS-THAN WITH DOT 'ldquo' => 0x201c, # “ dup xhtml LEFT DOUBLE QUOTATION MARK 'ldquor' => 0x201e, # „ dup skip DOUBLE LOW-9 QUOTATION MARK 'le' => 0x2264, # ≤ dup xhtml LESS-THAN OR EQUAL TO 'lE' => 0x2266, # ≦ LESS-THAN OVER EQUAL TO 'leg' => 0x22da, # ⋚ LESS-THAN EQUAL TO OR GREATER-THAN 'les' => 0x2264, # ≤ dup skip LESS-THAN OR EQUAL TO 'lfloor' => 0x230a, # ⌊ xhtml LEFT FLOOR 'lg' => 0x2276, # ≶ LESS-THAN OR GREATER-THAN 'Lgr' => 0x039b, # Λ dup skip GREEK CAPITAL LETTER LAMDA 'lgr' => 0x03bb, # λ dup skip GREEK SMALL LETTER LAMDA 'lhard' => 0x21bd, # ↽ LEFTWARDS HARPOON WITH BARB DOWNWARDS 'lharu' => 0x21bc, # ↼ LEFTWARDS HARPOON WITH BARB UPWARDS 'lhblk' => 0x2584, # ▄ LOWER HALF BLOCK 'LJcy' => 0x0409, # Љ CYRILLIC CAPITAL LETTER LJE 'ljcy' => 0x0459, # љ CYRILLIC SMALL LETTER LJE 'Ll' => 0x22d8, # ⋘ VERY MUCH LESS-THAN 'Lmidot' => 0x013f, # Ŀ LATIN CAPITAL LETTER L WITH MIDDLE DOT 'lmidot' => 0x0140, # ŀ LATIN SMALL LETTER L WITH MIDDLE DOT 'lnE' => 0x2268, # ≨ dup LESS-THAN BUT NOT EQUAL TO 'lne' => 0x2268, # ≨ dup skip LESS-THAN BUT NOT EQUAL TO 'lnsim' => 0x22e6, # ⋦ LESS-THAN BUT NOT EQUIVALENT TO 'lowast' => 0x2217, # ∗ xhtml ASTERISK OPERATOR 'lowbar' => 0x005f, # _ LOW LINE 'loz' => 0x25ca, # ◊ xhtml LOZENGE 'lozf' => 0x2726, # ✦ BLACK FOUR POINTED STAR 'lpar' => 0x0028, # ( LEFT PARENTHESIS 'lrarr2' => 0x21c6, # ⇆ LEFTWARDS ARROW OVER RIGHTWARDS ARROW 'lrhar2' => 0x21cb, # ⇋ LEFTWARDS HARPOON OVER RIGHTWARDS HARPOON 'lrm' => 0x200e, # ‎ xhtml LEFT-TO-RIGHT MARK 'lsaquo' => 0x2039, # ‹ xhtml SINGLE LEFT-POINTING ANGLE QUOTATION MARK 'lsh' => 0x21b0, # ↰ UPWARDS ARROW WITH TIP LEFTWARDS 'lsim' => 0x2272, # ≲ LESS-THAN OR EQUIVALENT TO 'Lslash' => 0x0141, # Ł LATIN CAPITAL LETTER L WITH STROKE 'lslash' => 0x0142, # ł LATIN SMALL LETTER L WITH STROKE 'lsqb' => 0x005b, # [ LEFT SQUARE BRACKET 'lsquo' => 0x2018, # ‘ dup xhtml LEFT SINGLE QUOTATION MARK 'lsquor' => 0x201a, # ‚ dup skip SINGLE LOW-9 QUOTATION MARK 'Lstrok' => 0x0141, # Ł LATIN CAPITAL LETTER L WITH STROKE 'lstrok' => 0x0142, # ł LATIN SMALL LETTER L WITH STROKE 'lt' => 0x003c, # < xhtml LESS-THAN SIGN 'Lt' => 0x226a, # ≪ MUCH LESS-THAN 'lthree' => 0x22cb, # ⋋ LEFT SEMIDIRECT PRODUCT 'ltimes' => 0x22c9, # ⋉ LEFT NORMAL FACTOR SEMIDIRECT PRODUCT 'ltri' => 0x25c3, # ◃ WHITE LEFT-POINTING SMALL TRIANGLE 'ltrie' => 0x22b4, # ⊴ NORMAL SUBGROUP OF OR EQUAL TO 'ltrif' => 0x25c2, # ◂ BLACK LEFT-POINTING SMALL TRIANGLE 'lvnE' => 0x2268, # ≨ dup skip LESS-THAN BUT NOT EQUAL TO 'macr' => 0x00af, # ¯ xhtml MACRON 'male' => 0x2642, # ♂ MALE SIGN 'malt' => 0x2720, # ✠ MALTESE CROSS 'map' => 0x21a6, # ↦ RIGHTWARDS ARROW FROM BAR 'marker' => 0x25ae, # ▮ BLACK VERTICAL RECTANGLE 'Mcy' => 0x041c, # М CYRILLIC CAPITAL LETTER EM 'mcy' => 0x043c, # м CYRILLIC SMALL LETTER EM 'mdash' => 0x2014, # — xhtml EM DASH 'Mgr' => 0x039c, # Μ dup skip GREEK CAPITAL LETTER MU 'mgr' => 0x03bc, # μ dup skip GREEK SMALL LETTER MU 'micro' => 0x00b5, # µ xhtml MICRO SIGN 'mid' => 0x2223, # ∣ DIVIDES 'middot' => 0x00b7, # · xhtml MIDDLE DOT 'minus' => 0x2212, # − xhtml MINUS SIGN 'minusb' => 0x229f, # ⊟ SQUARED MINUS 'mldr' => 0x2026, # … dup skip HORIZONTAL ELLIPSIS 'mnplus' => 0x2213, # ∓ MINUS-OR-PLUS SIGN 'models' => 0x22a7, # ⊧ MODELS 'msubdot' => 0x1e43, # ṃ LATIN SMALL LETTER M WITH DOT BELOW 'Mu' => 0x039c, # Μ dup xhtml GREEK CAPITAL LETTER MU 'mu' => 0x03bc, # μ dup xhtml GREEK SMALL LETTER MU 'mumap' => 0x22b8, # ⊸ MULTIMAP 'nabla' => 0x2207, # ∇ xhtml NABLA 'Nacute' => 0x0143, # Ń LATIN CAPITAL LETTER N WITH ACUTE 'nacute' => 0x0144, # ń LATIN SMALL LETTER N WITH ACUTE 'nap' => 0x2249, # ≉ NOT ALMOST EQUAL TO 'napos' => 0x0149, # ʼn LATIN SMALL LETTER N PRECEDED BY APOSTROPHE 'natur' => 0x266e, # ♮ MUSIC NATURAL SIGN 'nbsp' => 0x00a0, #   xhtml NO-BREAK SPACE 'Ncaron' => 0x0147, # Ň LATIN CAPITAL LETTER N WITH CARON 'ncaron' => 0x0148, # ň LATIN SMALL LETTER N WITH CARON 'Ncedil' => 0x0145, # Ņ LATIN CAPITAL LETTER N WITH CEDILLA 'ncedil' => 0x0146, # ņ LATIN SMALL LETTER N WITH CEDILLA 'ncong' => 0x2247, # ≇ NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO 'Ncy' => 0x041d, # Н CYRILLIC CAPITAL LETTER EN 'ncy' => 0x043d, # н CYRILLIC SMALL LETTER EN 'ndash' => 0x2013, # – xhtml EN DASH 'ne' => 0x2260, # ≠ xhtml NOT EQUAL TO 'nearr' => 0x2197, # ↗ NORTH EAST ARROW 'nequiv' => 0x2262, # ≢ NOT IDENTICAL TO 'nexist' => 0x2204, # ∄ THERE DOES NOT EXIST 'nge' => 0x2271, # ≱ dup NEITHER GREATER-THAN NOR EQUAL TO 'nges' => 0x2271, # ≱ dup skip NEITHER GREATER-THAN NOR EQUAL TO 'Ngr' => 0x039d, # Ν dup skip GREEK CAPITAL LETTER NU 'ngr' => 0x03bd, # ν dup skip GREEK SMALL LETTER NU 'ngt' => 0x226f, # ≯ NOT GREATER-THAN 'nharr' => 0x21ae, # ↮ LEFT RIGHT ARROW WITH STROKE 'nhArr' => 0x21ce, # ⇎ LEFT RIGHT DOUBLE ARROW WITH STROKE 'ni' => 0x220b, # ∋ xhtml CONTAINS AS MEMBER 'NJcy' => 0x040a, # Њ CYRILLIC CAPITAL LETTER NJE 'njcy' => 0x045a, # њ CYRILLIC SMALL LETTER NJE 'nlarr' => 0x219a, # ↚ LEFTWARDS ARROW WITH STROKE 'nlArr' => 0x21cd, # ⇍ LEFTWARDS DOUBLE ARROW WITH STROKE 'nldr' => 0x2025, # ‥ TWO DOT LEADER 'nle' => 0x2270, # ≰ dup NEITHER LESS-THAN NOR EQUAL TO 'nles' => 0x2270, # ≰ dup skip NEITHER LESS-THAN NOR EQUAL TO 'nlt' => 0x226e, # ≮ NOT LESS-THAN 'nltri' => 0x22ea, # ⋪ NOT NORMAL SUBGROUP OF 'nltrie' => 0x22ec, # ⋬ NOT NORMAL SUBGROUP OF OR EQUAL TO 'nmid' => 0x2224, # ∤ DOES NOT DIVIDE 'nodot' => 0x1e45, # ṅ LATIN SMALL LETTER N WITH DOT ABOVE 'not' => 0x00ac, # ¬ xhtml NOT SIGN 'notin' => 0x2209, # ∉ xhtml NOT AN ELEMENT OF 'npar' => 0x2226, # ∦ dup NOT PARALLEL TO 'npr' => 0x2280, # ⊀ DOES NOT PRECEDE 'npre' => 0x22e0, # ⋠ DOES NOT PRECEDE OR EQUAL 'nrarr' => 0x219b, # ↛ RIGHTWARDS ARROW WITH STROKE 'nrArr' => 0x21cf, # ⇏ RIGHTWARDS DOUBLE ARROW WITH STROKE 'nrtri' => 0x22eb, # ⋫ DOES NOT CONTAIN AS NORMAL SUBGROUP 'nrtrie' => 0x22ed, # ⋭ DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL 'nsc' => 0x2281, # ⊁ DOES NOT SUCCEED 'nsce' => 0x22e1, # ⋡ DOES NOT SUCCEED OR EQUAL 'nsim' => 0x2241, # ≁ NOT TILDE 'nsime' => 0x2244, # ≄ NOT ASYMPTOTICALLY EQUAL TO 'nspar' => 0x2226, # ∦ dup skip NOT PARALLEL TO 'nsub' => 0x2284, # ⊄ xhtml NOT A SUBSET OF 'nsubdot' => 0x1e47, # ṇ LATIN SMALL LETTER N WITH DOT BELOW 'nsubE' => 0x2288, # ⊈ dup NEITHER A SUBSET OF NOR EQUAL TO 'nsube' => 0x2288, # ⊈ dup skip NEITHER A SUBSET OF NOR EQUAL TO 'nsup' => 0x2285, # ⊅ NOT A SUPERSET OF 'nsupE' => 0x2289, # ⊉ dup NEITHER A SUPERSET OF NOR EQUAL TO 'nsupe' => 0x2289, # ⊉ dup skip NEITHER A SUPERSET OF NOR EQUAL TO 'Ntilde' => 0x00d1, # Ñ xhtml LATIN CAPITAL LETTER N WITH TILDE 'ntilde' => 0x00f1, # ñ xhtml LATIN SMALL LETTER N WITH TILDE 'Nu' => 0x039d, # Ν dup xhtml GREEK CAPITAL LETTER NU 'nu' => 0x03bd, # ν dup xhtml GREEK SMALL LETTER NU 'num' => 0x0023, # # NUMBER SIGN 'numero' => 0x2116, # № NUMERO SIGN 'numsp' => 0x2007, #   FIGURE SPACE 'nvdash' => 0x22ac, # ⊬ DOES NOT PROVE 'nvDash' => 0x22ad, # ⊭ NOT TRUE 'nVdash' => 0x22ae, # ⊮ DOES NOT FORCE 'nVDash' => 0x22af, # ⊯ NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE 'nwarr' => 0x2196, # ↖ NORTH WEST ARROW 'Oacgr' => 0x038c, # Ό GREEK CAPITAL LETTER OMICRON WITH TONOS 'oacgr' => 0x03cc, # ό GREEK SMALL LETTER OMICRON WITH TONOS 'Oacute' => 0x00d3, # Ó xhtml LATIN CAPITAL LETTER O WITH ACUTE 'oacute' => 0x00f3, # ó xhtml LATIN SMALL LETTER O WITH ACUTE 'oast' => 0x229b, # ⊛ CIRCLED ASTERISK OPERATOR 'ocir' => 0x229a, # ⊚ CIRCLED RING OPERATOR 'Ocirc' => 0x00d4, # Ô xhtml LATIN CAPITAL LETTER O WITH CIRCUMFLEX 'ocirc' => 0x00f4, # ô xhtml LATIN SMALL LETTER O WITH CIRCUMFLEX 'Ocy' => 0x041e, # О CYRILLIC CAPITAL LETTER O 'ocy' => 0x043e, # о CYRILLIC SMALL LETTER O 'odash' => 0x229d, # ⊝ CIRCLED DASH 'Odblac' => 0x0150, # Ő LATIN CAPITAL LETTER O WITH DOUBLE ACUTE 'odblac' => 0x0151, # ő LATIN SMALL LETTER O WITH DOUBLE ACUTE 'odot' => 0x2299, # ⊙ CIRCLED DOT OPERATOR 'OElig' => 0x0152, # Œ xhtml LATIN CAPITAL LIGATURE OE 'oelig' => 0x0153, # œ xhtml LATIN SMALL LIGATURE OE 'ogon' => 0x02db, # ˛ OGONEK 'Ogr' => 0x039f, # Ο dup skip GREEK CAPITAL LETTER OMICRON 'ogr' => 0x03bf, # ο dup skip GREEK SMALL LETTER OMICRON 'Ograve' => 0x00d2, # Ò xhtml LATIN CAPITAL LETTER O WITH GRAVE 'ograve' => 0x00f2, # ò xhtml LATIN SMALL LETTER O WITH GRAVE 'OHacgr' => 0x038f, # Ώ GREEK CAPITAL LETTER OMEGA WITH TONOS 'ohacgr' => 0x03ce, # ώ dup GREEK SMALL LETTER OMEGA WITH TONOS 'OHgr' => 0x03a9, # Ω dup skip GREEK CAPITAL LETTER OMEGA 'ohgr' => 0x03c9, # ω dup skip GREEK SMALL LETTER OMEGA 'ohm' => 0x2126, # Ω OHM SIGN 'olarr' => 0x21ba, # ↺ ANTICLOCKWISE OPEN CIRCLE ARROW 'oline' => 0x203e, # ‾ xhtml OVERLINE 'Omacr' => 0x014c, # Ō LATIN CAPITAL LETTER O WITH MACRON 'omacr' => 0x014d, # ō LATIN SMALL LETTER O WITH MACRON 'Omega' => 0x03a9, # Ω dup xhtml GREEK CAPITAL LETTER OMEGA 'omega' => 0x03c9, # ω dup xhtml GREEK SMALL LETTER OMEGA 'Omicron' => 0x039f, # Ο dup xhtml GREEK CAPITAL LETTER OMICRON 'omicron' => 0x03bf, # ο dup xhtml GREEK SMALL LETTER OMICRON 'ominus' => 0x2296, # ⊖ CIRCLED MINUS 'oplus' => 0x2295, # ⊕ xhtml CIRCLED PLUS 'or' => 0x2228, # ∨ xhtml LOGICAL OR 'orarr' => 0x21bb, # ↻ CLOCKWISE OPEN CIRCLE ARROW 'order' => 0x2134, # ℴ SCRIPT SMALL O 'ordf' => 0x00aa, # ª xhtml FEMININE ORDINAL INDICATOR 'ordm' => 0x00ba, # º xhtml MASCULINE ORDINAL INDICATOR 'oS' => 0x24c8, # Ⓢ CIRCLED LATIN CAPITAL LETTER S 'Oslash' => 0x00d8, # Ø xhtml LATIN CAPITAL LETTER O WITH STROKE 'oslash' => 0x00f8, # ø xhtml LATIN SMALL LETTER O WITH STROKE 'osol' => 0x2298, # ⊘ CIRCLED DIVISION SLASH 'Otilde' => 0x00d5, # Õ xhtml LATIN CAPITAL LETTER O WITH TILDE 'otilde' => 0x00f5, # õ xhtml LATIN SMALL LETTER O WITH TILDE 'otimes' => 0x2297, # ⊗ xhtml CIRCLED TIMES 'Ouml' => 0x00d6, # Ö xhtml LATIN CAPITAL LETTER O WITH DIAERESIS 'ouml' => 0x00f6, # ö xhtml LATIN SMALL LETTER O WITH DIAERESIS 'par' => 0x2225, # ∥ dup PARALLEL TO 'para' => 0x00b6, # ¶ xhtml PILCROW SIGN 'part' => 0x2202, # ∂ xhtml PARTIAL DIFFERENTIAL 'Pcy' => 0x041f, # П CYRILLIC CAPITAL LETTER PE 'pcy' => 0x043f, # п CYRILLIC SMALL LETTER PE 'percnt' => 0x0025, # % PERCENT SIGN 'period' => 0x002e, # . FULL STOP 'permil' => 0x2030, # ‰ xhtml PER MILLE SIGN 'perp' => 0x22a5, # ⊥ dup xhtml UP TACK 'Pgr' => 0x03a0, # Π dup skip GREEK CAPITAL LETTER PI 'pgr' => 0x03c0, # π dup skip GREEK SMALL LETTER PI 'PHgr' => 0x03a6, # Φ dup skip GREEK CAPITAL LETTER PHI 'phgr' => 0x03c6, # φ dup skip GREEK SMALL LETTER PHI 'Phi' => 0x03a6, # Φ dup xhtml GREEK CAPITAL LETTER PHI 'phi' => 0x03c6, # φ dup xhtml GREEK SMALL LETTER PHI 'phis' => 0x03c6, # φ dup skip GREEK SMALL LETTER PHI 'phiv' => 0x03d5, # ϕ dup GREEK PHI SYMBOL 'phmmat' => 0x2133, # ℳ SCRIPT CAPITAL M 'phone' => 0x260e, # ☎ BLACK TELEPHONE 'Pi' => 0x03a0, # Π dup xhtml GREEK CAPITAL LETTER PI 'pi' => 0x03c0, # π dup xhtml GREEK SMALL LETTER PI 'piv' => 0x03d6, # ϖ dup xhtml GREEK PI SYMBOL 'planck' => 0x210f, # ℏ PLANCK CONSTANT OVER TWO PI 'plus' => 0x002b, # + PLUS SIGN 'plusb' => 0x229e, # ⊞ SQUARED PLUS 'plusdo' => 0x2214, # ∔ DOT PLUS 'plusmn' => 0x00b1, # ± xhtml PLUS-MINUS SIGN 'pound' => 0x00a3, # £ xhtml POUND SIGN 'pr' => 0x227a, # ≺ PRECEDES 'pre' => 0x227c, # ≼ dup skip PRECEDES OR EQUAL TO 'prime' => 0x2032, # ′ dup xhtml PRIME 'Prime' => 0x2033, # ″ xhtml DOUBLE PRIME 'prnsim' => 0x22e8, # ⋨ PRECEDES BUT NOT EQUIVALENT TO 'prod' => 0x220f, # ∏ xhtml N-ARY PRODUCT 'prop' => 0x221d, # ∝ dup xhtml PROPORTIONAL TO 'prsim' => 0x227e, # ≾ PRECEDES OR EQUIVALENT TO 'PSgr' => 0x03a8, # Ψ dup skip GREEK CAPITAL LETTER PSI 'psgr' => 0x03c8, # ψ dup skip GREEK SMALL LETTER PSI 'Psi' => 0x03a8, # Ψ dup xhtml GREEK CAPITAL LETTER PSI 'psi' => 0x03c8, # ψ dup xhtml GREEK SMALL LETTER PSI 'puncsp' => 0x2008, #   PUNCTUATION SPACE 'quest' => 0x003f, # ? QUESTION MARK 'quot' => 0x0022, # " xhtml QUOTATION MARK 'rAarr' => 0x21db, # ⇛ RIGHTWARDS TRIPLE ARROW 'Racute' => 0x0154, # Ŕ LATIN CAPITAL LETTER R WITH ACUTE 'racute' => 0x0155, # ŕ LATIN SMALL LETTER R WITH ACUTE 'radic' => 0x221a, # √ xhtml SQUARE ROOT 'rang' => 0x232a, # 〉 xhtml RIGHT-POINTING ANGLE BRACKET 'raquo' => 0x00bb, # » xhtml RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 'rarr' => 0x2192, # → xhtml RIGHTWARDS ARROW 'Rarr' => 0x21a0, # ↠ RIGHTWARDS TWO HEADED ARROW 'rArr' => 0x21d2, # ⇒ dup xhtml RIGHTWARDS DOUBLE ARROW 'rarr2' => 0x21c9, # ⇉ RIGHTWARDS PAIRED ARROWS 'rarrhk' => 0x21aa, # ↪ RIGHTWARDS ARROW WITH HOOK 'rarrlp' => 0x21ac, # ↬ RIGHTWARDS ARROW WITH LOOP 'rarrtl' => 0x21a3, # ↣ RIGHTWARDS ARROW WITH TAIL 'rarrw' => 0x219d, # ↝ RIGHTWARDS WAVE ARROW 'Rcaron' => 0x0158, # Ř LATIN CAPITAL LETTER R WITH CARON 'rcaron' => 0x0159, # ř LATIN SMALL LETTER R WITH CARON 'Rcedil' => 0x0156, # Ŗ LATIN CAPITAL LETTER R WITH CEDILLA 'rcedil' => 0x0157, # ŗ LATIN SMALL LETTER R WITH CEDILLA 'rceil' => 0x2309, # ⌉ xhtml RIGHT CEILING 'rcub' => 0x007d, # } RIGHT CURLY BRACKET 'Rcy' => 0x0420, # Р CYRILLIC CAPITAL LETTER ER 'rcy' => 0x0440, # р CYRILLIC SMALL LETTER ER 'rdquo' => 0x201d, # ” xhtml RIGHT DOUBLE QUOTATION MARK 'rdquor' => 0x201c, # “ dup skip LEFT DOUBLE QUOTATION MARK 'real' => 0x211c, # ℜ xhtml BLACK-LETTER CAPITAL R 'rect' => 0x25ad, # ▭ WHITE RECTANGLE 'reg' => 0x00ae, # ® xhtml REGISTERED SIGN 'rfloor' => 0x230b, # ⌋ xhtml RIGHT FLOOR 'Rgr' => 0x03a1, # Ρ dup skip GREEK CAPITAL LETTER RHO 'rgr' => 0x03c1, # ρ dup skip GREEK SMALL LETTER RHO 'rhard' => 0x21c1, # ⇁ RIGHTWARDS HARPOON WITH BARB DOWNWARDS 'rharu' => 0x21c0, # ⇀ RIGHTWARDS HARPOON WITH BARB UPWARDS 'Rho' => 0x03a1, # Ρ dup xhtml GREEK CAPITAL LETTER RHO 'rho' => 0x03c1, # ρ dup xhtml GREEK SMALL LETTER RHO 'rhov' => 0x03f1, # ϱ dup GREEK RHO SYMBOL 'ring' => 0x02da, # ˚ RING ABOVE 'rlarr2' => 0x21c4, # ⇄ RIGHTWARDS ARROW OVER LEFTWARDS ARROW 'rlhar2' => 0x21cc, # ⇌ RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON 'rlm' => 0x200f, # ‏ xhtml RIGHT-TO-LEFT MARK 'rpar' => 0x0029, # ) RIGHT PARENTHESIS 'rsaquo' => 0x203a, # › xhtml SINGLE RIGHT-POINTING ANGLE QUOTATION MARK 'rsh' => 0x21b1, # ↱ UPWARDS ARROW WITH TIP RIGHTWARDS 'rsqb' => 0x005d, # ] RIGHT SQUARE BRACKET 'rsquo' => 0x2019, # ’ xhtml RIGHT SINGLE QUOTATION MARK 'rsquor' => 0x2018, # ‘ dup skip LEFT SINGLE QUOTATION MARK 'Rsubdot' => 0x1e5a, # Ṛ LATIN CAPITAL LETTER R WITH DOT BELOW 'rsubdot' => 0x1e5b, # ṛ LATIN SMALL LETTER R WITH DOT BELOW 'rthree' => 0x22cc, # ⋌ RIGHT SEMIDIRECT PRODUCT 'rtimes' => 0x22ca, # ⋊ RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT 'rtri' => 0x25b9, # ▹ WHITE RIGHT-POINTING SMALL TRIANGLE 'rtrie' => 0x22b5, # ⊵ CONTAINS AS NORMAL SUBGROUP OR EQUAL TO 'rtrif' => 0x25b8, # ▸ BLACK RIGHT-POINTING SMALL TRIANGLE 'rx' => 0x211e, # ℞ PRESCRIPTION TAKE 'Sacute' => 0x015a, # Ś LATIN CAPITAL LETTER S WITH ACUTE 'sacute' => 0x015b, # ś LATIN SMALL LETTER S WITH ACUTE 'samalg' => 0x2210, # ∐ dup skip N-ARY COPRODUCT 'sbquo' => 0x201a, # ‚ dup xhtml SINGLE LOW-9 QUOTATION MARK 'sbsol' => 0x005c, # \ dup skip REVERSE SOLIDUS 'sc' => 0x227b, # ≻ SUCCEEDS 'Scaron' => 0x0160, # Š xhtml LATIN CAPITAL LETTER S WITH CARON 'scaron' => 0x0161, # š xhtml LATIN SMALL LETTER S WITH CARON 'sccue' => 0x227d, # ≽ dup SUCCEEDS OR EQUAL TO 'sce' => 0x227d, # ≽ dup skip SUCCEEDS OR EQUAL TO 'Scedil' => 0x015e, # Ş LATIN CAPITAL LETTER S WITH CEDILLA 'scedil' => 0x015f, # ş LATIN SMALL LETTER S WITH CEDILLA 'Scirc' => 0x015c, # Ŝ LATIN CAPITAL LETTER S WITH CIRCUMFLEX 'scirc' => 0x015d, # ŝ LATIN SMALL LETTER S WITH CIRCUMFLEX 'scnsim' => 0x22e9, # ⋩ SUCCEEDS BUT NOT EQUIVALENT TO 'scsim' => 0x227f, # ≿ SUCCEEDS OR EQUIVALENT TO 'Scy' => 0x0421, # С CYRILLIC CAPITAL LETTER ES 'scy' => 0x0441, # с CYRILLIC SMALL LETTER ES 'sdot' => 0x22c5, # ⋅ xhtml DOT OPERATOR 'sdotb' => 0x22a1, # ⊡ SQUARED DOT OPERATOR 'sect' => 0x00a7, # § xhtml SECTION SIGN 'semi' => 0x003b, # ; SEMICOLON 'setmn' => 0x2216, # ∖ dup SET MINUS 'sext' => 0x2736, # ✶ SIX POINTED BLACK STAR 'sfgr' => 0x03c2, # ς dup skip GREEK SMALL LETTER FINAL SIGMA 'sfrown' => 0x2322, # ⌢ dup skip FROWN 'Sgr' => 0x03a3, # Σ dup skip GREEK CAPITAL LETTER SIGMA 'sgr' => 0x03c3, # σ dup skip GREEK SMALL LETTER SIGMA 'sharp' => 0x266f, # ♯ MUSIC SHARP SIGN 'SHCHcy' => 0x0429, # Щ CYRILLIC CAPITAL LETTER SHCHA 'shchcy' => 0x0449, # щ CYRILLIC SMALL LETTER SHCHA 'SHcy' => 0x0428, # Ш CYRILLIC CAPITAL LETTER SHA 'shcy' => 0x0448, # ш CYRILLIC SMALL LETTER SHA 'shy' => 0x00ad, # ­ xhtml SOFT HYPHEN 'Sigma' => 0x03a3, # Σ dup xhtml GREEK CAPITAL LETTER SIGMA 'sigma' => 0x03c3, # σ dup xhtml GREEK SMALL LETTER SIGMA 'sigmaf' => 0x03c2, # ς dup xhtml GREEK SMALL LETTER FINAL SIGMA 'sigmav' => 0x03c2, # ς dup skip GREEK SMALL LETTER FINAL SIGMA 'sim' => 0x223c, # ∼ dup xhtml TILDE OPERATOR 'sime' => 0x2243, # ≃ ASYMPTOTICALLY EQUAL TO 'smile' => 0x2323, # ⌣ dup SMILE 'SOFTcy' => 0x042c, # Ь CYRILLIC CAPITAL LETTER SOFT SIGN 'softcy' => 0x044c, # ь CYRILLIC SMALL LETTER SOFT SIGN 'sol' => 0x002f, # / SOLIDUS 'spades' => 0x2660, # ♠ xhtml BLACK SPADE SUIT 'spar' => 0x2225, # ∥ dup skip PARALLEL TO 'sqcap' => 0x2293, # ⊓ SQUARE CAP 'sqcup' => 0x2294, # ⊔ SQUARE CUP 'sqsub' => 0x228f, # ⊏ SQUARE IMAGE OF 'sqsube' => 0x2291, # ⊑ SQUARE IMAGE OF OR EQUAL TO 'sqsup' => 0x2290, # ⊐ SQUARE ORIGINAL OF 'sqsupe' => 0x2292, # ⊒ SQUARE ORIGINAL OF OR EQUAL TO 'squ' => 0x25a1, # □ dup WHITE SQUARE 'square' => 0x25a1, # □ dup skip WHITE SQUARE 'squf' => 0x25aa, # ▪ BLACK SMALL SQUARE 'ssetmn' => 0x2216, # ∖ dup skip SET MINUS 'ssmile' => 0x2323, # ⌣ dup skip SMILE 'sstarf' => 0x22c6, # ⋆ STAR OPERATOR 'ssubdot' => 0x1e63, # ṣ LATIN SMALL LETTER S WITH DOT BELOW 'star' => 0x2606, # ☆ WHITE STAR 'starf' => 0x2605, # ★ BLACK STAR 'sub' => 0x2282, # ⊂ xhtml SUBSET OF 'Sub' => 0x22d0, # ⋐ DOUBLE SUBSET 'subE' => 0x2286, # ⊆ dup skip SUBSET OF OR EQUAL TO 'sube' => 0x2286, # ⊆ dup xhtml SUBSET OF OR EQUAL TO 'subnE' => 0x228a, # ⊊ dup SUBSET OF WITH NOT EQUAL TO 'subne' => 0x228a, # ⊊ dup skip SUBSET OF WITH NOT EQUAL TO 'sum' => 0x2211, # ∑ xhtml N-ARY SUMMATION 'sung' => 0x266a, # ♪ EIGHTH NOTE 'sup' => 0x2283, # ⊃ xhtml SUPERSET OF 'Sup' => 0x22d1, # ⋑ DOUBLE SUPERSET 'sup1' => 0x00b9, # ¹ xhtml SUPERSCRIPT ONE 'sup2' => 0x00b2, # ² xhtml SUPERSCRIPT TWO 'sup3' => 0x00b3, # ³ xhtml SUPERSCRIPT THREE 'supE' => 0x2287, # ⊇ dup skip SUPERSET OF OR EQUAL TO 'supe' => 0x2287, # ⊇ dup xhtml SUPERSET OF OR EQUAL TO 'supnE' => 0x228b, # ⊋ dup SUPERSET OF WITH NOT EQUAL TO 'supne' => 0x228b, # ⊋ dup skip SUPERSET OF WITH NOT EQUAL TO 'szlig' => 0x00df, # ß xhtml LATIN SMALL LETTER SHARP S 'target' => 0x2316, # ⌖ POSITION INDICATOR 'Tau' => 0x03a4, # Τ dup xhtml GREEK CAPITAL LETTER TAU 'tau' => 0x03c4, # τ dup xhtml GREEK SMALL LETTER TAU 'Tcaron' => 0x0164, # Ť LATIN CAPITAL LETTER T WITH CARON 'tcaron' => 0x0165, # ť LATIN SMALL LETTER T WITH CARON 'Tcedil' => 0x0162, # Ţ LATIN CAPITAL LETTER T WITH CEDILLA 'tcedil' => 0x0163, # ţ LATIN SMALL LETTER T WITH CEDILLA 'Tcy' => 0x0422, # Т CYRILLIC CAPITAL LETTER TE 'tcy' => 0x0442, # т CYRILLIC SMALL LETTER TE 'tdot' => 0x20db, # ⃛ COMBINING THREE DOTS ABOVE 'telrec' => 0x2315, # ⌕ TELEPHONE RECORDER 'Tgr' => 0x03a4, # Τ dup skip GREEK CAPITAL LETTER TAU 'tgr' => 0x03c4, # τ dup skip GREEK SMALL LETTER TAU 'there4' => 0x2234, # ∴ xhtml THEREFORE 'Theta' => 0x0398, # Θ dup xhtml GREEK CAPITAL LETTER THETA 'theta' => 0x03b8, # θ dup xhtml GREEK SMALL LETTER THETA 'thetas' => 0x03b8, # θ dup skip GREEK SMALL LETTER THETA 'thetasym' => 0x03d1, # ϑ dup xhtml GREEK THETA SYMBOL 'thetav' => 0x03d1, # ϑ dup skip GREEK THETA SYMBOL 'THgr' => 0x0398, # Θ dup skip GREEK CAPITAL LETTER THETA 'thgr' => 0x03b8, # θ dup skip GREEK SMALL LETTER THETA 'thinsp' => 0x2009, #   xhtml THIN SPACE 'thkap' => 0x2248, # ≈ dup skip ALMOST EQUAL TO 'thksim' => 0x223c, # ∼ dup skip TILDE OPERATOR 'THORN' => 0x00de, # Þ xhtml LATIN CAPITAL LETTER THORN 'thorn' => 0x00fe, # þ xhtml LATIN SMALL LETTER THORN 'tilde' => 0x02dc, # ˜ xhtml SMALL TILDE 'times' => 0x00d7, # × xhtml MULTIPLICATION SIGN 'timesb' => 0x22a0, # ⊠ SQUARED TIMES 'top' => 0x22a4, # ⊤ DOWN TACK 'tprime' => 0x2034, # ‴ TRIPLE PRIME 'trade' => 0x2122, # ™ xhtml TRADE MARK SIGN 'trie' => 0x225c, # ≜ DELTA EQUAL TO 'TScy' => 0x0426, # Ц CYRILLIC CAPITAL LETTER TSE 'tscy' => 0x0446, # ц CYRILLIC SMALL LETTER TSE 'TSHcy' => 0x040b, # Ћ CYRILLIC CAPITAL LETTER TSHE 'tshcy' => 0x045b, # ћ CYRILLIC SMALL LETTER TSHE 'Tstrok' => 0x0166, # Ŧ LATIN CAPITAL LETTER T WITH STROKE 'tstrok' => 0x0167, # ŧ LATIN SMALL LETTER T WITH STROKE 'tsubdot' => 0x1e6d, # ṭ LATIN SMALL LETTER T WITH DOT BELOW 'twixt' => 0x226c, # ≬ BETWEEN 'Uacgr' => 0x038e, # Ύ GREEK CAPITAL LETTER UPSILON WITH TONOS 'uacgr' => 0x03cd, # ύ GREEK SMALL LETTER UPSILON WITH TONOS 'Uacute' => 0x00da, # Ú xhtml LATIN CAPITAL LETTER U WITH ACUTE 'uacute' => 0x00fa, # ú xhtml LATIN SMALL LETTER U WITH ACUTE 'uarr' => 0x2191, # ↑ xhtml UPWARDS ARROW 'uArr' => 0x21d1, # ⇑ xhtml UPWARDS DOUBLE ARROW 'uarr2' => 0x21c8, # ⇈ UPWARDS PAIRED ARROWS 'Ubrcy' => 0x040e, # Ў CYRILLIC CAPITAL LETTER SHORT U 'ubrcy' => 0x045e, # ў CYRILLIC SMALL LETTER SHORT U 'Ubreve' => 0x016c, # Ŭ LATIN CAPITAL LETTER U WITH BREVE 'ubreve' => 0x016d, # ŭ LATIN SMALL LETTER U WITH BREVE 'Ucirc' => 0x00db, # Û xhtml LATIN CAPITAL LETTER U WITH CIRCUMFLEX 'ucirc' => 0x00fb, # û xhtml LATIN SMALL LETTER U WITH CIRCUMFLEX 'Ucy' => 0x0423, # У CYRILLIC CAPITAL LETTER U 'ucy' => 0x0443, # у CYRILLIC SMALL LETTER U 'Udblac' => 0x0170, # Ű LATIN CAPITAL LETTER U WITH DOUBLE ACUTE 'udblac' => 0x0171, # ű LATIN SMALL LETTER U WITH DOUBLE ACUTE 'udiagr' => 0x03b0, # ΰ GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS 'Udigr' => 0x03ab, # Ϋ GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA 'udigr' => 0x03cb, # ϋ GREEK SMALL LETTER UPSILON WITH DIALYTIKA 'Ugr' => 0x03a5, # Υ dup skip GREEK CAPITAL LETTER UPSILON 'ugr' => 0x03c5, # υ dup skip GREEK SMALL LETTER UPSILON 'Ugrave' => 0x00d9, # Ù xhtml LATIN CAPITAL LETTER U WITH GRAVE 'ugrave' => 0x00f9, # ù xhtml LATIN SMALL LETTER U WITH GRAVE 'uharl' => 0x21bf, # ↿ UPWARDS HARPOON WITH BARB LEFTWARDS 'uharr' => 0x21be, # ↾ UPWARDS HARPOON WITH BARB RIGHTWARDS 'uhblk' => 0x2580, # ▀ UPPER HALF BLOCK 'ulcorn' => 0x231c, # ⌜ TOP LEFT CORNER 'ulcrop' => 0x230f, # ⌏ TOP LEFT CROP 'Umacr' => 0x016a, # Ū LATIN CAPITAL LETTER U WITH MACRON 'umacr' => 0x016b, # ū LATIN SMALL LETTER U WITH MACRON 'uml' => 0x00a8, # ¨ dup xhtml DIAERESIS 'Uogon' => 0x0172, # Ų LATIN CAPITAL LETTER U WITH OGONEK 'uogon' => 0x0173, # ų LATIN SMALL LETTER U WITH OGONEK 'uplus' => 0x228e, # ⊎ MULTISET UNION 'Upsi' => 0x03a5, # Υ dup skip GREEK CAPITAL LETTER UPSILON 'upsi' => 0x03c5, # υ dup skip GREEK SMALL LETTER UPSILON 'upsih' => 0x03d2, # ϒ xhtml GREEK UPSILON WITH HOOK SYMBOL 'Upsilon' => 0x03a5, # Υ dup xhtml GREEK CAPITAL LETTER UPSILON 'upsilon' => 0x03c5, # υ dup xhtml GREEK SMALL LETTER UPSILON 'urcorn' => 0x231d, # ⌝ TOP RIGHT CORNER 'urcrop' => 0x230e, # ⌎ TOP RIGHT CROP 'Uring' => 0x016e, # Ů LATIN CAPITAL LETTER U WITH RING ABOVE 'uring' => 0x016f, # ů LATIN SMALL LETTER U WITH RING ABOVE 'Utilde' => 0x0168, # Ũ LATIN CAPITAL LETTER U WITH TILDE 'utilde' => 0x0169, # ũ LATIN SMALL LETTER U WITH TILDE 'utri' => 0x25b5, # ▵ WHITE UP-POINTING SMALL TRIANGLE 'utrif' => 0x25b4, # ▴ BLACK UP-POINTING SMALL TRIANGLE 'Uuml' => 0x00dc, # Ü xhtml LATIN CAPITAL LETTER U WITH DIAERESIS 'uuml' => 0x00fc, # ü xhtml LATIN SMALL LETTER U WITH DIAERESIS 'varr' => 0x2195, # ↕ UP DOWN ARROW 'vArr' => 0x21d5, # ⇕ UP DOWN DOUBLE ARROW 'Vcy' => 0x0412, # В CYRILLIC CAPITAL LETTER VE 'vcy' => 0x0432, # в CYRILLIC SMALL LETTER VE 'vdash' => 0x22a2, # ⊢ RIGHT TACK 'vDash' => 0x22a8, # ⊨ TRUE 'Vdash' => 0x22a9, # ⊩ FORCES 'veebar' => 0x22bb, # ⊻ XOR 'vellip' => 0x22ee, # ⋮ VERTICAL ELLIPSIS 'verbar' => 0x007c, # | VERTICAL LINE 'Verbar' => 0x2016, # ‖ DOUBLE VERTICAL LINE 'vltri' => 0x22b2, # ⊲ NORMAL SUBGROUP OF 'vprime' => 0x2032, # ′ dup skip PRIME 'vprop' => 0x221d, # ∝ dup skip PROPORTIONAL TO 'vrtri' => 0x22b3, # ⊳ CONTAINS AS NORMAL SUBGROUP 'vsubnE' => 0x228a, # ⊊ dup skip SUBSET OF WITH NOT EQUAL TO 'vsubne' => 0x228a, # ⊊ dup skip SUBSET OF WITH NOT EQUAL TO 'vsupnE' => 0x228b, # ⊋ dup skip SUPERSET OF WITH NOT EQUAL TO 'vsupne' => 0x228b, # ⊋ dup skip SUPERSET OF WITH NOT EQUAL TO 'Vvdash' => 0x22aa, # ⊪ TRIPLE VERTICAL BAR RIGHT TURNSTILE 'Wcirc' => 0x0174, # Ŵ LATIN CAPITAL LETTER W WITH CIRCUMFLEX 'wcirc' => 0x0175, # ŵ LATIN SMALL LETTER W WITH CIRCUMFLEX 'wedgeq' => 0x2259, # ≙ ESTIMATES 'weierp' => 0x2118, # ℘ xhtml SCRIPT CAPITAL P 'wreath' => 0x2240, # ≀ WREATH PRODUCT 'xcirc' => 0x25cb, # ○ dup skip WHITE CIRCLE 'xdtri' => 0x25bd, # ▽ WHITE DOWN-POINTING TRIANGLE 'Xgr' => 0x039e, # Ξ dup skip GREEK CAPITAL LETTER XI 'xgr' => 0x03be, # ξ dup skip GREEK SMALL LETTER XI 'xhArr' => 0x2194, # ↔ dup skip LEFT RIGHT ARROW 'xharr' => 0x2194, # ↔ dup skip LEFT RIGHT ARROW 'Xi' => 0x039e, # Ξ dup xhtml GREEK CAPITAL LETTER XI 'xi' => 0x03be, # ξ dup xhtml GREEK SMALL LETTER XI 'xlArr' => 0x21d0, # ⇐ dup skip LEFTWARDS DOUBLE ARROW 'xrArr' => 0x21d2, # ⇒ dup skip RIGHTWARDS DOUBLE ARROW 'xutri' => 0x25b3, # △ WHITE UP-POINTING TRIANGLE 'Yacute' => 0x00dd, # Ý xhtml LATIN CAPITAL LETTER Y WITH ACUTE 'yacute' => 0x00fd, # ý xhtml LATIN SMALL LETTER Y WITH ACUTE 'YAcy' => 0x042f, # Я CYRILLIC CAPITAL LETTER YA 'yacy' => 0x044f, # я CYRILLIC SMALL LETTER YA 'Ycirc' => 0x0176, # Ŷ LATIN CAPITAL LETTER Y WITH CIRCUMFLEX 'ycirc' => 0x0177, # ŷ LATIN SMALL LETTER Y WITH CIRCUMFLEX 'Ycy' => 0x042b, # Ы CYRILLIC CAPITAL LETTER YERU 'ycy' => 0x044b, # ы CYRILLIC SMALL LETTER YERU 'yen' => 0x00a5, # ¥ xhtml YEN SIGN 'YIcy' => 0x0407, # Ї CYRILLIC CAPITAL LETTER YI 'yicy' => 0x0457, # ї CYRILLIC SMALL LETTER YI 'YUcy' => 0x042e, # Ю CYRILLIC CAPITAL LETTER YU 'yucy' => 0x044e, # ю CYRILLIC SMALL LETTER YU 'yuml' => 0x00ff, # ÿ xhtml LATIN SMALL LETTER Y WITH DIAERESIS 'Yuml' => 0x0178, # Ÿ xhtml LATIN CAPITAL LETTER Y WITH DIAERESIS 'Zacute' => 0x0179, # Ź LATIN CAPITAL LETTER Z WITH ACUTE 'zacute' => 0x017a, # ź LATIN SMALL LETTER Z WITH ACUTE 'Zcaron' => 0x017d, # Ž LATIN CAPITAL LETTER Z WITH CARON 'zcaron' => 0x017e, # ž LATIN SMALL LETTER Z WITH CARON 'Zcy' => 0x0417, # З CYRILLIC CAPITAL LETTER ZE 'zcy' => 0x0437, # з CYRILLIC SMALL LETTER ZE 'Zdot' => 0x017b, # Ż LATIN CAPITAL LETTER Z WITH DOT ABOVE 'zdot' => 0x017c, # ż LATIN SMALL LETTER Z WITH DOT ABOVE 'Zeta' => 0x0396, # Ζ dup xhtml GREEK CAPITAL LETTER ZETA 'zeta' => 0x03b6, # ζ dup xhtml GREEK SMALL LETTER ZETA 'Zgr' => 0x0396, # Ζ dup skip GREEK CAPITAL LETTER ZETA 'zgr' => 0x03b6, # ζ dup skip GREEK SMALL LETTER ZETA 'ZHcy' => 0x0416, # Ж CYRILLIC CAPITAL LETTER ZHE 'zhcy' => 0x0436, # ж CYRILLIC SMALL LETTER ZHE 'Zodot' => 0x017b, # Ż LATIN CAPITAL LETTER Z WITH DOT ABOVE 'zodot' => 0x017c, # ż LATIN SMALL LETTER Z WITH DOT ABOVE 'Zsubdot' => 0x1e92, # Ẓ LATIN CAPITAL LETTER Z WITH DOT BELOW 'zsubdot' => 0x1e93, # ẓ LATIN SMALL LETTER Z WITH DOT BELOW 'zwj' => 0x200d, # ‍ xhtml ZERO WIDTH JOINER 'zwnj' => 0x200c, # ‌ xhtml ZERO WIDTH NON-JOINER 'euro' => 0x20ac, # € xhtml EURO SIGN } SKIP_DUP_ENCODINGS['expanded'] = %w[ ap thkap rsquor aleph lsquor square rdquor ldquor b.kappav b.rhov mldr xlArr die Dot xrArr iff les ges vprime lne lvnE gne gvnE nles nges half xcirc pre sce Agr Bgr subE b.Gamma Ggr supE b.Delta Dgr nsube nsupe Egr Zgr subne vsubnE vsubne EEgr supne vsupnE vsupne b.Theta THgr Igr Kgr b.Lambda Lgr Mgr Ngr b.Xi Xgr Ogr b.Pi Pgr sfrown Rgr ssmile b.Sigma Sgr Tgr b.Upsi Ugr Upsi b.Phi PHgr KHgr b.Psi PSgr b.Omega OHgr coprod samalg sbsol ssetmn agr b.alpha bottom b.beta bgr b.gamma ggr b.delta dgr b.epsi b.epsis b.epsiv egr epsi b.zeta zgr vprop b.eta eegr b.thetas thetas thgr b.iota igr b.kappa kgr b.lambda lgr xhArr xharr b.mu mgr b.nu ngr b.xi xgr spar ogr nspar b.pi pgr b.rho rgr b.sigmav sfgr sigmav b.sigma sgr b.tau tgr b.upsi ugr upsi b.phis phgr phis b.chi khgr b.psi psgr ohgr b.omega b.thetav thetav b.phiv thksim b.piv b.gammad ] end htmlentities-4.3.1/lib/htmlentities/mappings/html4.rb0000644000004100000410000001175411665456537022764 0ustar www-datawww-data# encoding: UTF-8 class HTMLEntities MAPPINGS['html4'] = { 'Aacute' => 193, 'aacute' => 225, 'Acirc' => 194, 'acirc' => 226, 'acute' => 180, 'AElig' => 198, 'aelig' => 230, 'Agrave' => 192, 'agrave' => 224, 'alefsym' => 8501, 'Alpha' => 913, 'alpha' => 945, 'amp' => 38, 'and' => 8743, 'ang' => 8736, 'Aring' => 197, 'aring' => 229, 'asymp' => 8776, 'Atilde' => 195, 'atilde' => 227, 'Auml' => 196, 'auml' => 228, 'bdquo' => 8222, 'Beta' => 914, 'beta' => 946, 'brvbar' => 166, 'bull' => 8226, 'cap' => 8745, 'Ccedil' => 199, 'ccedil' => 231, 'cedil' => 184, 'cent' => 162, 'Chi' => 935, 'chi' => 967, 'circ' => 710, 'clubs' => 9827, 'cong' => 8773, 'copy' => 169, 'crarr' => 8629, 'cup' => 8746, 'curren' => 164, 'Dagger' => 8225, 'dagger' => 8224, 'dArr' => 8659, 'darr' => 8595, 'deg' => 176, 'Delta' => 916, 'delta' => 948, 'diams' => 9830, 'divide' => 247, 'Eacute' => 201, 'eacute' => 233, 'Ecirc' => 202, 'ecirc' => 234, 'Egrave' => 200, 'egrave' => 232, 'empty' => 8709, 'emsp' => 8195, 'ensp' => 8194, 'Epsilon' => 917, 'epsilon' => 949, 'equiv' => 8801, 'Eta' => 919, 'eta' => 951, 'ETH' => 208, 'eth' => 240, 'Euml' => 203, 'euml' => 235, 'euro' => 8364, 'exist' => 8707, 'fnof' => 402, 'forall' => 8704, 'frac12' => 189, 'frac14' => 188, 'frac34' => 190, 'frasl' => 8260, 'Gamma' => 915, 'gamma' => 947, 'ge' => 8805, 'gt' => 62, 'hArr' => 8660, 'harr' => 8596, 'hearts' => 9829, 'hellip' => 8230, 'Iacute' => 205, 'iacute' => 237, 'Icirc' => 206, 'icirc' => 238, 'iexcl' => 161, 'Igrave' => 204, 'igrave' => 236, 'image' => 8465, 'infin' => 8734, 'int' => 8747, 'Iota' => 921, 'iota' => 953, 'iquest' => 191, 'isin' => 8712, 'Iuml' => 207, 'iuml' => 239, 'Kappa' => 922, 'kappa' => 954, 'Lambda' => 923, 'lambda' => 955, 'lang' => 9001, 'laquo' => 171, 'lArr' => 8656, 'larr' => 8592, 'lceil' => 8968, 'ldquo' => 8220, 'le' => 8804, 'lfloor' => 8970, 'lowast' => 8727, 'loz' => 9674, 'lrm' => 8206, 'lsaquo' => 8249, 'lsquo' => 8216, 'lt' => 60, 'macr' => 175, 'mdash' => 8212, 'micro' => 181, 'middot' => 183, 'minus' => 8722, 'Mu' => 924, 'mu' => 956, 'nabla' => 8711, 'nbsp' => 160, 'ndash' => 8211, 'ne' => 8800, 'ni' => 8715, 'not' => 172, 'notin' => 8713, 'nsub' => 8836, 'Ntilde' => 209, 'ntilde' => 241, 'Nu' => 925, 'nu' => 957, 'Oacute' => 211, 'oacute' => 243, 'Ocirc' => 212, 'ocirc' => 244, 'OElig' => 338, 'oelig' => 339, 'Ograve' => 210, 'ograve' => 242, 'oline' => 8254, 'Omega' => 937, 'omega' => 969, 'Omicron' => 927, 'omicron' => 959, 'oplus' => 8853, 'or' => 8744, 'ordf' => 170, 'ordm' => 186, 'Oslash' => 216, 'oslash' => 248, 'Otilde' => 213, 'otilde' => 245, 'otimes' => 8855, 'Ouml' => 214, 'ouml' => 246, 'para' => 182, 'part' => 8706, 'permil' => 8240, 'perp' => 8869, 'Phi' => 934, 'phi' => 966, 'Pi' => 928, 'pi' => 960, 'piv' => 982, 'plusmn' => 177, 'pound' => 163, 'Prime' => 8243, 'prime' => 8242, 'prod' => 8719, 'prop' => 8733, 'Psi' => 936, 'psi' => 968, 'quot' => 34, 'radic' => 8730, 'rang' => 9002, 'raquo' => 187, 'rArr' => 8658, 'rarr' => 8594, 'rceil' => 8969, 'rdquo' => 8221, 'real' => 8476, 'reg' => 174, 'rfloor' => 8971, 'Rho' => 929, 'rho' => 961, 'rlm' => 8207, 'rsaquo' => 8250, 'rsquo' => 8217, 'sbquo' => 8218, 'Scaron' => 352, 'scaron' => 353, 'sdot' => 8901, 'sect' => 167, 'shy' => 173, 'Sigma' => 931, 'sigma' => 963, 'sigmaf' => 962, 'sim' => 8764, 'spades' => 9824, 'sub' => 8834, 'sube' => 8838, 'sum' => 8721, 'sup' => 8835, 'sup1' => 185, 'sup2' => 178, 'sup3' => 179, 'supe' => 8839, 'szlig' => 223, 'Tau' => 932, 'tau' => 964, 'there4' => 8756, 'Theta' => 920, 'theta' => 952, 'thetasym' => 977, 'thinsp' => 8201, 'THORN' => 222, 'thorn' => 254, 'tilde' => 732, 'times' => 215, 'trade' => 8482, 'Uacute' => 218, 'uacute' => 250, 'uArr' => 8657, 'uarr' => 8593, 'Ucirc' => 219, 'ucirc' => 251, 'Ugrave' => 217, 'ugrave' => 249, 'uml' => 168, 'upsih' => 978, 'Upsilon' => 933, 'upsilon' => 965, 'Uuml' => 220, 'uuml' => 252, 'weierp' => 8472, 'Xi' => 926, 'xi' => 958, 'Yacute' => 221, 'yacute' => 253, 'yen' => 165, 'Yuml' => 376, 'yuml' => 255, 'Zeta' => 918, 'zeta' => 950, 'zwj' => 8205, 'zwnj' => 8204 } end htmlentities-4.3.1/lib/htmlentities/mappings/xhtml1.rb0000644000004100000410000001177711665456537023156 0ustar www-datawww-data# encoding: UTF-8 class HTMLEntities MAPPINGS['xhtml1'] = { 'Aacute' => 193, 'aacute' => 225, 'Acirc' => 194, 'acirc' => 226, 'acute' => 180, 'AElig' => 198, 'aelig' => 230, 'Agrave' => 192, 'agrave' => 224, 'alefsym' => 8501, 'Alpha' => 913, 'alpha' => 945, 'amp' => 38, 'and' => 8743, 'ang' => 8736, 'apos' => 39, 'Aring' => 197, 'aring' => 229, 'asymp' => 8776, 'Atilde' => 195, 'atilde' => 227, 'Auml' => 196, 'auml' => 228, 'bdquo' => 8222, 'Beta' => 914, 'beta' => 946, 'brvbar' => 166, 'bull' => 8226, 'cap' => 8745, 'Ccedil' => 199, 'ccedil' => 231, 'cedil' => 184, 'cent' => 162, 'Chi' => 935, 'chi' => 967, 'circ' => 710, 'clubs' => 9827, 'cong' => 8773, 'copy' => 169, 'crarr' => 8629, 'cup' => 8746, 'curren' => 164, 'Dagger' => 8225, 'dagger' => 8224, 'dArr' => 8659, 'darr' => 8595, 'deg' => 176, 'Delta' => 916, 'delta' => 948, 'diams' => 9830, 'divide' => 247, 'Eacute' => 201, 'eacute' => 233, 'Ecirc' => 202, 'ecirc' => 234, 'Egrave' => 200, 'egrave' => 232, 'empty' => 8709, 'emsp' => 8195, 'ensp' => 8194, 'Epsilon' => 917, 'epsilon' => 949, 'equiv' => 8801, 'Eta' => 919, 'eta' => 951, 'ETH' => 208, 'eth' => 240, 'Euml' => 203, 'euml' => 235, 'euro' => 8364, 'exist' => 8707, 'fnof' => 402, 'forall' => 8704, 'frac12' => 189, 'frac14' => 188, 'frac34' => 190, 'frasl' => 8260, 'Gamma' => 915, 'gamma' => 947, 'ge' => 8805, 'gt' => 62, 'hArr' => 8660, 'harr' => 8596, 'hearts' => 9829, 'hellip' => 8230, 'Iacute' => 205, 'iacute' => 237, 'Icirc' => 206, 'icirc' => 238, 'iexcl' => 161, 'Igrave' => 204, 'igrave' => 236, 'image' => 8465, 'infin' => 8734, 'int' => 8747, 'Iota' => 921, 'iota' => 953, 'iquest' => 191, 'isin' => 8712, 'Iuml' => 207, 'iuml' => 239, 'Kappa' => 922, 'kappa' => 954, 'Lambda' => 923, 'lambda' => 955, 'lang' => 9001, 'laquo' => 171, 'lArr' => 8656, 'larr' => 8592, 'lceil' => 8968, 'ldquo' => 8220, 'le' => 8804, 'lfloor' => 8970, 'lowast' => 8727, 'loz' => 9674, 'lrm' => 8206, 'lsaquo' => 8249, 'lsquo' => 8216, 'lt' => 60, 'macr' => 175, 'mdash' => 8212, 'micro' => 181, 'middot' => 183, 'minus' => 8722, 'Mu' => 924, 'mu' => 956, 'nabla' => 8711, 'nbsp' => 160, 'ndash' => 8211, 'ne' => 8800, 'ni' => 8715, 'not' => 172, 'notin' => 8713, 'nsub' => 8836, 'Ntilde' => 209, 'ntilde' => 241, 'Nu' => 925, 'nu' => 957, 'Oacute' => 211, 'oacute' => 243, 'Ocirc' => 212, 'ocirc' => 244, 'OElig' => 338, 'oelig' => 339, 'Ograve' => 210, 'ograve' => 242, 'oline' => 8254, 'Omega' => 937, 'omega' => 969, 'Omicron' => 927, 'omicron' => 959, 'oplus' => 8853, 'or' => 8744, 'ordf' => 170, 'ordm' => 186, 'Oslash' => 216, 'oslash' => 248, 'Otilde' => 213, 'otilde' => 245, 'otimes' => 8855, 'Ouml' => 214, 'ouml' => 246, 'para' => 182, 'part' => 8706, 'permil' => 8240, 'perp' => 8869, 'Phi' => 934, 'phi' => 966, 'Pi' => 928, 'pi' => 960, 'piv' => 982, 'plusmn' => 177, 'pound' => 163, 'Prime' => 8243, 'prime' => 8242, 'prod' => 8719, 'prop' => 8733, 'Psi' => 936, 'psi' => 968, 'quot' => 34, 'radic' => 8730, 'rang' => 9002, 'raquo' => 187, 'rArr' => 8658, 'rarr' => 8594, 'rceil' => 8969, 'rdquo' => 8221, 'real' => 8476, 'reg' => 174, 'rfloor' => 8971, 'Rho' => 929, 'rho' => 961, 'rlm' => 8207, 'rsaquo' => 8250, 'rsquo' => 8217, 'sbquo' => 8218, 'Scaron' => 352, 'scaron' => 353, 'sdot' => 8901, 'sect' => 167, 'shy' => 173, 'Sigma' => 931, 'sigma' => 963, 'sigmaf' => 962, 'sim' => 8764, 'spades' => 9824, 'sub' => 8834, 'sube' => 8838, 'sum' => 8721, 'sup' => 8835, 'sup1' => 185, 'sup2' => 178, 'sup3' => 179, 'supe' => 8839, 'szlig' => 223, 'Tau' => 932, 'tau' => 964, 'there4' => 8756, 'Theta' => 920, 'theta' => 952, 'thetasym' => 977, 'thinsp' => 8201, 'THORN' => 222, 'thorn' => 254, 'tilde' => 732, 'times' => 215, 'trade' => 8482, 'Uacute' => 218, 'uacute' => 250, 'uArr' => 8657, 'uarr' => 8593, 'Ucirc' => 219, 'ucirc' => 251, 'Ugrave' => 217, 'ugrave' => 249, 'uml' => 168, 'upsih' => 978, 'Upsilon' => 933, 'upsilon' => 965, 'Uuml' => 220, 'uuml' => 252, 'weierp' => 8472, 'Xi' => 926, 'xi' => 958, 'Yacute' => 221, 'yacute' => 253, 'yen' => 165, 'Yuml' => 376, 'yuml' => 255, 'Zeta' => 918, 'zeta' => 950, 'zwj' => 8205, 'zwnj' => 8204 } end