String-Tagged-0.16000755001750001750 013454124632 12624 5ustar00leoleo000000000000String-Tagged-0.16/Build.PL000444001750001750 65313454124632 14241 0ustar00leoleo000000000000use strict; use warnings; use Module::Build; my $build = Module::Build->new( module_name => 'String::Tagged', requires => { }, configure_requires => { 'Module::Build' => '0.4004', # test_requires }, test_requires => { 'Test::Identity' => 0, 'Test::More' => '0.88', # done_testing }, license => 'perl', create_license => 1, create_readme => 1, ); $build->create_build_script; String-Tagged-0.16/Changes000444001750001750 620413454124632 14256 0ustar00leoleo000000000000Revision history for String-Tagged 0.16 2019-04-12 16:03:33 [CHANGES] * Print a more helpful message when ->sprintf encounters undef * A few docs improvements * Render linefeed as "." in ->debug_sprintf so line wrapping works [BUGFIXES] * Ensure ->substr preserves tags that are anchored both before and after 0.15 2017-10-02 16:09:35 [CHANGES] * Added ->from_sprintf constructor and ->sprintf convenience wrapper method 0.14 2017/04/24 15:46:45 [BUGFIXES] * Ensure that ->get_tag_extent can see non-initial tags (RT120691) 0.13 2017/03/16 17:59:47 [CHANGES] * Define a String::Tagged::Formatting spec name for monospace text * Updated documentation style to =head2 barename 0.12 2014/11/17 16:14:04 [BUGFIXES] * Fix for ->get_tag_at ignoring tags in some situations (RT100392) * Fix for ->substr for length of copied tags that start elsewhere than offset 0 in the copied chunk (RT100409) 0.11 2014/11/14 17:40:35 [CHANGES] * Added ->split method * Allow ->apply_tag/->unapply_tag/->delete_tag to take an Extent object instead of two integers for position * Added ->clone method with tag set restriction and conversion * Document the String::Tagged::Formatting spec [BUGFIXES] * Set the overload 'fallback' key 0.10 2014/09/08 17:48:53 [CHANGES] * Have ->apply_tagged return the object itself, for chaining * Have ->substr return a String::Tagged; add ->plain_substr for plain perl strings * Added ->matches [BUGFIXES] * Ensure ->get_tags_at at nonzero index works (RT98700) 0.09 2014/07/31 20:28:59 [CHANGES] * Have apply_tag, unapply_tag, delete_tag accessors return the object itself, so they're nice for chaining constructors 0.08 CHANGES: * Efficiency updates to improve the performance of common append operations 0.07 CHANGES: * Respect subclassing in ->concat and . operator * Added ->new_tagged convenience constructor * Added 'only' and 'except' filters to iteration methods 0.06 CHANGES: * Use Test::Identity to work around recent behavioural change in Test::More when comparing object references 0.05 CHANGES: * Allow use of ->new() as a clone constructor * Copy tags if ->set_substr/insert/append are passed a String::Tagged * Define . and .= operator overloads 0.04 CHANGES: * Added ->merge_tags() method * Created terminal colours/attributes example * Various small fixes to keep CPANTS happy 0.03 CHANGES: * use warnings BUGFIXES: * Ensure that, of multiple tags that start at the same position, the shortest one wins. 0.02 CHANGES: * New 'extent' API - methods to return extent objects * Added ->get_tag_extent() and ->get_tag_missing_extent() 0.01 First version, released on an unsuspecting world. String-Tagged-0.16/LICENSE000444001750001750 4376213454124632 14022 0ustar00leoleo000000000000This software is copyright (c) 2019 by Paul Evans . This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. Terms of the Perl programming language system itself a) the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version, or b) the "Artistic License" --- The GNU General Public License, Version 1, February 1989 --- This software is Copyright (c) 2019 by Paul Evans . This is free software, licensed under: The GNU General Public License, Version 1, February 1989 GNU GENERAL PUBLIC LICENSE Version 1, February 1989 Copyright (C) 1989 Free Software Foundation, Inc. 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The license agreements of most software companies try to keep users at the mercy of those companies. By contrast, our General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. The General Public License applies to the Free Software Foundation's software and to any other program whose authors commit to using it. You can use it for your programs, too. When we speak of free software, we are referring to freedom, not price. Specifically, the General Public License is designed to make sure that you have the freedom to give away or sell copies of free software, that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of a such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must tell them their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License Agreement applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any work containing the Program or a portion of it, either verbatim or with modifications. Each licensee is addressed as "you". 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this General Public License and to the absence of any warranty; and give any other recipients of the Program a copy of this General Public License along with the Program. You may charge a fee for the physical act of transferring a copy. 2. You may modify your copy or copies of the Program or any portion of it, and copy and distribute such modifications under the terms of Paragraph 1 above, provided that you also do the following: a) cause the modified files to carry prominent notices stating that you changed the files and the date of any change; and b) cause the whole of any work that you distribute or publish, that in whole or in part contains the Program or any part thereof, either with or without modifications, to be licensed at no charge to all third parties under the terms of this General Public License (except that you may choose to grant warranty protection to some or all third parties, at your option). c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the simplest and most usual way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this General Public License. d) You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. Mere aggregation of another independent work with the Program (or its derivative) on a volume of a storage or distribution medium does not bring the other work under the scope of these terms. 3. You may copy and distribute the Program (or a portion or derivative of it, under Paragraph 2) in object code or executable form under the terms of Paragraphs 1 and 2 above provided that you also do one of the following: a) accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Paragraphs 1 and 2 above; or, b) accompany it with a written offer, valid for at least three years, to give any third party free (except for a nominal charge for the cost of distribution) a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Paragraphs 1 and 2 above; or, c) accompany it with the information you received as to where the corresponding source code may be obtained. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form alone.) Source code for a work means the preferred form of the work for making modifications to it. For an executable file, complete source code means all the source code for all modules it contains; but, as a special exception, it need not include source code for modules which are standard libraries that accompany the operating system on which the executable file runs, or for standard header files or definitions files that accompany that operating system. 4. You may not copy, modify, sublicense, distribute or transfer the Program except as expressly provided under this General Public License. Any attempt otherwise to copy, modify, sublicense, distribute or transfer the Program is void, and will automatically terminate your rights to use the Program under this License. However, parties who have received copies, or rights to use copies, from you under this General Public License will not have their licenses terminated so long as such parties remain in full compliance. 5. By copying, distributing or modifying the Program (or any work based on the Program) you indicate your acceptance of this license to do so, and all its terms and conditions. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. 7. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of the license which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the license, you may choose any version ever published by the Free Software Foundation. 8. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 9. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 10. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS Appendix: How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to humanity, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) 19yy This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) 19xx name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (a program to direct compilers to make passes at assemblers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice That's all there is to it! --- The Artistic License 1.0 --- This software is Copyright (c) 2019 by Paul Evans . This is free software, licensed under: The Artistic License 1.0 The Artistic License Preamble The intent of this document is to state the conditions under which a Package may be copied, such that the Copyright Holder maintains some semblance of artistic control over the development of the package, while giving the users of the package the right to use and distribute the Package in a more-or-less customary fashion, plus the right to make reasonable modifications. Definitions: - "Package" refers to the collection of files distributed by the Copyright Holder, and derivatives of that collection of files created through textual modification. - "Standard Version" refers to such a Package if it has not been modified, or has been modified in accordance with the wishes of the Copyright Holder. - "Copyright Holder" is whoever is named in the copyright or copyrights for the package. - "You" is you, if you're thinking about copying or distributing this Package. - "Reasonable copying fee" is whatever you can justify on the basis of media cost, duplication charges, time of people involved, and so on. (You will not be required to justify it to the Copyright Holder, but only to the computing community at large as a market that must bear the fee.) - "Freely Available" means that no fee is charged for the item itself, though there may be fees involved in handling the item. It also means that recipients of the item may redistribute it under the same conditions they received it. 1. You may make and give away verbatim copies of the source form of the Standard Version of this Package without restriction, provided that you duplicate all of the original copyright notices and associated disclaimers. 2. You may apply bug fixes, portability fixes and other modifications derived from the Public Domain or from the Copyright Holder. A Package modified in such a way shall still be considered the Standard Version. 3. You may otherwise modify your copy of this Package in any way, provided that you insert a prominent notice in each changed file stating how and when you changed that file, and provided that you do at least ONE of the following: a) place your modifications in the Public Domain or otherwise make them Freely Available, such as by posting said modifications to Usenet or an equivalent medium, or placing the modifications on a major archive site such as ftp.uu.net, or by allowing the Copyright Holder to include your modifications in the Standard Version of the Package. b) use the modified Package only within your corporation or organization. c) rename any non-standard executables so the names do not conflict with standard executables, which must also be provided, and provide a separate manual page for each non-standard executable that clearly documents how it differs from the Standard Version. d) make other distribution arrangements with the Copyright Holder. 4. You may distribute the programs of this Package in object code or executable form, provided that you do at least ONE of the following: a) distribute a Standard Version of the executables and library files, together with instructions (in the manual page or equivalent) on where to get the Standard Version. b) accompany the distribution with the machine-readable source of the Package with your modifications. c) accompany any non-standard executables with their corresponding Standard Version executables, giving the non-standard executables non-standard names, and clearly documenting the differences in manual pages (or equivalent), together with instructions on where to get the Standard Version. d) make other distribution arrangements with the Copyright Holder. 5. You may charge a reasonable copying fee for any distribution of this Package. You may charge any fee you choose for support of this Package. You may not charge a fee for this Package itself. However, you may distribute this Package in aggregate with other (possibly commercial) programs as part of a larger (possibly commercial) software distribution provided that you do not advertise this Package as a product of your own. 6. The scripts and library files supplied as input to or produced as output from the programs of this Package do not automatically fall under the copyright of this Package, but belong to whomever generated them, and may be sold commercially, and may be aggregated with this Package. 7. C or perl subroutines supplied by you and linked into this Package shall not be considered part of this Package. 8. The name of the Copyright Holder may not be used to endorse or promote products derived from this software without specific prior written permission. 9. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. The End String-Tagged-0.16/MANIFEST000444001750001750 72213454124632 14073 0ustar00leoleo000000000000Build.PL Changes examples/demo-show.pl lib/String/Tagged.pm lib/String/Tagged/Formatting.pod LICENSE MANIFEST This list of files META.json META.yml README t/00use.t t/01plain.t t/02tags-conststr.t t/03tags-iter-limit.t t/04tags-appendstr.t t/05tags-delete.t t/06tags-substr.t t/07tags-range.t t/10debugprint.t t/11clone.t t/20merge-tags.t t/21merge-tags-anchors.t t/30appendinsert.t t/31matches.t t/32split.t t/33sprintf.t t/40operators.t t/50subclass.t t/99pod.t String-Tagged-0.16/META.json000444001750001750 172013454124632 14402 0ustar00leoleo000000000000{ "abstract" : "string buffers with value tags on extents", "author" : [ "Paul Evans " ], "dynamic_config" : 1, "generated_by" : "Module::Build version 0.4224", "license" : [ "perl_5" ], "meta-spec" : { "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", "version" : 2 }, "name" : "String-Tagged", "prereqs" : { "configure" : { "requires" : { "Module::Build" : "0.4004" } }, "test" : { "requires" : { "Test::Identity" : "0", "Test::More" : "0.88" } } }, "provides" : { "String::Tagged" : { "file" : "lib/String/Tagged.pm", "version" : "0.16" } }, "release_status" : "stable", "resources" : { "license" : [ "http://dev.perl.org/licenses/" ] }, "version" : "0.16", "x_serialization_backend" : "JSON::PP version 4.00" } String-Tagged-0.16/META.yml000444001750001750 120613454124632 14231 0ustar00leoleo000000000000--- abstract: 'string buffers with value tags on extents' author: - 'Paul Evans ' build_requires: Test::Identity: '0' Test::More: '0.88' configure_requires: Module::Build: '0.4004' dynamic_config: 1 generated_by: 'Module::Build version 0.4224, CPAN::Meta::Converter version 2.150010' license: perl meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: '1.4' name: String-Tagged provides: String::Tagged: file: lib/String/Tagged.pm version: '0.16' resources: license: http://dev.perl.org/licenses/ version: '0.16' x_serialization_backend: 'CPAN::Meta::YAML version 0.018' String-Tagged-0.16/README000444001750001750 5024113454124632 13663 0ustar00leoleo000000000000NAME String::Tagged - string buffers with value tags on extents SYNOPSIS use String::Tagged; my $st = String::Tagged->new( "An important message" ); $st->apply_tag( 3, 9, bold => 1 ); $st->iter_substr_nooverlap( sub { my ( $substring, %tags ) = @_; print $tags{bold} ? "$substring" : $substring; } ); DESCRIPTION This module implements an object class, instances of which store a (mutable) string buffer that supports tags. A tag is a name/value pair that applies to some non-empty extent of the underlying string. The types of tag names ought to be strings, or at least values that are well-behaved as strings, as the names will often be used as the keys in hashes or applied to the eq operator. The types of tag values are not restricted - any scalar will do. This could be a simple integer or string, ARRAY or HASH reference, or even a CODE reference containing an event handler of some kind. Tags may be arbitrarily overlapped. Any given offset within the string has in effect, a set of uniquely named tags. Tags of different names are independent. For tags of the same name, only the latest, shortest tag takes effect. For example, consider a string with three tags represented here: Here is my string with tags [-------------------------] foo => 1 [-------] foo => 2 [---] bar => 3 Every character in this string has a tag named foo. The value of this tag is 2 for the words my and string and the space inbetween, and 1 elsewhere. Additionally, the words is and my and the space between them also have the tag bar with a value 3. Since String::Tagged does not understand the significance of the tag values it therefore cannot detect if two neighbouring tags really contain the same semantic idea. Consider the following string: A string with words [-------] type => "message" [--------] type => "message" This string contains two tags. String::Tagged will treat this as two different tag values as far as iter_tags_nooverlap is concerned, even though get_tag_at yields the same value for the type tag at any position in the string. The merge_tags method may be used to merge tag extents of tags that should be considered as equal. NAMING I spent a lot of time considering the name for this module. It seems that a number of people across a number of languages all created similar functionallity, though named very differently. For the benefit of keyword-based search tools and similar, here's a list of some other names this sort of object might be known by: * Extents * Overlays * Attribute or attributed strings * Markup * Out-of-band data CONSTRUCTOR new $st = String::Tagged->new( $str ) Returns a new instance of a String::Tagged object. It will contain no tags. If the optional $str argument is supplied, the string buffer will be initialised from this value. If $str is a String::Tagged object then it will be cloned, as if calling the clone method on it. new_tagged $st = String::Tagged->new_tagged( $str, %tags ) Shortcut for creating a new String::Tagged object with the given tags applied to the entire length. The tags will not be anchored at either end. clone (class) $new = String::Tagged->clone( $orig, %opts ) Returns a new instance of String::Tagged made by cloning the original, subject to the options provided. The returned instance will be in the requested class, which need not match the class of the original. The following options are recognised: only_tags => ARRAY If present, gives an ARRAY reference containing tag names. Only those tags named here will be copied; others will be ignored. except_tags => ARRAY If present, gives an ARRAY reference containing tag names. All tags will be copied except those named here. convert_tags => HASH If present, gives a HASH reference containing tag conversion functions. For any tags in the original to be copied whose names appear in the hash, the name and value are passed into the corresponding function, which should return an even-sized key/value list giving a tag, or a list of tags, to apply to the new clone. my @new_tags = $convert_tags->{$orig_name}->( $orig_name, $orig_value ) # Where @new_tags is ( $new_name, $new_value, $new_name_2, $new_value_2, ... ) As a further convenience, if the value for a given tag name is a plain string instead of a code reference, it gives the new name for the tag, and will be applied with its existing value. If only_tags is being used too, then the source names of any tags to be converted must also be listed there, or they will not be copied. clone (instance) $new = $orig->clone( %args ) Called as an instance (rather than a class) method, the newly-cloned instance is returned in the same class as the original. from_sprintf $str = String::Tagged->from_sprintf( $format, @args ) Since version 0.15. Returns a new instance of a String::Tagged object, initialised by formatting the supplied arguments using the supplied format. The $format string is similar to that supported by the core sprintf operator, though a few features such as out-of-order argument indexing and vector formatting are missing. This format string may be a plain perl string, or an instance of String::Tagged. In the latter case, any tags within it are preserved in the result. In the case of a %s conversion, the value of the argument consumed may itself be a String::Tagged instance. In this case it will be appended to the returned object, preserving any tags within it. All other conversions are handled individually by the core sprintf operator and appended to the result. METHODS str $str = $st->str $str = "$st" Returns the plain string contained within the object. This method is also called for stringification; so the String::Tagged object can be used in a plain string interpolation such as my $message = String::Tagged->new( "Hello world" ); print "My message is $message\n"; length $len = $st->length $len = length( $st ) Returns the length of the plain string. Because stringification works on this object class, the normal core length function works correctly on it. substr $str = $st->substr( $start, $len ) Returns a String::Tagged instance representing a section from within the given string, containing all the same tags at the same conceptual positions. plain_substr $str = $st->plain_substr( $start, $len ) Returns as a plain perl string, the substring at the given position. This will be the same string data as returned by substr, only as a plain string without the tags apply_tag $st->apply_tag( $start, $len, $name, $value ) Apply the named tag value to the given extent. The tag will start on the character at the $start index, and continue for the next $len characters. If $start is given as -1, the tag will be considered to start "before" the actual string. If $len is given as -1, the tag will be considered to end "after" end of the actual string. These special limits are used by set_substr when deciding whether to move a tag boundary. The start of any tag that starts "before" the string is never moved, even if more text is inserted at the beginning. Similarly, a tag which ends "after" the end of the string, will continue to the end even if more text is appended. This method returns the $st object. $st->apply_tag( $e, $name, $value ) Alternatively, an existing extent object can be passed as the first argument instead of two integers. The new tag will apply at the given extent. unapply_tag $st->unapply_tag( $start, $len, $name ) Unapply the named tag value from the given extent. If the tag extends beyond this extent, then any partial fragment of the tag will be left in the string. This method returns the $st object. $st->unapply_tag( $e, $name ) Alternatively, an existing extent object can be passed as the first argument instead of two integers. delete_tag $st->delete_tag( $start, $len, $name ) Delete the named tag within the given extent. Entire tags are removed, even if they extend beyond this extent. This method returns the $st object. $st->delete_tag( $e, $name ) Alternatively, an existing extent object can be passed as the first argument instead of two integers. merge_tags $st->merge_tags( $eqsub ) Merge neighbouring or overlapping tags of the same name and equal values. For each pair of tags of the same name that apply on neighbouring or overlapping extents, the $eqsub callback is called, as $equal = $eqsub->( $name, $value_a, $value_b ) If this function returns true then the tags are merged. The equallity test function is free to perform any comparison of the values that may be relevant to the application; for example it may deeply compare referred structures and check for equivalence in some application-defined manner. In this case, the first tag of a pair is retained, the second is deleted. This may be relevant if the tag value is a reference to some object. iter_extents $st->iter_extents( $callback, %opts ) Iterate the tags stored in the string. For each tag, the CODE reference in $callback is invoked once, being passed an extent object that represents the extent of the tag. $callback->( $extent, $tagname, $tagvalue ) Options passed in %opts may include: start => INT Start at the given position; defaults to 0. end => INT End after the given position; defaults to end of string. This option overrides len. len => INT End after the given length beyond the start position; defaults to end of string. This option only applies if end is not given. only => ARRAY Select only the tags named in the given ARRAY reference. except => ARRAY Select all the tags except those named in the given ARRAY reference. iter_tags $st->iter_tags( $callback, %opts ) Iterate the tags stored in the string. For each tag, the CODE reference in $callback is invoked once, being passed the start point and length of the tag. $callback->( $start, $length, $tagname, $tagvalue ) Options passed in %opts are the same as for iter_extents. iter_extents_nooverlap $st->iter_extents_nooverlap( $callback, %opts ) Iterate non-overlapping extents of tags stored in the string. The CODE reference in $callback is invoked for each extent in the string where no tags change. The entire set of tags active in that extent is given to the callback. Because the extent covers possibly-multiple tags, it will not define the anchor_before and anchor_after flags. $callback->( $extent, %tags ) The callback will be invoked over the entire length of the string, including any extents with no tags applied. Options may be passed in %opts to control the range of the string iterated over, in the same way as the iter_extents method. If the only or except filters are applied, then only the tags that survive filtering will be present in the %tags hash. Tags that are excluded by the filtering will not be present, nor will their bounds be used to split the string into extents. iter_tags_nooverlap $st->iter_tags_nooverlap( $callback, %opts ) Iterate extents of the string using iter_extents_nooverlap, but passing the start and length of each extent to the callback instead of the extent object. $callback->( $start, $length, %tags ) Options may be passed in %opts to control the range of the string iterated over, in the same way as the iter_extents method. iter_substr_nooverlap $st->iter_substr_nooverlap( $callback, %opts ) Iterate extents of the string using iter_extents_nooverlap, but passing the substring of data instead of the extent object. $callback->( $substr, %tags ) Options may be passed in %opts to control the range of the string iterated over, in the same way as the iter_extents method. tagnames @names = $st->tagnames Returns the set of tag names used in the string, in no particular order. get_tags_at $tags = $st->get_tags_at( $pos ) Returns a HASH reference of all the tag values active at the given position. get_tag_at $value = $st->get_tag_at( $pos, $name ) Returns the value of the named tag at the given position, or undef if the tag is not applied there. get_tag_extent $extent = $st->get_tag_extent( $pos, $name ) If the named tag applies to the given position, returns the extent of the tag at that position. If it does not, undef is returned. If an extent is returned it will define the anchor_before and anchor_after flags if appropriate. get_tag_missing_extent $extent = $st->get_tag_missing_extent( $pos, $name ) If the named tag does not apply at the given position, returns the extent of the string around that position that does not have the tag. If it does exist, undef is returned. If an extent is returned it will not define the anchor_before and anchor_after flags, as these do not make sense for the range in which a tag is absent. set_substr $st->set_substr( $start, $len, $newstr ) Modifies a extent of the underlying plain string to that given. The extents of tags in the string are adjusted to cope with the modified region, and the adjustment in length. Tags entirely before the replaced extent remain unchanged. Tags entirely within the replaced extent are deleted. Tags entirely after the replaced extent are moved by appropriate amount to ensure they still apply to the same characters as before. Tags that start before and end after the extent remain, and have their lengths suitably adjusted. Tags that span just the start or end of the extent, but not both, are truncated, so as to remove the part of the tag applied on the modified extent but preserving that applied outside. If $newstr is a String::Tagged object, then its tags will be applied to $st as appropriate. Edge-anchored tags in $newstr will not be extended through $st, though they will apply as edge-anchored if they now sit at the edge of the new string. insert $st->insert( $start, $newstr ) Insert the given string at the given position. A shortcut around set_substr. If $newstr is a String::Tagged object, then its tags will be applied to $st as appropriate. If $start is 0, any before-anchored tags in will become before-anchored in $st. append $st->append( $newstr ) $st .= $newstr Append to the underlying plain string. A shortcut around set_substr. If $newstr is a String::Tagged object, then its tags will be applied to $st as appropriate. Any after-anchored tags in will become after-anchored in $st. append_tagged $st->append_tagged( $newstr, %tags ) Append to the underlying plain string, and apply the given tags to the newly-inserted extent. Returns $st itself so that the method may be easily chained. concat $ret = $st->concat( $other ) $ret = $st . $other Returns a new String::Tagged containing the two strings concatenated together, preserving any tags present. This method overloads normal string concatenation operator, so expressions involving String::Tagged values retain their tags. This method or operator tries to respect subclassing; preferring to return a new object of a subclass if either argument or operand is a subclass of String::Tagged. If they are both subclasses, it will prefer the type of the invocant or first operand. matches @subs = $st->matches( $regexp ) Returns a list of substrings (as String::Tagged instances) for every non-overlapping match of the given $regexp. This could be used, for example, to build a formatted string from a formatted template containing variable expansions: my $template = ... my %vars = ... my $ret = String::Tagged->new; foreach my $m ( $template->matches( qr/\$\w+|[^$]+/ ) ) { if( $m =~ m/^\$(\w+)$/ ) { $ret->append_tagged( $vars{$1}, %{ $m->get_tags_at( 0 ) } ); } else { $ret->append( $m ); } } This iterates segments of the template containing variables expansions starting with a $ symbol, and replaces them with values from the %vars hash, careful to preserve all the formatting tags from the original template string. split @parts = $st->split( $regexp, $limit ) Returns a list of substrings by applying the regexp to the string content; similar to the core perl split function. If $limit is supplied, the method will stop at that number of elements, returning the entire remainder of the input string as the final element. If the $regexp contains a capture group then the content of the first one will be added to the return list as well. sprintf $ret = $st->sprintf( @args ) Since version 0.15. Returns a new string by using the given instance as the format string for a "from_sprintf" constructor call. The returned instance will be of the same class as the invocant. debug_sprintf $ret = $st->debug_sprintf Returns a representation of the string data and all the tags, suitable for debug printing or other similar use. This is a format such as is given in the DESCRIPTION section above. The output will consist of a number of lines, the first containing the plain underlying string, then one line per tag. The line shows the extent of the tag given by [---] markers, or a | in the special case of a tag covering only a single character. Special markings of < and > indicate tags which are "before" or "after" anchored. For example: Hello, world [---] word => 1 <[----------]> everywhere => 1 | space => 1 Extent Objects These objects represent a range of characters within the containing String::Tagged object. The range they represent is fixed at the time of creation. If the containing string is modified by a call to set_substr then the effect on the extent object is not defined. These objects should be considered as relatively short-lived - used briefly for the purpose of querying the result of an operation, then discarded soon after. $extent->string Returns the containing String::Tagged object. $extent->start Returns the start index of the extent. This is the index of the first character within the extent. $extent->end Returns the end index of the extent. This is the index of the first character beyond the end of the extent. $extent->anchor_before True if this extent begins "before" the start of the string. Only certain methods return extents with this flag defined. $extent->anchor_after True if this extent ends "after" the end of the string. Only certain methods return extents with this flag defined. $extent->length Returns the number of characters within the extent. $extent->substr Returns the substring contained by the extent. $extent->plain_substr Returns the substring of the underlying plain string buffer contained by the extent. TODO * There are likely variations on the rules for set_substr that could equally apply to some uses of tagged strings. Consider whether the behaviour of modification is chosen per-method, per-tag, or per-string. * Consider how to implement a clone from one tag format to another which wants to merge multiple different source tags together into a single new one. AUTHOR Paul Evans String-Tagged-0.16/examples000755001750001750 013454124632 14442 5ustar00leoleo000000000000String-Tagged-0.16/examples/demo-show.pl000444001750001750 242113454124632 17035 0ustar00leoleo000000000000#!/usr/bin/perl -w use strict; use String::Tagged; my $CSI = "\e["; while( my $line = ) { my $str = String::Tagged->new( $line ); # Every capital letter red pos $line = 0; while( $line =~ m/[A-Z]/g ) { $str->apply_tag( $-[0], 1, fg => 1 ); } # Punctuation green pos $line = 0; while( $line =~ m/[[:punct:]]/g ) { $str->apply_tag( $-[0], 1, fg => 2 ); } # Numbers blue pos $line = 0; while( $line =~ m/\d+/g ) { $str->apply_tag( $-[0], $+[0]-$-[0], fg => 4 ); } # Underline whole words pos $line = 0; while( $line =~ m/\S+/g ) { $str->apply_tag( $-[0], $+[0]-$-[0], u => 1 ); } print STDERR $str->debug_sprintf; my %pen; $str->iter_substr_nooverlap( sub { my ( $substr, %tags ) = @_; my @SGR; if( defined( my $fg = $tags{fg} ) ) { push @SGR, $fg+30; $pen{fg} = $fg; } elsif( exists $pen{fg} ) { push @SGR, 39; delete $pen{fg}; } if( $tags{u} and !$pen{u} ) { push @SGR, 4; $pen{u} = 1; } elsif( !$tags{u} and $pen{u} ) { push @SGR, 24; delete $pen{u}; } print "${CSI}".join(";", @SGR)."m" if @SGR; print $substr; } ); print "${CSI}m\n"; } String-Tagged-0.16/lib000755001750001750 013454124632 13372 5ustar00leoleo000000000000String-Tagged-0.16/lib/String000755001750001750 013454124632 14640 5ustar00leoleo000000000000String-Tagged-0.16/lib/String/Tagged.pm000444001750001750 12407013454124632 16572 0ustar00leoleo000000000000# You may distribute under the terms of either the GNU General Public License # or the Artistic License (the same terms as Perl itself) # # (C) Paul Evans, 2008-2017 -- leonerd@leonerd.org.uk package String::Tagged; use strict; use warnings; our $VERSION = '0.16'; use Scalar::Util qw( blessed ); use constant FLAG_ANCHOR_BEFORE => 0x01; use constant FLAG_ANCHOR_AFTER => 0x02; use constant DEBUG => 0; # Since we're providing overloading, we should set fallback by default use overload fallback => 1; =head1 NAME C - string buffers with value tags on extents =head1 SYNOPSIS use String::Tagged; my $st = String::Tagged->new( "An important message" ); $st->apply_tag( 3, 9, bold => 1 ); $st->iter_substr_nooverlap( sub { my ( $substring, %tags ) = @_; print $tags{bold} ? "$substring" : $substring; } ); =head1 DESCRIPTION This module implements an object class, instances of which store a (mutable) string buffer that supports tags. A tag is a name/value pair that applies to some non-empty extent of the underlying string. The types of tag names ought to be strings, or at least values that are well-behaved as strings, as the names will often be used as the keys in hashes or applied to the C operator. The types of tag values are not restricted - any scalar will do. This could be a simple integer or string, ARRAY or HASH reference, or even a CODE reference containing an event handler of some kind. Tags may be arbitrarily overlapped. Any given offset within the string has in effect, a set of uniquely named tags. Tags of different names are independent. For tags of the same name, only the latest, shortest tag takes effect. For example, consider a string with three tags represented here: Here is my string with tags [-------------------------] foo => 1 [-------] foo => 2 [---] bar => 3 Every character in this string has a tag named C. The value of this tag is 2 for the words C and C and the space inbetween, and 1 elsewhere. Additionally, the words C and C and the space between them also have the tag C with a value 3. Since C does not understand the significance of the tag values it therefore cannot detect if two neighbouring tags really contain the same semantic idea. Consider the following string: A string with words [-------] type => "message" [--------] type => "message" This string contains two tags. C will treat this as two different tag values as far as C is concerned, even though C yields the same value for the C tag at any position in the string. The C method may be used to merge tag extents of tags that should be considered as equal. =head1 NAMING I spent a lot of time considering the name for this module. It seems that a number of people across a number of languages all created similar functionallity, though named very differently. For the benefit of keyword-based search tools and similar, here's a list of some other names this sort of object might be known by: =over 4 =item * Extents =item * Overlays =item * Attribute or attributed strings =item * Markup =item * Out-of-band data =back =cut =head1 CONSTRUCTOR =cut =head2 new $st = String::Tagged->new( $str ) Returns a new instance of a C object. It will contain no tags. If the optional C<$str> argument is supplied, the string buffer will be initialised from this value. If C<$str> is a C object then it will be cloned, as if calling the C method on it. =cut sub new { my $class = shift; my ( $str ) = @_; return $class->clone( $str ) if blessed $str and $str->isa( __PACKAGE__ ); $str = "" unless defined $str; return bless { str => "$str", tags => [], }, $class; } =head2 new_tagged $st = String::Tagged->new_tagged( $str, %tags ) Shortcut for creating a new C object with the given tags applied to the entire length. The tags will not be anchored at either end. =cut sub new_tagged { my $class = shift; my ( $str, %tags ) = @_; my $self = $class->new( $str ); my $length = $self->length; $self->apply_tag( 0, $length, $_ => $tags{$_} ) for keys %tags; return $self; } =head2 clone (class) $new = String::Tagged->clone( $orig, %opts ) Returns a new instance of C made by cloning the original, subject to the options provided. The returned instance will be in the requested class, which need not match the class of the original. The following options are recognised: =over 4 =item only_tags => ARRAY If present, gives an ARRAY reference containing tag names. Only those tags named here will be copied; others will be ignored. =item except_tags => ARRAY If present, gives an ARRAY reference containing tag names. All tags will be copied except those named here. =item convert_tags => HASH If present, gives a HASH reference containing tag conversion functions. For any tags in the original to be copied whose names appear in the hash, the name and value are passed into the corresponding function, which should return an even-sized key/value list giving a tag, or a list of tags, to apply to the new clone. my @new_tags = $convert_tags->{$orig_name}->( $orig_name, $orig_value ) # Where @new_tags is ( $new_name, $new_value, $new_name_2, $new_value_2, ... ) As a further convenience, if the value for a given tag name is a plain string instead of a code reference, it gives the new name for the tag, and will be applied with its existing value. If C is being used too, then the source names of any tags to be converted must also be listed there, or they will not be copied. =back =head2 clone (instance) $new = $orig->clone( %args ) Called as an instance (rather than a class) method, the newly-cloned instance is returned in the same class as the original. =cut sub clone { my ( $class, $orig ) = blessed $_[0] ? ( ref $_[0], shift ) : ( shift, shift ); my %opts = @_; my $only = exists $opts{only_tags} ? { map { $_ => 1 } @{ $opts{only_tags} } } : undef; my $except = exists $opts{except_tags} ? { map { $_ => 1 } @{ $opts{except_tags} } } : undef; my $convert = $opts{convert_tags}; my $new = $class->new( $orig->str ); $orig->iter_extents( sub { my ( $e, $tn, $tv ) = @_; return if $only and not $only->{$tn}; return if $except and $except->{$tn}; my @tags; if( $convert and my $c = $convert->{$tn} ) { if( ref $c eq "CODE" ) { @tags = $c->( $tn, $tv ); } else { @tags = ( $c, $tv ); } } else { @tags = ( $tn, $tv ); } while( @tags ) { $new->apply_tag( $e, shift @tags, shift @tags ); } }); return $new; } sub _mkextent { my $self = shift; my ( $start, $end, $flags ) = @_; $flags &= (FLAG_ANCHOR_BEFORE|FLAG_ANCHOR_AFTER); return bless [ $self, $start, $end, $flags ], 'String::Tagged::Extent'; } =head2 from_sprintf $str = String::Tagged->from_sprintf( $format, @args ) I Returns a new instance of a C object, initialised by formatting the supplied arguments using the supplied format. The C<$format> string is similar to that supported by the core C operator, though a few features such as out-of-order argument indexing and vector formatting are missing. This format string may be a plain perl string, or an instance of C. In the latter case, any tags within it are preserved in the result. In the case of a C<%s> conversion, the value of the argument consumed may itself be a C instance. In this case it will be appended to the returned object, preserving any tags within it. All other conversions are handled individually by the core C operator and appended to the result. =cut sub from_sprintf { my $class = shift; my ( $format, @args ) = @_; # Clone the format string into the candidate return value, and then # repeatedly replace %... expansions with their required value using # ->set_substr, so that embedded tags in the format will behave sensibly. my $ret = ( blessed $format and $format->isa( __PACKAGE__ ) ) ? $class->clone( $format ) : $class->new( $format ); my $pos = 0; while( $pos < length $ret ) { my $str = "$ret"; pos( $str ) = $pos; my $replacement; if( $str =~ m/\G[^%]+/gc ) { # A literal span $pos = $+[0]; next; } elsif( $str =~ m/\G%%/gc ) { # A literal %% conversion $replacement = "%"; } elsif( $str =~ m/\G%([-]?)(\d+|\*)?(?:\.(\d+|\*))?s/gc ) { # A string my ( $flags, $width, $precision ) = ( $1, $2, $3 ); $width = shift @args if defined $width and $width eq "*"; $precision = shift @args if defined $precision and $precision eq "*"; my $arg = shift @args; defined $arg or do { warnings::warnif( uninitialized => "Use of ininitialized value in String::Tagged->from_sprintf" ); $arg = ""; }; if( defined $precision ) { if( blessed $arg and $arg->isa( __PACKAGE__ ) ) { $arg = $arg->substr( 0, $precision ); } else { $arg = substr $arg, 0, $precision; } } my $leftalign = $flags =~ m/-/; my $padding = defined $width ? $width - length $arg : 0; $padding = 0 if $padding < 0; $replacement = ""; $replacement .= " " x $padding if !$leftalign; $replacement .= $arg; $replacement .= " " x $padding if $leftalign; } elsif( $str =~ m/\G%(.*?)([cduoxefgXEGbBpaAiDUOF])/gc ) { # Another conversion format my ( $template, $flags ) = ( $2, $1 ); my $argc = 1; $argc += ( () = $flags =~ m/\*/g ); $replacement = sprintf "%$flags$template", @args[0..$argc-1]; splice @args, 0, $argc; } elsif( $str =~ m/\G%(.*?)([a-zA-Z])/gc ) { warn "Unrecognised sprintf conversion %$2"; } else { # must be at EOF now last; } my $templatelen = $+[0] - $-[0]; $ret->set_substr( $-[0], $templatelen, $replacement ); $pos += length( $replacement ); } return $ret; } =head1 METHODS =cut =head2 str $str = $st->str $str = "$st" Returns the plain string contained within the object. This method is also called for stringification; so the C object can be used in a plain string interpolation such as my $message = String::Tagged->new( "Hello world" ); print "My message is $message\n"; =cut use overload '""' => 'str'; sub str { my $self = shift; return $self->{str}; } =head2 length $len = $st->length $len = length( $st ) Returns the length of the plain string. Because stringification works on this object class, the normal core C function works correctly on it. =cut sub length { my $self = shift; return CORE::length $self->{str}; } =head2 substr $str = $st->substr( $start, $len ) Returns a C instance representing a section from within the given string, containing all the same tags at the same conceptual positions. =cut sub substr { my $self = shift; my ( $start, $len ) = @_; my $end = $start + $len; my $ret = ( ref $self )->new( CORE::substr( $self->{str}, $start, $len ) ); my $tags = $self->{tags}; foreach my $t ( @$tags ) { my ( $ts, $te, $tn, $tv, $tf ) = @$t; next if $te < $start; last if $ts >= $end; $_ -= $start for $ts, $te; next if $te <= 0; $ts = -1 if $ts < 0 or $tf & FLAG_ANCHOR_BEFORE; $te = -1 if $te > $end or $tf & FLAG_ANCHOR_AFTER; $ret->apply_tag( $ts, $te == -1 ? -1 : $te - $ts, $tn => $tv ); } return $ret; } =head2 plain_substr $str = $st->plain_substr( $start, $len ) Returns as a plain perl string, the substring at the given position. This will be the same string data as returned by C, only as a plain string without the tags =cut sub plain_substr { my $self = shift; my ( $start, $len ) = @_; return CORE::substr( $self->{str}, $start, $len ); } sub _cmp_tags { my ( $as, $ae ) = @$a; my ( $bs, $be ) = @$b; # Sort by start first; shortest first return $as <=> $bs || $ae <=> $be; } sub _assert_sorted { my $self = shift; my $tags = $self->{tags}; # If fewer than 2 tags, must be sorted return if @$tags < 2; my $prev = $tags->[0]; for( my $i = 1; $i < @$tags; $i++ ) { my $here = $tags->[$i]; local ( $a, $b ) = ( $prev, $here ); if( _cmp_tags() <= 0 ) { $prev = $here; next; } print STDERR "Tag order violation at i=$i\n"; print STDERR "[@{[ $i - 1 ]}] = [ $tags->[$i-1]->[0], $tags->[$i-1]->[1] ]\n"; print STDERR "[@{[ $i ]}] = [ $tags->[$i]->[0], $tags->[$i]->[1] ]\n"; die "Assert failure"; } } sub _insert_tag { my $self = shift; my ( $start, $end, $name, $value, $flags ) = @_; my $tags = $self->{tags}; my $newtag = [ $start, $end, $name => $value, $flags ]; # Specialcase - if there's no tags yet, just push it if( @$tags == 0 ) { push @$tags, $newtag; return; } local $a = $newtag; # Two more special cases - it's quite likely we're either inserting an # 'everywhere' tag, or appending one to the end. Check the endpoints first local $b; $b = $tags->[0]; if( _cmp_tags() <= 0 ) { unshift @$tags, $newtag; return; } $b = $tags->[-1]; if( _cmp_tags() >= 0 ) { push @$tags, $newtag; return; } my $range_start = 0; my $range_end = $#$tags; my $inspos; while( $range_end > $range_start ) { my $i = int( ( $range_start + $range_end ) / 2 ); $b = $tags->[$i]; my $cmp = _cmp_tags; if( $cmp > 0 ) { $range_start = $i + 1; } elsif( $cmp < 0 ) { $range_end = $i; # open interval } else { $inspos = $i; last; } if( $range_start == $range_end ) { $inspos = $range_start; last; } } $inspos = $range_end unless defined $inspos; $inspos = 0 if $inspos < 0; $inspos = @$tags if $inspos > @$tags; splice @$tags, $inspos, 0, $newtag; $self->_assert_sorted if DEBUG; } =head2 apply_tag $st->apply_tag( $start, $len, $name, $value ) Apply the named tag value to the given extent. The tag will start on the character at the C<$start> index, and continue for the next C<$len> characters. If C<$start> is given as -1, the tag will be considered to start "before" the actual string. If C<$len> is given as -1, the tag will be considered to end "after" end of the actual string. These special limits are used by C when deciding whether to move a tag boundary. The start of any tag that starts "before" the string is never moved, even if more text is inserted at the beginning. Similarly, a tag which ends "after" the end of the string, will continue to the end even if more text is appended. This method returns the C<$st> object. $st->apply_tag( $e, $name, $value ) Alternatively, an existing extent object can be passed as the first argument instead of two integers. The new tag will apply at the given extent. =cut sub apply_tag { my $self = shift; my ( $start, $end ); my $flags = 0; if( blessed $_[0] ) { my $e = shift; $start = $e->start; $end = $e->end; $flags |= FLAG_ANCHOR_BEFORE if $e->anchor_before; $flags |= FLAG_ANCHOR_AFTER if $e->anchor_after; } else { $start = shift; my $len = shift; my $strlen = $self->length; if( $start < 0 ) { $start = 0; $flags |= FLAG_ANCHOR_BEFORE; } if( $len == -1 ) { $end = $strlen; $flags |= FLAG_ANCHOR_AFTER; } else { $end = $start + $len; $end = $strlen if $end > $strlen; } } my ( $name, $value ) = @_; $self->_insert_tag( $start, $end, $name, $value, $flags ); return $self; } sub _remove_tag { my $self = shift; my $keepends = shift; my ( $start, $end ); if( blessed $_[0] ) { my $e = shift; $start = $e->start; $end = $e->end; } else { $start = shift; $end = $start + shift; } my ( $name ) = @_; my $tags = $self->{tags}; my $have_added = 0; # Can't foreach() because we modify $i for( my $i = 0; $i < @$tags; $i++ ) { my ( $ts, $te, $tn, $tv, $tf ) = @{ $tags->[$i] }; next if $te <= $start; last if $ts >= $end; next if $tn ne $name; if( $keepends and $end < $te ) { $self->_insert_tag( $end, $te, $tn, $tv, $tf & ~FLAG_ANCHOR_BEFORE ); $have_added = 1; } splice @$tags, $i, 1; if( $keepends and $ts < $start ) { $self->_insert_tag( $ts, $start, $tn, $tv, $tf & ~FLAG_ANCHOR_AFTER ); $have_added = 1; } else { $i--; } } if( DEBUG && $have_added ) { $self->_assert_sorted; } return $self; } =head2 unapply_tag $st->unapply_tag( $start, $len, $name ) Unapply the named tag value from the given extent. If the tag extends beyond this extent, then any partial fragment of the tag will be left in the string. This method returns the C<$st> object. $st->unapply_tag( $e, $name ) Alternatively, an existing extent object can be passed as the first argument instead of two integers. =cut sub unapply_tag { my $self = shift; return $self->_remove_tag( 1, @_ ); } =head2 delete_tag $st->delete_tag( $start, $len, $name ) Delete the named tag within the given extent. Entire tags are removed, even if they extend beyond this extent. This method returns the C<$st> object. $st->delete_tag( $e, $name ) Alternatively, an existing extent object can be passed as the first argument instead of two integers. =cut sub delete_tag { my $self = shift; return $self->_remove_tag( 0, @_ ); } =head2 merge_tags $st->merge_tags( $eqsub ) Merge neighbouring or overlapping tags of the same name and equal values. For each pair of tags of the same name that apply on neighbouring or overlapping extents, the C<$eqsub> callback is called, as $equal = $eqsub->( $name, $value_a, $value_b ) If this function returns true then the tags are merged. The equallity test function is free to perform any comparison of the values that may be relevant to the application; for example it may deeply compare referred structures and check for equivalence in some application-defined manner. In this case, the first tag of a pair is retained, the second is deleted. This may be relevant if the tag value is a reference to some object. =cut sub merge_tags { my $self = shift; my ( $eqsub ) = @_; my $tags = $self->{tags}; # Can't foreach() because we modify @$tags OUTER: for( my $i = 0; $i < @$tags; $i++ ) { my ( $ts, $te, $tn, $tv, $tf ) = @{ $tags->[$i] }; for( my $j = $i+1; $j < @$tags; $j++ ) { my ( $t2s, $t2e, $t2n, $t2v, $t2f ) = @{ $tags->[$j] }; last if $t2s > $te; next unless $t2s <= $te; next unless $t2n eq $tn; last unless $eqsub->( $tn, $tv, $t2v ); # Need to delete the tag at $j, extend the end of the tag at $i, and # possibly move $i later splice @$tags, $j, 1, (); $j--; $te = $tags->[$i][1] = $t2e; $tags->[$i][4] |= FLAG_ANCHOR_AFTER if $t2f & FLAG_ANCHOR_AFTER; local $a = $tags->[$i]; if( local $b = $tags->[$i+1] and _cmp_tags() > 0 ) { my $newpos = $i+1; while( local $b = $tags->[$newpos ] and _cmp_tags() <= 0 ) { $newpos++; } splice @$tags, $newpos, 0, splice @$tags, $i, 1, (); redo OUTER; } } } } =head2 iter_extents $st->iter_extents( $callback, %opts ) Iterate the tags stored in the string. For each tag, the CODE reference in C<$callback> is invoked once, being passed an extent object that represents the extent of the tag. $callback->( $extent, $tagname, $tagvalue ) Options passed in C<%opts> may include: =over 4 =item start => INT Start at the given position; defaults to 0. =item end => INT End after the given position; defaults to end of string. This option overrides C. =item len => INT End after the given length beyond the start position; defaults to end of string. This option only applies if C is not given. =item only => ARRAY Select only the tags named in the given ARRAY reference. =item except => ARRAY Select all the tags except those named in the given ARRAY reference. =back =cut sub iter_extents { my $self = shift; my ( $callback, %opts ) = @_; my $start = exists $opts{start} ? $opts{start} : 0; my $end = exists $opts{end} ? $opts{end} : exists $opts{len} ? $start + $opts{len} : $self->length; my $only = exists $opts{only} ? { map { $_ => 1 } @{ $opts{only} } } : undef; my $except = exists $opts{except} ? { map { $_ => 1 } @{ $opts{except} } } : undef; my $tags = $self->{tags}; foreach my $t ( @$tags ) { my ( $ts, $te, $tn, $tv, $tf ) = @$t; next if $te < $start; last if $ts >= $end; next if $only and !$only->{$tn}; next if $except and $except->{$tn}; $callback->( $self->_mkextent( $ts, $te, $tf ), $tn, $tv ); } } =head2 iter_tags $st->iter_tags( $callback, %opts ) Iterate the tags stored in the string. For each tag, the CODE reference in C<$callback> is invoked once, being passed the start point and length of the tag. $callback->( $start, $length, $tagname, $tagvalue ) Options passed in C<%opts> are the same as for C. =cut sub iter_tags { my $self = shift; my ( $callback, %opts ) = @_; $self->iter_extents( sub { my ( $e, $tn, $tv ) = @_; $callback->( $e->start, $e->length, $tn, $tv ); }, %opts ); } =head2 iter_extents_nooverlap $st->iter_extents_nooverlap( $callback, %opts ) Iterate non-overlapping extents of tags stored in the string. The CODE reference in C<$callback> is invoked for each extent in the string where no tags change. The entire set of tags active in that extent is given to the callback. Because the extent covers possibly-multiple tags, it will not define the C and C flags. $callback->( $extent, %tags ) The callback will be invoked over the entire length of the string, including any extents with no tags applied. Options may be passed in C<%opts> to control the range of the string iterated over, in the same way as the C method. If the C or C filters are applied, then only the tags that survive filtering will be present in the C<%tags> hash. Tags that are excluded by the filtering will not be present, nor will their bounds be used to split the string into extents. =cut sub iter_extents_nooverlap { my $self = shift; my ( $callback, %opts ) = @_; my $start = exists $opts{start} ? $opts{start} : 0; my $end = exists $opts{end} ? $opts{end} : exists $opts{len} ? $start + $opts{len} : $self->length; my $only = exists $opts{only} ? { map { $_ => 1 } @{ $opts{only} } } : undef; my $except = exists $opts{except} ? { map { $_ => 1 } @{ $opts{except} } } : undef; my $tags = $self->{tags}; my @active; # ARRAY of [ $ts, $te, $tn, $tv ] my $pos = $start; foreach my $t ( @$tags ) { my ( $ts, $te, $tn, $tv ) = @$t; next if $te < $start; last if $ts >= $end; next if $only and !$only->{$tn}; next if $except and $except->{$tn}; while( $pos < $ts ) { my %activetags; my %tagends; my $rangeend = $ts; foreach ( @active ) { my ( undef, $e, $n, $v ) = @$_; $e < $rangeend and $rangeend = $e; next if $tagends{$n} and $tagends{$n} < $e; $activetags{$n} = $v; $tagends{$n} = $e; } $callback->( $self->_mkextent( $pos, $rangeend, 0 ), %activetags ); $pos = $rangeend; @active = grep { $_->[1] > $pos } @active; } push @active, [ $ts, $te, $tn, $tv ]; } while( $pos < $end ) { my %activetags; my %tagends; my $rangeend = $end; foreach ( @active ) { my ( undef, $e, $n, $v ) = @$_; $e < $rangeend and $rangeend = $e; next if $tagends{$n} and $tagends{$n} < $e; $activetags{$n} = $v; $tagends{$n} = $e; } $callback->( $self->_mkextent( $pos, $rangeend, 0 ), %activetags ); $pos = $rangeend; @active = grep { $_->[1] > $pos } @active; } } =head2 iter_tags_nooverlap $st->iter_tags_nooverlap( $callback, %opts ) Iterate extents of the string using C, but passing the start and length of each extent to the callback instead of the extent object. $callback->( $start, $length, %tags ) Options may be passed in C<%opts> to control the range of the string iterated over, in the same way as the C method. =cut sub iter_tags_nooverlap { my $self = shift; my ( $callback, %opts ) = @_; $self->iter_extents_nooverlap( sub { my ( $e, %tags ) = @_; $callback->( $e->start, $e->length, %tags ); }, %opts ); } =head2 iter_substr_nooverlap $st->iter_substr_nooverlap( $callback, %opts ) Iterate extents of the string using C, but passing the substring of data instead of the extent object. $callback->( $substr, %tags ) Options may be passed in C<%opts> to control the range of the string iterated over, in the same way as the C method. =cut sub iter_substr_nooverlap { my $self = shift; my ( $callback, %opts ) = @_; $self->iter_extents_nooverlap( sub { my ( $e, %tags ) = @_; $callback->( $e->plain_substr, %tags ); }, %opts, ); } =head2 tagnames @names = $st->tagnames Returns the set of tag names used in the string, in no particular order. =cut sub tagnames { my $self = shift; my $tags = $self->{tags}; my %tags; foreach my $t ( @$tags ) { $tags{$t->[2]}++; } keys %tags; } =head2 get_tags_at $tags = $st->get_tags_at( $pos ) Returns a HASH reference of all the tag values active at the given position. =cut sub get_tags_at { my $self = shift; my ( $pos ) = @_; my $tags = $self->{tags}; my %tags; # TODO: turn this into a binary search foreach my $t ( @$tags ) { my ( $ts, $te, $tn, $tv ) = @$t; last if $ts > $pos; next if $te <= $pos; $tags{$tn} = $tv; } return \%tags; } =head2 get_tag_at $value = $st->get_tag_at( $pos, $name ) Returns the value of the named tag at the given position, or C if the tag is not applied there. =cut sub get_tag_at { my $self = shift; my ( $pos, $name ) = @_; my $tags = $self->{tags}; my $value; foreach my $t ( @$tags ) { my ( $ts, $te, $tn, $tv ) = @$t; last if $ts > $pos; next if $te <= $pos; $value = $tv if $tn eq $name; } return $value; } =head2 get_tag_extent $extent = $st->get_tag_extent( $pos, $name ) If the named tag applies to the given position, returns the extent of the tag at that position. If it does not, C is returned. If an extent is returned it will define the C and C flags if appropriate. =cut sub get_tag_extent { my $self = shift; my ( $pos, $name ) = @_; my $tags = $self->{tags}; my ( $start, $end, $flags ); foreach my $t ( @$tags ) { my ( $ts, $te, $tn, undef, $tf ) = @$t; last if $ts > $pos; next if $te <= $pos; next unless $tn eq $name; $start = $ts; $end = $te; $flags = $tf; } if( defined $start ) { return $self->_mkextent( $start, $end, $flags ); } else { return undef; } } =head2 get_tag_missing_extent $extent = $st->get_tag_missing_extent( $pos, $name ) If the named tag does not apply at the given position, returns the extent of the string around that position that does not have the tag. If it does exist, C is returned. If an extent is returned it will not define the C and C flags, as these do not make sense for the range in which a tag is absent. =cut sub get_tag_missing_extent { my $self = shift; my ( $pos, $name ) = @_; my $tags = $self->{tags}; my $start = 0; foreach my $t ( @$tags ) { my ( $ts, $te, $tn ) = @$t; next unless $tn eq $name; if( $ts <= $pos and $te > $pos ) { return undef; } if( $ts > $pos ) { return $self->_mkextent( $start, $ts, 0 ); } $start = $te; } return $self->_mkextent( $start, $self->length, 0 ); } =head2 set_substr $st->set_substr( $start, $len, $newstr ) Modifies a extent of the underlying plain string to that given. The extents of tags in the string are adjusted to cope with the modified region, and the adjustment in length. Tags entirely before the replaced extent remain unchanged. Tags entirely within the replaced extent are deleted. Tags entirely after the replaced extent are moved by appropriate amount to ensure they still apply to the same characters as before. Tags that start before and end after the extent remain, and have their lengths suitably adjusted. Tags that span just the start or end of the extent, but not both, are truncated, so as to remove the part of the tag applied on the modified extent but preserving that applied outside. If C<$newstr> is a C object, then its tags will be applied to C<$st> as appropriate. Edge-anchored tags in C<$newstr> will not be extended through C<$st>, though they will apply as edge-anchored if they now sit at the edge of the new string. =cut sub set_substr { my $self = shift; my ( $start, $len, $new ) = @_; my $limit = $self->length; $start = $limit if $start > $limit; $len = ( $limit - $start ) if $len > ( $limit - $start ); CORE::substr( $self->{str}, $start, $len ) = $new; my $oldend = $start + $len; my $newend = $start + CORE::length( $new ); my $delta = $newend - $oldend; # Positions after $oldend have now moved up $delta places my $tags = $self->{tags}; my $i = 0; for( ; $i < @$tags; $i++ ) { # In this loop we'll handle tags that start before the deleted section my $t = $tags->[$i]; my ( $ts, $te, undef, undef, $tf ) = @$t; last if $ts >= $start and not( $tf & FLAG_ANCHOR_BEFORE ); # Two cases: # A: Tag spans entirely outside deleted section - stretch/compress it # We may have to collapse it to nothing, so delete it # B: Tag starts before but ends within deleted section - truncate it # Plus a case we don't care about # Tag starts and ends entirely before the deleted section - ignore it if( $te > $oldend or ( $te == $oldend and $tf & FLAG_ANCHOR_AFTER ) ) { # Case A $t->[1] += $delta; if( $t->[0] == $t->[1] ) { splice @$tags, $i, 1, (); $i--; next; } } elsif( $te > $start ) { # Case B $t->[1] = $start; } } for( ; $i < @$tags; $i++ ) { my $t = $tags->[$i]; my ( $ts, $te ) = @$t; # In this loop we'll handle tags that start within the deleted section last if $ts >= $oldend; # Two cases # C: Tag contained entirely within deleted section - delete it # D: Tag starts within but ends after the deleted section - truncate it if( $te <= $oldend ) { # Case C splice @$tags, $i, 1; $i--; next; } else { # Case D $t->[0] = $newend; $t->[1] += $delta; } } for( ; $i < @$tags; $i++ ) { my $t = $tags->[$i]; my ( $ts, $te, undef, undef, $tf ) = @$t; # In this loop we'll handle tags that start after the deleted section # One case # E: Tag starts and ends after the deleted section - move it $t->[0] += $delta unless $tf & FLAG_ANCHOR_BEFORE; $t->[1] += $delta; # If we've not moved the start (because it was FLAG_ANCHOR_BEFORE), we # might now have an ordering constraint violation. Better fix it. local $b = $t; foreach my $new_i ( reverse 0 .. $i-1 ) { local $a = $tags->[$new_i]; last if _cmp_tags() <= 0; splice @$tags, $new_i, 0, splice @$tags, $i, 1, (); last; } } if( blessed $new and $new->isa( __PACKAGE__ ) ) { my $atstart = $start == 0; my $atend = $newend == $self->length; $new->iter_extents( sub { my ( $e, $tn, $tv ) = @_; $self->apply_tag( ( $atstart && $e->anchor_before ) ? -1 : $start + $e->start, ( $atend && $e->anchor_after ) ? -1 : $e->length, $tn, $tv ); } ); } $self->_assert_sorted if DEBUG; return $self; } =head2 insert $st->insert( $start, $newstr ) Insert the given string at the given position. A shortcut around C. If C<$newstr> is a C object, then its tags will be applied to C<$st> as appropriate. If C<$start> is 0, any before-anchored tags in will become before-anchored in C<$st>. =cut sub insert { my $self = shift; my ( $at, $new ) = @_; $self->set_substr( $at, 0, $new ); } =head2 append $st->append( $newstr ) $st .= $newstr Append to the underlying plain string. A shortcut around C. If C<$newstr> is a C object, then its tags will be applied to C<$st> as appropriate. Any after-anchored tags in will become after-anchored in C<$st>. =cut use overload '.=' => 'append'; sub append { my $self = shift; my ( $new ) = @_; return $self->set_substr( $self->length, 0, $new ) if blessed $new and $new->isa( __PACKAGE__ ); # Optimised version $self->{str} .= $new; my $newend = $self->length; my $tags = $self->{tags}; my $i = 0; # Adjust boundaries of ANCHOR_AFTER tags for( ; $i < @$tags; $i++ ) { my $t = $tags->[$i]; $t->[1] = $newend if $t->[4] & FLAG_ANCHOR_AFTER; } return $self; } =head2 append_tagged $st->append_tagged( $newstr, %tags ) Append to the underlying plain string, and apply the given tags to the newly-inserted extent. Returns C<$st> itself so that the method may be easily chained. =cut sub append_tagged { my $self = shift; my ( $new, %tags ) = @_; my $start = $self->length; my $len = CORE::length( $new ); $self->append( $new ); $self->apply_tag( $start, $len, $_, $tags{$_} ) for keys %tags; return $self; } =head2 concat $ret = $st->concat( $other ) $ret = $st . $other Returns a new C containing the two strings concatenated together, preserving any tags present. This method overloads normal string concatenation operator, so expressions involving C values retain their tags. This method or operator tries to respect subclassing; preferring to return a new object of a subclass if either argument or operand is a subclass of C. If they are both subclasses, it will prefer the type of the invocant or first operand. =cut use overload '.' => 'concat'; sub concat { my $self = shift; my ( $other, $swap ) = @_; # Try to find the "higher" subclass my $class = ( ref $self eq __PACKAGE__ and blessed $other and $other->isa( __PACKAGE__ ) ) ? ref $other : ref $self; my $ret = $class->new( $self ); return $ret->insert( 0, $other ) if $swap; return $ret->append( $other ); } =head2 matches @subs = $st->matches( $regexp ) Returns a list of substrings (as C instances) for every non-overlapping match of the given C<$regexp>. This could be used, for example, to build a formatted string from a formatted template containing variable expansions: my $template = ... my %vars = ... my $ret = String::Tagged->new; foreach my $m ( $template->matches( qr/\$\w+|[^$]+/ ) ) { if( $m =~ m/^\$(\w+)$/ ) { $ret->append_tagged( $vars{$1}, %{ $m->get_tags_at( 0 ) } ); } else { $ret->append( $m ); } } This iterates segments of the template containing variables expansions starting with a C<$> symbol, and replaces them with values from the C<%vars> hash, careful to preserve all the formatting tags from the original template string. =cut sub matches { my $self = shift; my ( $re ) = @_; my $plain = $self->str; my @ret; while( $plain =~ m/$re/g ) { push @ret, $self->substr( $-[0], $+[0] - $-[0] ); } return @ret; } =head2 split @parts = $st->split( $regexp, $limit ) Returns a list of substrings by applying the regexp to the string content; similar to the core perl C function. If C<$limit> is supplied, the method will stop at that number of elements, returning the entire remainder of the input string as the final element. If the C<$regexp> contains a capture group then the content of the first one will be added to the return list as well. =cut sub split { my $self = shift; my ( $re, $limit ) = @_; my $plain = $self->str; my $prev = 0; my @ret; while( $plain =~ m/$re/g ) { push @ret, $self->substr( $prev, $-[0]-$prev ); push @ret, $self->substr( $-[1], $+[1]-$-[1] ) if @- > 1; $prev = $+[0]; last if defined $limit and @ret == $limit-1; } if( CORE::length $plain > $prev ) { push @ret, $self->substr( $prev, CORE::length( $plain ) - $prev ); } return @ret; } =head2 sprintf $ret = $st->sprintf( @args ) I Returns a new string by using the given instance as the format string for a L constructor call. The returned instance will be of the same class as the invocant. =cut sub sprintf { my $self = shift; return ( ref $self )->from_sprintf( $self, @_ ); } =head2 debug_sprintf $ret = $st->debug_sprintf Returns a representation of the string data and all the tags, suitable for debug printing or other similar use. This is a format such as is given in the DESCRIPTION section above. The output will consist of a number of lines, the first containing the plain underlying string, then one line per tag. The line shows the extent of the tag given by C<[---]> markers, or a C<|> in the special case of a tag covering only a single character. Special markings of C> and C> indicate tags which are "before" or "after" anchored. For example: Hello, world [---] word => 1 <[----------]> everywhere => 1 | space => 1 =cut sub debug_sprintf { my $self = shift; my $str = $self->str; my $len = CORE::length( $str ); my $maxnamelen = 0; my $ret = " " . ( $str =~ s/\n/./gr ) . "\n"; $self->iter_tags( sub { my ( undef, undef, $name, undef ) = @_; CORE::length( $name ) > $maxnamelen and $maxnamelen = CORE::length( $name ); } ); foreach my $t ( @{ $self->{tags} } ) { my ( $ts, $te, $tn, $tv, $tf ) = @$t; $ret .= ( $tf & FLAG_ANCHOR_BEFORE ) ? " <" : " "; $ret .= " " x $ts; my $tl = $te - $ts; if( $tl == 0 ) { $ret .= ""; } elsif( $tl == 1 ) { $ret .= "|"; } else { $ret .= "[" . ( "-" x ( $tl - 2 ) ) . "]"; } $ret .= " " x ( $len - $te ); $ret .= ( $tf & FLAG_ANCHOR_AFTER ) ? "> " : " "; $ret .= CORE::sprintf "%-*s => %s\n", $maxnamelen, $tn, $tv; } return $ret; } =head1 Extent Objects These objects represent a range of characters within the containing C object. The range they represent is fixed at the time of creation. If the containing string is modified by a call to C then the effect on the extent object is not defined. These objects should be considered as relatively short-lived - used briefly for the purpose of querying the result of an operation, then discarded soon after. =cut package # hide from CPAN indexer String::Tagged::Extent; =head2 $extent->string Returns the containing C object. =cut sub string { shift->[0] } =head2 $extent->start Returns the start index of the extent. This is the index of the first character within the extent. =cut sub start { shift->[1] } =head2 $extent->end Returns the end index of the extent. This is the index of the first character beyond the end of the extent. =cut sub end { shift->[2] } =head2 $extent->anchor_before True if this extent begins "before" the start of the string. Only certain methods return extents with this flag defined. =cut sub anchor_before { shift->[3] & String::Tagged::FLAG_ANCHOR_BEFORE; } =head2 $extent->anchor_after True if this extent ends "after" the end of the string. Only certain methods return extents with this flag defined. =cut sub anchor_after { shift->[3] & String::Tagged::FLAG_ANCHOR_AFTER; } =head2 $extent->length Returns the number of characters within the extent. =cut sub length { my $self = shift; $self->end - $self->start; } =head2 $extent->substr Returns the substring contained by the extent. =cut sub substr { my $self = shift; $self->string->substr( $self->start, $self->length ); } =head2 $extent->plain_substr Returns the substring of the underlying plain string buffer contained by the extent. =cut sub plain_substr { my $self = shift; $self->string->plain_substr( $self->start, $self->length ); } =head1 TODO =over 4 =item * There are likely variations on the rules for C that could equally apply to some uses of tagged strings. Consider whether the behaviour of modification is chosen per-method, per-tag, or per-string. =item * Consider how to implement a clone from one tag format to another which wants to merge multiple different source tags together into a single new one. =back =head1 AUTHOR Paul Evans =cut 0x55AA; String-Tagged-0.16/lib/String/Tagged000755001750001750 013454124632 16033 5ustar00leoleo000000000000String-Tagged-0.16/lib/String/Tagged/Formatting.pod000444001750001750 657613454124632 21024 0ustar00leoleo000000000000=head1 NAME C - an API specification for simple formatted strings =head1 DESCRIPTION A primary use case of L is to allow storage of a text string with associated formatting data. As there are a growing number of subclasses on CPAN that attempt to do this, a common specification is emerging to allow interoperability between them. This will allow interchange between formats from different sources, display or rendering, and so on. Primarily this specification consists of the names and meanings of a set of tags that a conforming string should supply, though it also suggests a pair of methods useful for converting between different types of object and the standard formatting. Specific implementations may not be able to represent all of the tags of course; this specification only gives the suggested way to represent those formatting styles that the implementation actually understands. =head1 TAGS =head2 bold, under, italic, strike, blink, monospace Tags with boolean values indicating bold, underline, italic, strikethrough, blinking and monospaced font. =head2 reverse Tag with boolean value indicating reverse video; i.e. the effect of swapping foreground and background colours. This effect is common on terminal-based string systems, but is unlikely to be found elsewhere. =head2 fg, bg Tags with L instances giving foreground and background colours. The use of a C instance allows specific implementations to be able to represent their own colour space, while still supporting an easy conversion to the colour spaces used by others. =head1 METHODS The following methods should be provided on conforming implementations, to indicate their support of this specification and to allow easy conversion from and to it. =head2 as_formatting $fmt = $st->as_formatting Called on an existing instance of the class, returns a C instance (or some subclass thereof) containing only the tags and values defined by this specification. This method may simply return the original instance if the tags natively used by it already fit this specification, or it may return a newly-constructed instance by converting its own tag formats. Use of the C method with C and possibly a C map should be able to implement this in most cases. =head2 new_from_formatting $st = String::Tagged::Subclass->new_from_formatting( $fmt ) Called as a class method on the target class type, returns a new instance of that class constructed to represent the formatting contained in the C<$fmt> instance, which should contain only the tags given in this specification. If the class natively uses tags as per this specification, this can be a trivial clone, otherwise some conversion will need to be performed. Use of the C method with C and possibly a C map should be able to implement this in most cases. =head1 KNOWN IMPLEMENTATIONS =over 4 =item * L - Contains a pair of functions to convert a formatted I message body to and from this format. =item * L - parse or build IRC formatted messages and convert bidirectionally to this format. =item * L - build terminal control sequences for message formatting and convert bidirectionally to this format. =back =head1 AUTHOR Paul Evans String-Tagged-0.16/t000755001750001750 013454124632 13067 5ustar00leoleo000000000000String-Tagged-0.16/t/00use.t000444001750001750 15013454124632 14321 0ustar00leoleo000000000000#!/usr/bin/perl use strict; use warnings; use Test::More; use_ok( "String::Tagged" ); done_testing; String-Tagged-0.16/t/01plain.t000444001750001750 140613454124632 14656 0ustar00leoleo000000000000#!/usr/bin/perl use strict; use warnings; use Test::More; use String::Tagged; my $str = String::Tagged->new( "Hello, world" ); is( $str->str, "Hello, world", 'Plain string accessor' ); is( $str->length, 12, 'Plain string length' ); is( length($str), 12, 'length() str also works' ); is( $str->plain_substr( 0, 5 ), "Hello", 'Plain substring accessor' ); isa_ok( $str->substr( 0, 5 ), "String::Tagged", 'Tagged substring accessor' ); $str->set_substr( 7, 5, "planet" ); is( $str->str, "Hello, planet", "After set_substr" ); is( $str->length, 13, 'String length after set_substr' ); $str->insert( 7, "lovely " ); is( $str->str, "Hello, lovely planet", 'After insert' ); $str->append( "!" ); is( $str->str, "Hello, lovely planet!", 'After append' ); done_testing; String-Tagged-0.16/t/02tags-conststr.t000444001750001750 1527513454124632 16420 0ustar00leoleo000000000000#!/usr/bin/perl use strict; use warnings; use Test::More; use Test::Identity; use String::Tagged; my $str = String::Tagged->new( "Hello, world" ); is_deeply( [ $str->tagnames ], [], 'No tags defined initially' ); identical( $str->apply_tag( 0, 12, message => 1 ), $str, '->apply_tag returns $str' ); is_deeply( [ $str->tagnames ], [qw( message )], 'message tag now defined' ); my @tags; $str->iter_tags( sub { push @tags, [ @_ ] } ); is_deeply( \@tags, [ [ 0, 12, message => 1 ], ], 'tags list after apply message' ); my @extents; $str->iter_extents( sub { push @extents, $_[0] } ); is( scalar @extents, 1, 'one extent from iter_extents' ); my $e = $extents[0]; can_ok( $e, qw( string start length end substr ) ); identical( $e->string, $str, '$e->string' ); is( $e->start, 0, '$e->start' ); is( $e->length, 12, '$e->length' ); is( $e->end, 12, '$e->end' ); is( $e->plain_substr, "Hello, world", '$e->plain_substr' ); { my $sub = $e->substr; isa_ok( $sub, "String::Tagged", '$e->substr' ); my @tags; $sub->iter_tags( sub { push @tags, [ @_ ] } ); is_deeply( \@tags, [ [ 0, 12, message => 1 ] ], '$e->substr->iter_tags' ); } is_deeply( $str->get_tags_at( 0 ), { message => 1 }, 'tags at pos 0' ); is( $str->get_tag_at( 0, "message" ), 1, 'message tag is 1 at pos 0' ); $str->apply_tag( 6, 1, space => 1 ); is_deeply( [ sort $str->tagnames ], [qw( message space )], 'space tag now also defined' ); undef @tags; $str->iter_tags( sub { push @tags, [ @_ ] } ); is_deeply( \@tags, [ [ 0, 12, message => 1 ], [ 6, 1, space => 1 ], ], 'tags list after apply space' ); undef @extents; $str->iter_extents( sub { push @extents, $_[0] } ); is( scalar @extents, 2, 'two extent from iter_extents' ); is( $extents[0]->plain_substr, "Hello, world", '$e[0]->substr' ); is( $extents[1]->plain_substr, " ", '$e[1]->substr' ); sub fetch_tags { my ( $start, $len, %tags ) = @_; push @tags, [ $start, $len, map { $_ => $tags{$_} } sort keys %tags ] } undef @tags; $str->iter_tags_nooverlap( \&fetch_tags ); is_deeply( \@tags, [ [ 0, 6, message => 1 ], [ 6, 1, message => 1, space => 1 ], [ 7, 5, message => 1 ], ], 'tags list non-overlapping after apply space' ); my @substrs; sub fetch_substrs { my ( $substr, %tags ) = @_; push @substrs, [ $substr, map { $_ => $tags{$_} } sort keys %tags ] } $str->iter_substr_nooverlap( \&fetch_substrs ); is_deeply( \@substrs, [ [ "Hello,", message => 1 ], [ " ", message => 1, space => 1 ], [ "world", message => 1 ], ], 'substrs non-overlapping after apply space' ); $str->apply_tag( 0, 1, capital => 1 ); undef @tags; $str->iter_tags_nooverlap( \&fetch_tags ); is_deeply( \@tags, [ [ 0, 1, capital => 1, message => 1 ], [ 1, 5, message => 1 ], [ 6, 1, message => 1, space => 1 ], [ 7, 5, message => 1 ], ], 'tags list non-overlapping after apply space' ); undef @substrs; $str->iter_substr_nooverlap( \&fetch_substrs ); is_deeply( \@substrs, [ [ "H", capital => 1, message => 1 ], [ "ello,", message => 1 ], [ " ", message => 1, space => 1 ], [ "world", message => 1 ], ], 'substrs non-overlapping after apply space' ); $str = String::Tagged->new( "my BIG message" ); $str->apply_tag( 0, 14, size => 1 ); $str->apply_tag( 3, 3, size => 2 ); undef @tags; $str->iter_tags_nooverlap( \&fetch_tags ); is_deeply( \@tags, [ [ 0, 3, size => 1 ], [ 3, 3, size => 2 ], [ 6, 8, size => 1 ], ], 'tags list with overridden tag' ); $str->apply_tag( 0, 1, size => 3 ); $str->apply_tag( 3, 1, size => 4 ); undef @tags; $str->iter_tags_nooverlap( \&fetch_tags ); is_deeply( \@tags, [ [ 0, 1, size => 3 ], [ 1, 2, size => 1 ], [ 3, 1, size => 4 ], [ 4, 2, size => 2 ], [ 6, 8, size => 1 ], ], 'tags list with overridden tag at BOS' ); $str = String::Tagged->new( "BEGIN middle END" ); $str->apply_tag( -1, -1, everywhere => 1 ); $str->apply_tag( -1, 5, begin => 1 ); $str->apply_tag( 13, -1, end => 1); undef @extents; $str->iter_extents( sub { my ( $e ) = @_; push @extents, [ $e->substr, $e->start, $e->end, $e->anchor_before?1:0, $e->anchor_after?1:0 ]; } ); is_deeply( \@extents, [ [ "BEGIN", 0, 5, 1, 0 ], [ "BEGIN middle END", 0, 16, 1, 1 ], [ "END", 13, 16, 0, 1 ] ], 'extent objects contain start/end/anchor_before/anchor_after' ); is_deeply( $str->get_tags_at( 0 ), { everywhere => 1, begin => 1 }, 'tags at pos 0 of edge-anchored' ); is( $str->get_tag_at( 0, "everywhere" ), 1, 'everywhere tag is 1 at pos 0 of edge-anchored' ); undef @tags; $str->iter_tags_nooverlap( \&fetch_tags ); is_deeply( \@tags, [ [ 0, 5, begin => 1, everywhere => 1 ], [ 5, 8, everywhere => 1 ], [ 13, 3, end => 1, everywhere => 1 ], ], 'tags list with edge-anchored tags' ); # RT98700 { my $str = String::Tagged->new( "Hello" ); $str->apply_tag( 1, 1, one => 1 ); $str->apply_tag( 4, 1, four => 4 ); is_deeply( $str->get_tags_at( 1 ), { one => 1 }, '->get_tags_at( 1 )' ); is_deeply( $str->get_tags_at( 4 ), { four => 4 }, '->get_tags_at( 4 )' ); } my $str2 = String::Tagged->new( $str ); is( $str2->str, "BEGIN middle END", 'constructor clones string' ); undef @extents; $str2->iter_extents( sub { my ( $e ) = @_; push @extents, [ $e->substr, $e->start, $e->end, $e->anchor_before?1:0, $e->anchor_after?1:0 ]; } ); is_deeply( \@extents, [ [ "BEGIN", 0, 5, 1, 0 ], [ "BEGIN middle END", 0, 16, 1, 1 ], [ "END", 13, 16, 0, 1 ] ], 'constructor clones tags' ); $str = String::Tagged->new_tagged( "sample", foo => 1 ); is( $str->str, "sample", '->str from ->new_tagged' ); is_deeply( $str->get_tags_at( 0 ), { foo => 1 }, 'tags at pos 0 from ->new_tagged' ); undef @tags; $str->iter_tags_nooverlap( \&fetch_tags ); is_deeply( \@tags, [ [ 0, 6, foo => 1 ] ], 'tags list from ->new_tagged' ); # get_tag_at (RT100392) { my $str = String::Tagged->new( "abcd" ); $str->apply_tag( $_, 1, some => 13 ) for 0 .. $str->length - 1; my $v = $str->get_tag_at( 2, "some" ); is( $v, 13, "get_tag_at retrieved value" ); } done_testing; String-Tagged-0.16/t/03tags-iter-limit.t000444001750001750 650513454124632 16575 0ustar00leoleo000000000000#!/usr/bin/perl use strict; use warnings; use Test::More; use String::Tagged; my $str = String::Tagged->new( "A string with BOLD and ITAL tags" ); $str->apply_tag( -1, -1, message => 1 ); $str->apply_tag( 14, 4, bold => 1 ); $str->apply_tag( 23, 4, italic => 1 ); my @tags; undef @tags; $str->iter_tags( sub { push @tags, [ @_ ] }, start => 20 ); is_deeply( \@tags, [ [ 0, 32, message => 1 ], [ 23, 4, italic => 1 ], ], 'tags list with start offset' ); undef @tags; $str->iter_tags( sub { push @tags, [ @_ ] }, end => 20 ); is_deeply( \@tags, [ [ 0, 32, message => 1 ], [ 14, 4, bold => 1 ], ], 'tags list with end limit' ); undef @tags; $str->iter_tags( sub { push @tags, [ @_ ] }, only => [qw( message )] ); is_deeply( \@tags, [ [ 0, 32, message => 1 ], ], 'tags list with only (message)' ); undef @tags; $str->iter_tags( sub { push @tags, [ @_ ] }, except => [qw( message )] ); is_deeply( \@tags, [ [ 14, 4, bold => 1 ], [ 23, 4, italic => 1 ], ], 'tags list with except (message)' ); sub fetch_tags { my ( $start, $len, %tags ) = @_; push @tags, [ $start, $len, map { $_ => $tags{$_} } sort keys %tags ] } undef @tags; $str->iter_tags_nooverlap( \&fetch_tags, start => 20 ); is_deeply( \@tags, [ [ 20, 3, message => 1 ], [ 23, 4, italic => 1, message => 1 ], [ 27, 5, message => 1 ], ], 'tags list non-overlapping with start offset' ); undef @tags; $str->iter_tags_nooverlap( \&fetch_tags, end => 20 ); is_deeply( \@tags, [ [ 0, 14, message => 1 ], [ 14, 4, bold => 1, message => 1 ], [ 18, 2, message => 1 ], ], 'tags list non-overlapping with end limit' ); undef @tags; $str->iter_tags_nooverlap( \&fetch_tags, only => [qw( message )] ); is_deeply( \@tags, [ [ 0, 32, message => 1 ], ], 'tags list non-overlapping with only limit' ); undef @tags; $str->iter_tags_nooverlap( \&fetch_tags, except => [qw( message )] ); is_deeply( \@tags, [ [ 0, 14 ], [ 14, 4, bold => 1 ], [ 18, 5 ], [ 23, 4, italic => 1 ], [ 27, 5 ], ], 'tags list non-overlapping with except limit' ); my @substrs; sub fetch_substrs { my ( $substr, %tags ) = @_; push @substrs, [ $substr, map { $_ => $tags{$_} } sort keys %tags ] } undef @substrs; $str->iter_substr_nooverlap( \&fetch_substrs, start => 20 ); is_deeply( \@substrs, [ [ "nd ", message => 1 ], [ "ITAL", italic => 1, message => 1 ], [ " tags", message => 1 ], ], 'substrs non-overlapping with start offset' ); undef @substrs; $str->iter_substr_nooverlap( \&fetch_substrs, end => 20 ); is_deeply( \@substrs, [ [ "A string with ", message => 1 ], [ "BOLD", bold => 1, message => 1 ], [ " a", message => 1 ], ], 'substrs non-overlapping with start offset' ); done_testing; String-Tagged-0.16/t/04tags-appendstr.t000444001750001750 353613454124632 16520 0ustar00leoleo000000000000#!/usr/bin/perl use strict; use warnings; use Test::More; use Test::Identity; use String::Tagged; my $str = String::Tagged->new(); is_deeply( [ $str->tagnames ], [], 'No tags defined initially' ); identical( scalar $str->apply_tag( -1, -1, everywhere => 1 ), $str, '->apply_tag returns $str' ); identical( scalar $str->append_tagged( "Hello", word => "greeting" ), $str, '->append_tagged returns $str' ); is( $str->str, "Hello", 'str after first append' ); is_deeply( [ sort $str->tagnames ], [qw( everywhere word )], 'tagnames after first append' ); is_deeply( $str->get_tags_at( 0 ), { word => "greeting", everywhere => 1 }, 'tags at pos 0' ); is( $str->get_tag_at( 0, "word" ), "greeting", 'word tag at pos 0' ); my @tags; sub fetch_tags { my ( $start, $len, %tags ) = @_; push @tags, [ $start, $len, map { $_ => $tags{$_} } sort keys %tags ] } $str->iter_tags_nooverlap( \&fetch_tags ); is_deeply( \@tags, [ [ 0, 5, everywhere => 1, word => "greeting" ], ], 'tags list after first append' ); $str->append_tagged( ", " ); # No tags is( $str->str, "Hello, ", 'str after second append' ); undef @tags; $str->iter_tags_nooverlap( \&fetch_tags ); is_deeply( \@tags, [ [ 0, 5, everywhere => 1, word => "greeting" ], [ 5, 2, everywhere => 1 ], ], 'tags list after second append' ); $str->append_tagged( "world", word => "target" ); is( $str->str, "Hello, world", 'str after third append' ); undef @tags; $str->iter_tags_nooverlap( \&fetch_tags ); is_deeply( \@tags, [ [ 0, 5, everywhere => 1, word => "greeting" ], [ 5, 2, everywhere => 1 ], [ 7, 5, everywhere => 1, word => "target" ], ], 'tags list after third append' ); done_testing; String-Tagged-0.16/t/05tags-delete.t000444001750001750 314113454124632 15753 0ustar00leoleo000000000000#!/usr/bin/perl use strict; use warnings; use Test::More; use Test::Identity; use String::Tagged; my $str = String::Tagged->new( "My message is here" ); $str->apply_tag( -1, -1, message => 1 ); my @tags; $str->iter_tags_nooverlap( sub { push @tags, [ @_ ] } ); is_deeply( \@tags, [ [ 0, 18, message => 1 ], ], 'tags list initially' ); identical( $str->delete_tag( 3, 4, 'message' ), $str, '->delete_tag returns $str' ); undef @tags; $str->iter_tags_nooverlap( sub { push @tags, [ @_ ] } ); is_deeply( \@tags, [ [ 0, 18 ], ], 'tags list after delete' ); $str->apply_tag( -1, -1, message => 1 ); identical( $str->unapply_tag( 3, 4, 'message' ), $str, '->unapply_tag returns $str' ); undef @tags; $str->iter_tags_nooverlap( sub { push @tags, [ @_ ] } ); is_deeply( \@tags, [ [ 0, 3, message => 1 ], [ 3, 4, ], [ 7, 11, message => 1 ], ], 'tags list after unapply' ); $str->unapply_tag( 3, 7, 'message' ); undef @tags; $str->iter_tags_nooverlap( sub { push @tags, [ @_ ] } ); is_deeply( \@tags, [ [ 0, 3, message => 1 ], [ 3, 7, ], [ 10, 8, message => 1 ], ], 'tags list after second unapply' ); $str->unapply_tag( 0, 5, 'message' ); undef @tags; $str->iter_tags_nooverlap( sub { push @tags, [ @_ ] } ); is_deeply( \@tags, [ [ 0, 10, ], [ 10, 8, message => 1 ], ], 'tags list after third unapply' ); done_testing; String-Tagged-0.16/t/06tags-substr.t000444001750001750 606113454124632 16040 0ustar00leoleo000000000000#!/usr/bin/perl use strict; use warnings; use Test::More; use String::Tagged; my $str = String::Tagged->new( "Hello, world" ); $str->apply_tag( 1, 1, e => 1 ); $str->apply_tag( -1, -1, message => 1 ); my @tags; sub fetch_tags { my ( $start, $len, %tags ) = @_; push @tags, [ $start, $len, map { $_ => $tags{$_} } sort keys %tags ] } $str->iter_tags_nooverlap( \&fetch_tags ); is_deeply( \@tags, [ [ 0, 1, message => 1 ], [ 1, 1, e => 1, message => 1 ], [ 2, 10, message => 1 ], ], 'tags list initially' ); $str->set_substr( 7, 5, "planet" ); undef @tags; $str->iter_tags_nooverlap( \&fetch_tags ); is_deeply( \@tags, [ [ 0, 1, message => 1 ], [ 1, 1, e => 1, message => 1 ], [ 2, 11, message => 1 ], ], 'tags list after first substr' ); $str->apply_tag( 5, 1, comma => 1 ); $str->set_substr( 0, 5, "Goodbye" ); undef @tags; $str->iter_tags_nooverlap( \&fetch_tags ); is_deeply( \@tags, [ [ 0, 7, message => 1 ], [ 7, 1, comma => 1, message => 1 ], [ 8, 7, message => 1 ], ], 'tags list after second substr' ); $str->set_substr( 7, 1, "" ); undef @tags; $str->iter_tags_nooverlap( \&fetch_tags ); is_deeply( \@tags, [ [ 0, 14, message => 1 ], ], 'tags list after collapsing substr' ); $str->apply_tag( 0, 7, goodbye => 1 ); $str->apply_tag( 8, 6, planet => 1 ); $str->set_substr( 2, 10, "urm" ); undef @tags; $str->iter_tags_nooverlap( \&fetch_tags ); is_deeply( \@tags, [ [ 0, 2, goodbye => 1, message => 1 ], [ 2, 3, message => 1 ], [ 5, 2, message => 1, planet => 1 ], ], 'tags list after straddling substr' ); $str->set_substr( 0, 0, "I say, " ); undef @tags; $str->iter_tags_nooverlap( \&fetch_tags ); is_deeply( \@tags, [ [ 0, 7, message => 1 ], [ 7, 2, goodbye => 1, message => 1 ], [ 9, 3, message => 1 ], [ 12, 2, message => 1, planet => 1 ], ], 'tags list after prepend substr' ); # ->substr accessor { my $str = String::Tagged->new ->append_tagged( "one", one => 1 ) ->append ( " " ) ->append_tagged( "two", two => 2 ) ->append ( " rest of the string" ); my $sub = $str->substr( 3, 9 ); is( $sub->str, " two rest", '$sub->str' ); my $e = $sub->get_tag_extent( 1, "two" ); is( $e->start, 1, 'two tag starts at 1' ); is( $e->length, 3, 'two tag length is 3' ); } # ->substr can keep both-edge anchored tags { my $str = String::Tagged->new( "one two three" ) ->apply_tag( -1, -1, wholestring => 1 ); my $sub = $str->substr( 4, 3 ); ok( my $e = $sub->get_tag_extent( 1, "wholestring" ), 'sub has wholestring tag' ); if( $e ) { is( $e->start, 0, 'wholestring tag starts at 0' ); is( $e->length, 3, 'wholestring tag is 3 long' ); }; } done_testing; String-Tagged-0.16/t/07tags-range.t000444001750001750 332113454124632 15607 0ustar00leoleo000000000000#!/usr/bin/perl use strict; use warnings; use Test::More; use Test::Identity; use String::Tagged; my $str = String::Tagged->new( "some BIG words" ); $str->apply_tag( -1, -1, everywhere => 1 ); $str->apply_tag( 5, 3, big => 1 ); my $e = $str->get_tag_extent( 7, 'everywhere' ); ok( defined $e, 'Got an extent' ); identical( $e->string, $str, '$e->str' ); is( $e->start, 0, '$e->start' ); is( $e->end, 14, '$e->end' ); is( $e->length, 14, '$e->length' ); is( $e->plain_substr, "some BIG words", '$e->substr' ); ok( $e->anchor_before, '$e->anchor_before' ); ok( $e->anchor_after, '$e->anchor_after' ); $e = $str->get_tag_extent( 7, 'big' ); is( $e->start, 5, '$e->start' ); is( $e->end, 8, '$e->end' ); is( $e->plain_substr, "BIG", '$e->substr of 7/big' ); ok( !$e->anchor_before, '$e->anchor_before' ); ok( !$e->anchor_after, '$e->anchor_after' ); $e = $str->get_tag_extent( 3, 'big' ); ok( !defined $e, '$e not defined for 3/big' ); $e = $str->get_tag_missing_extent( 3, 'big' ); ok( defined $e, '$e missing defined for 3/big' ); is( $e->start, 0, '$e->start' ); is( $e->end, 5, '$e->end' ); $e = $str->get_tag_missing_extent( 7, 'big' ); ok( !defined $e, '$e missing not defined for 7/big' ); $e = $str->get_tag_missing_extent( 10, 'big' ); ok( defined $e, '$e missing defined for 10/big' ); is( $e->start, 8, '$e->start' ); is( $e->end, 14, '$e->end' ); # RT120691 { my $str = String::Tagged->new; $str->append_tagged( "first", first => 1 ); $str->append_tagged( "second", second => 1 ); my $e = $str->get_tag_extent( 8, "second" ); ok( $e, 'second tag defined' ) and do { is( $e->start, 5, 'second tag start' ); is( $e->length, 6, 'second tag length '); }; } done_testing; String-Tagged-0.16/t/10debugprint.t000444001750001750 142213454124632 15714 0ustar00leoleo000000000000#!/usr/bin/perl use strict; use warnings; use Test::More; use String::Tagged; my $str = String::Tagged->new( "Hello, world" ); is( $str->debug_sprintf, " Hello, world\n", 'untagged' ); $str->apply_tag( 0, 5, word => 1 ); is( $str->debug_sprintf, " Hello, world\n" . " [---] word => 1\n", 'one tag' ); $str->apply_tag( 6, 1, space => 1 ); is( $str->debug_sprintf, " Hello, world\n" . " [---] word => 1\n" . " | space => 1\n", 'single-char tag' ); $str->apply_tag( -1, -1, everywhere => 1 ); is( $str->debug_sprintf, " Hello, world\n" . " [---] word => 1\n" . " <[----------]> everywhere => 1\n" . " | space => 1\n", 'everywhere tag' ); done_testing; String-Tagged-0.16/t/11clone.t000444001750001750 225613454124632 14660 0ustar00leoleo000000000000#!/usr/bin/perl use strict; use warnings; use Test::More; use String::Tagged; my $orig = String::Tagged->new ->append ( "this string has " ) ->append_tagged( "some", some => 1 ) ->append ( " " ) ->append_tagged( "tags", tags => 1 ) ->append ( " applied to it" ); # full clone { my $new = String::Tagged->clone( $orig ); is( $new->str, "this string has some tags applied to it", '->str of clone' ); is_deeply( [ sort $new->tagnames ], [qw( some tags )], '->tagnames of clone' ); } # instance clone { my $new = $orig->clone; is( $new->str, "this string has some tags applied to it", '->str of clone' ); } # subset clone { my $new = $orig->clone( only_tags => [qw( tags )] ); is_deeply( [ $new->tagnames ], [qw( tags )], '->tagnames of partial clone' ); } # clone with converter { my $new = $orig->clone( convert_tags => { some => sub { $_[0], $_[1] + 1 }, tags => "different_tag", } ); is_deeply( [ sort $new->tagnames ], [qw( different_tag some )], '->tagnames of converted clone' ); is( $new->get_tag_at( index( $new, "some" ), "some" ), 2, 'value of sub-converted tag' ); } done_testing; String-Tagged-0.16/t/20merge-tags.t000444001750001750 605413454124632 15613 0ustar00leoleo000000000000#!/usr/bin/perl use strict; use warnings; use Test::More; use String::Tagged; my $str = String::Tagged->new( "Hello, world" ); $str->apply_tag( 0, 6, message => 1 ); $str->apply_tag( 6, 6, message => 1 ); my @tags; $str->iter_tags( sub { push @tags, [ @_ ] } ); is_deeply( \@tags, [ [ 0, 6, message => 1 ], [ 6, 6, message => 1 ], ], 'tags list before merge' ); $str->merge_tags( sub { $_[1] == $_[2] } ); undef @tags; $str->iter_tags( sub { push @tags, [ @_ ] } ); is_deeply( \@tags, [ [ 0, 12, message => 1 ], ], 'tags list after merge' ); $str = String::Tagged->new( "Hello, world" ); $str->apply_tag( 0, 6, message => 1 ); $str->apply_tag( 6, 6, message => 2 ); $str->merge_tags( sub { $_[1] == $_[2] } ); undef @tags; $str->iter_tags( sub { push @tags, [ @_ ] } ); is_deeply( \@tags, [ [ 0, 6, message => 1 ], [ 6, 6, message => 2 ], ], 'tags list after merge differing values' ); $str = String::Tagged->new( "Hello, world" ); $str->apply_tag( 0, 6, message => 1 ); $str->apply_tag( 6, 6, others => 1 ); $str->merge_tags( sub { $_[1] == $_[2] } ); undef @tags; $str->iter_tags( sub { push @tags, [ @_ ] } ); is_deeply( \@tags, [ [ 0, 6, message => 1 ], [ 6, 6, others => 1 ], ], 'tags list after merge differing names' ); $str = String::Tagged->new( "Hello, world" ); $str->apply_tag( 0, 4, message => 1 ); $str->apply_tag( 4, 4, message => 1 ); $str->apply_tag( 8, 4, message => 1 ); $str->merge_tags( sub { $_[1] == $_[2] } ); undef @tags; $str->iter_tags( sub { push @tags, [ @_ ] } ); is_deeply( \@tags, [ [ 0, 12, message => 1 ], ], 'tags list after merge 3' ); $str = String::Tagged->new( "Hello, world" ); $str->apply_tag( 0, 4, message => 1 ); $str->apply_tag( 8, 12, message => 1 ); $str->merge_tags( sub { $_[1] == $_[2] } ); undef @tags; $str->iter_tags( sub { push @tags, [ @_ ] } ); is_deeply( \@tags, [ [ 0, 4, message => 1 ], [ 8, 4, message => 1 ], ], 'tags list after merge non-overlap' ); $str = String::Tagged->new( "Hello, world" ); $str->apply_tag( 0, 8, message => 1 ); $str->apply_tag( 4, 12, message => 1 ); $str->merge_tags( sub { $_[1] == $_[2] } ); undef @tags; $str->iter_tags( sub { push @tags, [ @_ ] } ); is_deeply( \@tags, [ [ 0, 12, message => 1 ], ], 'tags list after merge with overlap' ); $str = String::Tagged->new( "Hello, world" ); $str->apply_tag( 0, 5, word => 1 ); $str->apply_tag( 0, 1, message => 1 ); $str->apply_tag( 1, 11, message => 1 ); $str->merge_tags( sub { $_[1] == $_[2] } ); undef @tags; $str->iter_tags( sub { push @tags, [ @_ ] } ); is_deeply( \@tags, [ [ 0, 5, word => 1 ], [ 0, 12, message => 1 ], ], 'tags list after merge with overlap' ); done_testing; String-Tagged-0.16/t/21merge-tags-anchors.t000444001750001750 157413454124632 17251 0ustar00leoleo000000000000#!/usr/bin/perl use strict; use warnings; use Test::More; use String::Tagged; my $str = String::Tagged->new( "Hello, world" ); $str->apply_tag( -1, 6, message => 1 ); $str->apply_tag( 6, -1, message => 1 ); my @tags; $str->merge_tags( sub { $_[1] == $_[2] } ); undef @tags; $str->iter_tags( sub { push @tags, [ @_ ] } ); is_deeply( \@tags, [ [ 0, 12, message => 1 ], ], 'tags list after merge' ); $str->insert( 0, "<<" ); undef @tags; $str->iter_tags( sub { push @tags, [ @_ ] } ); is_deeply( \@tags, [ [ 0, 14, message => 1 ], ], 'tags list after prepend' ); $str->append( ">>" ); undef @tags; $str->iter_tags( sub { push @tags, [ @_ ] } ); is_deeply( \@tags, [ [ 0, 16, message => 1 ], ], 'tags list after append' ); done_testing; String-Tagged-0.16/t/30appendinsert.t000444001750001750 406013454124632 16250 0ustar00leoleo000000000000#!/usr/bin/perl use strict; use warnings; use Test::More; use String::Tagged; my $begin = String::Tagged->new( "BEGIN" ); $begin->apply_tag( -1, 5, begin => 1 ); my $middle = String::Tagged->new( " middle " ); $middle->apply_tag( 1, 6, middle => 1 ); my $end = String::Tagged->new( "END" ); $end->apply_tag( 0, -1, end => 1 ); my $str = String::Tagged->new( $begin ); $str->append( $middle ); is( $str->str, "BEGIN middle ", 'str after first append' ); my @tags; sub fetch_tags { my ( $start, $len, %tags ) = @_; push @tags, [ $start, $len, map { $_ => $tags{$_} } sort keys %tags ] } $str->iter_tags_nooverlap( \&fetch_tags ); is_deeply( \@tags, [ [ 0, 5, begin => 1 ], [ 5, 1, ], [ 6, 6, middle => 1 ], [ 12, 1, ], ], 'tags list after first append' ); $str->append( $end ); is( $str->str, "BEGIN middle END", 'str after second append' ); undef @tags; $str->iter_tags_nooverlap( \&fetch_tags ); is_deeply( \@tags, [ [ 0, 5, begin => 1 ], [ 5, 1, ], [ 6, 6, middle => 1 ], [ 12, 1, ], [ 13, 3, end => 1 ], ], 'tags list after secondappend' ); $str = String::Tagged->new( $begin ); $str->insert( 0, $middle ); is( $str->str, " middle BEGIN", 'str after first prepend' ); undef @tags; $str->iter_tags_nooverlap( \&fetch_tags ); is_deeply( \@tags, [ [ 0, 1, begin => 1 ], [ 1, 6, begin => 1, middle => 1 ], [ 7, 6, begin => 1 ], ], 'tags list after first prepend' ); $str->insert( 0, $end ); is( $str->str, "END middle BEGIN", 'str after second prepend' ); undef @tags; $str->iter_tags_nooverlap( \&fetch_tags ); is_deeply( \@tags, [ [ 0, 3, begin => 1, end => 1 ], [ 3, 1, begin => 1 ], [ 4, 6, begin => 1, middle => 1 ], [ 10, 6, begin => 1 ], ], 'tags list after second prepend' ); done_testing; String-Tagged-0.16/t/31matches.t000444001750001750 70713454124632 15165 0ustar00leoleo000000000000#!/usr/bin/perl use strict; use warnings; use Test::More; use String::Tagged; my $str = String::Tagged->new( "Here is %s with %d" ) ->apply_tag( 3, 7, tag => "value" ); my @subs = map { [ $_->str, $_->get_tags_at( 0 ) ] } $str->matches( qr/\S+/ ); is_deeply( \@subs, [ [ "Here", {} ], [ "is", { tag => "value" } ], [ "%s", { tag => "value" } ], [ "with", {} ], [ "%d", {} ] ], 'Result of ->matches' ); done_testing; String-Tagged-0.16/t/32split.t000444001750001750 277213454124632 14721 0ustar00leoleo000000000000#!/usr/bin/perl use strict; use warnings; use Test::More; use String::Tagged; { my $str = String::Tagged->new( "A message with\nlinefeeds" ); my @lines = $str->split( qr/\n/ ); is( scalar @lines, 2, '->split returns 2 elements' ); isa_ok( $lines[0], "String::Tagged", '->split returns String::Tagged instances' ); is_deeply( [ map { $_->str } @lines ], [ "A message with", "linefeeds" ], '->split returns correct strings' ); } # split preserves tags (RT100409) { my $str = String::Tagged->new ->append ( "one " ) ->append_tagged( "two", tag => 1 ) ->append ( " three\nfour" ); my @lines = $str->split( qr/\n/ ); my $e = $lines[0]->get_tag_extent( index( $str->str, "two" ), "tag" ); is( $e->start, 4, '$e->start of copied tag' ); is( $e->length, 3, '$e->length of copied tag' ); } # split with limit { my $str = String::Tagged->new( "command with some arguments" ); my @parts = $str->split( qr/\s+/, 2 ); is( scalar @parts, 2, '->split with limit returns only that limit' ); is_deeply( [ map { $_->str } @parts ], [ "command", "with some arguments" ], '->split with limit returns correct strings' ); } # split with captures { my $str = String::Tagged->new( "abc12def345" ); my @parts = $str->split( qr/(\d+)/ ); is( scalar @parts, 4, '->split with capture returns captures too' ); is_deeply( [ map { $_->str } @parts ], [qw( abc 12 def 345 )], '->split with capture returns correct strings' ); } done_testing; String-Tagged-0.16/t/33sprintf.t000444001750001750 424213454124632 15246 0ustar00leoleo000000000000#!/usr/bin/perl use strict; use warnings; use Test::More; use String::Tagged; # base case { my $str = String::Tagged->from_sprintf( "%s and %d", "strings", 123 ); is( $str->str, "strings and 123", 'base case' ); $str = String::Tagged->from_sprintf( "Can print literal %% mark" ); is( $str->str, "Can print literal % mark", 'literal %' ); $str = String::Tagged->from_sprintf( "%d and %s", 456, "order" ); is( $str->str, "456 and order", 'base case preserves order' ); } # a tagged %s argument { my $str = String::Tagged->from_sprintf( "A %s here", String::Tagged->new_tagged( "string", tagged => 1 ) ); is( $str->str, "A string here", 'tagged argument value' ); ok( $str->get_tag_extent( 2, "tagged" ), 'tagged argument has tag in result' ); } # %s padding { is( String::Tagged->from_sprintf( "%20s", "value" )->str, ' value', '%s padding right-aligned' ); is( String::Tagged->from_sprintf( "%-20s", "value" )->str, 'value ', '%s padding left-aligned' ); is( String::Tagged->from_sprintf( "%5s", "long value" )->str, 'long value', '%s padding excess' ); is( String::Tagged->from_sprintf( "%-*s", 10, "value" )->str, 'value ', '%s padding dynamic size' ); } # %s truncation { is( String::Tagged->from_sprintf( "%.3s", "value" )->str, 'val', '%s truncation' ); is( String::Tagged->from_sprintf( "%.*s", 2, "value" )->str, 'va', '%s truncation dynamic size' ); } # tagged format { my $str = String::Tagged->from_sprintf( String::Tagged->new_tagged( "A tagged format", tagged => 1 ) ); is( $str->str, "A tagged format", 'tagged format value' ); ok( $str->get_tag_extent( 2, "tagged" ), 'tagged format has tag in result' ); $str = String::Tagged->new_tagged( "Single %s here", span => 1 ) ->sprintf( "tag" ); is( $str->str, "Single tag here", 'tagged format with conversion' ); my $e; ok( $e = $str->get_tag_extent( 0, "span" ), 'tagged format with conversion has tag in result' ) and do { is( $e->end, length $str, 'tag from format covers the entire result' ); }; } done_testing; String-Tagged-0.16/t/40operators.t000444001750001750 67413454124632 15562 0ustar00leoleo000000000000#!/usr/bin/perl use strict; use warnings; use Test::More; use String::Tagged; my $str = String::Tagged->new( "Hello, world" ); is( sprintf( "%s", $str ), "Hello, world", 'STRINGify operator' ); my $s = $str . "!"; is( $s->str, "Hello, world!", 'concat after' ); $s = "I say, " . $str; is( $s->str, "I say, Hello, world", 'concat before' ); $str .= "!"; is( $str->str, "Hello, world!", 'str after .= operator' ); done_testing; String-Tagged-0.16/t/50subclass.t000444001750001750 103613454124632 15375 0ustar00leoleo000000000000#!/usr/bin/perl use strict; use warnings; use Test::More; package STSubclass; use base qw( String::Tagged ); package main; my $foo = STSubclass->new( "foo" ); isa_ok( $foo, "STSubclass", '$foo' ); isa_ok( $foo, "String::Tagged", '$foo' ); isa_ok( $foo . "bar", "STSubclass", 'concat plain after' ); isa_ok( "bar" . $foo, "STSubclass", 'concat plain before' ); my $bar = String::Tagged->new( "bar" ); isa_ok( $foo . $bar, "STSubclass", 'concat plain after' ); isa_ok( $bar . $foo, "STSubclass", 'concat plain before' ); done_testing; String-Tagged-0.16/t/99pod.t000444001750001750 25713454124632 14341 0ustar00leoleo000000000000#!/usr/bin/perl use strict; use warnings; use Test::More; eval "use Test::Pod 1.00"; plan skip_all => "Test::Pod 1.00 required for testing POD" if $@; all_pod_files_ok();