HTML-Tidy5-1.06/000755 000766 000024 00000000000 13555120030 013271 5ustar00andystaff000000 000000 HTML-Tidy5-1.06/perlcriticrc000644 000766 000024 00000003232 13555115664 015722 0ustar00andystaff000000 000000 verbose = %f:%l:%c %m (%p)\n %r\n\n [-BuiltinFunctions::ProhibitBooleanGrep] [-CodeLayout::ProhibitParensWithBuiltins] [CodeLayout::ProhibitHardTabs] allow_leading_tabs = 0 [-CodeLayout::RequireASCII] # t/unicode.t uses UTF8. [-CodeLayout::RequireTidyCode] # Never works for me. [Compatibility::PodMinimumVersion] above_version = 5.008008 [-ControlStructures::ProhibitPostfixControls] [-Documentation::PodSpelling] [-Documentation::RequirePodAtEnd] [-Documentation::RequirePodSections] [-Editor::RequireEmacsFileVariables] # We're a utility. We're OK with die. [-ErrorHandling::RequireCarping] [-ErrorHandling::RequireUseOfExceptions] [-InputOutput::ProhibitJoinedReadline] [-InputOutput::RequireCheckedSyscalls] functions = open opendir read readline readdir close closedir [-Modules::ProhibitAutomaticExportation] [-Modules::RequireExplicitInclusion] [-Modules::RequireVersionVar] [-RegularExpressions::ProhibitComplexRegexes] [-RegularExpressions::ProhibitEscapedMetacharacters] [-RegularExpressions::RequireDotMatchAnything] [-RegularExpressions::RequireExtendedFormatting] [-RegularExpressions::RequireLineBoundaryMatching] [-Subroutines::ProhibitCallsToUndeclaredSubs] [-Subroutines::ProhibitCallsToUnexportedSubs] # Too many subs coming in from Test::More [-Subroutines::RequireArgUnpacking] [-ValuesAndExpressions::ProhibitConstantPragma] [-ValuesAndExpressions::ProhibitEmptyQuotes] [-ValuesAndExpressions::ProhibitMagicNumbers] [-ValuesAndExpressions::ProhibitNoisyQuotes] [-ValuesAndExpressions::RequireConstantOnLeftSideOfEquality] [-ValuesAndExpressions::RequireInterpolationOfMetachars] [-Variables::ProhibitPackageVars] [-Variables::ProhibitPunctuationVars] HTML-Tidy5-1.06/bin/000755 000766 000024 00000000000 13555120030 014041 5ustar00andystaff000000 000000 HTML-Tidy5-1.06/Changes000644 000766 000024 00000002474 13555120023 014575 0ustar00andystaff000000 000000 Revision history for Perl extension HTML::Tidy5. The HTML::Tidy5 module is an HTML5-compatible upgrade from the HTML::Tidy module. 1.06 Sat Oct 26 14:35:44 CDT 2019 ==================================== Fixed a potentially failing test. Thanks, Pino Toscano. Fixed a failing test under macOS. (GH #13) 1.04 Fri Apr 20 16:45:00 CDT 2018 ==================================== [ENHANCEMENTS] Added html_fragment_tidy_ok() for running tidy checks against HTML that is not a full document. 1.02 Wed Apr 4 15:06:44 CDT 2018 ==================================== [FIXES] html_tidy_ok() now clears the messages in a passed-in HTML::Tidy5 object before using it to validate the HTML. 1.01_01 Mon Mar 26 16:37:13 CDT 2018 ==================================== [ENHANCEMENTS] Add Test::HTML::Tidy5 and the html_tidy_ok() function for testing HTML in your .t files. [FIXES] Fixed test failure. 1.00 Tue Mar 20 16:51:52 CDT 2018 ==================================== First production release. HTML::Tidy5 relies on version 5.6.0 of the tidy-html5 library. See http://html-tidy.org/ for information about the tidy-html5 library. [INCOMPATIBILITIES WITH HTML::Tidy] In HTML::Tidy, HTML::Tidy::Message objects had overloaded string context. In HTML::Tidy5, you must explicitly call the ->as_string method for HTML::Tidy5::Message. HTML-Tidy5-1.06/MANIFEST000644 000766 000024 00000001527 13555120030 014427 0ustar00andystaff000000 000000 bin/webtidy5 Changes lib/HTML/Tidy5.pm lib/HTML/Tidy5/Message.pm lib/Test/HTML/Tidy5.pm Makefile.PL MANIFEST perlcriticrc ppport.h README.md Tidy5.xs t/00-load.t t/cfg-for-parse.cfg t/cfg-for-parse.t t/clean.t t/clean-crash.t t/drop-empty-elements.t t/extra-quote.t t/html_fragment_tidy_ok.t t/html_tidy_ok.t t/ignore-text.t t/ignore.t t/illegal-options.t t/levels.t t/message.t t/new-tags.t t/opt-00.t t/parse.t t/parse-errors.t t/perfect.t t/roundtrip.t t/segfault-form.t t/show-info.t t/simple.t t/too-many-titles.t t/unicode.html t/unicode.t t/unicode-nbsp.t t/venus.cfg t/venus.html t/venus.t t/version.t t/wordwrap.cfg t/wordwrap.t t/TidyTestUtils.pm xt/pod.t xt/pod-coverage.t META.yml Module YAML meta-data (added by MakeMaker) META.json Module JSON meta-data (added by MakeMaker) HTML-Tidy5-1.06/ppport.h000644 000766 000024 00000063506 13555115664 015021 0ustar00andystaff000000 000000 /* ppport.h -- Perl/Pollution/Portability Version 2.007 * * Automatically Created by Devel::PPPort on Mon Feb 16 21:21:31 2004 * * Do NOT edit this file directly! -- Edit PPPort.pm instead. * * Version 2.x, Copyright (C) 2001, Paul Marquess. * Version 1.x, Copyright (C) 1999, Kenneth Albanowski. * This code may be used and distributed under the same license as any * version of Perl. * * This version of ppport.h is designed to support operation with Perl * installations back to 5.004, and has been tested up to 5.8.1. * * If this version of ppport.h is failing during the compilation of this * module, please check if a newer version of Devel::PPPort is available * on CPAN before sending a bug report. * * If you are using the latest version of Devel::PPPort and it is failing * during compilation of this module, please send a report to perlbug@perl.com * * Include all following information: * * 1. The complete output from running "perl -V" * * 2. This file. * * 3. The name & version of the module you were trying to build. * * 4. A full log of the build that failed. * * 5. Any other information that you think could be relevant. * * * For the latest version of this code, please retreive the Devel::PPPort * module from CPAN. * */ /* * In order for a Perl extension module to be as portable as possible * across differing versions of Perl itself, certain steps need to be taken. * Including this header is the first major one, then using dTHR is all the * appropriate places and using a PL_ prefix to refer to global Perl * variables is the second. * */ /* If you use one of a few functions that were not present in earlier * versions of Perl, please add a define before the inclusion of ppport.h * for a static include, or use the GLOBAL request in a single module to * produce a global definition that can be referenced from the other * modules. * * Function: Static define: Extern define: * newCONSTSUB() NEED_newCONSTSUB NEED_newCONSTSUB_GLOBAL * */ /* To verify whether ppport.h is needed for your module, and whether any * special defines should be used, ppport.h can be run through Perl to check * your source code. Simply say: * * perl -x ppport.h *.c *.h *.xs foo/bar*.c [etc] * * The result will be a list of patches suggesting changes that should at * least be acceptable, if not necessarily the most efficient solution, or a * fix for all possible problems. It won't catch where dTHR is needed, and * doesn't attempt to account for global macro or function definitions, * nested includes, typemaps, etc. * * In order to test for the need of dTHR, please try your module under a * recent version of Perl that has threading compiled-in. * */ /* #!/usr/bin/perl @ARGV = ("*.xs") if !@ARGV; %badmacros = %funcs = %macros = (); $replace = 0; foreach () { $funcs{$1} = 1 if /Provide:\s+(\S+)/; $macros{$1} = 1 if /^#\s*define\s+([a-zA-Z0-9_]+)/; $replace = $1 if /Replace:\s+(\d+)/; $badmacros{$2}=$1 if $replace and /^#\s*define\s+([a-zA-Z0-9_]+).*?\s+([a-zA-Z0-9_]+)/; $badmacros{$1}=$2 if /Replace (\S+) with (\S+)/; } foreach $filename (map(glob($_),@ARGV)) { unless (open(IN, "<$filename")) { warn "Unable to read from $file: $!\n"; next; } print "Scanning $filename...\n"; $c = ""; while () { $c .= $_; } close(IN); $need_include = 0; %add_func = (); $changes = 0; $has_include = ($c =~ /#.*include.*ppport/m); foreach $func (keys %funcs) { if ($c =~ /#.*define.*\bNEED_$func(_GLOBAL)?\b/m) { if ($c !~ /\b$func\b/m) { print "If $func isn't needed, you don't need to request it.\n" if $changes += ($c =~ s/^.*#.*define.*\bNEED_$func\b.*\n//m); } else { print "Uses $func\n"; $need_include = 1; } } else { if ($c =~ /\b$func\b/m) { $add_func{$func} =1 ; print "Uses $func\n"; $need_include = 1; } } } if (not $need_include) { foreach $macro (keys %macros) { if ($c =~ /\b$macro\b/m) { print "Uses $macro\n"; $need_include = 1; } } } foreach $badmacro (keys %badmacros) { if ($c =~ /\b$badmacro\b/m) { $changes += ($c =~ s/\b$badmacro\b/$badmacros{$badmacro}/gm); print "Uses $badmacros{$badmacro} (instead of $badmacro)\n"; $need_include = 1; } } if (scalar(keys %add_func) or $need_include != $has_include) { if (!$has_include) { $inc = join('',map("#define NEED_$_\n", sort keys %add_func)). "#include \"ppport.h\"\n"; $c = "$inc$c" unless $c =~ s/#.*include.*XSUB.*\n/$&$inc/m; } elsif (keys %add_func) { $inc = join('',map("#define NEED_$_\n", sort keys %add_func)); $c = "$inc$c" unless $c =~ s/^.*#.*include.*ppport.*$/$inc$&/m; } if (!$need_include) { print "Doesn't seem to need ppport.h.\n"; $c =~ s/^.*#.*include.*ppport.*\n//m; } $changes++; } if ($changes) { open(OUT,">/tmp/ppport.h.$$"); print OUT $c; close(OUT); open(DIFF, "diff -u $filename /tmp/ppport.h.$$|"); while () { s!/tmp/ppport\.h\.$$!$filename.patched!; print STDOUT; } close(DIFF); unlink("/tmp/ppport.h.$$"); } else { print "Looks OK\n"; } } __DATA__ */ #ifndef _P_P_PORTABILITY_H_ #define _P_P_PORTABILITY_H_ #ifndef PERL_REVISION # ifndef __PATCHLEVEL_H_INCLUDED__ # include # endif # if !(defined(PERL_VERSION) || (SUBVERSION > 0 && defined(PATCHLEVEL))) # include # endif # ifndef PERL_REVISION # define PERL_REVISION (5) /* Replace: 1 */ # define PERL_VERSION PATCHLEVEL # define PERL_SUBVERSION SUBVERSION /* Replace PERL_PATCHLEVEL with PERL_VERSION */ /* Replace: 0 */ # endif #endif #define PERL_BCDVERSION ((PERL_REVISION * 0x1000000L) + (PERL_VERSION * 0x1000L) + PERL_SUBVERSION) /* It is very unlikely that anyone will try to use this with Perl 6 (or greater), but who knows. */ #if PERL_REVISION != 5 # error ppport.h only works with Perl version 5 #endif /* PERL_REVISION != 5 */ #ifndef ERRSV # define ERRSV perl_get_sv("@",FALSE) #endif #if (PERL_VERSION < 4) || ((PERL_VERSION == 4) && (PERL_SUBVERSION <= 5)) /* Replace: 1 */ # define PL_Sv Sv # define PL_compiling compiling # define PL_copline copline # define PL_curcop curcop # define PL_curstash curstash # define PL_defgv defgv # define PL_dirty dirty # define PL_dowarn dowarn # define PL_hints hints # define PL_na na # define PL_perldb perldb # define PL_rsfp_filters rsfp_filters # define PL_rsfpv rsfp # define PL_stdingv stdingv # define PL_sv_no sv_no # define PL_sv_undef sv_undef # define PL_sv_yes sv_yes /* Replace: 0 */ #endif #ifdef HASATTRIBUTE # if (defined(__GNUC__) && defined(__cplusplus)) || defined(__INTEL_COMPILER) # define PERL_UNUSED_DECL # else # define PERL_UNUSED_DECL __attribute__((unused)) # endif #else # define PERL_UNUSED_DECL #endif #ifndef dNOOP # define NOOP (void)0 # define dNOOP extern int Perl___notused PERL_UNUSED_DECL #endif #ifndef dTHR # define dTHR dNOOP #endif #ifndef dTHX # define dTHX dNOOP # define dTHXa(x) dNOOP # define dTHXoa(x) dNOOP #endif #ifndef pTHX # define pTHX void # define pTHX_ # define aTHX # define aTHX_ #endif /* IV could also be a quad (say, a long long), but Perls * capable of those should have IVSIZE already. */ #if !defined(IVSIZE) && defined(LONGSIZE) # define IVSIZE LONGSIZE #endif #ifndef IVSIZE # define IVSIZE 4 /* A bold guess, but the best we can make. */ #endif #ifndef UVSIZE # define UVSIZE IVSIZE #endif #ifndef NVTYPE # if defined(USE_LONG_DOUBLE) && defined(HAS_LONG_DOUBLE) # define NVTYPE long double # else # define NVTYPE double # endif typedef NVTYPE NV; #endif #ifndef INT2PTR #if (IVSIZE == PTRSIZE) && (UVSIZE == PTRSIZE) # define PTRV UV # define INT2PTR(any,d) (any)(d) #else # if PTRSIZE == LONGSIZE # define PTRV unsigned long # else # define PTRV unsigned # endif # define INT2PTR(any,d) (any)(PTRV)(d) #endif #define NUM2PTR(any,d) (any)(PTRV)(d) #define PTR2IV(p) INT2PTR(IV,p) #define PTR2UV(p) INT2PTR(UV,p) #define PTR2NV(p) NUM2PTR(NV,p) #if PTRSIZE == LONGSIZE # define PTR2ul(p) (unsigned long)(p) #else # define PTR2ul(p) INT2PTR(unsigned long,p) #endif #endif /* !INT2PTR */ #ifndef boolSV # define boolSV(b) ((b) ? &PL_sv_yes : &PL_sv_no) #endif #ifndef gv_stashpvn # define gv_stashpvn(str,len,flags) gv_stashpv(str,flags) #endif #ifndef newSVpvn # define newSVpvn(data,len) ((len) ? newSVpv ((data), (len)) : newSVpv ("", 0)) #endif #ifndef newRV_inc /* Replace: 1 */ # define newRV_inc(sv) newRV(sv) /* Replace: 0 */ #endif /* DEFSV appears first in 5.004_56 */ #ifndef DEFSV # define DEFSV GvSV(PL_defgv) #endif #ifndef SAVE_DEFSV # define SAVE_DEFSV SAVESPTR(GvSV(PL_defgv)) #endif #ifndef newRV_noinc # ifdef __GNUC__ # define newRV_noinc(sv) \ ({ \ SV *nsv = (SV*)newRV(sv); \ SvREFCNT_dec(sv); \ nsv; \ }) # else # if defined(USE_THREADS) static SV * newRV_noinc (SV * sv) { SV *nsv = (SV*)newRV(sv); SvREFCNT_dec(sv); return nsv; } # else # define newRV_noinc(sv) \ (PL_Sv=(SV*)newRV(sv), SvREFCNT_dec(sv), (SV*)PL_Sv) # endif # endif #endif /* Provide: newCONSTSUB */ /* newCONSTSUB from IO.xs is in the core starting with 5.004_63 */ #if (PERL_VERSION < 4) || ((PERL_VERSION == 4) && (PERL_SUBVERSION < 63)) #if defined(NEED_newCONSTSUB) static #else extern void newCONSTSUB(HV * stash, char * name, SV *sv); #endif #if defined(NEED_newCONSTSUB) || defined(NEED_newCONSTSUB_GLOBAL) void newCONSTSUB(stash,name,sv) HV *stash; char *name; SV *sv; { U32 oldhints = PL_hints; HV *old_cop_stash = PL_curcop->cop_stash; HV *old_curstash = PL_curstash; line_t oldline = PL_curcop->cop_line; PL_curcop->cop_line = PL_copline; PL_hints &= ~HINT_BLOCK_SCOPE; if (stash) PL_curstash = PL_curcop->cop_stash = stash; newSUB( #if (PERL_VERSION < 3) || ((PERL_VERSION == 3) && (PERL_SUBVERSION < 22)) /* before 5.003_22 */ start_subparse(), #else # if (PERL_VERSION == 3) && (PERL_SUBVERSION == 22) /* 5.003_22 */ start_subparse(0), # else /* 5.003_23 onwards */ start_subparse(FALSE, 0), # endif #endif newSVOP(OP_CONST, 0, newSVpv(name,0)), newSVOP(OP_CONST, 0, &PL_sv_no), /* SvPV(&PL_sv_no) == "" -- GMB */ newSTATEOP(0, Nullch, newSVOP(OP_CONST, 0, sv)) ); PL_hints = oldhints; PL_curcop->cop_stash = old_cop_stash; PL_curstash = old_curstash; PL_curcop->cop_line = oldline; } #endif #endif /* newCONSTSUB */ #ifndef START_MY_CXT /* * Boilerplate macros for initializing and accessing interpreter-local * data from C. All statics in extensions should be reworked to use * this, if you want to make the extension thread-safe. See ext/re/re.xs * for an example of the use of these macros. * * Code that uses these macros is responsible for the following: * 1. #define MY_CXT_KEY to a unique string, e.g. "DynaLoader_guts" * 2. Declare a typedef named my_cxt_t that is a structure that contains * all the data that needs to be interpreter-local. * 3. Use the START_MY_CXT macro after the declaration of my_cxt_t. * 4. Use the MY_CXT_INIT macro such that it is called exactly once * (typically put in the BOOT: section). * 5. Use the members of the my_cxt_t structure everywhere as * MY_CXT.member. * 6. Use the dMY_CXT macro (a declaration) in all the functions that * access MY_CXT. */ #if defined(MULTIPLICITY) || defined(PERL_OBJECT) || \ defined(PERL_CAPI) || defined(PERL_IMPLICIT_CONTEXT) /* This must appear in all extensions that define a my_cxt_t structure, * right after the definition (i.e. at file scope). The non-threads * case below uses it to declare the data as static. */ #define START_MY_CXT #if (PERL_VERSION < 4 || (PERL_VERSION == 4 && PERL_SUBVERSION < 68 )) /* Fetches the SV that keeps the per-interpreter data. */ #define dMY_CXT_SV \ SV *my_cxt_sv = perl_get_sv(MY_CXT_KEY, FALSE) #else /* >= perl5.004_68 */ #define dMY_CXT_SV \ SV *my_cxt_sv = *hv_fetch(PL_modglobal, MY_CXT_KEY, \ sizeof(MY_CXT_KEY)-1, TRUE) #endif /* < perl5.004_68 */ /* This declaration should be used within all functions that use the * interpreter-local data. */ #define dMY_CXT \ dMY_CXT_SV; \ my_cxt_t *my_cxtp = INT2PTR(my_cxt_t*,SvUV(my_cxt_sv)) /* Creates and zeroes the per-interpreter data. * (We allocate my_cxtp in a Perl SV so that it will be released when * the interpreter goes away.) */ #define MY_CXT_INIT \ dMY_CXT_SV; \ /* newSV() allocates one more than needed */ \ my_cxt_t *my_cxtp = (my_cxt_t*)SvPVX(newSV(sizeof(my_cxt_t)-1));\ Zero(my_cxtp, 1, my_cxt_t); \ sv_setuv(my_cxt_sv, PTR2UV(my_cxtp)) /* This macro must be used to access members of the my_cxt_t structure. * e.g. MYCXT.some_data */ #define MY_CXT (*my_cxtp) /* Judicious use of these macros can reduce the number of times dMY_CXT * is used. Use is similar to pTHX, aTHX etc. */ #define pMY_CXT my_cxt_t *my_cxtp #define pMY_CXT_ pMY_CXT, #define _pMY_CXT ,pMY_CXT #define aMY_CXT my_cxtp #define aMY_CXT_ aMY_CXT, #define _aMY_CXT ,aMY_CXT #else /* single interpreter */ #define START_MY_CXT static my_cxt_t my_cxt; #define dMY_CXT_SV dNOOP #define dMY_CXT dNOOP #define MY_CXT_INIT NOOP #define MY_CXT my_cxt #define pMY_CXT void #define pMY_CXT_ #define _pMY_CXT #define aMY_CXT #define aMY_CXT_ #define _aMY_CXT #endif #endif /* START_MY_CXT */ #ifndef IVdf # if IVSIZE == LONGSIZE # define IVdf "ld" # define UVuf "lu" # define UVof "lo" # define UVxf "lx" # define UVXf "lX" # else # if IVSIZE == INTSIZE # define IVdf "d" # define UVuf "u" # define UVof "o" # define UVxf "x" # define UVXf "X" # endif # endif #endif #ifndef NVef # if defined(USE_LONG_DOUBLE) && defined(HAS_LONG_DOUBLE) && \ defined(PERL_PRIfldbl) /* Not very likely, but let's try anyway. */ # define NVef PERL_PRIeldbl # define NVff PERL_PRIfldbl # define NVgf PERL_PRIgldbl # else # define NVef "e" # define NVff "f" # define NVgf "g" # endif #endif #ifndef AvFILLp /* Older perls (<=5.003) lack AvFILLp */ # define AvFILLp AvFILL #endif #ifdef SvPVbyte # if PERL_REVISION == 5 && PERL_VERSION < 7 /* SvPVbyte does not work in perl-5.6.1, borrowed version for 5.7.3 */ # undef SvPVbyte # define SvPVbyte(sv, lp) \ ((SvFLAGS(sv) & (SVf_POK|SVf_UTF8)) == (SVf_POK) \ ? ((lp = SvCUR(sv)), SvPVX(sv)) : my_sv_2pvbyte(aTHX_ sv, &lp)) static char * my_sv_2pvbyte(pTHX_ register SV *sv, STRLEN *lp) { sv_utf8_downgrade(sv,0); return SvPV(sv,*lp); } # endif #else # define SvPVbyte SvPV #endif #ifndef SvPV_nolen # define SvPV_nolen(sv) \ ((SvFLAGS(sv) & (SVf_POK)) == SVf_POK \ ? SvPVX(sv) : sv_2pv_nolen(sv)) static char * sv_2pv_nolen(pTHX_ register SV *sv) { STRLEN n_a; return sv_2pv(sv, &n_a); } #endif #ifndef get_cv # define get_cv(name,create) perl_get_cv(name,create) #endif #ifndef get_sv # define get_sv(name,create) perl_get_sv(name,create) #endif #ifndef get_av # define get_av(name,create) perl_get_av(name,create) #endif #ifndef get_hv # define get_hv(name,create) perl_get_hv(name,create) #endif #ifndef call_argv # define call_argv perl_call_argv #endif #ifndef call_method # define call_method perl_call_method #endif #ifndef call_pv # define call_pv perl_call_pv #endif #ifndef call_sv # define call_sv perl_call_sv #endif #ifndef PERL_SCAN_GREATER_THAN_UV_MAX # define PERL_SCAN_GREATER_THAN_UV_MAX 0x02 #endif #ifndef PERL_SCAN_SILENT_ILLDIGIT # define PERL_SCAN_SILENT_ILLDIGIT 0x04 #endif #ifndef PERL_SCAN_ALLOW_UNDERSCORES # define PERL_SCAN_ALLOW_UNDERSCORES 0x01 #endif #ifndef PERL_SCAN_DISALLOW_PREFIX # define PERL_SCAN_DISALLOW_PREFIX 0x02 #endif #if (PERL_VERSION >= 6) #define I32_CAST #else #define I32_CAST (I32*) #endif #ifndef grok_hex static UV _grok_hex (char *string, STRLEN *len, I32 *flags, NV *result) { NV r = scan_hex(string, *len, I32_CAST len); if (r > UV_MAX) { *flags |= PERL_SCAN_GREATER_THAN_UV_MAX; if (result) *result = r; return UV_MAX; } return (UV)r; } # define grok_hex(string, len, flags, result) \ _grok_hex((string), (len), (flags), (result)) #endif #ifndef grok_oct static UV _grok_oct (char *string, STRLEN *len, I32 *flags, NV *result) { NV r = scan_oct(string, *len, I32_CAST len); if (r > UV_MAX) { *flags |= PERL_SCAN_GREATER_THAN_UV_MAX; if (result) *result = r; return UV_MAX; } return (UV)r; } # define grok_oct(string, len, flags, result) \ _grok_oct((string), (len), (flags), (result)) #endif #ifndef grok_bin static UV _grok_bin (char *string, STRLEN *len, I32 *flags, NV *result) { NV r = scan_bin(string, *len, I32_CAST len); if (r > UV_MAX) { *flags |= PERL_SCAN_GREATER_THAN_UV_MAX; if (result) *result = r; return UV_MAX; } return (UV)r; } # define grok_bin(string, len, flags, result) \ _grok_bin((string), (len), (flags), (result)) #endif #ifndef IN_LOCALE # define IN_LOCALE \ (PL_curcop == &PL_compiling ? IN_LOCALE_COMPILETIME : IN_LOCALE_RUNTIME) #endif #ifndef IN_LOCALE_RUNTIME # define IN_LOCALE_RUNTIME (PL_curcop->op_private & HINT_LOCALE) #endif #ifndef IN_LOCALE_COMPILETIME # define IN_LOCALE_COMPILETIME (PL_hints & HINT_LOCALE) #endif #ifndef IS_NUMBER_IN_UV # define IS_NUMBER_IN_UV 0x01 # define IS_NUMBER_GREATER_THAN_UV_MAX 0x02 # define IS_NUMBER_NOT_INT 0x04 # define IS_NUMBER_NEG 0x08 # define IS_NUMBER_INFINITY 0x10 # define IS_NUMBER_NAN 0x20 #endif #ifndef grok_numeric_radix # define GROK_NUMERIC_RADIX(sp, send) grok_numeric_radix(sp, send) #define grok_numeric_radix Perl_grok_numeric_radix bool Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send) { #ifdef USE_LOCALE_NUMERIC #if (PERL_VERSION >= 6) if (PL_numeric_radix_sv && IN_LOCALE) { STRLEN len; char* radix = SvPV(PL_numeric_radix_sv, len); if (*sp + len <= send && memEQ(*sp, radix, len)) { *sp += len; return TRUE; } } #else /* pre5.6.0 perls don't have PL_numeric_radix_sv so the radix * must manually be requested from locale.h */ #include struct lconv *lc = localeconv(); char *radix = lc->decimal_point; if (radix && IN_LOCALE) { STRLEN len = strlen(radix); if (*sp + len <= send && memEQ(*sp, radix, len)) { *sp += len; return TRUE; } } #endif /* PERL_VERSION */ #endif /* USE_LOCALE_NUMERIC */ /* always try "." if numeric radix didn't match because * we may have data from different locales mixed */ if (*sp < send && **sp == '.') { ++*sp; return TRUE; } return FALSE; } #endif /* grok_numeric_radix */ #ifndef grok_number #define grok_number Perl_grok_number int Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep) { const char *s = pv; const char *send = pv + len; const UV max_div_10 = UV_MAX / 10; const char max_mod_10 = UV_MAX % 10; int numtype = 0; int sawinf = 0; int sawnan = 0; while (s < send && isSPACE(*s)) s++; if (s == send) { return 0; } else if (*s == '-') { s++; numtype = IS_NUMBER_NEG; } else if (*s == '+') s++; if (s == send) return 0; /* next must be digit or the radix separator or beginning of infinity */ if (isDIGIT(*s)) { /* UVs are at least 32 bits, so the first 9 decimal digits cannot overflow. */ UV value = *s - '0'; /* This construction seems to be more optimiser friendly. (without it gcc does the isDIGIT test and the *s - '0' separately) With it gcc on arm is managing 6 instructions (6 cycles) per digit. In theory the optimiser could deduce how far to unroll the loop before checking for overflow. */ if (++s < send) { int digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { /* Now got 9 digits, so need to check each time for overflow. */ digit = *s - '0'; while (digit >= 0 && digit <= 9 && (value < max_div_10 || (value == max_div_10 && digit <= max_mod_10))) { value = value * 10 + digit; if (++s < send) digit = *s - '0'; else break; } if (digit >= 0 && digit <= 9 && (s < send)) { /* value overflowed. skip the remaining digits, don't worry about setting *valuep. */ do { s++; } while (s < send && isDIGIT(*s)); numtype |= IS_NUMBER_GREATER_THAN_UV_MAX; goto skip_value; } } } } } } } } } } } } } } } } } } numtype |= IS_NUMBER_IN_UV; if (valuep) *valuep = value; skip_value: if (GROK_NUMERIC_RADIX(&s, send)) { numtype |= IS_NUMBER_NOT_INT; while (s < send && isDIGIT(*s)) /* optional digits after the radix */ s++; } } else if (GROK_NUMERIC_RADIX(&s, send)) { numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */ /* no digits before the radix means we need digits after it */ if (s < send && isDIGIT(*s)) { do { s++; } while (s < send && isDIGIT(*s)); if (valuep) { /* integer approximation is valid - it's 0. */ *valuep = 0; } } else return 0; } else if (*s == 'I' || *s == 'i') { s++; if (s == send || (*s != 'N' && *s != 'n')) return 0; s++; if (s == send || (*s != 'F' && *s != 'f')) return 0; s++; if (s < send && (*s == 'I' || *s == 'i')) { s++; if (s == send || (*s != 'N' && *s != 'n')) return 0; s++; if (s == send || (*s != 'I' && *s != 'i')) return 0; s++; if (s == send || (*s != 'T' && *s != 't')) return 0; s++; if (s == send || (*s != 'Y' && *s != 'y')) return 0; s++; } sawinf = 1; } else if (*s == 'N' || *s == 'n') { /* XXX TODO: There are signaling NaNs and quiet NaNs. */ s++; if (s == send || (*s != 'A' && *s != 'a')) return 0; s++; if (s == send || (*s != 'N' && *s != 'n')) return 0; s++; sawnan = 1; } else return 0; if (sawinf) { numtype &= IS_NUMBER_NEG; /* Keep track of sign */ numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT; } else if (sawnan) { numtype &= IS_NUMBER_NEG; /* Keep track of sign */ numtype |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT; } else if (s < send) { /* we can have an optional exponent part */ if (*s == 'e' || *s == 'E') { /* The only flag we keep is sign. Blow away any "it's UV" */ numtype &= IS_NUMBER_NEG; numtype |= IS_NUMBER_NOT_INT; s++; if (s < send && (*s == '-' || *s == '+')) s++; if (s < send && isDIGIT(*s)) { do { s++; } while (s < send && isDIGIT(*s)); } else return 0; } } while (s < send && isSPACE(*s)) s++; if (s >= send) return numtype; if (len == 10 && memEQ(pv, "0 but true", 10)) { if (valuep) *valuep = 0; return IS_NUMBER_IN_UV; } return 0; } #endif /* grok_number */ #endif /* _P_P_PORTABILITY_H_ */ /* End of File ppport.h */ HTML-Tidy5-1.06/t/000755 000766 000024 00000000000 13555120030 013534 5ustar00andystaff000000 000000 HTML-Tidy5-1.06/xt/000755 000766 000024 00000000000 13555120030 013724 5ustar00andystaff000000 000000 HTML-Tidy5-1.06/README.md000644 000766 000024 00000001605 13555115664 014573 0ustar00andystaff000000 000000 HTML::Tidy5 =========== HTML::Tidy5 is an HTML checker in a handy dandy object. PREREQUISITES ============= HTML::Tidy5 does very little work. The real work of HTML::Tidy5 is done by the [html-tidy][1] library which is written in C. To use HTML::Tidy5, you must install html-tidy. Your package manager probably has it, either as "tidy" or "html-tidy" or "libtidy". If there's an option to get a "-devel" package, get that, too, because Perl needs the header files in it. INSTALLATION ============ Once you have libtidy installed via one of the previous methods, install HTML::Tidy5 like any standard Perl module. perl Makefile.PL make make test make install COPYRIGHT AND LICENSE ===================== Copyright (C) 2004-2018 by Andy Lester This library is free software. It may be redistributed and modified under the Artistic License v2.0. [1]: http://html-tidy.org/ HTML-Tidy5-1.06/META.yml000644 000766 000024 00000001666 13555120030 014553 0ustar00andystaff000000 000000 --- abstract: 'HTML validation in a Perl object' author: - 'Andy Lester ' build_requires: ExtUtils::MakeMaker: '0' configure_requires: ExtUtils::MakeMaker: '0' dynamic_config: 1 generated_by: 'ExtUtils::MakeMaker version 7.34, CPAN::Meta::Converter version 2.150010' license: artistic_2 meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: '1.4' name: HTML-Tidy5 no_index: directory: - t - inc requires: Carp: '0' Encode: '0' Exporter: '0' Getopt::Long: '0' Test::Builder: '0' Test::Exception: '0' Test::More: '0.98' constant: '0' perl: '5.010001' resources: bugtracker: http://github.com/petdance/html-tidy5/issues homepage: http://github.com/petdance/html-tidy5 license: http://www.opensource.org/licenses/artistic-license-2.0.php repository: http://github.com/petdance/html-tidy5 version: '1.06' x_serialization_backend: 'CPAN::Meta::YAML version 0.018' HTML-Tidy5-1.06/lib/000755 000766 000024 00000000000 13555120030 014037 5ustar00andystaff000000 000000 HTML-Tidy5-1.06/Makefile.PL000644 000766 000024 00000004125 13555115664 015266 0ustar00andystaff000000 000000 #!/usr/bin/perl package main; use 5.010001; use strict; use warnings; use ExtUtils::MakeMaker 6.48; use ExtUtils::Liblist; use Config; my $libs = $^O eq 'freebsd' ? '-ltidy5' : '-ltidy'; my $inc = "-I. -I/usr/include/tidy -I/usr/local/include/tidy -I$Config{usrinc}/tidy"; if ( not eval { require LWP::Simple; 1; } ) { print <<'EOF'; NOTE: It seems that you don't have LWP::Simple installed. The webtidy program will not be able to retrieve web pages. EOF } my $parms = { NAME => 'HTML::Tidy5', AUTHOR => 'Andy Lester ', VERSION_FROM => 'lib/HTML/Tidy5.pm', ABSTRACT_FROM => 'lib/HTML/Tidy5.pm', PREREQ_PM => { 'Encode' => 0, # for tests 'Exporter' => 0, 'Getopt::Long' => 0, # in webtidy 'Test::More' => '0.98', # For subtest() 'Test::Builder' => 0, 'Test::Exception' => 0, 'Carp' => 0, 'constant' => 0, }, MIN_PERL_VERSION => 5.010001, LICENSE => 'artistic_2', META_MERGE => { resources => { license => 'http://www.opensource.org/licenses/artistic-license-2.0.php', homepage => 'http://github.com/petdance/html-tidy5', bugtracker => 'http://github.com/petdance/html-tidy5/issues', repository => 'http://github.com/petdance/html-tidy5', }, }, LIBS => [$libs], NEEDS_LINKING => 1, INC => $inc, EXE_FILES => [qw(bin/webtidy5)], dist => { COMPRESS => 'gzip -9f', SUFFIX => 'gz', }, clean => { FILES => 'HTML-Tidy5-*' }, }; WriteMakefile( %{$parms} ); sub MY::postamble { ## no critic ( Subroutines::ProhibitQualifiedSubDeclarations ) return <<'MAKE_FRAG'; .PHONY: tags critic tags: ctags -f tags --recurse --totals \ --exclude=blib --exclude=t/lib \ --exclude=.svn --exclude='*~' \ --languages=C,Perl --langmap=Perl:+.t \ . critic: perlcritic -1 \ -profile perlcriticrc \ . MAKE_FRAG } HTML-Tidy5-1.06/Tidy5.xs000644 000766 000024 00000012273 13555115664 014671 0ustar00andystaff000000 000000 #include "EXTERN.h" #include "perl.h" #include "XSUB.h" #include #include #include #include static void _load_config_hash(TidyDoc tdoc, HV *tidy_options) { HE *entry; (void) hv_iterinit(tidy_options); while ( (entry = hv_iternext(tidy_options)) != NULL ) { I32 key_len; const char * const key = hv_iterkey(entry,&key_len); const TidyOption opt = tidyGetOptionByName(tdoc,key); if (!opt) { warn( "HTML::Tidy: Unrecognized option: \"%s\"\n",key ); } else { const TidyOptionId id = tidyOptGetId(opt); SV * const sv_data = hv_iterval(tidy_options,entry); STRLEN data_len; const char * const data = SvPV(sv_data,data_len); if ( ! tidyOptSetValue(tdoc,id,data) ) { warn( "HTML::Tidy: Can't set option: \"%s\" to \"%s\"\n", key, data ); } } } } MODULE = HTML::Tidy5 PACKAGE = HTML::Tidy5 PROTOTYPES: ENABLE void _tidy_messages(input, configfile, tidy_options) INPUT: const char *input const char *configfile HV *tidy_options PREINIT: TidyBuffer errbuf = {0}; TidyDoc tdoc = tidyCreate(); /* Initialize "document" */ const char* newline; int rc = 0; PPCODE: tidyBufInit(&errbuf); rc = ( tidyOptSetValue( tdoc, TidyCharEncoding, "utf8" ) ? rc : -1 ); if ( (rc >= 0 ) && configfile && *configfile ) { rc = tidyLoadConfig( tdoc, configfile ); } if ( rc >= 0 ) { _load_config_hash(tdoc,tidy_options); } if ( rc >= 0 ) { /* Capture diagnostics */ rc = tidySetErrorBuffer( tdoc, &errbuf ); } if ( rc >= 0 ) { /* Parse the input */ rc = tidyParseString( tdoc, input ); } if ( rc >= 0 && errbuf.bp) { XPUSHs( sv_2mortal(newSVpvn((char *)errbuf.bp, errbuf.size)) ); /* TODO: Make this a function */ switch ( tidyOptGetInt(tdoc,TidyNewline) ) { case TidyLF: newline = "\n"; break; case TidyCR: newline = "\r"; break; default: newline = "\r\n"; break; } XPUSHs( sv_2mortal(newSVpv(newline, 0)) ); } else { rc = -1; } if ( errbuf.bp ) tidyBufFree( &errbuf ); tidyRelease( tdoc ); if ( rc < 0 ) { XSRETURN_UNDEF; } void _tidy_clean(input, configfile, tidy_options) INPUT: const char *input const char *configfile HV *tidy_options PREINIT: TidyBuffer errbuf = {0}; TidyBuffer output = {0}; TidyDoc tdoc = tidyCreate(); /* Initialize "document" */ const char* newline; int rc = 0; PPCODE: tidyBufInit(&output); tidyBufInit(&errbuf); /* Set our default first. */ /* Don't word-wrap */ rc = ( tidyOptSetInt( tdoc, TidyWrapLen, 0 ) ? rc : -1 ); if ( (rc >= 0 ) && configfile && *configfile ) { rc = tidyLoadConfig( tdoc, configfile ); } /* XXX I think this cascade is a bug waiting to happen */ if ( rc >= 0 ) { rc = ( tidyOptSetValue( tdoc, TidyCharEncoding, "utf8" ) ? rc : -1 ); } if ( rc >= 0 ) { _load_config_hash( tdoc, tidy_options ); } if ( rc >= 0 ) { rc = tidySetErrorBuffer( tdoc, &errbuf ); /* Capture diagnostics */ } if ( rc >= 0 ) { rc = tidyParseString( tdoc, input ); /* Parse the input */ } if ( rc >= 0 ) { rc = tidyCleanAndRepair(tdoc); } if ( rc > 1 ) { rc = ( tidyOptSetBool( tdoc, TidyForceOutput, yes ) ? rc : -1 ); } if ( rc >= 0) { rc = tidySaveBuffer( tdoc, &output ); } if ( rc >= 0) { rc = tidyRunDiagnostics( tdoc ); } if ( rc >= 0 && output.bp && errbuf.bp ) { XPUSHs( sv_2mortal(newSVpvn((char *)output.bp, output.size)) ); XPUSHs( sv_2mortal(newSVpvn((char *)errbuf.bp, errbuf.size)) ); /* TODO: Hoist this into a function */ switch ( tidyOptGetInt(tdoc,TidyNewline) ) { case TidyLF: newline = "\n"; break; case TidyCR: newline = "\r"; break; default: newline = "\r\n"; break; } XPUSHs( sv_2mortal(newSVpv(newline, 0)) ); } else { rc = -1; } tidyBufFree( &output ); tidyBufFree( &errbuf ); tidyRelease( tdoc ); if ( rc < 0 ) { XSRETURN_UNDEF; } SV* _tidy_library_version() PREINIT: const char* version; CODE: version = tidyLibraryVersion(); RETVAL = newSVpv(version,0); /* will be automatically "mortalized" */ OUTPUT: RETVAL HTML-Tidy5-1.06/META.json000644 000766 000024 00000003030 13555120030 014706 0ustar00andystaff000000 000000 { "abstract" : "HTML validation in a Perl object", "author" : [ "Andy Lester " ], "dynamic_config" : 1, "generated_by" : "ExtUtils::MakeMaker version 7.34, CPAN::Meta::Converter version 2.150010", "license" : [ "artistic_2" ], "meta-spec" : { "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", "version" : 2 }, "name" : "HTML-Tidy5", "no_index" : { "directory" : [ "t", "inc" ] }, "prereqs" : { "build" : { "requires" : { "ExtUtils::MakeMaker" : "0" } }, "configure" : { "requires" : { "ExtUtils::MakeMaker" : "0" } }, "runtime" : { "requires" : { "Carp" : "0", "Encode" : "0", "Exporter" : "0", "Getopt::Long" : "0", "Test::Builder" : "0", "Test::Exception" : "0", "Test::More" : "0.98", "constant" : "0", "perl" : "5.010001" } } }, "release_status" : "stable", "resources" : { "bugtracker" : { "web" : "http://github.com/petdance/html-tidy5/issues" }, "homepage" : "http://github.com/petdance/html-tidy5", "license" : [ "http://www.opensource.org/licenses/artistic-license-2.0.php" ], "repository" : { "url" : "http://github.com/petdance/html-tidy5" } }, "version" : "1.06", "x_serialization_backend" : "JSON::PP version 4.02" } HTML-Tidy5-1.06/lib/Test/000755 000766 000024 00000000000 13555120030 014756 5ustar00andystaff000000 000000 HTML-Tidy5-1.06/lib/HTML/000755 000766 000024 00000000000 13555120030 014603 5ustar00andystaff000000 000000 HTML-Tidy5-1.06/lib/HTML/Tidy5.pm000644 000766 000024 00000025165 13555117753 016172 0ustar00andystaff000000 000000 package HTML::Tidy5; use 5.010001; use strict; use warnings; use Carp (); use HTML::Tidy5::Message; =head1 NAME HTML::Tidy5 - HTML validation in a Perl object =head1 VERSION Version 1.06 =cut our $VERSION = '1.06'; =head1 SYNOPSIS use HTML::Tidy5; my $tidy = HTML::Tidy5->new( {config_file => 'path/to/config'} ); $tidy->ignore( type => TIDY_WARNING, type => TIDY_INFO ); $tidy->parse( "foo.html", $contents_of_foo ); for my $message ( $tidy->messages ) { print $message->as_string; } =head1 DESCRIPTION C is an HTML checker in a handy dandy object. It's meant as a replacement for L. If you're currently an L user looking to migrate, see the section L. =head1 EXPORTS Message types C, C and C. Everything else is an object method. =cut use base 'Exporter'; use constant TIDY_ERROR => 3; use constant TIDY_WARNING => 2; use constant TIDY_INFO => 1; our @EXPORT = qw( TIDY_ERROR TIDY_WARNING TIDY_INFO ); =head1 METHODS =head2 new() Create an HTML::Tidy5 object. my $tidy = HTML::Tidy5->new(); Optionally you can give a hashref of configuration parms. my $tidy = HTML::Tidy5->new( {config_file => 'path/to/tidy.cfg'} ); This configuration file will be read and used when you clean or parse an HTML file. You can also pass options directly to tidy. my $tidy = HTML::Tidy5->new( { output_xhtml => 1, tidy_mark => 0, } ); See C for the list of options supported by tidy. The following options are not supported by C: =over 4 =item * quiet =back =cut sub new { my $class = shift; my $args = shift || {}; my @unsupported_options = qw( force-output gnu-emacs-file gnu-emacs keep-time quiet slide-style write-back ); # REVIEW perhaps a list of supported options would be better my $self = bless { messages => [], ignore_type => [], ignore_text => [], config_file => '', tidy_options => {}, }, $class; for my $key (keys %{$args} ) { if ($key eq 'config_file') { $self->{config_file} = $args->{$key}; next; } my $newkey = $key; $newkey =~ tr/_/-/; if ( grep {$newkey eq $_} @unsupported_options ) { Carp::croak( "Unsupported option: $newkey" ); } $self->{tidy_options}->{$newkey} = $args->{$key}; } return $self; } =head2 messages() Returns the messages accumulated. =cut sub messages { my $self = shift; return @{$self->{messages}}; } =head2 clear_messages() Clears the list of messages, in case you want to print and clear, print and clear. If you don't clear the messages, then each time you call L you'll be accumulating more in the list. =cut sub clear_messages { my $self = shift; $self->{messages} = []; return; } =head2 ignore( parm => value [, parm => value ] ) Specify types of messages to ignore. Note that the ignore flags must be set B calling C. You can call C as many times as necessary to set up all your restrictions; the options will stack up. =over 4 =item * type => TIDY_INFO|TIDY_WARNING|TIDY_ERROR Specifies the type of messages you want to ignore, either info or warnings or errors. If you wanted, you could call ignore on all three and get no messages at all. $tidy->ignore( type => TIDY_WARNING ); =item * text => qr/regex/ =item * text => [ qr/regex1/, qr/regex2/, ... ] Checks the text of the message against the specified regex or regexes, and ignores the message if there's a match. The value for the I parm may be either a regex, or a reference to a list of regexes. $tidy->ignore( text => qr/DOCTYPE/ ); $tidy->ignore( text => [ qr/unsupported/, qr/proprietary/i ] ); =back =cut sub ignore { my $self = shift; my @parms = @_; while ( @parms ) { my $parm = shift @parms; my $value = shift @parms; my @values = ref($value) eq 'ARRAY' ? @{$value} : ($value); Carp::croak( qq{Invalid ignore type of "$parm"} ) unless ($parm eq 'text') or ($parm eq 'type'); push( @{$self->{"ignore_$parm"}}, @values ); } # while return; } # ignore =head2 parse( $filename, $str [, $str...] ) Parses a string, or list of strings, that make up a single HTML file. The I<$filename> parm is only used as an identifier for your use. The file is not actually read and opened. Returns true if all went OK, or false if there was some problem calling tidy, or parsing tidy's output. =cut sub parse { my $self = shift; my $filename = shift; if (@_ == 0) { Carp::croak('Usage: parse($filename,$str [, $str...])'); } my $html = join( '', @_ ); utf8::encode($html) if utf8::is_utf8($html); my ($errorblock,$newline) = _tidy_messages( $html, $self->{config_file}, $self->{tidy_options} ); return 1 unless defined $errorblock; utf8::decode($errorblock); return !$self->_parse_errors($filename, $errorblock, $newline); } sub _parse_errors { my $self = shift; my $filename = shift; my $errs = shift; my $newline = shift; my $parse_errors; my @lines = split( /$newline/, $errs ); for my $line ( @lines ) { chomp $line; my $message; if ( $line =~ /^line (\d+) column (\d+) - (Warning|Error|Info): (.+)$/ ) { ## no critic ( ControlStructures::ProhibitCascadingIfElse ) my ($line, $col, $type, $text) = ($1, $2, $3, $4); $type = ($type eq 'Warning') ? TIDY_WARNING : ($type eq 'Info') ? TIDY_INFO : TIDY_ERROR; $message = HTML::Tidy5::Message->new( $filename, $type, $line, $col, $text ); } elsif ( $line =~ m/^Info: (.+)$/ ) { # Info line we don't want my $text = $1; $message = HTML::Tidy5::Message->new( $filename, TIDY_INFO, undef, undef, $text ); } elsif ( $line =~ /^Tidy found \d+ warnings? and \d+ errors?!/ ) { # Summary line we don't want # We should take these counts from the summary and make sure they match what we parsed. } elsif ( $line eq 'No warnings or errors were found.' ) { # Summary line we don't want } elsif ( $line eq 'This document has errors that must be fixed before' ) { # Summary line we don't want } elsif ( $line eq 'using HTML Tidy to generate a tidied up version.' ) { # Summary line we don't want } elsif ( $line =~ m/^\s*$/ ) { # Blank line we don't want } else { Carp::carp "HTML::Tidy5: Unknown error type: $line"; ++$parse_errors; } push( @{$self->{messages}}, $message ) if $message && $self->_is_keeper( $message ); } # for return $parse_errors; } =head2 clean( $str [, $str...] ) Cleans a string, or list of strings, that make up a single HTML file. Returns the cleaned string as a single string. =cut sub clean { my $self = shift; if (@_ == 0) { Carp::croak('Usage: clean($str [, $str...])'); } my $text = join( '', @_ ); utf8::encode($text) if utf8::is_utf8($text); if ( defined $text ) { $text .= "\n"; } my ($cleaned, $errbuf, $newline) = _tidy_clean( $text, $self->{config_file}, $self->{tidy_options}); utf8::decode($cleaned); utf8::decode($errbuf); $self->_parse_errors('', $errbuf, $newline); return $cleaned; } # Tells whether a given message object is one that we should keep. sub _is_keeper { my $self = shift; my $message = shift; my @ignore_types = @{$self->{ignore_type}}; if ( @ignore_types ) { return 0 if grep { $message->type == $_ } @ignore_types; } my @ignore_texts = @{$self->{ignore_text}}; if ( @ignore_texts ) { return 0 if grep { $message->text =~ $_ } @ignore_texts; } return 1; } =head2 tidy_library_version() Returns the version of the underling tidy library. =cut sub tidy_library_version { my $version_str = _tidy_library_version(); return $version_str; } require XSLoader; XSLoader::load('HTML::Tidy5', $VERSION); 1; __END__ =head1 INSTALLING TIDY C requires that the C be installed on your system. You can probably obtain html-tidy through your distribution's package manager (make sure you install the development package with headers). You can also check the html-tidy home page is L. =head1 CONVERTING FROM C C is different from C in a number of crucial ways. =over 4 =item * It's not pure Perl C is mostly a happy wrapper around the html-tidy library. =item * The real work is done by someone else Changes to tidy may come down the pipe that I don't have control over. That's the price we pay for having it do a darn good job. =item * It's no longer bundled with its C counterpart L came bundled with C, but L is a separate distribution. This saves the people who don't want the C framework from pulling it in, and all its prerequisite modules. =back =head1 BUGS & FEEDBACK Please report any bugs or feature requests at the issue tracker on github L. I will be notified, and then you'll automatically be notified of progress on your bug as I make changes. Please do NOT use L. =head1 SUPPORT You can find documentation for this module with the perldoc command. perldoc HTML::Tidy5 You can also look for information at: =over 4 =item * HTML::Tidy5's issue queue at github L =item * CPAN Ratings L =item * search.cpan.org L =item * Git source code repository L =back =head1 ACKNOWLEDGEMENTS Thanks to Rufus Cable, Jonathan Rockway, and Robert Bachmann for contributions. =head1 AUTHOR Andy Lester, C<< >> =head1 COPYRIGHT & LICENSE Copyright (C) 2005-2018 by Andy Lester This library is free software. You may modify or distribute it under the Artistic License v2.0. =cut HTML-Tidy5-1.06/lib/HTML/Tidy5/000755 000766 000024 00000000000 13555120030 015601 5ustar00andystaff000000 000000 HTML-Tidy5-1.06/lib/HTML/Tidy5/Message.pm000644 000766 000024 00000004671 13555115664 017554 0ustar00andystaff000000 000000 package HTML::Tidy5::Message; use 5.010001; use warnings; use strict; =head1 NAME HTML::Tidy5::Message - Message object for the Tidy functionality =head1 EXPORTS None. It's all object-based. =head1 METHODS Almost everything is an accessor. =head2 new( $file, $line, $column, $text ) Create an object. It's not very exciting. C<$file> can be C or an empty string, in which case it will not appear in messages. =cut sub new { my $class = shift; my $file = shift; my $type = shift; my $line = shift || 0; my $column = shift || 0; my $text = shift; # Add an element that says what tag caused the error (B, TR, etc) # so that we can match 'em up down the road. my $self = { _file => $file, _type => $type, _line => $line, _column => $column, _text => $text, }; bless $self, $class; return $self; } =head2 as_string() Returns a nicely-formatted string for printing out to stdout or some similar user thing. =cut sub as_string { my $self = shift; my %strings = ( 1 => 'Info', 2 => 'Warning', 3 => 'Error', ); my $msg = $strings{$self->type} . ': ' . $self->text; if ( $self->line && $self->column ) { $msg = sprintf( '(%d:%d) %s', $self->line, $self->column, $msg ); } my $file = $self->file // ''; if ( $file ne '' ) { $msg = "$file $msg"; } return $msg; } =head2 file() Returns the filename of the error, as set by the caller. =head2 type() Returns the type of the error. This will either be C, or C. =head2 line() Returns the line number of the error, or 0 if there isn't an applicable line number. =head2 column() Returns the column number, or 0 if there isn't an applicable column number. =head2 text() Returns the text of the message. This does not include a type string, like "Info: ". =cut sub file { my $self = shift; return $self->{_file} } sub type { my $self = shift; return $self->{_type} } sub line { my $self = shift; return $self->{_line} } sub column { my $self = shift; return $self->{_column} } sub text { my $self = shift; return $self->{_text} } =head1 COPYRIGHT & LICENSE Copyright 2005-2018 Andy Lester. This program is free software; you can redistribute it and/or modify it under the terms of the Artistic License v2.0. =head1 AUTHOR Andy Lester, C<< >> =cut 1; # happy HTML-Tidy5-1.06/lib/Test/HTML/000755 000766 000024 00000000000 13555120030 015522 5ustar00andystaff000000 000000 HTML-Tidy5-1.06/lib/Test/HTML/Tidy5.pm000644 000766 000024 00000007777 13555117761 017121 0ustar00andystaff000000 000000 package Test::HTML::Tidy5; use 5.010001; use warnings; use strict; use Test::Builder; use Exporter; use HTML::Tidy5; use parent 'Exporter'; our @EXPORT_OK = qw( html_tidy_ok html_fragment_tidy_ok ); our @EXPORT = @EXPORT_OK; =head1 NAME Test::HTML::Tidy5 - Test::More-style wrapper around HTML::Tidy5 =head1 VERSION Version 1.06 =cut our $VERSION = '1.06'; my $TB = Test::Builder->new; =head1 SYNOPSIS use Test::HTML::Tidy5 tests => 4; my $table = build_display_table(); html_tidy_ok( $table, 'Built display table properly' ); =head1 DESCRIPTION This module provides a few convenience methods for testing exception based code. It is built with L and plays happily with L and friends. If you are not already familiar with L now would be the time to go take a look. =head1 EXPORT C =cut sub import { my $self = shift; my $pack = caller; $TB->exported_to($pack); $TB->plan(@_); $self->export_to_level(1, $self, @EXPORT); return; } =head2 html_tidy_ok( [$tidy, ] $html, $name ) Checks to see if C<$html> is a valid HTML document. If you pass an HTML::Tidy5 object, C will use that for its settings. my $tidy = HTML::Tidy5->new( {config_file => 'path/to/config'} ); $tidy->ignore( type => TIDY_WARNING, type => TIDY_INFO ); html_tidy_ok( $tidy, $content, "Web page is OK, ignoring warnings and info' ); Otherwise, it will use the default rules. html_tidy_ok( $content, "Web page passes ALL tests" ); =cut sub html_tidy_ok { my $tidy = (ref($_[0]) eq 'HTML::Tidy5') ? shift : HTML::Tidy5->new; my $html = shift; my $name = shift; my $ok = defined $html; if ( !$ok ) { $TB->ok( 0, $name ); $TB->diag( 'Error: html_tidy_ok() got undef' ); } else { $ok = _parse_and_complain( $tidy, $html, $name, 0 ); } return $ok; } =head2 html_fragment_tidy_ok( [$tidy, ] $html, $name ) Works the same as C, but first wraps it up an HTML document. This is useful for when want to validate self-contained snippets of HTML, such as from templates or an HTML feed from a third party, and check that it is valid. =cut sub html_fragment_tidy_ok { my $tidy = (ref($_[0]) eq 'HTML::Tidy5') ? shift : HTML::Tidy5->new; my $html = shift; my $name = shift; my $ok = defined $html; if ( !$ok ) { $TB->ok( 0, $name ); $TB->diag( 'Error: html_fragment_tidy_ok() got undef' ); } else { $html = <<"HTML"; $html HTML $ok = _parse_and_complain( $tidy, $html, $name, 6 ); } return $ok; } sub _parse_and_complain { local $Test::Builder::Level = $Test::Builder::Level + 1; my $tidy = shift; my $html = shift; my $name = shift; my $offset = shift; $tidy->clear_messages(); $tidy->parse( undef, $html ); my @messages = $tidy->messages; my $nmessages = @messages; my $ok = !$nmessages; $TB->ok( $ok, $name ); if ( !$ok ) { if ( $offset ) { $_->{_line} -= $offset for @messages; } my $msg = 'Errors:'; $msg .= " $name" if $name; $TB->diag( $msg ); $TB->diag( $_->as_string ) for @messages; my $s = $nmessages == 1 ? '' : 's'; $TB->diag( "$nmessages message$s on the page" ); } return $ok; } =head1 BUGS All bugs and requests are now being handled through GitHub. https://github.com/petdance/html-lint/issues DO NOT send bug reports to http://rt.cpan.org/. =head1 COPYRIGHT & LICENSE Copyright 2005-2018 Andy Lester. This program is free software; you can redistribute it and/or modify it under the terms of the Artistic License v2.0. http://www.opensource.org/licenses/Artistic-2.0 Please note that these modules are not products of or supported by the employers of the various contributors to the code. =head1 AUTHOR Andy Lester, C =cut 1; HTML-Tidy5-1.06/xt/pod.t000644 000766 000024 00000000155 13555115664 014715 0ustar00andystaff000000 000000 #!perl -T use warnings; use strict; use 5.010001; use Test::More; use Test::Pod 1.14; all_pod_files_ok(); HTML-Tidy5-1.06/xt/pod-coverage.t000644 000766 000024 00000000203 13555115664 016500 0ustar00andystaff000000 000000 #!perl -T use 5.010001; use strict; use warnings; use Test::More; use Test::Pod::Coverage 1.04; all_pod_coverage_ok(); exit 0; HTML-Tidy5-1.06/t/venus.cfg000644 000766 000024 00000000761 13555115664 015402 0ustar00andystaff000000 000000 // HTML Tidy configuration file bare: yes // // This actually caused a segmentation fault in a MSHTML created doc // //clean: yes drop-proprietary-attributes: yes drop-empty-paras: yes break-before-br: yes word-2000: yes //tidy-mark: yes tidy-mark: no //add-xml-space: yes output-xml: yes enclose-text: yes enclose-block-text: yes char-encoding: utf8 force-output: yes wrap: 0 indent: yes quiet: yes //add-xml-decl: yes //gnu-emacs: yes // make sure we are using "\n", even on Win32 newline: LF HTML-Tidy5-1.06/t/ignore.t000644 000766 000024 00000010772 13555115664 015234 0ustar00andystaff000000 000000 #!perl -T use 5.010001; use strict; use warnings; use Test::More tests => 9; use HTML::Tidy5; my $html = do { local $/ = undef; }; my @expected_warnings = split /\n/, <<'HERE'; - (1:1) Warning: missing declaration - (23:1) Warning: discarding unexpected - (24:XX) Info: value for attribute "height" missing quote marks - (24:XX) Info: value for attribute "width" missing quote marks - (24:XX) Info: value for attribute "align" missing quote marks HERE chomp @expected_warnings; my @expected_errors = split /\n/, <<'HERE'; - (23:1) Error: is not recognized! HERE chomp @expected_errors; WARNINGS_ONLY: { my $tidy = HTML::Tidy5->new; isa_ok( $tidy, 'HTML::Tidy5' ); $tidy->ignore( type => TIDY_ERROR ); my $rc = $tidy->parse( '-', $html ); ok( $rc, 'Parsed OK' ); my @returned = map { $_->as_string } $tidy->messages; s/[\r\n]+\z// for @returned; munge_returned( \@returned ); is_deeply( \@returned, \@expected_warnings, 'Matching warnings' ); } ERRORS_ONLY: { my $tidy = HTML::Tidy5->new; isa_ok( $tidy, 'HTML::Tidy5' ); $tidy->ignore( type => TIDY_WARNING ); $tidy->ignore( type => TIDY_INFO ); my $rc = $tidy->parse( '-', $html ); ok( $rc, 'Parsed OK' ); my @returned = map { $_->as_string } $tidy->messages; s/[\r\n]+\z// for @returned; is_deeply( \@returned, \@expected_errors, 'Matching errors' ); } DIES_ON_ERROR: { my $tidy = HTML::Tidy5->new; isa_ok( $tidy, 'HTML::Tidy5' ); my $rc = eval { $tidy->ignore( blongo => TIDY_WARNING ) }; ok( !$rc, 'eval should fail' ); like( $@, qr/^Invalid ignore type.+blongo/, 'Throws an error' ); } sub munge_returned { # non-1 line numbers are not reliable across libtidies my $returned = shift; my $start_line = shift || '-'; for my $line ( @{$returned} ) { next if $line =~ /$start_line \(\d+:1\)/; $line =~ s/$start_line \((\d+):(\d+)\)/$start_line ($1:XX)/; } return; } __DATA__ petdance.com: Andy Lester's Programming & Writing Andy & Amy's Pet Supplies & Dance Instruction

Perl, Programming & Writing

My Technology & publishing blog at oreillynet.com
My Perl-specific and personal blog at use.perl.org
Andy Lester's resume
Andy's Perl Pages

The Lester Family

Andy: The Dad
Amy: The Mom
Quinn: The Girl
Baxter: The Dog
Our family trees

About Andy & Amy

The Page Of Mush
People we're looking for
Article about us and how we met from the Northwest Herald

Useful Stuff

Andy's Magic Start Page: Bunches of your favorite search engines, all in one place
Add-a-page Page: Bunches of website submission forms, all in one place
Cold Fusion stuff: CFX_HTTP and other custom tags

Musical Information, etc

Naked Raygun: Bring your mom and your mom's friends, too
Action Park: A compendium of information about Big Black, Rapeman, Shellac and Steve Albini

Other things that might be fun

When In Doubt, Use Parsley: Amy's journal
Wonder: Andy's old journal
So American It Hurts: Andy & Amy's trip to Graceland
Gallery Of Household Appliances
Media Wants: Stuff I yearn to consume
Buzzword Bingo
HTML-Tidy5-1.06/t/opt-00.t000644 000766 000024 00000002621 13555115664 014762 0ustar00andystaff000000 000000 #!perl -T use 5.010001; use warnings; use strict; use Test::More tests => 1; use HTML::Tidy5; my $tidy = HTML::Tidy5->new({ tidy_mark => 0, add_xml_decl => 1, output_xhtml => 1, doctype => 'strict', clean => 1, css_prefix => 'myprefix', drop_empty_paras => 0, enclose_block_text => 1, escape_cdata => 1, hide_comments => 1, replace_color => 1, repeated_attributes => 'keep-first', break_before_br => 1, vertical_space => 1, newline => 'cr', }); my $input=<<'EOD';

example

Here's some ed and
eakfest MarkUp: ...

EOD my $expected =<<'EOD';

example

Here's some ed and
eakfest MarkUp: ...

EOD my @expected = split(/\n/, $expected); my $result = $tidy->clean( $input ); my @result = split(/\r/, $result); is_deeply( \@result, \@expected, 'Cleaned stuff looks like what we expected'); HTML-Tidy5-1.06/t/illegal-options.t000644 000766 000024 00000001064 13555115664 017045 0ustar00andystaff000000 000000 #!perl -T use 5.010001; use strict; use warnings; use Test::Exception; use Test::More; use HTML::Tidy5; my @unsupported_options = qw( force-output gnu-emacs-file gnu-emacs keep-time quiet slide-style write-back ); foreach my $option ( @unsupported_options ) { throws_ok { HTML::Tidy5->new( { config_file => 't/cfg-for-parse.cfg', $option => 1, }, ); } qr/\QUnsupported option: $option\E/, "option $option is not supported"; } done_testing(); HTML-Tidy5-1.06/t/drop-empty-elements.t000644 000766 000024 00000002201 13555117644 017647 0ustar00andystaff000000 000000 #!perl -T use 5.010001; use warnings; use strict; use Test::More tests => 3; use HTML::Tidy5; use lib 't'; use TidyTestUtils; my $html = <<'HTML'; HTML subtest 'default constructor warns about empty spans' => sub { plan tests => 2; my $tidy = HTML::Tidy5->new; isa_ok( $tidy, 'HTML::Tidy5' ); $tidy->parse( 'test', $html ); messages_are( $tidy, [ 'test (7:9) Warning: trimming empty ' ], ); }; subtest 'drop_empty_elements => 1 gives message' => sub { plan tests => 2; my $tidy = HTML::Tidy5->new( { drop_empty_elements => 1 } ); isa_ok( $tidy, 'HTML::Tidy5' ); $tidy->parse( 'test', $html ); messages_are( $tidy, [ 'test (7:9) Warning: trimming empty ' ], ); }; subtest 'drop_empty_elements => 0 gives no messages' => sub { plan tests => 2; my $tidy = HTML::Tidy5->new( { drop_empty_elements => 0 } ); isa_ok( $tidy, 'HTML::Tidy5' ); $tidy->parse( 'test', $html ); messages_are( $tidy, [] ); }; exit 0; HTML-Tidy5-1.06/t/segfault-form.t000644 000766 000024 00000000753 13555115664 016522 0ustar00andystaff000000 000000 #!perl -T use 5.010001; use strict; use warnings; use Test::More tests => 3; use HTML::Tidy5; my $html = do { local $/ = undef; ; }; my $tidy = HTML::Tidy5->new; isa_ok( $tidy, 'HTML::Tidy5' ); $tidy->clean( $html ); isa_ok( $tidy, 'HTML::Tidy5' ); pass( 'Cleaned OK' ); exit 0; __DATA__
HTML-Tidy5-1.06/t/version.t000644 000766 000024 00000000346 13555115664 015432 0ustar00andystaff000000 000000 #!perl -T use 5.010001; use warnings; use strict; use Test::More tests => 1; use HTML::Tidy5; my $version_string = HTML::Tidy5->tidy_library_version; like( $version_string, qr/^5.\d+\.\d+$/, 'Valid version string' ); exit 0; HTML-Tidy5-1.06/t/simple.t000644 000766 000024 00000001166 13555115664 015237 0ustar00andystaff000000 000000 #!perl -T use 5.010001; use warnings; use strict; use Test::More tests => 4; use HTML::Tidy5; my $html = join '', ; my $tidy = HTML::Tidy5->new; isa_ok( $tidy, 'HTML::Tidy5' ); $tidy->ignore( type => TIDY_INFO ); my $rc = $tidy->parse( '-', $html ); ok( $rc, 'Parsed OK' ); my @messages = $tidy->messages; is( scalar @messages, 6, 'Right number of initial messages' ); $tidy->clear_messages; is_deeply( [$tidy->messages], [], 'Cleared the messages' ); __DATA__ blah blah Barf

more blah

HTML-Tidy5-1.06/t/wordwrap.cfg000644 000766 000024 00000000042 13555115664 016077 0ustar00andystaff000000 000000 tidy-mark: 0 newline: LF wrap: 12 HTML-Tidy5-1.06/t/parse-errors.t000644 000766 000024 00000002241 13555115664 016365 0ustar00andystaff000000 000000 #!/usr/bin/perl -T use 5.010001; use strict; use warnings; use Test::Exception; use Test::More tests => 4; use HTML::Tidy5; my $tidy = HTML::Tidy5->new; my $errbuf = join( '', ); CATCH_A_WARNING: { my $stashed_warning; my $ncalls = 0; local $SIG{__WARN__} = sub { $stashed_warning = shift; ++$ncalls; }; my $ret = $tidy->_parse_errors('fake_filename.html', $errbuf, "\n"); is( $ret, 1, 'encountered 1 parsing error' ); is( scalar @{$tidy->{messages}}, 7, 'got 7 messages when parsing errors' ); # Check our warnings. is( $ncalls, 1, 'Warning should have been called exactly once' ); like( $stashed_warning, qr/HTML::Tidy5: Unknown error type: FAKE_ERROR_TYPE at/, 'Expected warning' ); } __DATA__ line 1 column 1 - Warning: missing declaration line 1 column 1 - Warning: plain text isn\'t allowed in elements line 1 column 1 - Info: previously mentioned line 1 column 1 - Warning: inserting implicit line 1 column 13 - Warning: missing
line 1 column 1 - Warning: inserting missing \'title\' element Info: Document content looks like HTML 3.2 FAKE_ERROR_TYPE Tidy found 5 warnings and 0 errors! HTML-Tidy5-1.06/t/clean.t000644 000766 000024 00000001374 13555115664 015031 0ustar00andystaff000000 000000 #!/usr/bin/perl -T use 5.010001; use strict; use warnings; use Test::Exception; use Test::More tests => 3; use HTML::Tidy5; use lib 't'; use TidyTestUtils; my $tidy = HTML::Tidy5->new( { wrap => 0 } ); isa_ok( $tidy, 'HTML::Tidy5' ); my $expected_pattern = 'Usage: clean($str [, $str...])'; throws_ok { $tidy->clean(); } qr/\Q$expected_pattern\E/, 'clean() croaks when not given a string or list of strings'; my $actual = $tidy->clean(''); $actual = remove_specificity( $actual ); my $expected_empty_html = <<'HERE'; HERE is( $actual, $expected_empty_html, '$tidy->clean("") returns empty HTML document' ); done_testing(); exit 0; HTML-Tidy5-1.06/t/html_fragment_tidy_ok.t000644 000766 000024 00000005070 13555115664 020315 0ustar00andystaff000000 000000 #!perl use 5.010001; use warnings; use strict; use Test::More tests => 3; use Test::Builder::Tester; use Test::HTML::Tidy5; subtest 'html_fragment_tidy_ok fails on undef' => sub { plan tests => 1; my $msg = 'Fails on undef'; test_out( "not ok 1 - $msg" ); test_fail( +2 ); test_diag( 'Error: html_fragment_tidy_ok() got undef' ); html_fragment_tidy_ok( undef, $msg ); test_test( $msg ); }; subtest 'html_tidy_ok fails on a fragment, but html_fragment_tidy_ok is OK' => sub { plan tests => 2; my $html = <<'HTML';

This is an incomplete document but it's structurally OK. alpha

HTML my $msg = 'Called html_tidy_ok on incomplete document'; test_out( "not ok 1 - $msg" ); test_fail( +6 ); test_diag( "Errors: $msg" ); test_diag( '(1:1) Warning: missing declaration' ); test_diag( '(1:1) Warning: inserting implicit ' ); test_diag( '(1:1) Warning: inserting missing \'title\' element' ); test_diag( '3 messages on the page' ); html_tidy_ok( $html, $msg ); test_test( $msg ); $msg = 'html_fragment_tidy_ok can handle it'; test_out( "ok 1 - $msg" ); html_fragment_tidy_ok( $html, $msg ); test_test( $msg ); }; subtest 'html_fragment_tidy_ok gets the same errors as html_tidy_ok' => sub { plan tests => 2; my $html = <<'HTML';

This is an incomplete document, and it has structural errors.

HTML my $msg = 'html_tidy_ok on sloppy doc'; test_out( "not ok 1 - $msg" ); test_fail( +8 ); test_diag( "Errors: $msg" ); test_diag( '(1:1) Warning: missing declaration' ); test_diag( '(1:1) Warning: inserting implicit ' ); test_diag( '(2:59) Warning: discarding unexpected ' ); test_diag( '(1:1) Warning: inserting missing \'title\' element' ); test_diag( '(3:5) Warning: lacks "alt" attribute' ); test_diag( '5 messages on the page' ); html_tidy_ok( $html, $msg ); test_test( $msg ); # Note that the line numbers are the same between html_tidy_ok and html_fragment_tidy_ok. $msg = 'html_fragment_tidy_ok on sloppy doc'; test_out( "not ok 1 - $msg" ); test_fail( +5 ); test_diag( "Errors: $msg" ); test_diag( '(2:59) Warning: discarding unexpected ' ); test_diag( '(3:5) Warning: lacks "alt" attribute' ); test_diag( '2 messages on the page' ); html_fragment_tidy_ok( $html, $msg ); test_test( $msg ); }; exit 0; HTML-Tidy5-1.06/t/clean-crash.t000644 000766 000024 00000002334 13555115664 016124 0ustar00andystaff000000 000000 #!/usr/bin/perl -T use 5.010001; use warnings; use strict; # From a bug found by Aaron Patterson #Full context and any attached attachments can be found at: # #Here's a snippet of code to repro the bug, it produces an 'Illegal instruction' error use Test::More tests => 2; use HTML::Tidy5; my $html = do { local $/ = undef; }; my $tidy = HTML::Tidy5->new; isa_ok( $tidy, 'HTML::Tidy5' ); $tidy->ignore( type => TIDY_INFO ); $tidy->clean( $html ); my @expected = split( /\n/, <<'HERE' ); (1:1) Warning: missing declaration (1:1) Warning: inserting implicit (1:1) Warning: missing before (2:1) Warning: inserting implicit (2:1) Warning: missing (3:1) Error: discarding unexpected (2:1) Warning: missing
(1:1) Warning: missing (1:1) Warning: inserting missing 'title' element HERE my @mess = map { $_ ? $_->as_string() : undef } $tidy->messages(); is_deeply( \@mess, \@expected, 'Messages match' ); exit 0; __DATA__
HTML-Tidy5-1.06/t/new-tags.t000644 000766 000024 00000004364 13555117644 015476 0ustar00andystaff000000 000000 #!perl -T use 5.010001; use warnings; use strict; use Test::More tests => 4; use HTML::Tidy5; use lib 't'; use TidyTestUtils; my $html = <<'HTML';

This is a nav section

This is an otherblock.

HTML my @all_errors = ( 'test (7:9) Error: is not recognized!', 'test (7:9) Warning: discarding unexpected ', 'test (11:9) Warning: discarding unexpected ', 'test (12:9) Error: is not recognized!', 'test (12:9) Warning: discarding unexpected ', 'test (14:28) Error: is not recognized!', 'test (14:28) Warning: discarding unexpected ', 'test (14:49) Warning: discarding unexpected ', 'test (16:9) Warning: discarding unexpected ', ); subtest 'default constructor warns about