HTML-Tidy5-1.06/ 000755 000766 000024 00000000000 13555120030 013271 5 ustar 00andy staff 000000 000000 HTML-Tidy5-1.06/perlcriticrc 000644 000766 000024 00000003232 13555115664 015722 0 ustar 00andy staff 000000 000000 verbose = %f:%l:%c %m (%p)\n %r\n\n
[-BuiltinFunctions::ProhibitBooleanGrep]
[-CodeLayout::ProhibitParensWithBuiltins]
[CodeLayout::ProhibitHardTabs]
allow_leading_tabs = 0
[-CodeLayout::RequireASCII]
# t/unicode.t uses UTF8.
[-CodeLayout::RequireTidyCode]
# Never works for me.
[Compatibility::PodMinimumVersion]
above_version = 5.008008
[-ControlStructures::ProhibitPostfixControls]
[-Documentation::PodSpelling]
[-Documentation::RequirePodAtEnd]
[-Documentation::RequirePodSections]
[-Editor::RequireEmacsFileVariables]
# We're a utility. We're OK with die.
[-ErrorHandling::RequireCarping]
[-ErrorHandling::RequireUseOfExceptions]
[-InputOutput::ProhibitJoinedReadline]
[-InputOutput::RequireCheckedSyscalls]
functions = open opendir read readline readdir close closedir
[-Modules::ProhibitAutomaticExportation]
[-Modules::RequireExplicitInclusion]
[-Modules::RequireVersionVar]
[-RegularExpressions::ProhibitComplexRegexes]
[-RegularExpressions::ProhibitEscapedMetacharacters]
[-RegularExpressions::RequireDotMatchAnything]
[-RegularExpressions::RequireExtendedFormatting]
[-RegularExpressions::RequireLineBoundaryMatching]
[-Subroutines::ProhibitCallsToUndeclaredSubs]
[-Subroutines::ProhibitCallsToUnexportedSubs]
# Too many subs coming in from Test::More
[-Subroutines::RequireArgUnpacking]
[-ValuesAndExpressions::ProhibitConstantPragma]
[-ValuesAndExpressions::ProhibitEmptyQuotes]
[-ValuesAndExpressions::ProhibitMagicNumbers]
[-ValuesAndExpressions::ProhibitNoisyQuotes]
[-ValuesAndExpressions::RequireConstantOnLeftSideOfEquality]
[-ValuesAndExpressions::RequireInterpolationOfMetachars]
[-Variables::ProhibitPackageVars]
[-Variables::ProhibitPunctuationVars]
HTML-Tidy5-1.06/bin/ 000755 000766 000024 00000000000 13555120030 014041 5 ustar 00andy staff 000000 000000 HTML-Tidy5-1.06/Changes 000644 000766 000024 00000002474 13555120023 014575 0 ustar 00andy staff 000000 000000 Revision history for Perl extension HTML::Tidy5.
The HTML::Tidy5 module is an HTML5-compatible upgrade from the HTML::Tidy
module.
1.06 Sat Oct 26 14:35:44 CDT 2019
====================================
Fixed a potentially failing test. Thanks, Pino Toscano.
Fixed a failing test under macOS. (GH #13)
1.04 Fri Apr 20 16:45:00 CDT 2018
====================================
[ENHANCEMENTS]
Added html_fragment_tidy_ok() for running tidy checks against HTML that
is not a full document.
1.02 Wed Apr 4 15:06:44 CDT 2018
====================================
[FIXES]
html_tidy_ok() now clears the messages in a passed-in HTML::Tidy5 object
before using it to validate the HTML.
1.01_01 Mon Mar 26 16:37:13 CDT 2018
====================================
[ENHANCEMENTS]
Add Test::HTML::Tidy5 and the html_tidy_ok() function for testing HTML
in your .t files.
[FIXES]
Fixed test failure.
1.00 Tue Mar 20 16:51:52 CDT 2018
====================================
First production release. HTML::Tidy5 relies on version 5.6.0 of the
tidy-html5 library. See http://html-tidy.org/ for information about
the tidy-html5 library.
[INCOMPATIBILITIES WITH HTML::Tidy]
In HTML::Tidy, HTML::Tidy::Message objects had overloaded string context.
In HTML::Tidy5, you must explicitly call the ->as_string method for
HTML::Tidy5::Message.
HTML-Tidy5-1.06/MANIFEST 000644 000766 000024 00000001527 13555120030 014427 0 ustar 00andy staff 000000 000000 bin/webtidy5
Changes
lib/HTML/Tidy5.pm
lib/HTML/Tidy5/Message.pm
lib/Test/HTML/Tidy5.pm
Makefile.PL
MANIFEST
perlcriticrc
ppport.h
README.md
Tidy5.xs
t/00-load.t
t/cfg-for-parse.cfg
t/cfg-for-parse.t
t/clean.t
t/clean-crash.t
t/drop-empty-elements.t
t/extra-quote.t
t/html_fragment_tidy_ok.t
t/html_tidy_ok.t
t/ignore-text.t
t/ignore.t
t/illegal-options.t
t/levels.t
t/message.t
t/new-tags.t
t/opt-00.t
t/parse.t
t/parse-errors.t
t/perfect.t
t/roundtrip.t
t/segfault-form.t
t/show-info.t
t/simple.t
t/too-many-titles.t
t/unicode.html
t/unicode.t
t/unicode-nbsp.t
t/venus.cfg
t/venus.html
t/venus.t
t/version.t
t/wordwrap.cfg
t/wordwrap.t
t/TidyTestUtils.pm
xt/pod.t
xt/pod-coverage.t
META.yml Module YAML meta-data (added by MakeMaker)
META.json Module JSON meta-data (added by MakeMaker)
HTML-Tidy5-1.06/ppport.h 000644 000766 000024 00000063506 13555115664 015021 0 ustar 00andy staff 000000 000000
/* ppport.h -- Perl/Pollution/Portability Version 2.007
*
* Automatically Created by Devel::PPPort on Mon Feb 16 21:21:31 2004
*
* Do NOT edit this file directly! -- Edit PPPort.pm instead.
*
* Version 2.x, Copyright (C) 2001, Paul Marquess.
* Version 1.x, Copyright (C) 1999, Kenneth Albanowski.
* This code may be used and distributed under the same license as any
* version of Perl.
*
* This version of ppport.h is designed to support operation with Perl
* installations back to 5.004, and has been tested up to 5.8.1.
*
* If this version of ppport.h is failing during the compilation of this
* module, please check if a newer version of Devel::PPPort is available
* on CPAN before sending a bug report.
*
* If you are using the latest version of Devel::PPPort and it is failing
* during compilation of this module, please send a report to perlbug@perl.com
*
* Include all following information:
*
* 1. The complete output from running "perl -V"
*
* 2. This file.
*
* 3. The name & version of the module you were trying to build.
*
* 4. A full log of the build that failed.
*
* 5. Any other information that you think could be relevant.
*
*
* For the latest version of this code, please retreive the Devel::PPPort
* module from CPAN.
*
*/
/*
* In order for a Perl extension module to be as portable as possible
* across differing versions of Perl itself, certain steps need to be taken.
* Including this header is the first major one, then using dTHR is all the
* appropriate places and using a PL_ prefix to refer to global Perl
* variables is the second.
*
*/
/* If you use one of a few functions that were not present in earlier
* versions of Perl, please add a define before the inclusion of ppport.h
* for a static include, or use the GLOBAL request in a single module to
* produce a global definition that can be referenced from the other
* modules.
*
* Function: Static define: Extern define:
* newCONSTSUB() NEED_newCONSTSUB NEED_newCONSTSUB_GLOBAL
*
*/
/* To verify whether ppport.h is needed for your module, and whether any
* special defines should be used, ppport.h can be run through Perl to check
* your source code. Simply say:
*
* perl -x ppport.h *.c *.h *.xs foo/bar*.c [etc]
*
* The result will be a list of patches suggesting changes that should at
* least be acceptable, if not necessarily the most efficient solution, or a
* fix for all possible problems. It won't catch where dTHR is needed, and
* doesn't attempt to account for global macro or function definitions,
* nested includes, typemaps, etc.
*
* In order to test for the need of dTHR, please try your module under a
* recent version of Perl that has threading compiled-in.
*
*/
/*
#!/usr/bin/perl
@ARGV = ("*.xs") if !@ARGV;
%badmacros = %funcs = %macros = (); $replace = 0;
foreach () {
$funcs{$1} = 1 if /Provide:\s+(\S+)/;
$macros{$1} = 1 if /^#\s*define\s+([a-zA-Z0-9_]+)/;
$replace = $1 if /Replace:\s+(\d+)/;
$badmacros{$2}=$1 if $replace and /^#\s*define\s+([a-zA-Z0-9_]+).*?\s+([a-zA-Z0-9_]+)/;
$badmacros{$1}=$2 if /Replace (\S+) with (\S+)/;
}
foreach $filename (map(glob($_),@ARGV)) {
unless (open(IN, "<$filename")) {
warn "Unable to read from $file: $!\n";
next;
}
print "Scanning $filename...\n";
$c = ""; while () { $c .= $_; } close(IN);
$need_include = 0; %add_func = (); $changes = 0;
$has_include = ($c =~ /#.*include.*ppport/m);
foreach $func (keys %funcs) {
if ($c =~ /#.*define.*\bNEED_$func(_GLOBAL)?\b/m) {
if ($c !~ /\b$func\b/m) {
print "If $func isn't needed, you don't need to request it.\n" if
$changes += ($c =~ s/^.*#.*define.*\bNEED_$func\b.*\n//m);
} else {
print "Uses $func\n";
$need_include = 1;
}
} else {
if ($c =~ /\b$func\b/m) {
$add_func{$func} =1 ;
print "Uses $func\n";
$need_include = 1;
}
}
}
if (not $need_include) {
foreach $macro (keys %macros) {
if ($c =~ /\b$macro\b/m) {
print "Uses $macro\n";
$need_include = 1;
}
}
}
foreach $badmacro (keys %badmacros) {
if ($c =~ /\b$badmacro\b/m) {
$changes += ($c =~ s/\b$badmacro\b/$badmacros{$badmacro}/gm);
print "Uses $badmacros{$badmacro} (instead of $badmacro)\n";
$need_include = 1;
}
}
if (scalar(keys %add_func) or $need_include != $has_include) {
if (!$has_include) {
$inc = join('',map("#define NEED_$_\n", sort keys %add_func)).
"#include \"ppport.h\"\n";
$c = "$inc$c" unless $c =~ s/#.*include.*XSUB.*\n/$&$inc/m;
} elsif (keys %add_func) {
$inc = join('',map("#define NEED_$_\n", sort keys %add_func));
$c = "$inc$c" unless $c =~ s/^.*#.*include.*ppport.*$/$inc$&/m;
}
if (!$need_include) {
print "Doesn't seem to need ppport.h.\n";
$c =~ s/^.*#.*include.*ppport.*\n//m;
}
$changes++;
}
if ($changes) {
open(OUT,">/tmp/ppport.h.$$");
print OUT $c;
close(OUT);
open(DIFF, "diff -u $filename /tmp/ppport.h.$$|");
while () { s!/tmp/ppport\.h\.$$!$filename.patched!; print STDOUT; }
close(DIFF);
unlink("/tmp/ppport.h.$$");
} else {
print "Looks OK\n";
}
}
__DATA__
*/
#ifndef _P_P_PORTABILITY_H_
#define _P_P_PORTABILITY_H_
#ifndef PERL_REVISION
# ifndef __PATCHLEVEL_H_INCLUDED__
# include
# endif
# if !(defined(PERL_VERSION) || (SUBVERSION > 0 && defined(PATCHLEVEL)))
# include
# endif
# ifndef PERL_REVISION
# define PERL_REVISION (5)
/* Replace: 1 */
# define PERL_VERSION PATCHLEVEL
# define PERL_SUBVERSION SUBVERSION
/* Replace PERL_PATCHLEVEL with PERL_VERSION */
/* Replace: 0 */
# endif
#endif
#define PERL_BCDVERSION ((PERL_REVISION * 0x1000000L) + (PERL_VERSION * 0x1000L) + PERL_SUBVERSION)
/* It is very unlikely that anyone will try to use this with Perl 6
(or greater), but who knows.
*/
#if PERL_REVISION != 5
# error ppport.h only works with Perl version 5
#endif /* PERL_REVISION != 5 */
#ifndef ERRSV
# define ERRSV perl_get_sv("@",FALSE)
#endif
#if (PERL_VERSION < 4) || ((PERL_VERSION == 4) && (PERL_SUBVERSION <= 5))
/* Replace: 1 */
# define PL_Sv Sv
# define PL_compiling compiling
# define PL_copline copline
# define PL_curcop curcop
# define PL_curstash curstash
# define PL_defgv defgv
# define PL_dirty dirty
# define PL_dowarn dowarn
# define PL_hints hints
# define PL_na na
# define PL_perldb perldb
# define PL_rsfp_filters rsfp_filters
# define PL_rsfpv rsfp
# define PL_stdingv stdingv
# define PL_sv_no sv_no
# define PL_sv_undef sv_undef
# define PL_sv_yes sv_yes
/* Replace: 0 */
#endif
#ifdef HASATTRIBUTE
# if (defined(__GNUC__) && defined(__cplusplus)) || defined(__INTEL_COMPILER)
# define PERL_UNUSED_DECL
# else
# define PERL_UNUSED_DECL __attribute__((unused))
# endif
#else
# define PERL_UNUSED_DECL
#endif
#ifndef dNOOP
# define NOOP (void)0
# define dNOOP extern int Perl___notused PERL_UNUSED_DECL
#endif
#ifndef dTHR
# define dTHR dNOOP
#endif
#ifndef dTHX
# define dTHX dNOOP
# define dTHXa(x) dNOOP
# define dTHXoa(x) dNOOP
#endif
#ifndef pTHX
# define pTHX void
# define pTHX_
# define aTHX
# define aTHX_
#endif
/* IV could also be a quad (say, a long long), but Perls
* capable of those should have IVSIZE already. */
#if !defined(IVSIZE) && defined(LONGSIZE)
# define IVSIZE LONGSIZE
#endif
#ifndef IVSIZE
# define IVSIZE 4 /* A bold guess, but the best we can make. */
#endif
#ifndef UVSIZE
# define UVSIZE IVSIZE
#endif
#ifndef NVTYPE
# if defined(USE_LONG_DOUBLE) && defined(HAS_LONG_DOUBLE)
# define NVTYPE long double
# else
# define NVTYPE double
# endif
typedef NVTYPE NV;
#endif
#ifndef INT2PTR
#if (IVSIZE == PTRSIZE) && (UVSIZE == PTRSIZE)
# define PTRV UV
# define INT2PTR(any,d) (any)(d)
#else
# if PTRSIZE == LONGSIZE
# define PTRV unsigned long
# else
# define PTRV unsigned
# endif
# define INT2PTR(any,d) (any)(PTRV)(d)
#endif
#define NUM2PTR(any,d) (any)(PTRV)(d)
#define PTR2IV(p) INT2PTR(IV,p)
#define PTR2UV(p) INT2PTR(UV,p)
#define PTR2NV(p) NUM2PTR(NV,p)
#if PTRSIZE == LONGSIZE
# define PTR2ul(p) (unsigned long)(p)
#else
# define PTR2ul(p) INT2PTR(unsigned long,p)
#endif
#endif /* !INT2PTR */
#ifndef boolSV
# define boolSV(b) ((b) ? &PL_sv_yes : &PL_sv_no)
#endif
#ifndef gv_stashpvn
# define gv_stashpvn(str,len,flags) gv_stashpv(str,flags)
#endif
#ifndef newSVpvn
# define newSVpvn(data,len) ((len) ? newSVpv ((data), (len)) : newSVpv ("", 0))
#endif
#ifndef newRV_inc
/* Replace: 1 */
# define newRV_inc(sv) newRV(sv)
/* Replace: 0 */
#endif
/* DEFSV appears first in 5.004_56 */
#ifndef DEFSV
# define DEFSV GvSV(PL_defgv)
#endif
#ifndef SAVE_DEFSV
# define SAVE_DEFSV SAVESPTR(GvSV(PL_defgv))
#endif
#ifndef newRV_noinc
# ifdef __GNUC__
# define newRV_noinc(sv) \
({ \
SV *nsv = (SV*)newRV(sv); \
SvREFCNT_dec(sv); \
nsv; \
})
# else
# if defined(USE_THREADS)
static SV * newRV_noinc (SV * sv)
{
SV *nsv = (SV*)newRV(sv);
SvREFCNT_dec(sv);
return nsv;
}
# else
# define newRV_noinc(sv) \
(PL_Sv=(SV*)newRV(sv), SvREFCNT_dec(sv), (SV*)PL_Sv)
# endif
# endif
#endif
/* Provide: newCONSTSUB */
/* newCONSTSUB from IO.xs is in the core starting with 5.004_63 */
#if (PERL_VERSION < 4) || ((PERL_VERSION == 4) && (PERL_SUBVERSION < 63))
#if defined(NEED_newCONSTSUB)
static
#else
extern void newCONSTSUB(HV * stash, char * name, SV *sv);
#endif
#if defined(NEED_newCONSTSUB) || defined(NEED_newCONSTSUB_GLOBAL)
void
newCONSTSUB(stash,name,sv)
HV *stash;
char *name;
SV *sv;
{
U32 oldhints = PL_hints;
HV *old_cop_stash = PL_curcop->cop_stash;
HV *old_curstash = PL_curstash;
line_t oldline = PL_curcop->cop_line;
PL_curcop->cop_line = PL_copline;
PL_hints &= ~HINT_BLOCK_SCOPE;
if (stash)
PL_curstash = PL_curcop->cop_stash = stash;
newSUB(
#if (PERL_VERSION < 3) || ((PERL_VERSION == 3) && (PERL_SUBVERSION < 22))
/* before 5.003_22 */
start_subparse(),
#else
# if (PERL_VERSION == 3) && (PERL_SUBVERSION == 22)
/* 5.003_22 */
start_subparse(0),
# else
/* 5.003_23 onwards */
start_subparse(FALSE, 0),
# endif
#endif
newSVOP(OP_CONST, 0, newSVpv(name,0)),
newSVOP(OP_CONST, 0, &PL_sv_no), /* SvPV(&PL_sv_no) == "" -- GMB */
newSTATEOP(0, Nullch, newSVOP(OP_CONST, 0, sv))
);
PL_hints = oldhints;
PL_curcop->cop_stash = old_cop_stash;
PL_curstash = old_curstash;
PL_curcop->cop_line = oldline;
}
#endif
#endif /* newCONSTSUB */
#ifndef START_MY_CXT
/*
* Boilerplate macros for initializing and accessing interpreter-local
* data from C. All statics in extensions should be reworked to use
* this, if you want to make the extension thread-safe. See ext/re/re.xs
* for an example of the use of these macros.
*
* Code that uses these macros is responsible for the following:
* 1. #define MY_CXT_KEY to a unique string, e.g. "DynaLoader_guts"
* 2. Declare a typedef named my_cxt_t that is a structure that contains
* all the data that needs to be interpreter-local.
* 3. Use the START_MY_CXT macro after the declaration of my_cxt_t.
* 4. Use the MY_CXT_INIT macro such that it is called exactly once
* (typically put in the BOOT: section).
* 5. Use the members of the my_cxt_t structure everywhere as
* MY_CXT.member.
* 6. Use the dMY_CXT macro (a declaration) in all the functions that
* access MY_CXT.
*/
#if defined(MULTIPLICITY) || defined(PERL_OBJECT) || \
defined(PERL_CAPI) || defined(PERL_IMPLICIT_CONTEXT)
/* This must appear in all extensions that define a my_cxt_t structure,
* right after the definition (i.e. at file scope). The non-threads
* case below uses it to declare the data as static. */
#define START_MY_CXT
#if (PERL_VERSION < 4 || (PERL_VERSION == 4 && PERL_SUBVERSION < 68 ))
/* Fetches the SV that keeps the per-interpreter data. */
#define dMY_CXT_SV \
SV *my_cxt_sv = perl_get_sv(MY_CXT_KEY, FALSE)
#else /* >= perl5.004_68 */
#define dMY_CXT_SV \
SV *my_cxt_sv = *hv_fetch(PL_modglobal, MY_CXT_KEY, \
sizeof(MY_CXT_KEY)-1, TRUE)
#endif /* < perl5.004_68 */
/* This declaration should be used within all functions that use the
* interpreter-local data. */
#define dMY_CXT \
dMY_CXT_SV; \
my_cxt_t *my_cxtp = INT2PTR(my_cxt_t*,SvUV(my_cxt_sv))
/* Creates and zeroes the per-interpreter data.
* (We allocate my_cxtp in a Perl SV so that it will be released when
* the interpreter goes away.) */
#define MY_CXT_INIT \
dMY_CXT_SV; \
/* newSV() allocates one more than needed */ \
my_cxt_t *my_cxtp = (my_cxt_t*)SvPVX(newSV(sizeof(my_cxt_t)-1));\
Zero(my_cxtp, 1, my_cxt_t); \
sv_setuv(my_cxt_sv, PTR2UV(my_cxtp))
/* This macro must be used to access members of the my_cxt_t structure.
* e.g. MYCXT.some_data */
#define MY_CXT (*my_cxtp)
/* Judicious use of these macros can reduce the number of times dMY_CXT
* is used. Use is similar to pTHX, aTHX etc. */
#define pMY_CXT my_cxt_t *my_cxtp
#define pMY_CXT_ pMY_CXT,
#define _pMY_CXT ,pMY_CXT
#define aMY_CXT my_cxtp
#define aMY_CXT_ aMY_CXT,
#define _aMY_CXT ,aMY_CXT
#else /* single interpreter */
#define START_MY_CXT static my_cxt_t my_cxt;
#define dMY_CXT_SV dNOOP
#define dMY_CXT dNOOP
#define MY_CXT_INIT NOOP
#define MY_CXT my_cxt
#define pMY_CXT void
#define pMY_CXT_
#define _pMY_CXT
#define aMY_CXT
#define aMY_CXT_
#define _aMY_CXT
#endif
#endif /* START_MY_CXT */
#ifndef IVdf
# if IVSIZE == LONGSIZE
# define IVdf "ld"
# define UVuf "lu"
# define UVof "lo"
# define UVxf "lx"
# define UVXf "lX"
# else
# if IVSIZE == INTSIZE
# define IVdf "d"
# define UVuf "u"
# define UVof "o"
# define UVxf "x"
# define UVXf "X"
# endif
# endif
#endif
#ifndef NVef
# if defined(USE_LONG_DOUBLE) && defined(HAS_LONG_DOUBLE) && \
defined(PERL_PRIfldbl) /* Not very likely, but let's try anyway. */
# define NVef PERL_PRIeldbl
# define NVff PERL_PRIfldbl
# define NVgf PERL_PRIgldbl
# else
# define NVef "e"
# define NVff "f"
# define NVgf "g"
# endif
#endif
#ifndef AvFILLp /* Older perls (<=5.003) lack AvFILLp */
# define AvFILLp AvFILL
#endif
#ifdef SvPVbyte
# if PERL_REVISION == 5 && PERL_VERSION < 7
/* SvPVbyte does not work in perl-5.6.1, borrowed version for 5.7.3 */
# undef SvPVbyte
# define SvPVbyte(sv, lp) \
((SvFLAGS(sv) & (SVf_POK|SVf_UTF8)) == (SVf_POK) \
? ((lp = SvCUR(sv)), SvPVX(sv)) : my_sv_2pvbyte(aTHX_ sv, &lp))
static char *
my_sv_2pvbyte(pTHX_ register SV *sv, STRLEN *lp)
{
sv_utf8_downgrade(sv,0);
return SvPV(sv,*lp);
}
# endif
#else
# define SvPVbyte SvPV
#endif
#ifndef SvPV_nolen
# define SvPV_nolen(sv) \
((SvFLAGS(sv) & (SVf_POK)) == SVf_POK \
? SvPVX(sv) : sv_2pv_nolen(sv))
static char *
sv_2pv_nolen(pTHX_ register SV *sv)
{
STRLEN n_a;
return sv_2pv(sv, &n_a);
}
#endif
#ifndef get_cv
# define get_cv(name,create) perl_get_cv(name,create)
#endif
#ifndef get_sv
# define get_sv(name,create) perl_get_sv(name,create)
#endif
#ifndef get_av
# define get_av(name,create) perl_get_av(name,create)
#endif
#ifndef get_hv
# define get_hv(name,create) perl_get_hv(name,create)
#endif
#ifndef call_argv
# define call_argv perl_call_argv
#endif
#ifndef call_method
# define call_method perl_call_method
#endif
#ifndef call_pv
# define call_pv perl_call_pv
#endif
#ifndef call_sv
# define call_sv perl_call_sv
#endif
#ifndef PERL_SCAN_GREATER_THAN_UV_MAX
# define PERL_SCAN_GREATER_THAN_UV_MAX 0x02
#endif
#ifndef PERL_SCAN_SILENT_ILLDIGIT
# define PERL_SCAN_SILENT_ILLDIGIT 0x04
#endif
#ifndef PERL_SCAN_ALLOW_UNDERSCORES
# define PERL_SCAN_ALLOW_UNDERSCORES 0x01
#endif
#ifndef PERL_SCAN_DISALLOW_PREFIX
# define PERL_SCAN_DISALLOW_PREFIX 0x02
#endif
#if (PERL_VERSION >= 6)
#define I32_CAST
#else
#define I32_CAST (I32*)
#endif
#ifndef grok_hex
static UV _grok_hex (char *string, STRLEN *len, I32 *flags, NV *result) {
NV r = scan_hex(string, *len, I32_CAST len);
if (r > UV_MAX) {
*flags |= PERL_SCAN_GREATER_THAN_UV_MAX;
if (result) *result = r;
return UV_MAX;
}
return (UV)r;
}
# define grok_hex(string, len, flags, result) \
_grok_hex((string), (len), (flags), (result))
#endif
#ifndef grok_oct
static UV _grok_oct (char *string, STRLEN *len, I32 *flags, NV *result) {
NV r = scan_oct(string, *len, I32_CAST len);
if (r > UV_MAX) {
*flags |= PERL_SCAN_GREATER_THAN_UV_MAX;
if (result) *result = r;
return UV_MAX;
}
return (UV)r;
}
# define grok_oct(string, len, flags, result) \
_grok_oct((string), (len), (flags), (result))
#endif
#ifndef grok_bin
static UV _grok_bin (char *string, STRLEN *len, I32 *flags, NV *result) {
NV r = scan_bin(string, *len, I32_CAST len);
if (r > UV_MAX) {
*flags |= PERL_SCAN_GREATER_THAN_UV_MAX;
if (result) *result = r;
return UV_MAX;
}
return (UV)r;
}
# define grok_bin(string, len, flags, result) \
_grok_bin((string), (len), (flags), (result))
#endif
#ifndef IN_LOCALE
# define IN_LOCALE \
(PL_curcop == &PL_compiling ? IN_LOCALE_COMPILETIME : IN_LOCALE_RUNTIME)
#endif
#ifndef IN_LOCALE_RUNTIME
# define IN_LOCALE_RUNTIME (PL_curcop->op_private & HINT_LOCALE)
#endif
#ifndef IN_LOCALE_COMPILETIME
# define IN_LOCALE_COMPILETIME (PL_hints & HINT_LOCALE)
#endif
#ifndef IS_NUMBER_IN_UV
# define IS_NUMBER_IN_UV 0x01
# define IS_NUMBER_GREATER_THAN_UV_MAX 0x02
# define IS_NUMBER_NOT_INT 0x04
# define IS_NUMBER_NEG 0x08
# define IS_NUMBER_INFINITY 0x10
# define IS_NUMBER_NAN 0x20
#endif
#ifndef grok_numeric_radix
# define GROK_NUMERIC_RADIX(sp, send) grok_numeric_radix(sp, send)
#define grok_numeric_radix Perl_grok_numeric_radix
bool
Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send)
{
#ifdef USE_LOCALE_NUMERIC
#if (PERL_VERSION >= 6)
if (PL_numeric_radix_sv && IN_LOCALE) {
STRLEN len;
char* radix = SvPV(PL_numeric_radix_sv, len);
if (*sp + len <= send && memEQ(*sp, radix, len)) {
*sp += len;
return TRUE;
}
}
#else
/* pre5.6.0 perls don't have PL_numeric_radix_sv so the radix
* must manually be requested from locale.h */
#include
struct lconv *lc = localeconv();
char *radix = lc->decimal_point;
if (radix && IN_LOCALE) {
STRLEN len = strlen(radix);
if (*sp + len <= send && memEQ(*sp, radix, len)) {
*sp += len;
return TRUE;
}
}
#endif /* PERL_VERSION */
#endif /* USE_LOCALE_NUMERIC */
/* always try "." if numeric radix didn't match because
* we may have data from different locales mixed */
if (*sp < send && **sp == '.') {
++*sp;
return TRUE;
}
return FALSE;
}
#endif /* grok_numeric_radix */
#ifndef grok_number
#define grok_number Perl_grok_number
int
Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep)
{
const char *s = pv;
const char *send = pv + len;
const UV max_div_10 = UV_MAX / 10;
const char max_mod_10 = UV_MAX % 10;
int numtype = 0;
int sawinf = 0;
int sawnan = 0;
while (s < send && isSPACE(*s))
s++;
if (s == send) {
return 0;
} else if (*s == '-') {
s++;
numtype = IS_NUMBER_NEG;
}
else if (*s == '+')
s++;
if (s == send)
return 0;
/* next must be digit or the radix separator or beginning of infinity */
if (isDIGIT(*s)) {
/* UVs are at least 32 bits, so the first 9 decimal digits cannot
overflow. */
UV value = *s - '0';
/* This construction seems to be more optimiser friendly.
(without it gcc does the isDIGIT test and the *s - '0' separately)
With it gcc on arm is managing 6 instructions (6 cycles) per digit.
In theory the optimiser could deduce how far to unroll the loop
before checking for overflow. */
if (++s < send) {
int digit = *s - '0';
if (digit >= 0 && digit <= 9) {
value = value * 10 + digit;
if (++s < send) {
digit = *s - '0';
if (digit >= 0 && digit <= 9) {
value = value * 10 + digit;
if (++s < send) {
digit = *s - '0';
if (digit >= 0 && digit <= 9) {
value = value * 10 + digit;
if (++s < send) {
digit = *s - '0';
if (digit >= 0 && digit <= 9) {
value = value * 10 + digit;
if (++s < send) {
digit = *s - '0';
if (digit >= 0 && digit <= 9) {
value = value * 10 + digit;
if (++s < send) {
digit = *s - '0';
if (digit >= 0 && digit <= 9) {
value = value * 10 + digit;
if (++s < send) {
digit = *s - '0';
if (digit >= 0 && digit <= 9) {
value = value * 10 + digit;
if (++s < send) {
digit = *s - '0';
if (digit >= 0 && digit <= 9) {
value = value * 10 + digit;
if (++s < send) {
/* Now got 9 digits, so need to check
each time for overflow. */
digit = *s - '0';
while (digit >= 0 && digit <= 9
&& (value < max_div_10
|| (value == max_div_10
&& digit <= max_mod_10))) {
value = value * 10 + digit;
if (++s < send)
digit = *s - '0';
else
break;
}
if (digit >= 0 && digit <= 9
&& (s < send)) {
/* value overflowed.
skip the remaining digits, don't
worry about setting *valuep. */
do {
s++;
} while (s < send && isDIGIT(*s));
numtype |=
IS_NUMBER_GREATER_THAN_UV_MAX;
goto skip_value;
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
numtype |= IS_NUMBER_IN_UV;
if (valuep)
*valuep = value;
skip_value:
if (GROK_NUMERIC_RADIX(&s, send)) {
numtype |= IS_NUMBER_NOT_INT;
while (s < send && isDIGIT(*s)) /* optional digits after the radix */
s++;
}
}
else if (GROK_NUMERIC_RADIX(&s, send)) {
numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */
/* no digits before the radix means we need digits after it */
if (s < send && isDIGIT(*s)) {
do {
s++;
} while (s < send && isDIGIT(*s));
if (valuep) {
/* integer approximation is valid - it's 0. */
*valuep = 0;
}
}
else
return 0;
} else if (*s == 'I' || *s == 'i') {
s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
s++; if (s == send || (*s != 'F' && *s != 'f')) return 0;
s++; if (s < send && (*s == 'I' || *s == 'i')) {
s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
s++; if (s == send || (*s != 'I' && *s != 'i')) return 0;
s++; if (s == send || (*s != 'T' && *s != 't')) return 0;
s++; if (s == send || (*s != 'Y' && *s != 'y')) return 0;
s++;
}
sawinf = 1;
} else if (*s == 'N' || *s == 'n') {
/* XXX TODO: There are signaling NaNs and quiet NaNs. */
s++; if (s == send || (*s != 'A' && *s != 'a')) return 0;
s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
s++;
sawnan = 1;
} else
return 0;
if (sawinf) {
numtype &= IS_NUMBER_NEG; /* Keep track of sign */
numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT;
} else if (sawnan) {
numtype &= IS_NUMBER_NEG; /* Keep track of sign */
numtype |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
} else if (s < send) {
/* we can have an optional exponent part */
if (*s == 'e' || *s == 'E') {
/* The only flag we keep is sign. Blow away any "it's UV" */
numtype &= IS_NUMBER_NEG;
numtype |= IS_NUMBER_NOT_INT;
s++;
if (s < send && (*s == '-' || *s == '+'))
s++;
if (s < send && isDIGIT(*s)) {
do {
s++;
} while (s < send && isDIGIT(*s));
}
else
return 0;
}
}
while (s < send && isSPACE(*s))
s++;
if (s >= send)
return numtype;
if (len == 10 && memEQ(pv, "0 but true", 10)) {
if (valuep)
*valuep = 0;
return IS_NUMBER_IN_UV;
}
return 0;
}
#endif /* grok_number */
#endif /* _P_P_PORTABILITY_H_ */
/* End of File ppport.h */
HTML-Tidy5-1.06/t/ 000755 000766 000024 00000000000 13555120030 013534 5 ustar 00andy staff 000000 000000 HTML-Tidy5-1.06/xt/ 000755 000766 000024 00000000000 13555120030 013724 5 ustar 00andy staff 000000 000000 HTML-Tidy5-1.06/README.md 000644 000766 000024 00000001605 13555115664 014573 0 ustar 00andy staff 000000 000000 HTML::Tidy5
===========
HTML::Tidy5 is an HTML checker in a handy dandy object.
PREREQUISITES
=============
HTML::Tidy5 does very little work. The real work of HTML::Tidy5 is done
by the [html-tidy][1] library which is written in C. To use HTML::Tidy5,
you must install html-tidy. Your package manager probably has it, either
as "tidy" or "html-tidy" or "libtidy". If there's an option to get a
"-devel" package, get that, too, because Perl needs the header files
in it.
INSTALLATION
============
Once you have libtidy installed via one of the previous methods,
install HTML::Tidy5 like any standard Perl module.
perl Makefile.PL
make
make test
make install
COPYRIGHT AND LICENSE
=====================
Copyright (C) 2004-2018 by Andy Lester
This library is free software. It may be redistributed and modified
under the Artistic License v2.0.
[1]: http://html-tidy.org/
HTML-Tidy5-1.06/META.yml 000644 000766 000024 00000001666 13555120030 014553 0 ustar 00andy staff 000000 000000 ---
abstract: 'HTML validation in a Perl object'
author:
- 'Andy Lester '
build_requires:
ExtUtils::MakeMaker: '0'
configure_requires:
ExtUtils::MakeMaker: '0'
dynamic_config: 1
generated_by: 'ExtUtils::MakeMaker version 7.34, CPAN::Meta::Converter version 2.150010'
license: artistic_2
meta-spec:
url: http://module-build.sourceforge.net/META-spec-v1.4.html
version: '1.4'
name: HTML-Tidy5
no_index:
directory:
- t
- inc
requires:
Carp: '0'
Encode: '0'
Exporter: '0'
Getopt::Long: '0'
Test::Builder: '0'
Test::Exception: '0'
Test::More: '0.98'
constant: '0'
perl: '5.010001'
resources:
bugtracker: http://github.com/petdance/html-tidy5/issues
homepage: http://github.com/petdance/html-tidy5
license: http://www.opensource.org/licenses/artistic-license-2.0.php
repository: http://github.com/petdance/html-tidy5
version: '1.06'
x_serialization_backend: 'CPAN::Meta::YAML version 0.018'
HTML-Tidy5-1.06/lib/ 000755 000766 000024 00000000000 13555120030 014037 5 ustar 00andy staff 000000 000000 HTML-Tidy5-1.06/Makefile.PL 000644 000766 000024 00000004125 13555115664 015266 0 ustar 00andy staff 000000 000000 #!/usr/bin/perl
package main;
use 5.010001;
use strict;
use warnings;
use ExtUtils::MakeMaker 6.48;
use ExtUtils::Liblist;
use Config;
my $libs = $^O eq 'freebsd' ? '-ltidy5' : '-ltidy';
my $inc = "-I. -I/usr/include/tidy -I/usr/local/include/tidy -I$Config{usrinc}/tidy";
if ( not eval { require LWP::Simple; 1; } ) {
print <<'EOF';
NOTE: It seems that you don't have LWP::Simple installed.
The webtidy program will not be able to retrieve web pages.
EOF
}
my $parms = {
NAME => 'HTML::Tidy5',
AUTHOR => 'Andy Lester ',
VERSION_FROM => 'lib/HTML/Tidy5.pm',
ABSTRACT_FROM => 'lib/HTML/Tidy5.pm',
PREREQ_PM => {
'Encode' => 0, # for tests
'Exporter' => 0,
'Getopt::Long' => 0, # in webtidy
'Test::More' => '0.98', # For subtest()
'Test::Builder' => 0,
'Test::Exception' => 0,
'Carp' => 0,
'constant' => 0,
},
MIN_PERL_VERSION => 5.010001,
LICENSE => 'artistic_2',
META_MERGE => {
resources => {
license => 'http://www.opensource.org/licenses/artistic-license-2.0.php',
homepage => 'http://github.com/petdance/html-tidy5',
bugtracker => 'http://github.com/petdance/html-tidy5/issues',
repository => 'http://github.com/petdance/html-tidy5',
},
},
LIBS => [$libs],
NEEDS_LINKING => 1,
INC => $inc,
EXE_FILES => [qw(bin/webtidy5)],
dist => { COMPRESS => 'gzip -9f', SUFFIX => 'gz', },
clean => { FILES => 'HTML-Tidy5-*' },
};
WriteMakefile( %{$parms} );
sub MY::postamble { ## no critic ( Subroutines::ProhibitQualifiedSubDeclarations )
return <<'MAKE_FRAG';
.PHONY: tags critic
tags:
ctags -f tags --recurse --totals \
--exclude=blib --exclude=t/lib \
--exclude=.svn --exclude='*~' \
--languages=C,Perl --langmap=Perl:+.t \
.
critic:
perlcritic -1 \
-profile perlcriticrc \
.
MAKE_FRAG
}
HTML-Tidy5-1.06/Tidy5.xs 000644 000766 000024 00000012273 13555115664 014671 0 ustar 00andy staff 000000 000000 #include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
#include
#include
#include
#include
static void
_load_config_hash(TidyDoc tdoc, HV *tidy_options)
{
HE *entry;
(void) hv_iterinit(tidy_options);
while ( (entry = hv_iternext(tidy_options)) != NULL ) {
I32 key_len;
const char * const key = hv_iterkey(entry,&key_len);
const TidyOption opt = tidyGetOptionByName(tdoc,key);
if (!opt) {
warn( "HTML::Tidy: Unrecognized option: \"%s\"\n",key );
}
else {
const TidyOptionId id = tidyOptGetId(opt);
SV * const sv_data = hv_iterval(tidy_options,entry);
STRLEN data_len;
const char * const data = SvPV(sv_data,data_len);
if ( ! tidyOptSetValue(tdoc,id,data) ) {
warn( "HTML::Tidy: Can't set option: \"%s\" to \"%s\"\n", key, data );
}
}
}
}
MODULE = HTML::Tidy5 PACKAGE = HTML::Tidy5
PROTOTYPES: ENABLE
void
_tidy_messages(input, configfile, tidy_options)
INPUT:
const char *input
const char *configfile
HV *tidy_options
PREINIT:
TidyBuffer errbuf = {0};
TidyDoc tdoc = tidyCreate(); /* Initialize "document" */
const char* newline;
int rc = 0;
PPCODE:
tidyBufInit(&errbuf);
rc = ( tidyOptSetValue( tdoc, TidyCharEncoding, "utf8" ) ? rc : -1 );
if ( (rc >= 0 ) && configfile && *configfile ) {
rc = tidyLoadConfig( tdoc, configfile );
}
if ( rc >= 0 ) {
_load_config_hash(tdoc,tidy_options);
}
if ( rc >= 0 ) {
/* Capture diagnostics */
rc = tidySetErrorBuffer( tdoc, &errbuf );
}
if ( rc >= 0 ) {
/* Parse the input */
rc = tidyParseString( tdoc, input );
}
if ( rc >= 0 && errbuf.bp) {
XPUSHs( sv_2mortal(newSVpvn((char *)errbuf.bp, errbuf.size)) );
/* TODO: Make this a function */
switch ( tidyOptGetInt(tdoc,TidyNewline) ) {
case TidyLF:
newline = "\n";
break;
case TidyCR:
newline = "\r";
break;
default:
newline = "\r\n";
break;
}
XPUSHs( sv_2mortal(newSVpv(newline, 0)) );
}
else {
rc = -1;
}
if ( errbuf.bp )
tidyBufFree( &errbuf );
tidyRelease( tdoc );
if ( rc < 0 ) {
XSRETURN_UNDEF;
}
void
_tidy_clean(input, configfile, tidy_options)
INPUT:
const char *input
const char *configfile
HV *tidy_options
PREINIT:
TidyBuffer errbuf = {0};
TidyBuffer output = {0};
TidyDoc tdoc = tidyCreate(); /* Initialize "document" */
const char* newline;
int rc = 0;
PPCODE:
tidyBufInit(&output);
tidyBufInit(&errbuf);
/* Set our default first. */
/* Don't word-wrap */
rc = ( tidyOptSetInt( tdoc, TidyWrapLen, 0 ) ? rc : -1 );
if ( (rc >= 0 ) && configfile && *configfile ) {
rc = tidyLoadConfig( tdoc, configfile );
}
/* XXX I think this cascade is a bug waiting to happen */
if ( rc >= 0 ) {
rc = ( tidyOptSetValue( tdoc, TidyCharEncoding, "utf8" ) ? rc : -1 );
}
if ( rc >= 0 ) {
_load_config_hash( tdoc, tidy_options );
}
if ( rc >= 0 ) {
rc = tidySetErrorBuffer( tdoc, &errbuf ); /* Capture diagnostics */
}
if ( rc >= 0 ) {
rc = tidyParseString( tdoc, input ); /* Parse the input */
}
if ( rc >= 0 ) {
rc = tidyCleanAndRepair(tdoc);
}
if ( rc > 1 ) {
rc = ( tidyOptSetBool( tdoc, TidyForceOutput, yes ) ? rc : -1 );
}
if ( rc >= 0) {
rc = tidySaveBuffer( tdoc, &output );
}
if ( rc >= 0) {
rc = tidyRunDiagnostics( tdoc );
}
if ( rc >= 0 && output.bp && errbuf.bp ) {
XPUSHs( sv_2mortal(newSVpvn((char *)output.bp, output.size)) );
XPUSHs( sv_2mortal(newSVpvn((char *)errbuf.bp, errbuf.size)) );
/* TODO: Hoist this into a function */
switch ( tidyOptGetInt(tdoc,TidyNewline) ) {
case TidyLF:
newline = "\n";
break;
case TidyCR:
newline = "\r";
break;
default:
newline = "\r\n";
break;
}
XPUSHs( sv_2mortal(newSVpv(newline, 0)) );
}
else {
rc = -1;
}
tidyBufFree( &output );
tidyBufFree( &errbuf );
tidyRelease( tdoc );
if ( rc < 0 ) {
XSRETURN_UNDEF;
}
SV*
_tidy_library_version()
PREINIT:
const char* version;
CODE:
version = tidyLibraryVersion();
RETVAL = newSVpv(version,0); /* will be automatically "mortalized" */
OUTPUT:
RETVAL
HTML-Tidy5-1.06/META.json 000644 000766 000024 00000003030 13555120030 014706 0 ustar 00andy staff 000000 000000 {
"abstract" : "HTML validation in a Perl object",
"author" : [
"Andy Lester "
],
"dynamic_config" : 1,
"generated_by" : "ExtUtils::MakeMaker version 7.34, CPAN::Meta::Converter version 2.150010",
"license" : [
"artistic_2"
],
"meta-spec" : {
"url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
"version" : 2
},
"name" : "HTML-Tidy5",
"no_index" : {
"directory" : [
"t",
"inc"
]
},
"prereqs" : {
"build" : {
"requires" : {
"ExtUtils::MakeMaker" : "0"
}
},
"configure" : {
"requires" : {
"ExtUtils::MakeMaker" : "0"
}
},
"runtime" : {
"requires" : {
"Carp" : "0",
"Encode" : "0",
"Exporter" : "0",
"Getopt::Long" : "0",
"Test::Builder" : "0",
"Test::Exception" : "0",
"Test::More" : "0.98",
"constant" : "0",
"perl" : "5.010001"
}
}
},
"release_status" : "stable",
"resources" : {
"bugtracker" : {
"web" : "http://github.com/petdance/html-tidy5/issues"
},
"homepage" : "http://github.com/petdance/html-tidy5",
"license" : [
"http://www.opensource.org/licenses/artistic-license-2.0.php"
],
"repository" : {
"url" : "http://github.com/petdance/html-tidy5"
}
},
"version" : "1.06",
"x_serialization_backend" : "JSON::PP version 4.02"
}
HTML-Tidy5-1.06/lib/Test/ 000755 000766 000024 00000000000 13555120030 014756 5 ustar 00andy staff 000000 000000 HTML-Tidy5-1.06/lib/HTML/ 000755 000766 000024 00000000000 13555120030 014603 5 ustar 00andy staff 000000 000000 HTML-Tidy5-1.06/lib/HTML/Tidy5.pm 000644 000766 000024 00000025165 13555117753 016172 0 ustar 00andy staff 000000 000000 package HTML::Tidy5;
use 5.010001;
use strict;
use warnings;
use Carp ();
use HTML::Tidy5::Message;
=head1 NAME
HTML::Tidy5 - HTML validation in a Perl object
=head1 VERSION
Version 1.06
=cut
our $VERSION = '1.06';
=head1 SYNOPSIS
use HTML::Tidy5;
my $tidy = HTML::Tidy5->new( {config_file => 'path/to/config'} );
$tidy->ignore( type => TIDY_WARNING, type => TIDY_INFO );
$tidy->parse( "foo.html", $contents_of_foo );
for my $message ( $tidy->messages ) {
print $message->as_string;
}
=head1 DESCRIPTION
C is an HTML checker in a handy dandy object. It's meant
as a replacement for L. If you're currently
an L user looking to migrate, see the section
L.
=head1 EXPORTS
Message types C, C and C.
Everything else is an object method.
=cut
use base 'Exporter';
use constant TIDY_ERROR => 3;
use constant TIDY_WARNING => 2;
use constant TIDY_INFO => 1;
our @EXPORT = qw( TIDY_ERROR TIDY_WARNING TIDY_INFO );
=head1 METHODS
=head2 new()
Create an HTML::Tidy5 object.
my $tidy = HTML::Tidy5->new();
Optionally you can give a hashref of configuration parms.
my $tidy = HTML::Tidy5->new( {config_file => 'path/to/tidy.cfg'} );
This configuration file will be read and used when you clean or parse an HTML file.
You can also pass options directly to tidy.
my $tidy = HTML::Tidy5->new( {
output_xhtml => 1,
tidy_mark => 0,
} );
See C for the list of options supported by tidy.
The following options are not supported by C:
=over 4
=item * quiet
=back
=cut
sub new {
my $class = shift;
my $args = shift || {};
my @unsupported_options = qw(
force-output
gnu-emacs-file
gnu-emacs
keep-time
quiet
slide-style
write-back
); # REVIEW perhaps a list of supported options would be better
my $self = bless {
messages => [],
ignore_type => [],
ignore_text => [],
config_file => '',
tidy_options => {},
}, $class;
for my $key (keys %{$args} ) {
if ($key eq 'config_file') {
$self->{config_file} = $args->{$key};
next;
}
my $newkey = $key;
$newkey =~ tr/_/-/;
if ( grep {$newkey eq $_} @unsupported_options ) {
Carp::croak( "Unsupported option: $newkey" );
}
$self->{tidy_options}->{$newkey} = $args->{$key};
}
return $self;
}
=head2 messages()
Returns the messages accumulated.
=cut
sub messages {
my $self = shift;
return @{$self->{messages}};
}
=head2 clear_messages()
Clears the list of messages, in case you want to print and clear, print
and clear. If you don't clear the messages, then each time you call
L you'll be accumulating more in the list.
=cut
sub clear_messages {
my $self = shift;
$self->{messages} = [];
return;
}
=head2 ignore( parm => value [, parm => value ] )
Specify types of messages to ignore. Note that the ignore flags must be
set B calling C. You can call C as many times
as necessary to set up all your restrictions; the options will stack up.
=over 4
=item * type => TIDY_INFO|TIDY_WARNING|TIDY_ERROR
Specifies the type of messages you want to ignore, either info or warnings
or errors. If you wanted, you could call ignore on all three and get
no messages at all.
$tidy->ignore( type => TIDY_WARNING );
=item * text => qr/regex/
=item * text => [ qr/regex1/, qr/regex2/, ... ]
Checks the text of the message against the specified regex or regexes,
and ignores the message if there's a match. The value for the I
parm may be either a regex, or a reference to a list of regexes.
$tidy->ignore( text => qr/DOCTYPE/ );
$tidy->ignore( text => [ qr/unsupported/, qr/proprietary/i ] );
=back
=cut
sub ignore {
my $self = shift;
my @parms = @_;
while ( @parms ) {
my $parm = shift @parms;
my $value = shift @parms;
my @values = ref($value) eq 'ARRAY' ? @{$value} : ($value);
Carp::croak( qq{Invalid ignore type of "$parm"} )
unless ($parm eq 'text') or ($parm eq 'type');
push( @{$self->{"ignore_$parm"}}, @values );
} # while
return;
} # ignore
=head2 parse( $filename, $str [, $str...] )
Parses a string, or list of strings, that make up a single HTML file.
The I<$filename> parm is only used as an identifier for your use.
The file is not actually read and opened.
Returns true if all went OK, or false if there was some problem calling
tidy, or parsing tidy's output.
=cut
sub parse {
my $self = shift;
my $filename = shift;
if (@_ == 0) {
Carp::croak('Usage: parse($filename,$str [, $str...])');
}
my $html = join( '', @_ );
utf8::encode($html) if utf8::is_utf8($html);
my ($errorblock,$newline) = _tidy_messages( $html, $self->{config_file}, $self->{tidy_options} );
return 1 unless defined $errorblock;
utf8::decode($errorblock);
return !$self->_parse_errors($filename, $errorblock, $newline);
}
sub _parse_errors {
my $self = shift;
my $filename = shift;
my $errs = shift;
my $newline = shift;
my $parse_errors;
my @lines = split( /$newline/, $errs );
for my $line ( @lines ) {
chomp $line;
my $message;
if ( $line =~ /^line (\d+) column (\d+) - (Warning|Error|Info): (.+)$/ ) { ## no critic ( ControlStructures::ProhibitCascadingIfElse )
my ($line, $col, $type, $text) = ($1, $2, $3, $4);
$type =
($type eq 'Warning') ? TIDY_WARNING :
($type eq 'Info') ? TIDY_INFO :
TIDY_ERROR;
$message = HTML::Tidy5::Message->new( $filename, $type, $line, $col, $text );
}
elsif ( $line =~ m/^Info: (.+)$/ ) {
# Info line we don't want
my $text = $1;
$message = HTML::Tidy5::Message->new( $filename, TIDY_INFO, undef, undef, $text );
}
elsif ( $line =~ /^Tidy found \d+ warnings? and \d+ errors?!/ ) {
# Summary line we don't want
# We should take these counts from the summary and make sure they match what we parsed.
}
elsif ( $line eq 'No warnings or errors were found.' ) {
# Summary line we don't want
}
elsif ( $line eq 'This document has errors that must be fixed before' ) {
# Summary line we don't want
}
elsif ( $line eq 'using HTML Tidy to generate a tidied up version.' ) {
# Summary line we don't want
}
elsif ( $line =~ m/^\s*$/ ) {
# Blank line we don't want
}
else {
Carp::carp "HTML::Tidy5: Unknown error type: $line";
++$parse_errors;
}
push( @{$self->{messages}}, $message )
if $message && $self->_is_keeper( $message );
} # for
return $parse_errors;
}
=head2 clean( $str [, $str...] )
Cleans a string, or list of strings, that make up a single HTML file.
Returns the cleaned string as a single string.
=cut
sub clean {
my $self = shift;
if (@_ == 0) {
Carp::croak('Usage: clean($str [, $str...])');
}
my $text = join( '', @_ );
utf8::encode($text) if utf8::is_utf8($text);
if ( defined $text ) {
$text .= "\n";
}
my ($cleaned, $errbuf, $newline) = _tidy_clean( $text,
$self->{config_file},
$self->{tidy_options});
utf8::decode($cleaned);
utf8::decode($errbuf);
$self->_parse_errors('', $errbuf, $newline);
return $cleaned;
}
# Tells whether a given message object is one that we should keep.
sub _is_keeper {
my $self = shift;
my $message = shift;
my @ignore_types = @{$self->{ignore_type}};
if ( @ignore_types ) {
return 0 if grep { $message->type == $_ } @ignore_types;
}
my @ignore_texts = @{$self->{ignore_text}};
if ( @ignore_texts ) {
return 0 if grep { $message->text =~ $_ } @ignore_texts;
}
return 1;
}
=head2 tidy_library_version()
Returns the version of the underling tidy library.
=cut
sub tidy_library_version {
my $version_str = _tidy_library_version();
return $version_str;
}
require XSLoader;
XSLoader::load('HTML::Tidy5', $VERSION);
1;
__END__
=head1 INSTALLING TIDY
C requires that the C be installed on your system.
You can probably obtain html-tidy through your distribution's package
manager (make sure you install the development package with headers). You
can also check the html-tidy home page is L.
=head1 CONVERTING FROM C
C is different from C in a number of crucial ways.
=over 4
=item * It's not pure Perl
C is mostly a happy wrapper around the html-tidy library.
=item * The real work is done by someone else
Changes to tidy may come down the pipe that I don't have control over.
That's the price we pay for having it do a darn good job.
=item * It's no longer bundled with its C counterpart
L came bundled with C, but
L is a separate distribution. This saves the people
who don't want the C framework from pulling it in, and all its
prerequisite modules.
=back
=head1 BUGS & FEEDBACK
Please report any bugs or feature requests at the issue tracker on github
L. I will be notified,
and then you'll automatically be notified of progress on your bug as I
make changes.
Please do NOT use L.
=head1 SUPPORT
You can find documentation for this module with the perldoc command.
perldoc HTML::Tidy5
You can also look for information at:
=over 4
=item * HTML::Tidy5's issue queue at github
L
=item * CPAN Ratings
L
=item * search.cpan.org
L
=item * Git source code repository
L
=back
=head1 ACKNOWLEDGEMENTS
Thanks to
Rufus Cable,
Jonathan Rockway,
and Robert Bachmann for contributions.
=head1 AUTHOR
Andy Lester, C<< >>
=head1 COPYRIGHT & LICENSE
Copyright (C) 2005-2018 by Andy Lester
This library is free software. You may modify or distribute it under
the Artistic License v2.0.
=cut
HTML-Tidy5-1.06/lib/HTML/Tidy5/ 000755 000766 000024 00000000000 13555120030 015601 5 ustar 00andy staff 000000 000000 HTML-Tidy5-1.06/lib/HTML/Tidy5/Message.pm 000644 000766 000024 00000004671 13555115664 017554 0 ustar 00andy staff 000000 000000 package HTML::Tidy5::Message;
use 5.010001;
use warnings;
use strict;
=head1 NAME
HTML::Tidy5::Message - Message object for the Tidy functionality
=head1 EXPORTS
None. It's all object-based.
=head1 METHODS
Almost everything is an accessor.
=head2 new( $file, $line, $column, $text )
Create an object. It's not very exciting.
C<$file> can be C or an empty string, in which case it will not appear in messages.
=cut
sub new {
my $class = shift;
my $file = shift;
my $type = shift;
my $line = shift || 0;
my $column = shift || 0;
my $text = shift;
# Add an element that says what tag caused the error (B, TR, etc)
# so that we can match 'em up down the road.
my $self = {
_file => $file,
_type => $type,
_line => $line,
_column => $column,
_text => $text,
};
bless $self, $class;
return $self;
}
=head2 as_string()
Returns a nicely-formatted string for printing out to stdout or some similar user thing.
=cut
sub as_string {
my $self = shift;
my %strings = (
1 => 'Info',
2 => 'Warning',
3 => 'Error',
);
my $msg = $strings{$self->type} . ': ' . $self->text;
if ( $self->line && $self->column ) {
$msg = sprintf( '(%d:%d) %s', $self->line, $self->column, $msg );
}
my $file = $self->file // '';
if ( $file ne '' ) {
$msg = "$file $msg";
}
return $msg;
}
=head2 file()
Returns the filename of the error, as set by the caller.
=head2 type()
Returns the type of the error. This will either be C,
or C.
=head2 line()
Returns the line number of the error, or 0 if there isn't an applicable
line number.
=head2 column()
Returns the column number, or 0 if there isn't an applicable column
number.
=head2 text()
Returns the text of the message. This does not include a type string,
like "Info: ".
=cut
sub file { my $self = shift; return $self->{_file} }
sub type { my $self = shift; return $self->{_type} }
sub line { my $self = shift; return $self->{_line} }
sub column { my $self = shift; return $self->{_column} }
sub text { my $self = shift; return $self->{_text} }
=head1 COPYRIGHT & LICENSE
Copyright 2005-2018 Andy Lester.
This program is free software; you can redistribute it and/or modify
it under the terms of the Artistic License v2.0.
=head1 AUTHOR
Andy Lester, C<< >>
=cut
1; # happy
HTML-Tidy5-1.06/lib/Test/HTML/ 000755 000766 000024 00000000000 13555120030 015522 5 ustar 00andy staff 000000 000000 HTML-Tidy5-1.06/lib/Test/HTML/Tidy5.pm 000644 000766 000024 00000007777 13555117761 017121 0 ustar 00andy staff 000000 000000 package Test::HTML::Tidy5;
use 5.010001;
use warnings;
use strict;
use Test::Builder;
use Exporter;
use HTML::Tidy5;
use parent 'Exporter';
our @EXPORT_OK = qw(
html_tidy_ok
html_fragment_tidy_ok
);
our @EXPORT = @EXPORT_OK;
=head1 NAME
Test::HTML::Tidy5 - Test::More-style wrapper around HTML::Tidy5
=head1 VERSION
Version 1.06
=cut
our $VERSION = '1.06';
my $TB = Test::Builder->new;
=head1 SYNOPSIS
use Test::HTML::Tidy5 tests => 4;
my $table = build_display_table();
html_tidy_ok( $table, 'Built display table properly' );
=head1 DESCRIPTION
This module provides a few convenience methods for testing exception
based code. It is built with L and plays happily with
L and friends.
If you are not already familiar with L now would be the time
to go take a look.
=head1 EXPORT
C
=cut
sub import {
my $self = shift;
my $pack = caller;
$TB->exported_to($pack);
$TB->plan(@_);
$self->export_to_level(1, $self, @EXPORT);
return;
}
=head2 html_tidy_ok( [$tidy, ] $html, $name )
Checks to see if C<$html> is a valid HTML document.
If you pass an HTML::Tidy5 object, C will use that for its
settings.
my $tidy = HTML::Tidy5->new( {config_file => 'path/to/config'} );
$tidy->ignore( type => TIDY_WARNING, type => TIDY_INFO );
html_tidy_ok( $tidy, $content, "Web page is OK, ignoring warnings and info' );
Otherwise, it will use the default rules.
html_tidy_ok( $content, "Web page passes ALL tests" );
=cut
sub html_tidy_ok {
my $tidy = (ref($_[0]) eq 'HTML::Tidy5') ? shift : HTML::Tidy5->new;
my $html = shift;
my $name = shift;
my $ok = defined $html;
if ( !$ok ) {
$TB->ok( 0, $name );
$TB->diag( 'Error: html_tidy_ok() got undef' );
}
else {
$ok = _parse_and_complain( $tidy, $html, $name, 0 );
}
return $ok;
}
=head2 html_fragment_tidy_ok( [$tidy, ] $html, $name )
Works the same as C, but first wraps it up an HTML document.
This is useful for when want to validate self-contained snippets of HTML,
such as from templates or an HTML feed from a third party, and check
that it is valid.
=cut
sub html_fragment_tidy_ok {
my $tidy = (ref($_[0]) eq 'HTML::Tidy5') ? shift : HTML::Tidy5->new;
my $html = shift;
my $name = shift;
my $ok = defined $html;
if ( !$ok ) {
$TB->ok( 0, $name );
$TB->diag( 'Error: html_fragment_tidy_ok() got undef' );
}
else {
$html = <<"HTML";
$html
HTML
$ok = _parse_and_complain( $tidy, $html, $name, 6 );
}
return $ok;
}
sub _parse_and_complain {
local $Test::Builder::Level = $Test::Builder::Level + 1;
my $tidy = shift;
my $html = shift;
my $name = shift;
my $offset = shift;
$tidy->clear_messages();
$tidy->parse( undef, $html );
my @messages = $tidy->messages;
my $nmessages = @messages;
my $ok = !$nmessages;
$TB->ok( $ok, $name );
if ( !$ok ) {
if ( $offset ) {
$_->{_line} -= $offset for @messages;
}
my $msg = 'Errors:';
$msg .= " $name" if $name;
$TB->diag( $msg );
$TB->diag( $_->as_string ) for @messages;
my $s = $nmessages == 1 ? '' : 's';
$TB->diag( "$nmessages message$s on the page" );
}
return $ok;
}
=head1 BUGS
All bugs and requests are now being handled through GitHub.
https://github.com/petdance/html-lint/issues
DO NOT send bug reports to http://rt.cpan.org/.
=head1 COPYRIGHT & LICENSE
Copyright 2005-2018 Andy Lester.
This program is free software; you can redistribute it and/or modify
it under the terms of the Artistic License v2.0.
http://www.opensource.org/licenses/Artistic-2.0
Please note that these modules are not products of or supported by the
employers of the various contributors to the code.
=head1 AUTHOR
Andy Lester, C
=cut
1;
HTML-Tidy5-1.06/xt/pod.t 000644 000766 000024 00000000155 13555115664 014715 0 ustar 00andy staff 000000 000000 #!perl -T
use warnings;
use strict;
use 5.010001;
use Test::More;
use Test::Pod 1.14;
all_pod_files_ok();
HTML-Tidy5-1.06/xt/pod-coverage.t 000644 000766 000024 00000000203 13555115664 016500 0 ustar 00andy staff 000000 000000 #!perl -T
use 5.010001;
use strict;
use warnings;
use Test::More;
use Test::Pod::Coverage 1.04;
all_pod_coverage_ok();
exit 0;
HTML-Tidy5-1.06/t/venus.cfg 000644 000766 000024 00000000761 13555115664 015402 0 ustar 00andy staff 000000 000000 // HTML Tidy configuration file
bare: yes
//
// This actually caused a segmentation fault in a MSHTML created doc
//
//clean: yes
drop-proprietary-attributes: yes
drop-empty-paras: yes
break-before-br: yes
word-2000: yes
//tidy-mark: yes
tidy-mark: no
//add-xml-space: yes
output-xml: yes
enclose-text: yes
enclose-block-text: yes
char-encoding: utf8
force-output: yes
wrap: 0
indent: yes
quiet: yes
//add-xml-decl: yes
//gnu-emacs: yes
// make sure we are using "\n", even on Win32
newline: LF
HTML-Tidy5-1.06/t/ignore.t 000644 000766 000024 00000010772 13555115664 015234 0 ustar 00andy staff 000000 000000 #!perl -T
use 5.010001;
use strict;
use warnings;
use Test::More tests => 9;
use HTML::Tidy5;
my $html = do { local $/ = undef; };
my @expected_warnings = split /\n/, <<'HERE';
- (1:1) Warning: missing declaration
- (23:1) Warning: discarding unexpected
- (24:XX) Info: value for attribute "height" missing quote marks
- (24:XX) Info: value for attribute "width" missing quote marks
- (24:XX) Info: value for attribute "align" missing quote marks
HERE
chomp @expected_warnings;
my @expected_errors = split /\n/, <<'HERE';
- (23:1) Error: is not recognized!
HERE
chomp @expected_errors;
WARNINGS_ONLY: {
my $tidy = HTML::Tidy5->new;
isa_ok( $tidy, 'HTML::Tidy5' );
$tidy->ignore( type => TIDY_ERROR );
my $rc = $tidy->parse( '-', $html );
ok( $rc, 'Parsed OK' );
my @returned = map { $_->as_string } $tidy->messages;
s/[\r\n]+\z// for @returned;
munge_returned( \@returned );
is_deeply( \@returned, \@expected_warnings, 'Matching warnings' );
}
ERRORS_ONLY: {
my $tidy = HTML::Tidy5->new;
isa_ok( $tidy, 'HTML::Tidy5' );
$tidy->ignore( type => TIDY_WARNING );
$tidy->ignore( type => TIDY_INFO );
my $rc = $tidy->parse( '-', $html );
ok( $rc, 'Parsed OK' );
my @returned = map { $_->as_string } $tidy->messages;
s/[\r\n]+\z// for @returned;
is_deeply( \@returned, \@expected_errors, 'Matching errors' );
}
DIES_ON_ERROR: {
my $tidy = HTML::Tidy5->new;
isa_ok( $tidy, 'HTML::Tidy5' );
my $rc = eval { $tidy->ignore( blongo => TIDY_WARNING ) };
ok( !$rc, 'eval should fail' );
like( $@, qr/^Invalid ignore type.+blongo/, 'Throws an error' );
}
sub munge_returned {
# non-1 line numbers are not reliable across libtidies
my $returned = shift;
my $start_line = shift || '-';
for my $line ( @{$returned} ) {
next if $line =~ /$start_line \(\d+:1\)/;
$line =~ s/$start_line \((\d+):(\d+)\)/$start_line ($1:XX)/;
}
return;
}
__DATA__
petdance.com: Andy Lester's Programming & Writing