* Users of this class are encouraged to use a {@link TreeSet} with the {@link String#CASE_INSENSITIVE_ORDER}
* comparator if case-insensitive comparison is needed (like when dealing with HTML tags).
*/
public static class SetEmptyElementHandler
implements EmptyElementHandler
{
final protected Set
* Note that element name comparison is case-insensitive as required
* by HTML specification.
*/
public static class HtmlEmptyElementHandler
extends SetEmptyElementHandler
{
private final static HtmlEmptyElementHandler sInstance = new HtmlEmptyElementHandler();
public static HtmlEmptyElementHandler getInstance() { return sInstance; }
protected HtmlEmptyElementHandler()
{
super(new TreeSet
* Note about exceptions: choice of only allowing throwing of
* {@link IOException}s is due to the way Woodstox stream writer
* backend works;
* In addition to its main task as a configuration container, this class
* also acts as a wrapper around simple buffer recycling functionality.
* The reason is that while conceptually this is a separate concern,
* there are enough commonalities with the life-cycle of this object to
* make this a very convenience place to add that functionality...
* (that is: conceptually this is not right, but from pragmatic viewpoint
* it just makes sense)
*/
public final class ReaderConfig
extends CommonConfig
implements InputConfigFlags
{
// Default limit values
public final static int DEFAULT_MAX_ATTRIBUTES_PER_ELEMENT = 1000;
public final static int DEFAULT_MAX_ATTRIBUTE_LENGTH = 65536 * 8;
public final static int DEFAULT_MAX_ELEMENT_DEPTH = 1000;
public final static int DEFAULT_MAX_ENTITY_DEPTH = 500;
public final static int DEFAULT_MAX_ENTITY_COUNT = 100 * 1000;
/*
///////////////////////////////////////////////////////////////////////
// Constants for reader properties:
///////////////////////////////////////////////////////////////////////
*/
// // First, standard StAX properties:
// Simple flags:
final static int PROP_COALESCE_TEXT = 1;
final static int PROP_NAMESPACE_AWARE = 2;
final static int PROP_REPLACE_ENTITY_REFS = 3;
final static int PROP_SUPPORT_EXTERNAL_ENTITIES = 4;
final static int PROP_VALIDATE_AGAINST_DTD = 5;
final static int PROP_SUPPORT_DTD = 6;
// Object type properties
public final static int PROP_EVENT_ALLOCATOR = 7;
final static int PROP_WARNING_REPORTER = 8;
final static int PROP_XML_RESOLVER = 9;
// // Then StAX2 standard properties:
// Simple flags:
final static int PROP_INTERN_NS_URIS = 20;
final static int PROP_INTERN_NAMES = 21;
final static int PROP_REPORT_CDATA = 22;
final static int PROP_REPORT_PROLOG_WS = 23;
final static int PROP_PRESERVE_LOCATION = 24;
final static int PROP_AUTO_CLOSE_INPUT = 25;
// Enum / Object type properties:
final static int PROP_SUPPORT_XMLID = 26; // shared with WriterConfig
final static int PROP_DTD_OVERRIDE = 27;
// // // Constants for additional Wstx properties:
// Simple flags:
/**
* Note: this entry was deprecated for 4.0 versions up until
* and including 4.0.7; was brought back for 4.0.8 (and will
* be retained for 4.1)
*/
final static int PROP_NORMALIZE_LFS = 40;
/* This entry was deprecated for 3.2 and removed in 4.0
* version. There are no plans to bring it back.
*/
//final static int PROP_NORMALIZE_ATTR_VALUES = 41;
final static int PROP_CACHE_DTDS = 42;
final static int PROP_CACHE_DTDS_BY_PUBLIC_ID = 43;
final static int PROP_LAZY_PARSING = 44;
final static int PROP_SUPPORT_DTDPP = 45;
final static int PROP_TREAT_CHAR_REFS_AS_ENTS = 46;
// Object type properties:
final static int PROP_INPUT_BUFFER_LENGTH = 50;
//final static int PROP_TEXT_BUFFER_LENGTH = 51;
final static int PROP_MIN_TEXT_SEGMENT = 52;
final static int PROP_CUSTOM_INTERNAL_ENTITIES = 53;
final static int PROP_DTD_RESOLVER = 54;
final static int PROP_ENTITY_RESOLVER = 55;
final static int PROP_UNDECLARED_ENTITY_RESOLVER = 56;
final static int PROP_BASE_URL = 57;
final static int PROP_INPUT_PARSING_MODE = 58;
// Size limitation to prevent various DOS attacks
final static int PROP_MAX_ATTRIBUTES_PER_ELEMENT = 60;
final static int PROP_MAX_CHILDREN_PER_ELEMENT = 61;
final static int PROP_MAX_ELEMENT_COUNT = 62;
final static int PROP_MAX_ELEMENT_DEPTH = 63;
final static int PROP_MAX_CHARACTERS = 64;
final static int PROP_MAX_ATTRIBUTE_SIZE = 65;
final static int PROP_MAX_TEXT_LENGTH = 66;
final static int PROP_MAX_ENTITY_COUNT = 67;
final static int PROP_MAX_ENTITY_DEPTH = 68;
/*
////////////////////////////////////////////////
// Limits for numeric properties
////////////////////////////////////////////////
*/
/**
* Need to set a minimum size, since there are some limitations to
* smallest consequtive block that can be used.
*/
final static int MIN_INPUT_BUFFER_LENGTH = 8; // 16 bytes
/**
* Let's allow caching of just a dozen DTDs... shouldn't really
* matter, how many DTDs does one really use?
*/
final static int DTD_CACHE_SIZE_J2SE = 12;
final static int DTD_CACHE_SIZE_J2ME = 5;
/*
///////////////////////////////////////////////////////////////////////
// Default values for custom properties:
///////////////////////////////////////////////////////////////////////
*/
/**
* By default, let's require minimum of 64 chars to be delivered
* as shortest partial (piece of) text (CDATA, text) segment;
* same for both J2ME subset and full readers. Prevents tiniest
* runts from getting passed
*/
final static int DEFAULT_SHORTEST_TEXT_SEGMENT = 64;
/**
* Default config flags are converted from individual settings,
* to conform to StAX 1.0 specifications.
*/
final static int DEFAULT_FLAGS_FULL =
0
// First, default settings StAX specs dictate:
| CFG_NAMESPACE_AWARE
// Coalescing to be disabled
//| CFG_COALESCE_TEXT
| CFG_REPLACE_ENTITY_REFS
| CFG_SUPPORT_EXTERNAL_ENTITIES
| CFG_SUPPORT_DTD
// and then custom setting defaults:
// and namespace URI interning
| CFG_INTERN_NAMES
| CFG_INTERN_NS_URIS
// we will also accurately report CDATA, by default
| CFG_REPORT_CDATA
/* 20-Jan-2006, TSa: As per discussions on stax-builders list
* (and input from xml experts), 4.0 will revert to "do not
* report SPACE events outside root element by default"
* settings. Conceptually this is what xml specification
* implies should be done: there is no content outside of
* the element tree, including any ignorable content, just
* processing instructions and comments.
*/
//| CFG_REPORT_PROLOG_WS
/* but enable DTD caching (if they are handled):
* (... maybe J2ME subset shouldn't do it?)
*/
| CFG_CACHE_DTDS
/* 29-Mar-2006, TSa: But note, no caching by public-id, due
* to problems with cases where public-id/system-id were
* inconsistently used, leading to problems.
*/
/* by default, let's also allow lazy parsing, since it tends
* to improve performance
*/
| CFG_LAZY_PARSING
/* and also make Event objects preserve location info...
* can be turned off for maximum performance
*/
| CFG_PRESERVE_LOCATION
// As per Stax 1.0 specs, we can not enable this by default:
//| CFG_AUTO_CLOSE_INPUT);
/* Also, let's enable dtd++ support (shouldn't hurt with non-dtd++
* dtds)
*/
| CFG_SUPPORT_DTDPP
/*
* Set this as a default, as this is required in xml;
*/
| CFG_NORMALIZE_LFS
/* Regarding Xml:id, let's enabled typing by default, but not
* uniqueness validity checks: latter will be taken care of
* by DTD validation if enabled, otherwise needs to be explicitly
* enabled
*/
| CFG_XMLID_TYPING
// | CFG_XMLID_UNIQ_CHECKS
;
/**
* For now defaults for J2ME flags can be identical to 'full' set;
* differences are in buffer sizes.
*/
final static int DEFAULT_FLAGS_J2ME = DEFAULT_FLAGS_FULL;
// // //
/**
* Map to use for converting from String property ids to ints
* described above; useful to allow use of switch later on.
*/
final static HashMap
* In addition to the standard settings, following Woodstox-specific
* settings are also done:
*
* Notes: Does NOT change 'performance' settings (buffer sizes,
* DTD caching, coalescing, interning, accurate location info).
*/
public void configureForXmlConformance()
{
// // StAX 1.0 settings
doSupportNamespaces(true);
doSupportDTDs(true);
doSupportExternalEntities(true);
doReplaceEntityRefs(true);
// // Stax2 additional settings
// Better enable full xml:id checks:
doXmlIdTyping(true);
doXmlIdUniqChecks(true);
// Woodstox-specific ones:
}
/**
* Method to call to make Reader created be as "convenient" to use
* as possible; ie try to avoid having to deal with some of things
* like segmented text chunks. This may incur some slight performance
* penalties, but should not affect XML conformance.
* See {@link XMLInputFactory2#configureForConvenience} for
* required settings for standard StAX/StAX properties.
*
* In addition to the standard settings, following Woodstox-specific
* settings are also done:
*
* In addition to the standard settings, following Woodstox-specific
* settings are also done:
*
* See {@link XMLInputFactory2#configureForLowMemUsage} for
* required settings for standard StAX/StAX properties.
*
* In addition to the standard settings, following Woodstox-specific
* settings are also done:
*
* See {@link XMLInputFactory2#configureForLowMemUsage} for
* required settings for standard StAX/StAX properties.
*
* In addition to the standard settings, following Woodstox-specific
* settings are also done:
*
* TODO:
*
* - CHECK_CHAR_VALIDITY (separate for white spaces?)
* - CATALOG_RESOLVER? (or at least, ENABLE_CATALOGS)
*/
public final class WstxInputProperties
{
/**
* Constants used when no DTD handling is done, and we do not know the
* 'real' type of an attribute. Seems like CDATA is the safe choice.
*/
public final static String UNKNOWN_ATTR_TYPE = "CDATA";
/*
///////////////////////////////////////////////////////////////////////
// Simple on/off settings:
///////////////////////////////////////////////////////////////////////
*/
// // // Normalization:
/**
* Feature that controls whether linefeeds are normalized into
* canonical linefeed as mandated by xml specification.
*
* Note that disabling this property (from its default enabled
* state) will result in non-conforming XML processing. It may
* be useful for use cases where changes to input content should
* be minimized.
*
* Note: this property was initially removed from Woodstox 4.0,
* but was reintroduced in 4.0.8 due to user request.
*/
public final static String P_NORMALIZE_LFS = "com.ctc.wstx.normalizeLFs";
//public final static String P_NORMALIZE_ATTR_VALUES = "com.ctc.wstx.normalizeAttrValues";
// // // XML character validation:
/**
* Whether readers will verify that characters in text content are fully
* valid XML characters (not just Unicode). If true, will check
* that they are valid (including white space); if false, will not
* check.
*
* Note that this property will NOT have effect on all encoding problems,
* specifically:
*
* Turning this option off may improve parsing performance; leaving
* it on guarantees compatibility with XML 1.0 specs regarding character
* validity rules.
*/
public final static String P_VALIDATE_TEXT_CHARS = "com.ctc.wstx.validateTextChars";
// // // Caching:
/**
* Whether readers will try to cache parsed external DTD subsets or not.
*/
public final static String P_CACHE_DTDS = "com.ctc.wstx.cacheDTDs";
/**
* Whether reader is to cache DTDs (when caching enabled) based on public id
* or not: if not, system id will be primarily used. Although theoretically
* public IDs should be unique, and should be good caching keys, sometimes
* broken documents use 'wrong' public IDs, and such by default caching keys
* are based on system id only.
*/
public final static String P_CACHE_DTDS_BY_PUBLIC_ID = "com.ctc.wstx.cacheDTDsByPublicId";
// // // Enabling/disabling lazy/incomplete parsing
/**
* Whether stream readers are allowed to do lazy parsing, meaning
* to parse minimal part of the event when
* {@link javax.xml.stream.XMLStreamReader#next} is called, and only parse the rest
* as needed (or skip remainder of no extra information is needed).
* Alternative to lazy parsing is called "eager parsing", and is
* what most xml parsers use by default.
*
* Enabling lazy parsing can improve performance for tasks where
* number of textual events are skipped. The downside is that
* not all well-formedness problems are reported when
* {@link javax.xml.stream.XMLStreamReader#next} is called, but only when the
* rest of event are read or skipped.
*
* Default value for Woodstox is such that lazy parsing is
* enabled.
*
* @deprecated As of Woodstox 4.0 use
* {@link XMLInputFactory2#P_LAZY_PARSING} instead (from
* Stax2 extension API, v3.0)
*/
@Deprecated
public final static String P_LAZY_PARSING = XMLInputFactory2.P_LAZY_PARSING;
// // // API behavior (for backwards compatibility)
/**
* This read-only property indicates whether null is returned for default name space prefix;
* Boolean.TRUE indicates it does, Boolean.FALSE that it does not.
*
* Default value for 4.1 is 'false'; this will most likely change for 5.0 since
* Stax API actually specifies null to be used.
*
* @since 4.1.2
*/
public final static String P_RETURN_NULL_FOR_DEFAULT_NAMESPACE = "com.ctc.wstx.returnNullForDefaultNamespace";
// // // Enabling/disabling support for dtd++
/**
* Whether the Reader will recognized DTD++ extensions when parsing
* DTD subsets.
*
* Note: not implemented by Woodstox.
*
* @deprecated Never implement, let's phase this out (deprecated in 4.2)
*/
@Deprecated
public final static String P_SUPPORT_DTDPP = "com.ctc.wstx.supportDTDPP";
/**
* Whether the Reader will treat character references as entities while parsing
* XML documents.
*/
public static final String P_TREAT_CHAR_REFS_AS_ENTS = "com.ctc.wstx.treatCharRefsAsEnts";
// // // Enabling alternate mode for parsing XML fragments instead
// // // of full documents
// Automatic W3C Schema support?
/*
* Whether W3C Schema hint attributes are recognized within document,
* and used to locate Schema to use for validation.
*/
//public final static String P_AUTOMATIC_W3C_SCHEMA = 0x00100000;
/*
///////////////////////////////////////////////////////////////////////
// More complex settings
///////////////////////////////////////////////////////////////////////
*/
// // // Buffer sizes;
/**
* Size of input buffer (in chars), to use for reading XML content
* from input stream/reader.
*/
public final static String P_INPUT_BUFFER_LENGTH = "com.ctc.wstx.inputBufferLength";
// // // Constraints on sizes of text segments parsed:
/**
* Property to specify shortest non-complete text segment (part of
* CDATA section or text content) that parser is allowed to return,
* if not required to coalesce text.
*/
public final static String P_MIN_TEXT_SEGMENT = "com.ctc.wstx.minTextSegment";
// // // Other size constraints (4.2+)
/**
* Maximum number of attributes allowed for single XML element.
* @since 4.2
*/
public final static String P_MAX_ATTRIBUTES_PER_ELEMENT = "com.ctc.wstx.maxAttributesPerElement";
/**
* Maximum length of of individual attribute values (in characters)
* @since 4.2
*/
public final static String P_MAX_ATTRIBUTE_SIZE = "com.ctc.wstx.maxAttributeSize";
/**
* Maximum number of child elements for any given element.
* @since 4.2
*/
public final static String P_MAX_CHILDREN_PER_ELEMENT = "com.ctc.wstx.maxChildrenPerElement";
/**
* Maximum number of all elements in a single document.
* @since 4.2
*/
public final static String P_MAX_ELEMENT_COUNT = "com.ctc.wstx.maxElementCount";
/**
* Maximum level of nesting of XML elements, starting with root element.
* @since 4.2
*/
public final static String P_MAX_ELEMENT_DEPTH = "com.ctc.wstx.maxElementDepth";
/**
* Maximum length of input document, in characters.
* @since 4.2
*/
public final static String P_MAX_CHARACTERS = "com.ctc.wstx.maxCharacters";
/**
* Maximum length of individual text (cdata) segments in input, in characters.
* @since 4.2
*/
public final static String P_MAX_TEXT_LENGTH = "com.ctc.wstx.maxTextLength";
// and more size constraints (4.3+)
/**
* Maximum number of total (general parsed) entity expansions within input.
*
* @since 4.3
*/
public final static String P_MAX_ENTITY_COUNT = "com.ctc.wstx.maxEntityCount";
/**
* Maximum depth of nested (general parsed) entity expansions.
*
* @since 4.3
*/
public final static String P_MAX_ENTITY_DEPTH = "com.ctc.wstx.maxEntityDepth";
// // // Entity handling
/**
* Property of type {@link java.util.Map}, that defines explicit set of
* internal (generic) entities that will define of override any entities
* defined in internal or external subsets; except for the 5 pre-defined
* entities (lt, gt, amp, apos, quot). Can be used to explicitly define
* entites that would normally come from a DTD.
*
* @deprecated This feature may be removed from future versions of
* Woodstox, since the same functionality can be achieved by using
* custom entity resolvers.
*/
@Deprecated
public final static String P_CUSTOM_INTERNAL_ENTITIES = "com.ctc.wstx.customInternalEntities";
/**
* Property of type {@link XMLResolver}, that
* will allow overriding of default DTD and external parameter entity
* resolution.
*/
public final static String P_DTD_RESOLVER = "com.ctc.wstx.dtdResolver";
/**
* Property of type {@link XMLResolver}, that
* will allow overriding of default external general entity
* resolution. Note that using this property overrides settings done
* using {@link javax.xml.stream.XMLInputFactory#RESOLVER} (and vice versa).
*/
public final static String P_ENTITY_RESOLVER = "com.ctc.wstx.entityResolver";
/**
* Property of type {@link XMLResolver}, that
* will allow graceful handling of references to undeclared (general)
* entities.
*/
public final static String P_UNDECLARED_ENTITY_RESOLVER = "com.ctc.wstx.undeclaredEntityResolver";
/**
* Property of type {@link java.net.URL}, that will allow specifying
* context URL to use when resolving relative references, for the
* main-level entities (external DTD subset, references from the internal
* DTD subset).
*/
public final static String P_BASE_URL = "com.ctc.wstx.baseURL";
// // // Alternate parsing modes
/**
* Three-valued property (one of
* {@link #PARSING_MODE_DOCUMENT},
* {@link #PARSING_MODE_FRAGMENT} or
* {@link #PARSING_MODE_DOCUMENTS}; default being the document mode)
* that can be used to handle "non-standard" XML content. The default
* mode (
* The main difference from the API perspective is that in first two
* modes, START_DOCUMENT and END_DOCUMENT are used as usual (as the first
* and last events returned), whereas the multi-document mode can return
* multiple pairs of these events: although it is still true that the
* first event (one cursor points to when reader is instantiated or
* returned by the event reader), there may be intervening pairs that
* signal boundary between two adjacent enclosed documents.
*/
public final static String P_INPUT_PARSING_MODE = "com.ctc.wstx.fragmentMode";
// // // DTD defaulting, overriding
/*
///////////////////////////////////////////////////////////////////////
// Helper classes, values enumerations
///////////////////////////////////////////////////////////////////////
*/
public final static ParsingMode PARSING_MODE_DOCUMENT = new ParsingMode();
public final static ParsingMode PARSING_MODE_FRAGMENT = new ParsingMode();
public final static ParsingMode PARSING_MODE_DOCUMENTS = new ParsingMode();
/**
* Inner class used for creating type-safe enumerations (prior to JDK 1.5).
*/
public final static class ParsingMode
{
ParsingMode() { }
}
}
woodstox-woodstox-core-5.1.0/src/main/java/com/ctc/wstx/api/WstxOutputProperties.java 0000664 0000000 0000000 00000017431 13257562550 0031104 0 ustar 00root root 0000000 0000000 package com.ctc.wstx.api;
/**
* Class that contains constant for property names used to configure
* cursor and event writers produced by Wstx implementation of
* {@link javax.xml.stream.XMLOutputFactory}.
*
*/
public final class WstxOutputProperties
{
/**
* Default xml version number output, if none was specified by
* application. Version 1.0 is used
* to try to maximize compatibility (some older parsers
* may barf on 1.1 and later...)
*/
public final static String DEFAULT_XML_VERSION = "1.0";
/**
* If no encoding is passed, we should just default to what xml
* in general expects (and can determine), UTF-8.
*
* Note: you can check out bug entry [WSTX-18] for more details
*/
public final static String DEFAULT_OUTPUT_ENCODING = "UTF-8";
// // // Output options, simple on/off settings:
/**
* Whether writer should use double quotes in the XML declaration.
* The default is to use single quotes.
*
* @since 4.2.2
*/
public final static String P_USE_DOUBLE_QUOTES_IN_XML_DECL = "com.ctc.wstx.useDoubleQuotesInXmlDecl";
/**
* Whether writer should just automatically convert all calls that
* would normally produce CDATA to produce (quoted) text.
*/
public final static String P_OUTPUT_CDATA_AS_TEXT = "com.ctc.wstx.outputCDataAsText";
/**
* Whether writer should copy attributes that were initially expanded
* using default settings ("implicit" attributes) or not.
*/
public final static String P_COPY_DEFAULT_ATTRS = "com.ctc.wstx.copyDefaultAttrs";
/**
* Whether writer is to add a single white space before closing "/>"
* of the empty element or not. It is sometimes useful to add to
* increase compatibility with HTML browsers, or to increase
* readability.
*
* The default value is 'false', up to Woodstox 4.x.
*
* NOTE: JavaDocs for versions 4.0.0 - 4.0.7 incorrectly state that
* default is 'true': this is NOT the case.
*
* Note: added to resolve Jira entry
* WSTX-125.
*/
public final static String P_ADD_SPACE_AFTER_EMPTY_ELEM = "com.ctc.wstx.addSpaceAfterEmptyElem";
/**
* Whether stream writer is to automatically add end elements that are
* needed to properly close the output tree, when the stream is closed
* (either explicitly by a call to
* The default value is 'true' as of Woodstox 4.x.
* Prior to 4.0, this feature was always enabled and there was no
* way to disable it)
*
* @since 3.2.8
*/
public final static String P_AUTOMATIC_END_ELEMENTS = "com.ctc.wstx.automaticEndElements";
// // // Validation options:
/**
* Whether output classes should do basic verification that the output
* structure is well-formed (start and end elements match); that
* there is one and only one root, and that there is no textual content
* in prolog/epilog. If false, won't do any checking regarding structure.
*/
public final static String P_OUTPUT_VALIDATE_STRUCTURE = "com.ctc.wstx.outputValidateStructure";
/**
* Whether output classes should do basic verification that the textual
* content output as part of nodes should be checked for validity,
* if there's a possibility of invalid content. Nodes that include
* such constraints are: comment/'--', cdata/']]>',
* proc. instr/'?>'.
*/
public final static String P_OUTPUT_VALIDATE_CONTENT = "com.ctc.wstx.outputValidateContent";
/**
* Whether output classes should check uniqueness of attribute names,
* to prevent accidental output of duplicate attributes.
*/
public final static String P_OUTPUT_VALIDATE_ATTR = "com.ctc.wstx.outputValidateAttr";
/**
* Whether output classes should check validity of names, ie that they
* only contain legal XML identifier characters.
*/
public final static String P_OUTPUT_VALIDATE_NAMES = "com.ctc.wstx.outputValidateNames";
/**
* Property that further modifies handling of invalid content so
* that if {@link #P_OUTPUT_VALIDATE_CONTENT} is enabled, instead of
* reporting an error, writer will try to fix the problem.
* Invalid content in this context refers to comment
* content with "--", CDATA with "]]>" and proc. instr data with "?>".
* This can
* be done for some content (CDATA, possibly comment), by splitting
* content into separate
* segments; but not for others (proc. instr, since that might
* change the semantics in unintended ways).
*/
public final static String P_OUTPUT_FIX_CONTENT = "com.ctc.wstx.outputFixContent";
/**
* Property that determines whether Carriage Return (\r) characters are
* to be escaped when output or not. If enabled, all instances of
* of character \r are escaped using a character entity (where possible,
* that is, within CHARACTERS events, and attribute values). Otherwise
* they are output as is. The main reason to enable this property is
* to ensure that carriage returns are preserved as is through parsing,
* since otherwise they will be converted to canonical xml linefeeds
* (\n), when occuring along or as part of \r\n pair.
*/
public final static String P_OUTPUT_ESCAPE_CR = "com.ctc.wstx.outputEscapeCr";
/**
* Property that defines a {@link InvalidCharHandler} used to determine
* what to do with a Java character that app tries to output but which
* is not a valid xml character. Alternatives are converting it to
* another character or throw an exception: default implementations
* exist for both behaviors.
*/
public final static String P_OUTPUT_INVALID_CHAR_HANDLER = "com.ctc.wstx.outputInvalidCharHandler";
/**
* Property that defines an {@link EmptyElementHandler} used to determine
* if the end tag for an empty element should be written or not.
*
* If specified {@link org.codehaus.stax2.XMLOutputFactory2#P_AUTOMATIC_EMPTY_ELEMENTS} is ignored.
*/
public final static String P_OUTPUT_EMPTY_ELEMENT_HANDLER = "com.ctc.wstx.outputEmptyElementHandler";
// // // Per-instance access to underlying output objects
/**
* Property that can be used to find out the underlying
* {@link java.io.OutputStream} that an
* {@link javax.xml.stream.XMLStreamWriter} instance is using,
* if known (not known if constructed with a {@link java.io.Writer},
* or other non-stream destination). Null is returned, if not
* known.
*
* Note: in general it is dangerous to operate on returned stream
* (if any), due to buffering stream writer can do. As such, caller
* has to take care to know what he is doing, including properly
* flushing output.
*/
public final static String P_OUTPUT_UNDERLYING_STREAM = "com.ctc.wstx.outputUnderlyingStream";
/**
* Property that can be used to find out the underlying
* {@link java.io.Writer} that an
* {@link javax.xml.stream.XMLStreamWriter} instance is using,
* if known (may not be known if constructed with a {@link java.io.OutputStream},
* or other non-Writer destination). Null is returned, if not
* known. Note that the Writer may be an internal wrapper over
* an output stream.
*
* Note: in general it is dangerous to operate on returned Writer
* (if any), due to buffering stream writer can do. As such, caller
* has to take care to know what he is doing, including properly
* flushing output.
*/
public final static String P_OUTPUT_UNDERLYING_WRITER = "com.ctc.wstx.outputUnderlyingWriter";
}
woodstox-woodstox-core-5.1.0/src/main/java/com/ctc/wstx/api/package.html 0000664 0000000 0000000 00000000512 13257562550 0026247 0 ustar 00root root 0000000 0000000
* Note, however, that the underlying parser will still keep track
* of location information for error reporting purposes; it's only
* Event objects that are affected.
*/
final static int CFG_PRESERVE_LOCATION = 0x1000;
// // // Input source handling
/**
* Property that enables/disables stream reader to close the underlying
* input source, either when it is asked to (.close() is called), or
* when it doesn't need it any more (reaching EOF, hitting an
* unrecoverable exception).
* As per Stax 1.0 specification, automatic closing is NOT enabled by
* default; except if the caller has no access to the target (i.e.
* when factory created it)
*/
final static int CFG_AUTO_CLOSE_INPUT = 0x2000;
/*
//////////////////////////////////////////////////////
// Flags for Woodstox-specific features
//////////////////////////////////////////////////////
*/
// // // Content normalization
// 20-Jan-2007, TSa: These properties removed from 4.0, deprecated:
final static int CFG_NORMALIZE_LFS = 0x4000;
//final static int CFG_NORMALIZE_ATTR_VALUES = 0x8000;
// // // Caching
/**
* If true, input factory is allowed cache parsed external DTD subsets,
* potentially speeding up things for which DTDs are needed for: entity
* substitution, attribute defaulting, and of course DTD-based validation.
*/
final static int CFG_CACHE_DTDS = 0x00010000;
/**
* If true, key used for matching DTD subsets can be the public id,
* if false, only system id can be used.
*/
final static int CFG_CACHE_DTDS_BY_PUBLIC_ID = 0x00020000;
// // // Lazy/incomplete parsing
/**
* If true, input factory can defer parsing of nodes until data is
* actually needed; if false, it has to read all the data in right
* away when next type is requested. Setting it to true is good for
* performance, in the cases where some of the nodes (like comments,
* processing instructions, or whole subtrees) are ignored. Otherwise
* setting will not make much of a difference. Downside is that error
* reporting is also done 'lazily'; not right away when getting the next
* even type but when either accessing data, or skipping it.
*/
final static int CFG_LAZY_PARSING = 0x00040000;
// // // Validation support
// DTD++ support
/**
* If true, DTD-parser will recognize DTD++ features, and the validator
* will also use any such information found from DTD when DTD validation
* is enabled.
*/
final static int CFG_SUPPORT_DTDPP = 0x00080000;
// Automatic W3C Schema support?
//final static int CFG_AUTOMATIC_W3C_SCHEMA = 0x00100000;
// // // Xml:id support
/**
* If true, xml:id attribute type assignment and matching checks will
* be done as per Xml:id specification. Needs to be enabled for xml:id
* uniqueness checks to function properly
*/
final static int CFG_XMLID_TYPING = 0x00200000;
/**
* If true, xml:id attribute uniqueness constraints are enforced, even
* if not validating against DTD otherwise.
*/
final static int CFG_XMLID_UNIQ_CHECKS = 0x00400000;
/**
* If true, the XML parser will treat character references as entities.
*
*/
final static int CFG_TREAT_CHAR_REFS_AS_ENTS = 0x00800000;
}
woodstox-woodstox-core-5.1.0/src/main/java/com/ctc/wstx/cfg/OutputConfigFlags.java 0000664 0000000 0000000 00000010156 13257562550 0030227 0 ustar 00root root 0000000 0000000 package com.ctc.wstx.cfg;
/**
* Constant interface that contains configuration flag used by output
* classes internally, for presenting on/off configuration options.
*/
public interface OutputConfigFlags
{
/**
* Flag that indicates whether writer is namespace-aware or not; if not,
* only local part is ever used.
*/
final static int CFG_ENABLE_NS = 0x0001;
/// Flag that indicates that output class should auto-generate namespace prefixes as necessary.
final static int CFG_AUTOMATIC_NS = 0x0002;
/// Flag that indicates we can output 'automatic' empty elements.
final static int CFG_AUTOMATIC_EMPTY_ELEMENTS = 0x0004;
/**
* Whether writer should just automatically convert all calls that
* would normally produce CDATA to produce (quoted) text.
*/
final static int CFG_OUTPUT_CDATA_AS_TEXT = 0x0008;
/**
* Flag that indicates whether attributes expanded from default attribute
* values should be copied to output, when using copy methods.
*/
final static int CFG_COPY_DEFAULT_ATTRS = 0x0010;
/**
* Flag that indicates whether CR (\r, ascii 13) characters occuring
* in text (CHARACTERS) and attribute values should be escaped using
* character entities or not. Escaping is needed to enable seamless
* round-tripping (preserving CR characters).
*/
final static int CFG_ESCAPE_CR = 0x0020;
/**
* Flag that indicates
* whether writer is to add a single white space before closing "/>"
* of the empty element or not. It is sometimes useful to add to
* increase compatibility with HTML browsers, or to increase
* readability.
*/
final static int CFG_ADD_SPACE_AFTER_EMPTY_ELEM = 0x0040;
/**
* Flag that indicates we can output 'automatic' empty elements;
* end elements needed to close the logical output tree when
* stream writer is closed (by closing it explicitly, or by writing
* end-document event)
*
* @since 3.2.8
*/
final static int CFG_AUTOMATIC_END_ELEMENTS = 0x0080;
/// Flag that indicates we should check validity of output XML structure.
final static int CFG_VALIDATE_STRUCTURE = 0x0100;
/**
* Flag that indicates we should check validity of textual content of
* nodes that have constraints.
*
* Specifically: comments can not have '--', CDATA sections can not
* have ']]>' and processing instruction can not have '?<' character
* combinations in content passed in.
*/
final static int CFG_VALIDATE_CONTENT = 0x0200;
/**
* Flag that indicates we should check validity of names (element and
* attribute names and prefixes; processing instruction names), that they
* contain only legal identifier characters.
*/
final static int CFG_VALIDATE_NAMES = 0x0400;
/**
* Flag that indicates we should check uniqueness of attribute names,
* to prevent accidental output of duplicate attributes.
*/
final static int CFG_VALIDATE_ATTR = 0x0800;
/**
* Flag that will enable writer that checks for validity of content
* to try to fix the problem, by splitting output segments as
* necessary. If disabled, validation will throw an exception; and
* without validation no problem is noticed by writer (but instead
* invalid output is created).
*/
final static int CFG_FIX_CONTENT = 0x1000;
/**
* Property that enables/disables stream write to close the underlying
* output target, either when it is asked to (.close() is called), or
* when it doesn't need it any more (reaching EOF, hitting an
* unrecoverable exception).
* As per Stax 1.0 specification, automatic closing is NOT enabled by
* default; except if the caller has no access to the target (i.e.
* when factory created it)
*/
final static int CFG_AUTO_CLOSE_OUTPUT = 0x2000;
/**
* Property that indicates if singe quotes or double quotes should be
* used in the XML declaration.
* The default is to use single quotes.
*/
final static int CFG_USE_DOUBLE_QUOTES_IN_XML_DECL = 0x4000;
}
woodstox-woodstox-core-5.1.0/src/main/java/com/ctc/wstx/cfg/ParsingErrorMsgs.java 0000664 0000000 0000000 00000002443 13257562550 0030073 0 ustar 00root root 0000000 0000000 package com.ctc.wstx.cfg;
public interface ParsingErrorMsgs
{
// // // EOF problems:
final static String SUFFIX_IN_ATTR_VALUE = " in attribute value";
final static String SUFFIX_IN_DEF_ATTR_VALUE = " in attribute default value";
final static String SUFFIX_IN_CDATA = " in CDATA section";
final static String SUFFIX_IN_CLOSE_ELEMENT = " in end tag";
final static String SUFFIX_IN_COMMENT = " in comment";
final static String SUFFIX_IN_DTD = " in DOCTYPE declaration";
final static String SUFFIX_IN_DTD_EXTERNAL = " in external DTD subset";
final static String SUFFIX_IN_DTD_INTERNAL = " in internal DTD subset";
final static String SUFFIX_IN_DOC = " in main document content";
final static String SUFFIX_IN_ELEMENT = " in start tag";
final static String SUFFIX_IN_ENTITY_REF = " in entity reference";
final static String SUFFIX_IN_EPILOG = " in epilog";
final static String SUFFIX_IN_NAME = " in name token";
final static String SUFFIX_IN_PROC_INSTR = " in processing instruction";
final static String SUFFIX_IN_PROLOG = " in prolog";
final static String SUFFIX_IN_TEXT = " in document text content";
final static String SUFFIX_IN_XML_DECL = " in xml declaration";
final static String SUFFIX_EOF_EXP_NAME = "; expected an identifier";
}
woodstox-woodstox-core-5.1.0/src/main/java/com/ctc/wstx/cfg/XmlConsts.java 0000664 0000000 0000000 00000004102 13257562550 0026550 0 ustar 00root root 0000000 0000000 package com.ctc.wstx.cfg;
/**
* Simple constant container interface, shared by input and output
* sides.
*/
public interface XmlConsts
{
// // // Constants for XML declaration
public final static String XML_DECL_KW_ENCODING = "encoding";
public final static String XML_DECL_KW_VERSION = "version";
public final static String XML_DECL_KW_STANDALONE = "standalone";
public final static String XML_V_10_STR = "1.0";
public final static String XML_V_11_STR = "1.1";
/**
* This constants refers to cases where the version has not been
* declared explicitly; and needs to be considered to be 1.0.
*/
public final static int XML_V_UNKNOWN = 0x0000;
public final static int XML_V_10 = 0x0100;
public final static int XML_V_11 = 0x0110;
public final static String XML_SA_YES = "yes";
public final static String XML_SA_NO = "no";
// // // Stax specs mandates some settings: but since exact
// // // definitions have been re-interpreted a few times,
// // // let's isolate them in a single place
/* 13-Mar-2008, TSa: As per latest reading of Stax specs,
* all of these are expected to be "", not null.
*/
public final static String ELEM_NO_NS_URI = "";
public final static String ATTR_NO_NS_URI = "";
public final static String ELEM_NO_PREFIX = "";
public final static String ATTR_NO_PREFIX = "";
/**
* Top-most namespace URI assigned for root element, if not specifically
* defined (default namespace unbound).
*
* As per Stax specs, related clarifying discussion on
* the mailing list, and especially JDK 1.6 definitions
* in {@link javax.xml.XMLConstants} constants, empty String
* should be used instead of null.
*/
public final static String DEFAULT_NAMESPACE_URI = ELEM_NO_NS_URI;
// // // Well, these are not strictly xml constants, but for
// // // now can live here
/**
* This constant defines the highest Unicode character allowed
* in XML content.
*/
final static int MAX_UNICODE_CHAR = 0x10FFFF;
}
woodstox-woodstox-core-5.1.0/src/main/java/com/ctc/wstx/cfg/package.html 0000664 0000000 0000000 00000000123 13257562550 0026233 0 ustar 00root root 0000000 0000000
* Note: choice of java.util.logging logging is only based on the
* fact that it is guaranteed to be present (we have JDK 1.4 baseline
* requirement) so that we do not add external dependencies.
* It is not a recommendation for using JUL per se; most users would
* do well to just use slf4j or log4j directly instead.
*
* @author Tatu Saloranta
*
* @since 3.2.8
*/
public final class QNameCreator
{
/**
* Creator object that creates QNames using proper 3-arg constructor.
* If dynamic class loading fails
*/
private final static Helper _helper;
static {
Helper h = null;
try {
// Not sure where it'll fail, constructor or create...
Helper h0 = new Helper();
/*QName n =*/ h0.create("elem", "http://dummy", "ns");
h = h0;
} catch (Throwable t) {
String msg = "Could not construct QNameCreator.Helper; assume 3-arg QName constructor not available and use 2-arg method instead. Problem: "+t.getMessage();
try {
Logger.getLogger("com.ctc.wstx.compat.QNameCreator").warning(msg);
} catch (Throwable t2) { // just in case JUL craps out...
System.err.println("ERROR: failed to log error using Logger (problem "+t.getMessage()+"), original problem: "+msg);
}
}
_helper = h;
}
public static QName create(String uri, String localName, String prefix)
{
if (_helper == null) { // can't use 3-arg constructor; but 2-arg will be there
return new QName(uri, localName);
}
return _helper.create(uri, localName, prefix);
}
/**
* Helper class used to encapsulate calls to the missing method.
*/
private final static class Helper
{
public Helper() { }
public QName create(String localName, String nsURI, String prefix)
{
return new QName(localName, nsURI, prefix);
}
}
}
woodstox-woodstox-core-5.1.0/src/main/java/com/ctc/wstx/compat/package.html 0000664 0000000 0000000 00000000612 13257562550 0026762 0 ustar 00root root 0000000 0000000
* Note that the implementation is only to be used for use with
*
* Some notes regarding missing/incomplete functionality:
*
* Note: explicit empty element (written using
*
* Note: while this is often the same as {@link #mCurrElem},
* it's not always. Specifically, an empty element (written
* explicitly using
* Note:
*/
public int getValueType() {
return TYPE_CDATA;
}
public String getValueTypeString()
{
return sTypes[getValueType()];
}
public boolean typeIsId() {
return false;
}
public boolean typeIsNotation() {
return false;
}
/*
///////////////////////////////////////////////////
// Public API, validation
///////////////////////////////////////////////////
*/
public abstract String validate(DTDValidatorBase v, char[] cbuf, int start, int end, boolean normalize)
throws XMLStreamException;
/**
*
* Note: the default implementation is not optimized, as it does
* a potentially unnecessary copy of the contents. It is expected that
* this method is seldom called (Woodstox never directly calls it; it
* only gets called for chained validators when one validator normalizes
* the value, and then following validators are passed a String, not
* char array)
*/
public String validate(DTDValidatorBase v, String value, boolean normalize)
throws XMLStreamException
{
int len = value.length();
/* Temporary buffer has to come from the validator itself, since
* attribute objects are stateless and shared...
*/
char[] cbuf = v.getTempAttrValueBuffer(value.length());
if (len > 0) {
value.getChars(0, len, cbuf, 0);
}
return validate(v, cbuf, 0, len, normalize);
}
/**
* Method called by the {@link DTDValidator}
* to ask attribute to verify that the default it has (if any) is
* valid for such type.
*/
public abstract void validateDefault(InputProblemReporter rep, boolean normalize)
throws XMLStreamException;
/**
* Method called when no validation is to be done, but value is still
* to be normalized as much as it can. What this usually means is that
* all regular space (parser earlier on converts other white space to
* spaces, except for specific character entities; and these special
* cases are NOT to be normalized).
*
* The only exception is that CDATA will not do any normalization. But
* for now, let's implement basic functionality that CDTA instance will
* override
*
* @param v Validator that invoked normalization
*
* @return Normalized value as a String, if any changes were done;
* null if input was normalized
*/
public String normalize(DTDValidatorBase v, char[] cbuf, int start, int end)
{
return StringUtil.normalizeSpaces(cbuf, start, end);
}
/**
* Method called to do initial normalization of the default attribute
* value, without trying to verify its validity. Thus, it's
* called independent of whether we are fully validating the document.
*/
public void normalizeDefault()
{
String val = mDefValue.getValue();
if (val.length() > 0) {
char[] cbuf = val.toCharArray();
String str = StringUtil.normalizeSpaces(cbuf, 0, cbuf.length);
if (str != null) {
mDefValue.setValue(str);
}
}
}
/*
///////////////////////////////////////////////////
// Package methods, validation helper methods
///////////////////////////////////////////////////
*/
protected String validateDefaultName(InputProblemReporter rep, boolean normalize)
throws XMLStreamException
{
String origDefValue = mDefValue.getValue();
String defValue = origDefValue.trim();
if (defValue.length() == 0) {
reportValidationProblem(rep, "Invalid default value '"+defValue
+"'; empty String is not a valid name");
}
// Ok, needs to be a valid XML name:
int illegalIx = WstxInputData.findIllegalNameChar(defValue, mCfgNsAware, mCfgXml11);
if (illegalIx >= 0) {
if (illegalIx == 0) {
reportValidationProblem(rep, "Invalid default value '"+defValue+"'; character "
+WstxInputData.getCharDesc(defValue.charAt(0))
+") not valid first character of a name");
} else {
reportValidationProblem(rep, "Invalid default value '"+defValue+"'; character #"+illegalIx+" ("
+WstxInputData.getCharDesc(defValue.charAt(illegalIx))
+") not valid name character");
}
}
// Ok, cool it's ok...
return normalize ? defValue : origDefValue;
}
protected String validateDefaultNames(InputProblemReporter rep, boolean normalize)
throws XMLStreamException
{
String defValue = mDefValue.getValue().trim();
int len = defValue.length();
// Then code similar to actual value validation:
StringBuilder sb = null;
int count = 0;
int start = 0;
main_loop:
while (start < len) {
char c = defValue.charAt(start);
// Ok, any white space to skip?
while (true) {
if (!WstxInputData.isSpaceChar(c)) {
break;
}
if (++start >= len) {
break main_loop;
}
c = defValue.charAt(start);
}
// Then need to find the token itself:
int i = start+1;
for (; i < len; ++i) {
if (WstxInputData.isSpaceChar(defValue.charAt(i))) {
break;
}
}
String token = defValue.substring(start, i);
int illegalIx = WstxInputData.findIllegalNameChar(token, mCfgNsAware, mCfgXml11);
if (illegalIx >= 0) {
if (illegalIx == 0) {
reportValidationProblem(rep, "Invalid default value '"+defValue
+"'; character "
+WstxInputData.getCharDesc(defValue.charAt(start))
+") not valid first character of a name token");
} else {
reportValidationProblem(rep, "Invalid default value '"+defValue
+"'; character "
+WstxInputData.getCharDesc(c)
+") not a valid name character");
}
}
++count;
if (normalize) {
if (sb == null) {
sb = new StringBuilder(i - start + 32);
} else {
sb.append(' ');
}
sb.append(token);
}
start = i+1;
}
if (count == 0) {
reportValidationProblem(rep, "Invalid default value '"+defValue
+"'; empty String is not a valid name value");
}
return normalize ? sb.toString() : defValue;
}
protected String validateDefaultNmToken(InputProblemReporter rep, boolean normalize)
throws XMLStreamException
{
String origDefValue = mDefValue.getValue();
String defValue = origDefValue.trim();
if (defValue.length() == 0) {
reportValidationProblem(rep, "Invalid default value '"+defValue+"'; empty String is not a valid NMTOKEN");
}
int illegalIx = WstxInputData.findIllegalNmtokenChar(defValue, mCfgNsAware, mCfgXml11);
if (illegalIx >= 0) {
reportValidationProblem(rep, "Invalid default value '"+defValue
+"'; character #"+illegalIx+" ("
+WstxInputData.getCharDesc(defValue.charAt(illegalIx))
+") not valid NMTOKEN character");
}
// Ok, cool it's ok...
return normalize ? defValue : origDefValue;
}
/**
* Method called by validation/normalization code for enumeration-valued
* attributes, to trim
* specified attribute value (full normalization not needed -- called
* for values that CAN NOT have spaces inside; such values can not
* be legal), and then check whether it is included
* in set of words (tokens) passed in. If actual value was included,
* will return the normalized word (as well as store shared String
* locally); otherwise will return null.
*/
public String validateEnumValue(char[] cbuf, int start, int end,
boolean normalize,
WordResolver res)
{
/* Better NOT to build temporary Strings quite yet; can resolve
* matches via resolver more efficiently.
*/
// Note: at this point, should only have real spaces...
if (normalize) {
while (start < end && cbuf[start] <= CHAR_SPACE) {
++start;
}
while (--end > start && cbuf[end] <= CHAR_SPACE) {
;
}
++end; // so it'll point to the first char (or beyond end of buffer)
}
// Empty String is never legal for enums:
if (start >= end) {
return null;
}
return res.find(cbuf, start, end);
}
protected EntityDecl findEntityDecl(DTDValidatorBase v,
char[] ch, int start, int len /*, int hash*/)
throws XMLStreamException
{
Map
* Notes about thread-safety: this class is not thread-safe, since it does
* not have to be, in general case. That is, the only instances that can
* be shared are external subset instances, and those are used in read-only
* manner (with the exception of temporary arrays constructed on-demand).
*/
public final class DTDElement
{
/*
///////////////////////////////////////////////////
// Information about the element itself
///////////////////////////////////////////////////
*/
final PrefixedName mName;
/**
* Location of the (real) definition of the element; may be null for
* placeholder elements created to hold ATTLIST definitions
*/
final Location mLocation;
/**
* Base validator object for validating content model of this element;
* may be null for some simple content models (ANY, EMPTY).
*/
StructValidator mValidator;
int mAllowedContent;
/**
* True if the DTD was parsed (and is to be used) in namespace-aware
* mode.
* Affects (name) validation amongst other things.
*/
final boolean mNsAware;
/**
* True if the DTD was parsed in xml1.1 compliant mode (referenced to
* from an xml 1.1 document).
* Affects (name) validation amongst other things.
*/
final boolean mXml11;
/*
///////////////////////////////////////////////////
// Attribute info
///////////////////////////////////////////////////
*/
HashMap
* note: although ID attributes are not to have default value,
* this is 'only' a validity constraint, and in dtd-aware-but-
* not-validating mode it is apparently 'legal' to add default
* values.
*/
public DTDIdAttr(PrefixedName name, DefaultAttrValue defValue, int specIndex,
boolean nsAware, boolean xml11)
{
super(name, defValue, specIndex, nsAware, xml11);
}
@Override
public DTDAttribute cloneWith(int specIndex) {
return new DTDIdAttr(mName, mDefValue, specIndex, mCfgNsAware, mCfgXml11);
}
/*
///////////////////////////////////////////////////
// Public API
///////////////////////////////////////////////////
*/
@Override
public int getValueType() {
return TYPE_ID;
}
@Override
public boolean typeIsId() {
return true;
}
/*
///////////////////////////////////////////////////
// Public API, validation
///////////////////////////////////////////////////
*/
/**
* Method called by the validator
* to let the attribute do necessary normalization and/or validation
* for the value.
*/
@SuppressWarnings("cast")
@Override
public String validate(DTDValidatorBase v, char[] cbuf, int start, int end, boolean normalize)
throws XMLStreamException
{
// Let's trim leading white space first...
while (start < end && WstxInputData.isSpaceChar(cbuf[start])) {
++start;
}
// No id?
if (start >= end) {
return reportValidationProblem(v, "Empty ID value");
}
--end; // so that it now points to the last char
while (end > start && WstxInputData.isSpaceChar(cbuf[end])) {
--end;
}
// Ok, need to check char validity, and also calc hash code:
char c = cbuf[start];
if (!WstxInputData.isNameStartChar(c, mCfgNsAware, mCfgXml11)) {
return reportInvalidChar(v, c, "not valid as the first ID character");
}
int hash = (int) c;
for (int i = start+1; i <= end; ++i) {
c = cbuf[i];
if (!WstxInputData.isNameChar(c, mCfgNsAware, mCfgXml11)) {
return reportInvalidChar(v, c, "not valid as an ID character");
}
hash = (hash * 31) + (int) c;
}
// Either way, we do need to validate characters, and calculate hash
ElementIdMap m = v.getIdMap();
PrefixedName elemName = v.getElemName();
Location loc = v.getLocation();
ElementId id = m.addDefined(cbuf, start, (end - start + 1), hash,
loc, elemName, mName);
// We can detect dups by checking if Location is the one we passed:
if (id.getLocation() != loc) {
return reportValidationProblem(v, "Duplicate id '"+id.getId()+"', first declared at "
+id.getLocation());
}
if (normalize) {
return id.getId();
}
return null;
}
/**
* Method called by the validator
* to ask attribute to verify that the default it has (if any) is
* valid for such type.
*/
@Override
public void validateDefault(InputProblemReporter rep, boolean normalize)
{
// Should never get called
throw new IllegalStateException(ErrorConsts.ERR_INTERNAL);
}
}
woodstox-woodstox-core-5.1.0/src/main/java/com/ctc/wstx/dtd/DTDIdRefAttr.java 0000664 0000000 0000000 00000007273 13257562550 0027026 0 ustar 00root root 0000000 0000000 package com.ctc.wstx.dtd;
import javax.xml.stream.Location;
import javax.xml.stream.XMLStreamException;
import com.ctc.wstx.io.WstxInputData;
import com.ctc.wstx.sr.InputProblemReporter;
import com.ctc.wstx.util.ElementId;
import com.ctc.wstx.util.ElementIdMap;
import com.ctc.wstx.util.PrefixedName;
/**
* Attribute class for attributes that contain references
* to elements that have matching identifier specified.
*/
public final class DTDIdRefAttr
extends DTDAttribute
{
/*
///////////////////////////////////////////////////
// Life-cycle
///////////////////////////////////////////////////
*/
/**
* Main constructor.
*/
public DTDIdRefAttr(PrefixedName name, DefaultAttrValue defValue, int specIndex,
boolean nsAware, boolean xml11)
{
super(name, defValue, specIndex, nsAware, xml11);
}
@Override
public DTDAttribute cloneWith(int specIndex) {
return new DTDIdRefAttr(mName, mDefValue, specIndex, mCfgNsAware, mCfgXml11);
}
/*
///////////////////////////////////////////////////
// Public API
///////////////////////////////////////////////////
*/
@Override
public int getValueType() {
return TYPE_IDREF;
}
/*
///////////////////////////////////////////////////
// Public API, validation
///////////////////////////////////////////////////
*/
/**
* Method called by the validator
* to let the attribute do necessary normalization and/or validation
* for the value.
*/
@SuppressWarnings("cast")
@Override
public String validate(DTDValidatorBase v, char[] cbuf, int start, int end, boolean normalize)
throws XMLStreamException
{
/* Let's skip leading/trailing white space, even if we are not
* to normalize visible attribute value. This allows for better
* round-trip handling, but still allow validation.
*/
while (start < end && WstxInputData.isSpaceChar(cbuf[start])) {
++start;
}
if (start >= end) { // empty (all white space) value?
return reportValidationProblem(v, "Empty IDREF value");
}
--end; // so that it now points to the last char
while (end > start && WstxInputData.isSpaceChar(cbuf[end])) {
--end;
}
// Ok, need to check char validity, and also calc hash code:
char c = cbuf[start];
if (!WstxInputData.isNameStartChar(c, mCfgNsAware, mCfgXml11)) {
return reportInvalidChar(v, c, "not valid as the first IDREF character");
}
int hash = (int) c;
for (int i = start+1; i <= end; ++i) {
c = cbuf[i];
if (!WstxInputData.isNameChar(c, mCfgNsAware, mCfgXml11)) {
return reportInvalidChar(v, c, "not valid as an IDREF character");
}
hash = (hash * 31) + (int) c;
}
// Ok, let's check and update id ref list...
ElementIdMap m = v.getIdMap();
Location loc = v.getLocation();
ElementId id = m.addReferenced(cbuf, start, (end - start + 1), hash,
loc, v.getElemName(), mName);
// and that's all; no more checks needed here
return normalize ? id.getId() : null;
}
/**
* Method called by the validator
* to ask attribute to verify that the default it has (if any) is
* valid for such type.
*/
@Override
public void validateDefault(InputProblemReporter rep, boolean normalize)
throws XMLStreamException
{
String def = validateDefaultName(rep, normalize);
if (normalize) {
mDefValue.setValue(def);
}
}
}
woodstox-woodstox-core-5.1.0/src/main/java/com/ctc/wstx/dtd/DTDIdRefsAttr.java 0000664 0000000 0000000 00000011743 13257562550 0027206 0 ustar 00root root 0000000 0000000 package com.ctc.wstx.dtd;
import javax.xml.stream.Location;
import javax.xml.stream.XMLStreamException;
import com.ctc.wstx.io.WstxInputData;
import com.ctc.wstx.sr.InputProblemReporter;
import com.ctc.wstx.util.ElementId;
import com.ctc.wstx.util.ElementIdMap;
import com.ctc.wstx.util.PrefixedName;
/**
* Attribute class for attributes that contain multiple references
* to elements that have matching identifier specified.
*/
public final class DTDIdRefsAttr
extends DTDAttribute
{
/*
///////////////////////////////////////////////////
// Life-cycle
///////////////////////////////////////////////////
*/
/**
* Main constructor.
*/
public DTDIdRefsAttr(PrefixedName name, DefaultAttrValue defValue, int specIndex,
boolean nsAware, boolean xml11)
{
super(name, defValue, specIndex, nsAware, xml11);
}
@Override
public DTDAttribute cloneWith(int specIndex) {
return new DTDIdRefsAttr(mName, mDefValue, specIndex, mCfgNsAware, mCfgXml11);
}
/*
///////////////////////////////////////////////////
// Public API
///////////////////////////////////////////////////
*/
@Override
public int getValueType() {
return TYPE_IDREFS;
}
/*
///////////////////////////////////////////////////
// Public API, validation
///////////////////////////////////////////////////
*/
@SuppressWarnings("cast")
@Override
public String validate(DTDValidatorBase v, char[] cbuf, int start, int end, boolean normalize)
throws XMLStreamException
{
/* Let's skip leading/trailing white space, even if we are not
* to normalize visible attribute value. This allows for better
* round-trip handling (no changes for physical value caller
* gets), but still allows succesful validation.
*/
while (start < end && WstxInputData.isSpaceChar(cbuf[start])) {
++start;
}
// No id?
if (start >= end) {
return reportValidationProblem(v, "Empty IDREFS value");
}
--end; // so that it now points to the last char
// We now the first char is not a space by now...
while (end > start && WstxInputData.isSpaceChar(cbuf[end])) {
--end;
}
// Ok; now start points to first, end to last char (both inclusive)
ElementIdMap m = v.getIdMap();
Location loc = v.getLocation();
String idStr = null;
StringBuilder sb = null;
while (start <= end) {
// Ok, need to check char validity, and also calc hash code:
char c = cbuf[start];
if (!WstxInputData.isNameStartChar(c, mCfgNsAware, mCfgXml11)) {
return reportInvalidChar(v, c, "not valid as the first IDREFS character");
}
int hash = (int) c;
int i = start+1;
for (; i <= end; ++i) {
c = cbuf[i];
if (WstxInputData.isSpaceChar(c)) {
break;
}
if (!WstxInputData.isNameChar(c, mCfgNsAware, mCfgXml11)) {
return reportInvalidChar(v, c, "not valid as an IDREFS character");
}
hash = (hash * 31) + (int) c;
}
// Ok, got the next id ref...
ElementId id = m.addReferenced(cbuf, start, i - start, hash,
loc, v.getElemName(), mName);
// Can skip the trailing space char (if there was one)
start = i+1;
/* When normalizing, we can possibly share id String, or
* alternatively, compose normalized String if multiple
*/
if (normalize) {
if (idStr == null) { // first idref
idStr = id.getId();
} else {
if (sb == null) {
sb = new StringBuilder(idStr);
}
idStr = id.getId();
sb.append(' ');
sb.append(idStr);
}
}
// Ok, any white space to skip?
while (start <= end && WstxInputData.isSpaceChar(cbuf[start])) {
++start;
}
}
if (normalize) {
if (sb != null) {
idStr = sb.toString();
}
return idStr;
}
return null;
}
/**
* Method called by the validator
* to ask attribute to verify that the default it has (if any) is
* valid for such type.
*
* It's unlikely there will be default values... but just in case,
* let's implement it properly.
*/
@Override
public void validateDefault(InputProblemReporter rep, boolean normalize)
throws XMLStreamException
{
String def = validateDefaultNames(rep, normalize);
if (normalize) {
mDefValue.setValue(def);
}
}
}
woodstox-woodstox-core-5.1.0/src/main/java/com/ctc/wstx/dtd/DTDNmTokenAttr.java 0000664 0000000 0000000 00000005774 13257562550 0027414 0 ustar 00root root 0000000 0000000 package com.ctc.wstx.dtd;
import javax.xml.stream.XMLStreamException;
import com.ctc.wstx.io.WstxInputData;
import com.ctc.wstx.sr.InputProblemReporter;
import com.ctc.wstx.util.PrefixedName;
/**
* Specific attribute class for attributes that contain (unique)
* identifiers.
*/
public final class DTDNmTokenAttr
extends DTDAttribute
{
/*
///////////////////////////////////////////////////
// Life-cycle
///////////////////////////////////////////////////
*/
/**
* Main constructor.
*/
public DTDNmTokenAttr(PrefixedName name, DefaultAttrValue defValue, int specIndex,
boolean nsAware, boolean xml11)
{
super(name, defValue, specIndex, nsAware, xml11);
}
@Override
public DTDAttribute cloneWith(int specIndex)
{
return new DTDNmTokenAttr(mName, mDefValue, specIndex, mCfgNsAware, mCfgXml11);
}
/*
///////////////////////////////////////////////////
// Public API
///////////////////////////////////////////////////
*/
@Override
public int getValueType() {
return TYPE_NMTOKEN;
}
/*
///////////////////////////////////////////////////
// Public API, validation
///////////////////////////////////////////////////
*/
/**
* Method called by the validator
* to let the attribute do necessary normalization and/or validation
* for the value.
*/
@Override
public String validate(DTDValidatorBase v, char[] cbuf, int start, int end, boolean normalize)
throws XMLStreamException
{
int origLen = end-start;
// Let's trim leading white space first...
while (start < end && WstxInputData.isSpaceChar(cbuf[start])) {
++start;
}
// Empty value?
if (start >= end) {
return reportValidationProblem(v, "Empty NMTOKEN value");
}
--end; // so that it now points to the last char
while (end > start && WstxInputData.isSpaceChar(cbuf[end])) {
--end;
}
// Ok, need to check char validity
for (int i = start; i <= end; ++i) {
char c = cbuf[i];
if (!WstxInputData.isNameChar(c, mCfgNsAware, mCfgXml11)) {
return reportInvalidChar(v, c, "not valid NMTOKEN character");
}
}
if (normalize) {
// Let's only create the String if we trimmed something
int len = (end - start)+1;
if (len != origLen) {
return new String(cbuf, start, len);
}
}
return null;
}
/**
* Method called by the validator
* to ask attribute to verify that the default it has (if any) is
* valid for such type.
*/
@Override
public void validateDefault(InputProblemReporter rep, boolean normalize)
throws XMLStreamException
{
String def = validateDefaultNmToken(rep, normalize);
if (normalize) {
mDefValue.setValue(def);
}
}
}
woodstox-woodstox-core-5.1.0/src/main/java/com/ctc/wstx/dtd/DTDNmTokensAttr.java 0000664 0000000 0000000 00000014436 13257562550 0027572 0 ustar 00root root 0000000 0000000 package com.ctc.wstx.dtd;
import javax.xml.stream.XMLStreamException;
import com.ctc.wstx.io.WstxInputData;
import com.ctc.wstx.sr.InputProblemReporter;
import com.ctc.wstx.util.PrefixedName;
/**
* Specific attribute class for attributes that contain (unique)
* identifiers.
*/
public final class DTDNmTokensAttr
extends DTDAttribute
{
/*
///////////////////////////////////////////////////
// Life-cycle
///////////////////////////////////////////////////
*/
/**
* Main constructor.
*/
public DTDNmTokensAttr(PrefixedName name, DefaultAttrValue defValue, int specIndex,
boolean nsAware, boolean xml11)
{
super(name, defValue, specIndex, nsAware, xml11);
}
@Override
public DTDAttribute cloneWith(int specIndex) {
return new DTDNmTokensAttr(mName, mDefValue, specIndex, mCfgNsAware, mCfgXml11);
}
/*
///////////////////////////////////////////////////
// Public API
///////////////////////////////////////////////////
*/
@Override
public int getValueType() {
return TYPE_NMTOKENS;
}
/*
///////////////////////////////////////////////////
// Public API, validation
///////////////////////////////////////////////////
*/
/**
* Method called by the validator
* to let the attribute do necessary normalization and/or validation
* for the value.
*/
@Override
public String validate(DTDValidatorBase v, char[] cbuf, int start, int end, boolean normalize)
throws XMLStreamException
{
//int origStart = start;
/* First things first; let's ensure value is not empty (all
* white space)...
*/
while (start < end && WstxInputData.isSpaceChar(cbuf[start])) {
++start;
}
// Empty value?
if (start >= end) {
return reportValidationProblem(v, "Empty NMTOKENS value");
}
/* Then, let's have separate handling for normalizing and
* non-normalizing case, since latter is trivially easy case:
*/
if (!normalize) {
for (; start < end; ++start) {
char c = cbuf[start];
if (!WstxInputData.isSpaceChar(c)
&& !WstxInputData.isNameChar(c, mCfgNsAware, mCfgXml11)) {
return reportInvalidChar(v, c, "not valid as NMTOKENS character");
}
}
return null; // ok, all good
}
//boolean trimmed = (origStart != start);
//origStart = start;
--end; // so that it now points to the last char
// Wouldn't absolutely have to trim trailing... but is easy to do
while (end > start && WstxInputData.isSpaceChar(cbuf[end])) {
--end;
//trimmed = true;
}
/* Ok, now, need to check we only have valid chars, and maybe
* also coalesce multiple spaces, if any.
*/
StringBuilder sb = null;
while (start <= end) {
int i = start;
for (; i <= end; ++i) {
char c = cbuf[i];
if (WstxInputData.isSpaceChar(c)) {
break;
}
if (!WstxInputData.isNameChar(c, mCfgNsAware, mCfgXml11)) {
return reportInvalidChar(v, c, "not valid as an NMTOKENS character");
}
}
if (sb == null) {
sb = new StringBuilder(end - start + 1);
} else {
sb.append(' ');
}
sb.append(cbuf, start, (i - start));
start = i + 1;
// Ok, any white space to skip?
while (start <= end && WstxInputData.isSpaceChar(cbuf[start])) {
++start;
}
}
/* 27-Nov-2005, TSa: Could actually optimize trimming, and often
* avoid using StringBuilder... but let's only do it if it turns
* out dealing with NMTOKENS normalization shows up on profiling...
*/
return sb.toString();
}
/**
* Method called by the validator
* to ask attribute to verify that the default it has (if any) is
* valid for such type.
*/
@Override
public void validateDefault(InputProblemReporter rep, boolean normalize)
throws XMLStreamException
{
String defValue = mDefValue.getValue();
int len = defValue.length();
// Then code similar to actual value validation:
StringBuilder sb = null;
int count = 0;
int start = 0;
main_loop:
while (start < len) {
char c = defValue.charAt(start);
// Ok, any white space to skip?
while (true) {
if (!WstxInputData.isSpaceChar(c)) {
break;
}
if (++start >= len) {
break main_loop;
}
c = defValue.charAt(start);
}
int i = start+1;
do {
if (++i >= len) {
break;
}
c = defValue.charAt(i);
} while (!WstxInputData.isSpaceChar(c));
++count;
String token = defValue.substring(start, i);
int illegalIx = WstxInputData.findIllegalNmtokenChar(token, mCfgNsAware, mCfgXml11);
if (illegalIx >= 0) {
reportValidationProblem(rep, "Invalid default value '"+defValue
+"'; character #"+illegalIx+" ("
+WstxInputData.getCharDesc(defValue.charAt(illegalIx))
+") not a valid NMTOKENS character");
}
if (normalize) {
if (sb == null) {
sb = new StringBuilder(i - start + 32);
} else {
sb.append(' ');
}
sb.append(token);
}
start = i+1;
}
if (count == 0) {
reportValidationProblem(rep, "Invalid default value '"+defValue
+"'; empty String is not a valid NMTOKENS value");
return;
}
if (normalize) {
mDefValue.setValue(sb.toString());
}
}
}
woodstox-woodstox-core-5.1.0/src/main/java/com/ctc/wstx/dtd/DTDNotationAttr.java 0000664 0000000 0000000 00000006043 13257562550 0027622 0 ustar 00root root 0000000 0000000 package com.ctc.wstx.dtd;
import javax.xml.stream.XMLStreamException;
import com.ctc.wstx.sr.InputProblemReporter;
import com.ctc.wstx.util.PrefixedName;
import com.ctc.wstx.util.WordResolver;
/**
* Specific attribute class for attributes that are of NOTATION type,
* and also contain enumerated set of legal values.
*/
public final class DTDNotationAttr
extends DTDAttribute
{
final WordResolver mEnumValues;
/*
///////////////////////////////////////////////////
// Life-cycle
///////////////////////////////////////////////////
*/
public DTDNotationAttr(PrefixedName name, DefaultAttrValue defValue,
int specIndex, boolean nsAware, boolean xml11,
WordResolver enumValues)
{
super(name, defValue, specIndex, nsAware, xml11);
mEnumValues = enumValues;
}
@Override
public DTDAttribute cloneWith(int specIndex)
{
return new DTDNotationAttr(mName, mDefValue, specIndex,
mCfgNsAware, mCfgXml11, mEnumValues);
}
/*
///////////////////////////////////////////////////
// Public API
///////////////////////////////////////////////////
*/
@Override
public int getValueType() {
return TYPE_NOTATION;
}
@Override
public boolean typeIsNotation() {
return true;
}
/*
///////////////////////////////////////////////////
// Public API, validation
///////////////////////////////////////////////////
*/
/**
* Method called by the validator
* to let the attribute do necessary normalization and/or validation
* for the value.
*
* Note: identical to the implementation in {@link DTDEnumAttr}
*/
@Override
public String validate(DTDValidatorBase v, char[] cbuf, int start, int end, boolean normalize)
throws XMLStreamException
{
String ok = validateEnumValue(cbuf, start, end, normalize, mEnumValues);
if (ok == null) {
String val = new String(cbuf, start, (end-start));
return reportValidationProblem(v, "Invalid notation value '"+val+"': has to be one of ("
+mEnumValues+")");
}
return ok;
}
/**
* Method called by the validator
* to ask attribute to verify that the default it has (if any) is
* valid for such type.
*/
@Override
public void validateDefault(InputProblemReporter rep, boolean normalize)
throws XMLStreamException
{
// First, basic checks that it's a valid non-empty name:
String def = validateDefaultName(rep, normalize);
// And then that it's one of listed values:
String shared = mEnumValues.find(def);
if (shared == null) {
reportValidationProblem(rep, "Invalid default value '"+def+"': has to be one of ("
+mEnumValues+")");
}
// Ok, cool it's ok...
if (normalize) {
mDefValue.setValue(shared);
}
}
}
woodstox-woodstox-core-5.1.0/src/main/java/com/ctc/wstx/dtd/DTDSchemaFactory.java 0000664 0000000 0000000 00000015476 13257562550 0027736 0 ustar 00root root 0000000 0000000 /* Woodstox XML processor
*
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in the file LICENSE which is
* included with the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.dtd;
import java.io.*;
import java.net.URL;
import javax.xml.stream.*;
import org.codehaus.stax2.validation.*;
import com.ctc.wstx.api.ReaderConfig;
import com.ctc.wstx.api.ValidatorConfig;
import com.ctc.wstx.cfg.XmlConsts;
import com.ctc.wstx.exc.WstxIOException;
import com.ctc.wstx.io.*;
import com.ctc.wstx.util.DefaultXmlSymbolTable;
import com.ctc.wstx.util.SymbolTable;
import com.ctc.wstx.util.URLUtil;
/**
* Factory for creating DTD validator schema objects (shareable stateless
* "blueprints" for creating actual validators).
*
* Due to close coupling of XML and DTD, some of the functionality
* implemented (like that of reading internal subsets embedded in XML
* documents) is only accessible by core Woodstox. The externally
* accessible
*/
public class DTDSchemaFactory
extends XMLValidationSchemaFactory
{
/*
/////////////////////////////////////////////////////
// Objects shared by actual parsers
/////////////////////////////////////////////////////
*/
/**
* 'Root' symbol table, used for creating actual symbol table instances,
* but never as is.
*/
final static SymbolTable mRootSymbols = DefaultXmlSymbolTable.getInstance();
static {
mRootSymbols.setInternStrings(true);
}
/**
* Current configurations for this factory
*/
protected final ValidatorConfig mSchemaConfig;
/**
* This configuration object is used (instead of a more specific one)
* since the actual DTD reader uses such configuration object.
*/
protected final ReaderConfig mReaderConfig;
public DTDSchemaFactory()
{
super(XMLValidationSchema.SCHEMA_ID_DTD);
mReaderConfig = ReaderConfig.createFullDefaults();
mSchemaConfig = ValidatorConfig.createDefaults();
}
/*
////////////////////////////////////////////////////////////
// Stax2, Configuration methods
////////////////////////////////////////////////////////////
*/
@Override
public boolean isPropertySupported(String propName) {
return mSchemaConfig.isPropertySupported(propName);
}
@Override
public boolean setProperty(String propName, Object value) {
return mSchemaConfig.setProperty(propName, value);
}
@Override
public Object getProperty(String propName) {
return mSchemaConfig.getProperty(propName);
}
/*
////////////////////////////////////////////////////////////
// Stax2, Factory methods
////////////////////////////////////////////////////////////
*/
@Override
public XMLValidationSchema createSchema(InputStream in, String encoding,
String publicId, String systemId)
throws XMLStreamException
{
ReaderConfig rcfg = createPrivateReaderConfig();
return doCreateSchema(rcfg, StreamBootstrapper.getInstance
(publicId, SystemId.construct(systemId), in), publicId, systemId, null);
}
@Override
public XMLValidationSchema createSchema(Reader r,
String publicId, String systemId)
throws XMLStreamException
{
ReaderConfig rcfg = createPrivateReaderConfig();
return doCreateSchema(rcfg, ReaderBootstrapper.getInstance
(publicId, SystemId.construct(systemId), r, null), publicId, systemId, null);
}
@SuppressWarnings("resource")
@Override
public XMLValidationSchema createSchema(URL url)
throws XMLStreamException
{
ReaderConfig rcfg = createPrivateReaderConfig();
try {
InputStream in = URLUtil.inputStreamFromURL(url);
return doCreateSchema(rcfg, StreamBootstrapper.getInstance
(null, null, in),
null, url.toExternalForm(), url);
} catch (IOException ioe) {
throw new WstxIOException(ioe);
}
}
@SuppressWarnings("resource")
@Override
public XMLValidationSchema createSchema(File f)
throws XMLStreamException
{
ReaderConfig rcfg = createPrivateReaderConfig();
try {
URL url = URLUtil.toURL(f);
return doCreateSchema(rcfg, StreamBootstrapper.getInstance
(null, null, new FileInputStream(f)),
null, url.toExternalForm(), url);
} catch (IOException ioe) {
throw new WstxIOException(ioe);
}
}
/*
////////////////////////////////////////////////////////////
// Internal methods
////////////////////////////////////////////////////////////
*/
/**
* The main validator construction method, called by all externally
* visible methods.
*/
@SuppressWarnings("resource")
protected XMLValidationSchema doCreateSchema
(ReaderConfig rcfg, InputBootstrapper bs, String publicId, String systemIdStr, URL ctxt)
throws XMLStreamException
{
try {
Reader r = bs.bootstrapInput(rcfg, false, XmlConsts.XML_V_UNKNOWN);
if (bs.declaredXml11()) {
rcfg.enableXml11(true);
}
if (ctxt == null) { // this is just needed as context for param entity expansion
ctxt = URLUtil.urlFromCurrentDir();
}
/* Note: need to pass unknown for 'xmlVersion' here (as well as
* above for bootstrapping), since this is assumed to be the main
* level parsed document and no xml version compatibility checks
* should be done.
*/
SystemId systemId = SystemId.construct(systemIdStr, ctxt);
WstxInputSource src = InputSourceFactory.constructEntitySource
(rcfg, null, null, bs, publicId, systemId, XmlConsts.XML_V_UNKNOWN, r);
/* true -> yes, fully construct for validation
* (does not mean it has to be used for validation, but required
* if it is to be used for that purpose)
*/
return FullDTDReader.readExternalSubset(src, rcfg, /*int.subset*/null, true, bs.getDeclaredVersion());
} catch (IOException ioe) {
throw new WstxIOException(ioe);
}
}
private ReaderConfig createPrivateReaderConfig()
{
return mReaderConfig.createNonShared(mRootSymbols.makeChild());
}
}
woodstox-woodstox-core-5.1.0/src/main/java/com/ctc/wstx/dtd/DTDSubset.java 0000664 0000000 0000000 00000007670 13257562550 0026450 0 ustar 00root root 0000000 0000000 /* Woodstox XML processor
*
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in the file LICENSE which is
* included with the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.dtd;
import java.util.*;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.NotationDeclaration;
import org.codehaus.stax2.validation.*;
import com.ctc.wstx.ent.EntityDecl;
import com.ctc.wstx.sr.InputProblemReporter;
import com.ctc.wstx.util.PrefixedName;
/**
* This is the abstract base class that implements the standard Stax2
* validation schema base class ({@link XMLValidationSchema}, as well
* as specifies extended Woodstox-specific interface for accessing
* DTD-specific things like entity expansions and notation properties.
*
* API is separated from its implementation to reduce coupling; for example,
* it is possible to have DTD subset implementations that do not implement
* validation logics, just entity expansion.
*/
public abstract class DTDSubset
implements DTDValidationSchema
{
/*
//////////////////////////////////////////////////////
// Life-cycle
//////////////////////////////////////////////////////
*/
protected DTDSubset() { }
/**
* Method that will combine definitions from this internal subset with
* definitions from passed-in external subset, producing a new combined
* DTDSubset instance.
*/
public abstract DTDSubset combineWithExternalSubset(InputProblemReporter rep,
DTDSubset extSubset)
throws XMLStreamException;
/*
//////////////////////////////////////////////////////
// XMLValidationSchema implementation
//////////////////////////////////////////////////////
*/
@Override
public abstract XMLValidator createValidator(ValidationContext ctxt)
throws XMLStreamException;
@Override
public String getSchemaType() {
return XMLValidationSchema.SCHEMA_ID_DTD;
}
/*
//////////////////////////////////////////////////////
// And extended DTDValidationSchema
//////////////////////////////////////////////////////
*/
@Override
public abstract int getEntityCount();
@Override
public abstract int getNotationCount();
/*
//////////////////////////////////////////////////////
// Woodstox-specific API, caching support
//////////////////////////////////////////////////////
*/
public abstract boolean isCachable();
/**
* Method used in determining whether cached external subset instance
* can be used with specified internal subset. If ext. subset references
* any parameter entities int subset (re-)defines, it can not; otherwise
* it can be used.
*
* @return True if this (external) subset refers to a parameter entity
* defined in passed-in internal subset.
*/
public abstract boolean isReusableWith(DTDSubset intSubset);
/*
//////////////////////////////////////////////////////
// Woodstox-specific API, entity/notation handling
//////////////////////////////////////////////////////
*/
public abstract HashMap
* Needed
* for determinining if external subset materially depends on definitions
* from internal subset; if so, such subset is not cachable.
* This also
* means that information is not stored for non-cachable instance.
*/
final Set
* Needed
* for determinining if external subset materially depends on definitions
* from internal subset; if so, such subset is not cachable.
* This also
* means that information is not stored for non-cachable instance.
*/
final Set
* Note: The first Map argument WILL be modified; second one
* not. Caller needs to ensure this is acceptable.
*/
private static
* Note, too, that underlying {@link IOException}s are generally wrapped
* as {@link XMLStreamException}s. This is needed to reduce amount of
* work caller has to do for wrapping. It will still be possible to
* unwrap these exceptions further up the call stack if need be.
*/
final class DTDWriter
{
/*
//////////////////////////////////////////////////
// Configuration
//////////////////////////////////////////////////
*/
final Writer mWriter;
final boolean mIncludeComments;
final boolean mIncludeConditionals;
final boolean mIncludePEs;
/*
//////////////////////////////////////////////////
// Output status
//////////////////////////////////////////////////
*/
/**
* Counter that indicates whether flattened output should be written to
* (non-null) mWriter; values above zero indicate output is enabled,
* zero and below that output is disabled.
* Only enabled if mWriter is not
* null; will be temporarily disabled during processing of content
* that is not to be included (PE reference; or comments / conditional
* sections if comment/cs output is suppressed)
*/
int mIsFlattening = 0;
/**
* Pointer to first character in the current input buffer that
* has not yet been written to flatten writer.
*/
int mFlattenStart = 0;
/*
//////////////////////////////////////////////////
// Life-cycle
//////////////////////////////////////////////////
*/
public DTDWriter(Writer out, boolean inclComments, boolean inclCond,
boolean inclPEs)
{
mWriter = out;
mIncludeComments = inclComments;
mIncludeConditionals = inclCond;
mIncludePEs = inclPEs;
mIsFlattening = 1; // starts enabled
}
/*
//////////////////////////////////////////////////
// Public API, accessors, state change
//////////////////////////////////////////////////
*/
public boolean includeComments() {
return mIncludeComments;
}
public boolean includeConditionals() {
return mIncludeConditionals;
}
public boolean includeParamEntities() {
return mIncludePEs;
}
public void disableOutput()
{
--mIsFlattening;
}
public void enableOutput(int newStart)
{
++mIsFlattening;
mFlattenStart = newStart;
}
public void setFlattenStart(int ptr) {
mFlattenStart = ptr;
}
public int getFlattenStart() {
return mFlattenStart;
}
/*
//////////////////////////////////////////////////
// Public API, output methods:
//////////////////////////////////////////////////
*/
public void flush(char[] buf, int upUntil)
throws XMLStreamException
{
if (mFlattenStart < upUntil) {
if (mIsFlattening > 0) {
try {
mWriter.write(buf, mFlattenStart, upUntil - mFlattenStart);
} catch (IOException ioe) {
throw new WstxIOException(ioe);
}
}
mFlattenStart = upUntil;
}
}
/**
* Method called when explicit output has to be done for flatten output:
* this is usually done when there's need to do speculative checks
* before it's known if some chars are output (when suppressing comments
* or conditional sections)
*/
public void output(String output)
throws XMLStreamException
{
if (mIsFlattening > 0) {
try {
mWriter.write(output);
} catch (IOException ioe) {
throw new WstxIOException(ioe);
}
}
}
public void output(char c)
throws XMLStreamException
{
if (mIsFlattening > 0) {
try {
mWriter.write(c);
} catch (IOException ioe) {
throw new WstxIOException(ioe);
}
}
}
}
woodstox-woodstox-core-5.1.0/src/main/java/com/ctc/wstx/dtd/DefaultAttrValue.java 0000664 0000000 0000000 00000014770 13257562550 0030062 0 ustar 00root root 0000000 0000000 /* Woodstox XML processor
*
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in the file LICENSE which is
* included with the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.dtd;
import java.text.MessageFormat;
import javax.xml.stream.Location;
import javax.xml.stream.XMLStreamException;
import org.codehaus.stax2.validation.ValidationContext;
import org.codehaus.stax2.validation.XMLValidationProblem;
import org.codehaus.stax2.validation.XMLValidator;
import com.ctc.wstx.cfg.ErrorConsts;
/**
* Simple container class used to contain information about the default
* value for an attribute. Although for most use cases a simple String
* would suffice, there are cases where additional information is needed
* (especially status of 'broken' default values, which only need to be
* reported should the default value be needed).
*/
public final class DefaultAttrValue
{
/*
////////////////////////////////////////////////////
// Constants
////////////////////////////////////////////////////
*/
// // // Default value types
public final static int DEF_DEFAULT = 1;
public final static int DEF_IMPLIED = 2;
public final static int DEF_REQUIRED = 3;
public final static int DEF_FIXED = 4;
/*
////////////////////////////////////////////////////
// Singleton instances
////////////////////////////////////////////////////
*/
final static DefaultAttrValue sImplied = new DefaultAttrValue(DEF_IMPLIED);
final static DefaultAttrValue sRequired = new DefaultAttrValue(DEF_REQUIRED);
/*
////////////////////////////////////////////////////
// State
////////////////////////////////////////////////////
*/
final int mDefValueType;
/**
* Actual expanded textual content of the default attribute value;
* normalized if appropriate in this mode.
* Note that all entities have been expanded: if a GE/PE was undefined,
* and no fatal errors were reported (non-validating mode), the
* references were just silently removed, and matching entries added
* to
* There are 2 main modes for DTDReader, depending on whether it is parsing
* internal or external subset. Parsing of internal subset is somewhat
* simpler, since no dependency checking is needed. For external subset,
* handling of parameter entities is bit more complicated, as care has to
* be taken to distinguish between using PEs defined in int. subset, and
* ones defined in ext. subset itself. This determines cachability of
* external subsets.
*
* Reader also implements simple stand-alone functionality for flattening
* DTD files (expanding all references to their eventual textual form);
* this is sometimes useful when optimizing modularized DTDs
* (which are more maintainable) into single monolithic DTDs (which in
* general can be more performant).
*
* @author Tatu Saloranta
*/
public class FullDTDReader
extends MinimalDTDReader
{
/**
* Flag that can be changed to enable or disable interning of shared
* names; shared names are used for enumerated values to reduce
* memory usage.
*/
final static boolean INTERN_SHARED_NAMES = false;
// // // Entity expansion types:
final static Boolean ENTITY_EXP_GE = Boolean.FALSE;
final static Boolean ENTITY_EXP_PE = Boolean.TRUE;
/*
///////////////////////////////////////////////////////////
// Configuration
///////////////////////////////////////////////////////////
*/
final int mConfigFlags;
// Extracted wstx-specific settings:
final boolean mCfgSupportDTDPP;
/**
* This flag indicates whether we should build a validating 'real'
* validator (true, the usual case),
* or a simpler pseudo-validator that can do all non-validation tasks
* that are based on DTD info (entity expansion, notation references,
* default attribute values). Latter is used in non-validating mode.
*
*/
final boolean mCfgFullyValidating;
/*
///////////////////////////////////////////////////////////
// Entity handling, parameter entities (PEs)
///////////////////////////////////////////////////////////
*/
/**
* Set of parameter entities defined so far in the currently parsed
* subset. Note: the first definition sticks, entities can not be
* redefined.
*
* Keys are entity name Strings; values are instances of EntityDecl
*/
HashMap
* Keys are entity name Strings; values are instances of EntityDecl
*
* Note: this Map only contains entities declared and defined in the
* subset being parsed; no previously defined values are passed.
*/
HashMap
* Keys are entity name Strings; values are instances of
* NotationDecl objects
*/
HashMap
* Note: see base class for some additional remarks about this
* method.
*/
@Override
public EntityDecl findEntity(String entName)
{
if (mPredefdGEs != null) {
EntityDecl decl = mPredefdGEs.get(entName);
if (decl != null) {
return decl;
}
}
return mGeneralEntities.get(entName);
}
/*
///////////////////////////////////////////////////////////
// Main-level parsing methods
///////////////////////////////////////////////////////////
*/
protected DTDSubset parseDTD()
throws XMLStreamException
{
while (true) {
mCheckForbiddenPEs = false; // PEs are ok at this point
int i = getNextAfterWS();
if (i < 0) {
if (mIsExternal) { // ok for external DTDs
break;
}
// Error for internal subset
throwUnexpectedEOF(SUFFIX_IN_DTD_INTERNAL);
}
if (i == '%') { // parameter entity
expandPE();
continue;
}
/* First, let's keep track of start of the directive; needed for
* entity and notation declaration events.
*/
mTokenInputTotal = mCurrInputProcessed + mInputPtr;
mTokenInputRow = mCurrInputRow;
mTokenInputCol = mInputPtr - mCurrInputRowStart;
if (i == '<') {
// PEs not allowed within declarations, in the internal subset proper
mCheckForbiddenPEs = !mIsExternal && (mInput == mRootInput);
if (mFlattenWriter == null) {
parseDirective();
} else {
parseDirectiveFlattened();
}
continue;
}
if (i == ']') {
if (mIncludeCount == 0 && !mIsExternal) { // End of internal subset
break;
}
if (mIncludeCount > 0) { // active INCLUDE block(s) open?
boolean suppress = (mFlattenWriter != null) && !mFlattenWriter.includeConditionals();
if (suppress) {
mFlattenWriter.flush(mInputBuffer, mInputPtr-1);
mFlattenWriter.disableOutput();
}
try {
// ]]> needs to be a token, can not come from PE:
char c = dtdNextFromCurr();
if (c == ']') {
c = dtdNextFromCurr();
if (c == '>') {
// Ok, fine, conditional include section ended.
--mIncludeCount;
continue;
}
}
throwDTDUnexpectedChar(c, "; expected ']]>' to close conditional include section");
} finally {
if (suppress) {
mFlattenWriter.enableOutput(mInputPtr);
}
}
}
// otherwise will fall through, and give an error
}
if (mIsExternal) {
throwDTDUnexpectedChar(i, "; expected a '<' to start a directive");
}
throwDTDUnexpectedChar(i, "; expected a '<' to start a directive, or \"]>\" to end internal subset");
}
/* 05-Feb-2006, TSa: Not allowed to have unclosed INCLUDE/IGNORE
* blocks...
*/
if (mIncludeCount > 0) { // active INCLUDE block(s) open?
String suffix = (mIncludeCount == 1) ? "an INCLUDE block" : (""+mIncludeCount+" INCLUDE blocks");
throwUnexpectedEOF(getErrorMsg()+"; expected closing marker for "+suffix);
}
/* First check: have all notation references been resolved?
* (related to [WSTX-121])
*/
if (mNotationForwardRefs != null && mNotationForwardRefs.size() > 0) {
_reportUndefinedNotationRefs();
}
// Ok; time to construct and return DTD data object.
DTDSubset ss;
// There are more settings for ext. subsets:
if (mIsExternal) {
/* External subsets are cachable if they did not refer to any
* PEs or GEs defined in internal subset passed in (if any),
* nor to any notations.
* We don't care about PEs it defined itself, but need to pass
* in Set of PEs it refers to, to check if cached copy can be
* used with different int. subsets.
* We need not worry about notations referred, since they are
* not allowed to be re-defined.
*/
boolean cachable = !mUsesPredefdEntities && !mUsesPredefdNotations;
ss = DTDSubsetImpl.constructInstance(cachable,
mGeneralEntities, mRefdGEs,
null, mRefdPEs,
mNotations, mElements,
mCfgFullyValidating);
} else {
/* Internal subsets are not cachable (no unique way to refer
* to unique internal subsets), and there can be no references
* to pre-defined PEs, as none were passed.
*/
ss = DTDSubsetImpl.constructInstance(false, mGeneralEntities, null,
mParamEntities, null,
mNotations, mElements,
mCfgFullyValidating);
}
return ss;
}
protected void parseDirective()
throws XMLStreamException
{
/* Hmmh. Don't think PEs are allowed to contain starting
* '!' (or '?')... and it has to come from the same
* input source too (no splits)
*/
char c = dtdNextFromCurr();
if (c == '?') { // xml decl?
readPI();
return;
}
if (c != '!') { // nothing valid
throwDTDUnexpectedChar(c, "; expected '!' to start a directive");
}
/* ignore/include, comment, or directive; we are still getting
* token from same section though
*/
c = dtdNextFromCurr();
if (c == '-') { // plain comment
c = dtdNextFromCurr();
if (c != '-') {
throwDTDUnexpectedChar(c, "; expected '-' for a comment");
}
if (mEventListener != null && mEventListener.dtdReportComments()) {
readComment(mEventListener);
} else {
skipComment();
}
} else if (c == '[') {
checkInclusion();
} else if (c >= 'A' && c <= 'Z') {
handleDeclaration(c);
} else {
throwDTDUnexpectedChar(c, ErrorConsts.ERR_DTD_MAINLEVEL_KEYWORD);
}
}
/**
* Method similar to {@link #parseDirective}, but one that takes care
* to properly output dtd contents via {@link com.ctc.wstx.dtd.DTDWriter}
* as necessary.
* Separated to simplify both methods; otherwise would end up with
* 'if (... flatten...) ... else ...' spaghetti code.
*/
protected void parseDirectiveFlattened()
throws XMLStreamException
{
/* First, need to flush any flattened output there may be, at
* this point (except for opening lt char): and then need to
* temporarily disable more output until we know the type and
* whether it should be output or not:
*/
mFlattenWriter.flush(mInputBuffer, mInputPtr-1);
mFlattenWriter.disableOutput();
/* Let's determine type here, and call appropriate skip/parse
* methods.
*/
char c = dtdNextFromCurr();
if (c == '?') { // xml decl?
mFlattenWriter.enableOutput(mInputPtr);
mFlattenWriter.output("");
readPI();
//throwDTDUnexpectedChar(c, " expected '!' to start a directive");
return;
}
if (c != '!') { // nothing valid
throwDTDUnexpectedChar(c, ErrorConsts.ERR_DTD_MAINLEVEL_KEYWORD);
}
// ignore/include, comment, or directive
c = dtdNextFromCurr();
if (c == '-') { // plain comment
c = dtdNextFromCurr();
if (c != '-') {
throwDTDUnexpectedChar(c, "; expected '-' for a comment");
}
boolean comm = mFlattenWriter.includeComments();
if (comm) {
mFlattenWriter.enableOutput(mInputPtr);
mFlattenWriter.output("");
}
@Override
public final void writePIStart(String target, boolean addSpace) throws IOException
{
fastWriteRaw('<', '?');
fastWriteRaw(target);
if (addSpace) {
fastWriteRaw(' ');
}
}
@Override
public final void writePIEnd() throws IOException {
fastWriteRaw('?', '>');
}
/*
////////////////////////////////////////////////
// Higher-level output methods, text output
////////////////////////////////////////////////
*/
@Override
public int writeCData(String data) throws IOException
{
if (mCheckContent) {
int ix = verifyCDataContent(data);
if (ix >= 0) {
if (!mFixContent) { // Can we fix it?
return ix;
}
// Yes we can! (...Bob the Builder...)
writeSegmentedCData(data, ix);
return -1;
}
}
fastWriteRaw("");
return -1;
}
@Override
public int writeCData(char[] cbuf, int offset, int len) throws IOException
{
if (mCheckContent) {
int ix = verifyCDataContent(cbuf, offset, len);
if (ix >= 0) {
if (!mFixContent) { // Can we fix it?
return ix;
}
// Yes we can! (...Bob the Builder...)
writeSegmentedCData(cbuf, offset, len, ix);
return -1;
}
}
fastWriteRaw("");
return -1;
}
@Override
public void writeCharacters(String text) throws IOException
{
if (mOut == null) {
return;
}
if (mTextWriter != null) { // custom escaping?
mTextWriter.write(text);
return;
}
int inPtr = 0;
final int len = text.length();
// nope, default:
final int[] QC = QUOTABLE_TEXT_CHARS;
final int highChar = mEncHighChar;
final int MAXQC = Math.min(QC.length, highChar);
main_loop:
while (true) {
String ent = null;
inner_loop:
while (true) {
if (inPtr >= len) {
break main_loop;
}
char c = text.charAt(inPtr++);
if (c < MAXQC) {
if (QC[c] != 0) {
if (c < 0x0020) {
if (c != ' ' && c != '\n' && c != '\t') { // fine as is
if (c == '\r') {
if (mEscapeCR) {
break inner_loop;
}
} else {
if (!mXml11 || c == 0) {
c = handleInvalidChar(c); // throws an error usually
ent = String.valueOf((char) c);
} else {
break inner_loop; // need quoting
}
}
}
} else if (c == '<') {
ent = "<";
break inner_loop;
} else if (c == '&') {
ent = "&";
break inner_loop;
} else if (c == '>') {
// Let's be conservative; and if there's any
// change it might be part of "]]>" quote it
if (inPtr < 2 || text.charAt(inPtr-2) == ']') {
ent = ">";
break inner_loop;
}
} else if (c >= 0x7F) {
break;
}
}
} else if (c >= highChar) {
break inner_loop;
}
if (mOutputPtr >= mOutputBufLen) {
flushBuffer();
}
mOutputBuffer[mOutputPtr++] = c;
}
if (ent != null) {
writeRaw(ent);
} else {
writeAsEntity(text.charAt(inPtr-1));
}
}
}
@Override
public void writeCharacters(char[] cbuf, int offset, int len) throws IOException
{
if (mOut == null) {
return;
}
if (mTextWriter != null) { // custom escaping?
mTextWriter.write(cbuf, offset, len);
return;
}
// nope, default:
final int[] QC = QUOTABLE_TEXT_CHARS;
final int highChar = mEncHighChar;
final int MAXQC = Math.min(QC.length, highChar);
len += offset;
do {
int c = 0;
int start = offset;
String ent = null;
for (; offset < len; ++offset) {
c = cbuf[offset];
if (c < MAXQC) {
if (QC[c] != 0) {
// Ok, possibly needs quoting... further checks needed
if (c == '<') {
ent = "<";
break;
} else if (c == '&') {
ent = "&";
break;
} else if (c == '>') {
/* Let's be conservative; and if there's any
* change it might be part of "]]>" quote it
*/
if ((offset == start) || cbuf[offset-1] == ']') {
ent = ">";
break;
}
} else if (c < 0x0020) {
if (c == '\n' || c == '\t') { // fine as is
;
} else if (c == '\r') {
if (mEscapeCR) {
break;
}
} else {
if (!mXml11 || c == 0) {
c = handleInvalidChar(c);
// Hmmh. This is very inefficient, but...
ent = String.valueOf((char) c);
}
break; // need quoting
}
} else if (c >= 0x7F) {
break;
}
}
} else if (c >= highChar) {
break;
}
// otherwise fine
}
int outLen = offset - start;
if (outLen > 0) {
writeRaw(cbuf, start, outLen);
}
if (ent != null) {
writeRaw(ent);
ent = null;
} else if (offset < len) {
writeAsEntity(c);
}
} while (++offset < len);
}
/**
* Method that will try to output the content as specified. If
* the content passed in has embedded "--" in it, it will either
* add an intervening space between consequtive hyphens (if content
* fixing is enabled), or return the offset of the first hyphen in
* multi-hyphen sequence.
*/
@Override
public int writeComment(String data) throws IOException
{
if (mCheckContent) {
int ix = verifyCommentContent(data);
if (ix >= 0) {
if (!mFixContent) { // Can we fix it?
return ix;
}
// Yes we can! (...Bob the Builder...)
writeSegmentedComment(data, ix);
return -1;
}
}
fastWriteRaw("");
return -1;
}
@Override
public void writeDTD(String data) throws IOException
{
writeRaw(data);
}
@Override
public void writeDTD(String rootName, String systemId, String publicId,
String internalSubset)
throws IOException, XMLStreamException
{
fastWriteRaw(" 0) {
fastWriteRaw(' ', '[');
fastWriteRaw(internalSubset);
fastWriteRaw(']');
}
fastWriteRaw('>');
}
@Override
public void writeEntityReference(String name)
throws IOException, XMLStreamException
{
if (mCheckNames) {
verifyNameValidity(name, mNsAware);
}
fastWriteRaw('&');
fastWriteRaw(name);
fastWriteRaw(';');
}
@Override
public void writeXmlDeclaration(String version, String encoding, String standalone)
throws IOException
{
final char chQuote = (mUseDoubleQuotesInXmlDecl ? '"' : '\'');
fastWriteRaw(" 0) {
fastWriteRaw(" encoding=");
fastWriteRaw(chQuote);
fastWriteRaw(encoding);
fastWriteRaw(chQuote);
}
if (standalone != null) {
fastWriteRaw(" standalone=");
fastWriteRaw(chQuote);
fastWriteRaw(standalone);
fastWriteRaw(chQuote);
}
fastWriteRaw('?', '>');
}
@Override
public int writePI(String target, String data)
throws IOException, XMLStreamException
{
if (mCheckNames) {
// As per namespace specs, can not have colon(s)
verifyNameValidity(target, mNsAware);
}
fastWriteRaw('<', '?');
fastWriteRaw(target);
if (data != null && data.length() > 0) {
if (mCheckContent) {
int ix = data.indexOf('?');
if (ix >= 0) {
ix = data.indexOf("?>", ix);
if (ix >= 0) {
return ix;
}
}
}
fastWriteRaw(' ');
// Data may be longer, let's call regular writeRaw method
writeRaw(data);
}
fastWriteRaw('?', '>');
return -1;
}
/*
////////////////////////////////////////////////////
// Write methods, elements
////////////////////////////////////////////////////
*/
@Override
public void writeStartTagStart(String localName)
throws IOException, XMLStreamException
{
if (mCheckNames) {
verifyNameValidity(localName, mNsAware);
}
int ptr = mOutputPtr;
int extra = (mOutputBufLen - ptr) - (1 + localName.length());
if (extra < 0) { // split on boundary, slower
fastWriteRaw('<');
fastWriteRaw(localName);
} else {
char[] buf = mOutputBuffer;
buf[ptr++] = '<';
int len = localName.length();
localName.getChars(0, len, buf, ptr);
mOutputPtr = ptr+len;
}
}
@Override
public void writeStartTagStart(String prefix, String localName)
throws IOException, XMLStreamException
{
if (prefix == null || prefix.length() == 0) { // shouldn't happen
writeStartTagStart(localName);
return;
}
if (mCheckNames) {
verifyNameValidity(prefix, mNsAware);
verifyNameValidity(localName, mNsAware);
}
int ptr = mOutputPtr;
int len = prefix.length();
int extra = (mOutputBufLen - ptr) - (2 + localName.length() + len);
if (extra < 0) { // across buffer boundary, slow case
fastWriteRaw('<');
fastWriteRaw(prefix);
fastWriteRaw(':');
fastWriteRaw(localName);
} else { // fast case, all inlined
char[] buf = mOutputBuffer;
buf[ptr++] = '<';
prefix.getChars(0, len, buf, ptr);
ptr += len;
buf[ptr++] = ':';
len = localName.length();
localName.getChars(0, len, buf, ptr);
mOutputPtr = ptr+len;
}
}
@Override
public void writeStartTagEnd() throws IOException {
fastWriteRaw('>');
}
@Override
public void writeStartTagEmptyEnd() throws IOException
{
int ptr = mOutputPtr;
if ((ptr + 3) >= mOutputBufLen) {
if (mOut == null) {
return;
}
flushBuffer();
ptr = mOutputPtr;
}
char[] buf = mOutputBuffer;
if (mAddSpaceAfterEmptyElem) {
buf[ptr++] = ' ';
}
buf[ptr++] = '/';
buf[ptr++] = '>';
mOutputPtr = ptr;
}
@Override
public void writeEndTag(String localName) throws IOException
{
int ptr = mOutputPtr;
int extra = (mOutputBufLen - ptr) - (3 + localName.length());
if (extra < 0) {
fastWriteRaw('<', '/');
fastWriteRaw(localName);
fastWriteRaw('>');
} else {
char[] buf = mOutputBuffer;
buf[ptr++] = '<';
buf[ptr++] = '/';
int len = localName.length();
localName.getChars(0, len, buf, ptr);
ptr += len;
buf[ptr++] = '>';
mOutputPtr = ptr;
}
}
@Override
public void writeEndTag(String prefix, String localName) throws IOException
{
if (prefix == null || prefix.length() == 0) {
writeEndTag(localName);
return;
}
int ptr = mOutputPtr;
int len = prefix.length();
int extra = (mOutputBufLen - ptr) - (4 + localName.length() + len);
if (extra < 0) {
fastWriteRaw('<', '/');
/* At this point, it is assumed caller knows that end tag
* matches with start tag, and that it (by extension) has been
* validated if and as necessary
*/
fastWriteRaw(prefix);
fastWriteRaw(':');
fastWriteRaw(localName);
fastWriteRaw('>');
} else {
char[] buf = mOutputBuffer;
buf[ptr++] = '<';
buf[ptr++] = '/';
prefix.getChars(0, len, buf, ptr);
ptr += len;
buf[ptr++] = ':';
len = localName.length();
localName.getChars(0, len, buf, ptr);
ptr += len;
buf[ptr++] = '>';
mOutputPtr = ptr;
}
}
/*
////////////////////////////////////////////////////
// Write methods, attributes/ns
////////////////////////////////////////////////////
*/
@Override
public void writeAttribute(String localName, String value)
throws IOException, XMLStreamException
{
if (mOut == null) {
return;
}
if (mCheckNames) {
verifyNameValidity(localName, mNsAware);
}
int len = localName.length();
if (((mOutputBufLen - mOutputPtr) - (3 + len)) < 0) {
fastWriteRaw(' ');
fastWriteRaw(localName);
fastWriteRaw('=', '"');
} else {
int ptr = mOutputPtr;
char[] buf = mOutputBuffer;
buf[ptr++] = ' ';
localName.getChars(0, len, buf, ptr);
ptr += len;
buf[ptr++] = '=';
buf[ptr++] = '"';
mOutputPtr = ptr;
}
len = (value == null) ? 0 : value.length();
if (len > 0) {
if (mAttrValueWriter != null) { // custom escaping?
mAttrValueWriter.write(value, 0, len);
} else { // nope, default
writeAttrValue(value, len);
}
}
fastWriteRaw('"');
}
@Override
public void writeAttribute(String localName, char[] value, int offset, int vlen)
throws IOException, XMLStreamException
{
if (mOut == null) {
return;
}
if (mCheckNames) {
verifyNameValidity(localName, mNsAware);
}
int len = localName.length();
if (((mOutputBufLen - mOutputPtr) - (3 + len)) < 0) {
fastWriteRaw(' ');
fastWriteRaw(localName);
fastWriteRaw('=', '"');
} else {
int ptr = mOutputPtr;
char[] buf = mOutputBuffer;
buf[ptr++] = ' ';
localName.getChars(0, len, buf, ptr);
ptr += len;
buf[ptr++] = '=';
buf[ptr++] = '"';
mOutputPtr = ptr;
}
if (vlen > 0) {
if (mAttrValueWriter != null) { // custom escaping?
mAttrValueWriter.write(value, offset, vlen);
} else { // nope, default
writeAttrValue(value, offset, vlen);
}
}
fastWriteRaw('"');
}
@Override
public void writeAttribute(String prefix, String localName, String value)
throws IOException, XMLStreamException
{
if (mOut == null) {
return;
}
if (mCheckNames) {
verifyNameValidity(prefix, mNsAware);
verifyNameValidity(localName, mNsAware);
}
int len = prefix.length();
if (((mOutputBufLen - mOutputPtr) - (4 + localName.length() + len)) < 0) {
fastWriteRaw(' ');
if (len > 0) {
fastWriteRaw(prefix);
fastWriteRaw(':');
}
fastWriteRaw(localName);
fastWriteRaw('=', '"');
} else {
int ptr = mOutputPtr;
char[] buf = mOutputBuffer;
buf[ptr++] = ' ';
prefix.getChars(0, len, buf, ptr);
ptr += len;
buf[ptr++] = ':';
len = localName.length();
localName.getChars(0, len, buf, ptr);
ptr += len;
buf[ptr++] = '=';
buf[ptr++] = '"';
mOutputPtr = ptr;
}
len = (value == null) ? 0 : value.length();
if (len > 0) {
if (mAttrValueWriter != null) { // custom escaping?
mAttrValueWriter.write(value, 0, len);
} else { // nope, default
writeAttrValue(value, len);
}
}
fastWriteRaw('"');
}
@Override
public void writeAttribute(String prefix, String localName, char[] value, int offset, int vlen)
throws IOException, XMLStreamException
{
if (mOut == null) {
return;
}
if (mCheckNames) {
verifyNameValidity(prefix, mNsAware);
verifyNameValidity(localName, mNsAware);
}
int len = prefix.length();
if (((mOutputBufLen - mOutputPtr) - (4 + localName.length() + len)) < 0) {
fastWriteRaw(' ');
if (len > 0) {
fastWriteRaw(prefix);
fastWriteRaw(':');
}
fastWriteRaw(localName);
fastWriteRaw('=', '"');
} else {
int ptr = mOutputPtr;
char[] buf = mOutputBuffer;
buf[ptr++] = ' ';
prefix.getChars(0, len, buf, ptr);
ptr += len;
buf[ptr++] = ':';
len = localName.length();
localName.getChars(0, len, buf, ptr);
ptr += len;
buf[ptr++] = '=';
buf[ptr++] = '"';
mOutputPtr = ptr;
}
if (vlen > 0) {
if (mAttrValueWriter != null) { // custom escaping?
mAttrValueWriter.write(value, offset, vlen);
} else { // nope, default
writeAttrValue(value, offset, vlen);
}
}
fastWriteRaw('"');
}
private final void writeAttrValue(String value, int len)
throws IOException
{
int inPtr = 0;
final char qchar = mEncQuoteChar;
int highChar = mEncHighChar;
main_loop:
while (true) { // main_loop
String ent = null;
inner_loop:
while (true) {
if (inPtr >= len) {
break main_loop;
}
char c = value.charAt(inPtr++);
if (c <= HIGHEST_ENCODABLE_ATTR_CHAR) { // special char?
if (c < 0x0020) { // tab, cr/lf need encoding too
if (c == '\r') {
if (mEscapeCR) {
break inner_loop; // quoting
}
} else if (c != '\n' && c != '\t'
&& (!mXml11 || c == 0)) {
c = handleInvalidChar(c);
} else {
break inner_loop; // need quoting
}
} else if (c == qchar) {
ent = mEncQuoteEntity;
break inner_loop;
} else if (c == '<') {
ent = "<";
break inner_loop;
} else if (c == '&') {
ent = "&";
break inner_loop;
}
} else if (c >= highChar) { // out of range, have to escape
break inner_loop;
}
if (mOutputPtr >= mOutputBufLen) {
flushBuffer();
}
mOutputBuffer[mOutputPtr++] = c;
}
if (ent != null) {
writeRaw(ent);
} else {
writeAsEntity(value.charAt(inPtr-1));
}
}
}
private final void writeAttrValue(char[] value, int offset, int len)
throws IOException
{
len += offset;
final char qchar = mEncQuoteChar;
int highChar = mEncHighChar;
main_loop:
while (true) { // main_loop
String ent = null;
inner_loop:
while (true) {
if (offset >= len) {
break main_loop;
}
char c = value[offset++];
if (c <= HIGHEST_ENCODABLE_ATTR_CHAR) { // special char?
if (c < 0x0020) { // tab, cr/lf need encoding too
if (c == '\r') {
if (mEscapeCR) {
break inner_loop; // quoting
}
} else if (c != '\n' && c != '\t'
&& (!mXml11 || c == 0)) {
c = handleInvalidChar(c);
} else {
break inner_loop; // need quoting
}
} else if (c == qchar) {
ent = mEncQuoteEntity;
break inner_loop;
} else if (c == '<') {
ent = "<";
break inner_loop;
} else if (c == '&') {
ent = "&";
break inner_loop;
}
} else if (c >= highChar) { // out of range, have to escape
break inner_loop;
}
if (mOutputPtr >= mOutputBufLen) {
flushBuffer();
}
mOutputBuffer[mOutputPtr++] = c;
}
if (ent != null) {
writeRaw(ent);
} else {
writeAsEntity(value[offset-1]);
}
}
}
/*
////////////////////////////////////////////////
// Methods used by Typed Access API
////////////////////////////////////////////////
*/
@Override
public final void writeTypedElement(AsciiValueEncoder enc)
throws IOException
{
if (mOut == null) {
return;
}
int free = mOutputBufLen - mOutputPtr;
if (enc.bufferNeedsFlush(free)) {
flush();
}
while (true) {
mOutputPtr = enc.encodeMore(mOutputBuffer, mOutputPtr, mOutputBufLen);
// If no flushing needed, indicates that all data was encoded
if (enc.isCompleted()) {
break;
}
flush();
}
}
@Override
public final void writeTypedElement(AsciiValueEncoder enc,
XMLValidator validator, char[] copyBuffer)
throws IOException, XMLStreamException
{
if (mOut == null) {
return;
}
int free = mOutputBufLen - mOutputPtr;
if (enc.bufferNeedsFlush(free)) {
flush();
}
int start = mOutputPtr;
while (true) {
mOutputPtr = enc.encodeMore(mOutputBuffer, mOutputPtr, mOutputBufLen);
// False -> can't be sure it's the whole remaining text
validator.validateText(mOutputBuffer, start, mOutputPtr, false);
if (enc.isCompleted()) {
break;
}
flush();
start = mOutputPtr;
}
}
@Override
public void writeTypedAttribute(String localName, AsciiValueEncoder enc)
throws IOException, XMLStreamException
{
if (mOut == null) {
return;
}
if (mCheckNames) {
verifyNameValidity(localName, mNsAware);
}
int len = localName.length();
if ((mOutputPtr + 3 + len) > mOutputBufLen) {
fastWriteRaw(' ');
fastWriteRaw(localName);
fastWriteRaw('=', '"');
} else {
int ptr = mOutputPtr;
char[] buf = mOutputBuffer;
buf[ptr++] = ' ';
localName.getChars(0, len, buf, ptr);
ptr += len;
buf[ptr++] = '=';
buf[ptr++] = '"';
mOutputPtr = ptr;
}
int free = mOutputBufLen - mOutputPtr;
if (enc.bufferNeedsFlush(free)) {
flush();
}
while (true) {
mOutputPtr = enc.encodeMore(mOutputBuffer, mOutputPtr, mOutputBufLen);
if (enc.isCompleted()) {
break;
}
flush();
}
fastWriteRaw('"');
}
@Override
public void writeTypedAttribute(String prefix, String localName,
AsciiValueEncoder enc)
throws IOException, XMLStreamException
{
if (mOut == null) {
return;
}
if (mCheckNames) {
verifyNameValidity(prefix, mNsAware);
verifyNameValidity(localName, mNsAware);
}
int plen = prefix.length();
int llen = localName.length();
if ((mOutputPtr + 4 + plen + llen) > mOutputBufLen) {
writePrefixedName(prefix, localName);
fastWriteRaw('=', '"');
} else {
int ptr = mOutputPtr;
char[] buf = mOutputBuffer;
buf[ptr++] = ' ';
if (plen > 0) {
prefix.getChars(0, plen, buf, ptr);
ptr += plen;
buf[ptr++] = ':';
}
localName.getChars(0, llen, buf, ptr);
ptr += llen;
buf[ptr++] = '=';
buf[ptr++] = '"';
mOutputPtr = ptr;
}
int free = mOutputBufLen - mOutputPtr;
if (enc.bufferNeedsFlush(free)) {
flush();
}
while (true) {
mOutputPtr = enc.encodeMore(mOutputBuffer, mOutputPtr, mOutputBufLen);
if (enc.isCompleted()) {
break;
}
flush();
}
fastWriteRaw('"');
}
@Override
public void writeTypedAttribute(String prefix, String localName, String nsURI,
AsciiValueEncoder enc,
XMLValidator validator, char[] copyBuffer)
throws IOException, XMLStreamException
{
if (mOut == null) {
return;
}
if (prefix == null) {
prefix = "";
}
if (nsURI == null) {
nsURI = "";
}
int plen = prefix.length();
if (mCheckNames) {
if (plen > 0) {
verifyNameValidity(prefix, mNsAware);
}
verifyNameValidity(localName, mNsAware);
}
if (((mOutputBufLen - mOutputPtr) - (4 + localName.length() + plen)) < 0) {
writePrefixedName(prefix, localName);
fastWriteRaw('=', '"');
} else {
int ptr = mOutputPtr;
char[] buf = mOutputBuffer;
buf[ptr++] = ' ';
if (plen > 0) {
prefix.getChars(0, plen, buf, ptr);
ptr += plen;
buf[ptr++] = ':';
}
int llen = localName.length();
localName.getChars(0, llen, buf, ptr);
ptr += llen;
buf[ptr++] = '=';
buf[ptr++] = '"';
mOutputPtr = ptr;
}
/* Tricky here is this: attributes to validate can not be
* split (validators expect complete values). So, if value
* won't fit as is, may need to aggregate using StringBuilder
*/
int free = mOutputBufLen - mOutputPtr;
if (enc.bufferNeedsFlush(free)) {
flush();
}
int start = mOutputPtr;
// First, let's see if one call is enough
mOutputPtr = enc.encodeMore(mOutputBuffer, mOutputPtr, mOutputBufLen);
if (enc.isCompleted()) { // yup
validator.validateAttribute(localName, nsURI, prefix, mOutputBuffer, start, mOutputPtr);
return;
}
// If not, must combine first
StringBuilder sb = new StringBuilder(mOutputBuffer.length << 1);
sb.append(mOutputBuffer, start, mOutputPtr-start);
while (true) {
flush();
start = mOutputPtr;
mOutputPtr = enc.encodeMore(mOutputBuffer, mOutputPtr, mOutputBufLen);
sb.append(mOutputBuffer, start, mOutputPtr-start);
// All done?
if (enc.isCompleted()) {
break;
}
}
fastWriteRaw('"');
// Then validate
String valueStr = sb.toString();
validator.validateAttribute(localName, nsURI, prefix, valueStr);
}
protected final void writePrefixedName(String prefix, String localName)
throws IOException
{
fastWriteRaw(' ');
if (prefix.length() > 0) {
fastWriteRaw(prefix);
fastWriteRaw(':');
}
fastWriteRaw(localName);
}
/*
////////////////////////////////////////////////////
// Internal methods, buffering
////////////////////////////////////////////////////
*/
private final void flushBuffer()
throws IOException
{
if (mOutputPtr > 0 && mOutputBuffer != null) {
int ptr = mOutputPtr;
// Need to update location info, to keep it in sync
mLocPastChars += ptr;
mLocRowStartOffset -= ptr;
mOutputPtr = 0;
mOut.write(mOutputBuffer, 0, ptr);
}
}
private final void fastWriteRaw(char c)
throws IOException
{
if (mOutputPtr >= mOutputBufLen) {
if (mOut == null) {
return;
}
flushBuffer();
}
mOutputBuffer[mOutputPtr++] = c;
}
private final void fastWriteRaw(char c1, char c2)
throws IOException
{
if ((mOutputPtr + 1) >= mOutputBufLen) {
if (mOut == null) {
return;
}
flushBuffer();
}
mOutputBuffer[mOutputPtr++] = c1;
mOutputBuffer[mOutputPtr++] = c2;
}
private final void fastWriteRaw(String str)
throws IOException
{
int len = str.length();
int ptr = mOutputPtr;
if ((ptr + len) >= mOutputBufLen) {
if (mOut == null) {
return;
}
/* It's even possible that String is longer than the buffer (not
* likely, possible). If so, let's just call the full
* method:
*/
if (len > mOutputBufLen) {
writeRaw(str);
return;
}
flushBuffer();
ptr = mOutputPtr;
}
str.getChars(0, len, mOutputBuffer, ptr);
mOutputPtr = ptr+len;
}
/*
////////////////////////////////////////////////////
// Internal methods, content verification/fixing
////////////////////////////////////////////////////
*/
/**
* @return Index at which a problem was found, if any; -1 if there's
* no problem.
*/
protected int verifyCDataContent(String content)
{
if (content != null && content.length() >= 3) {
int ix = content.indexOf(']');
if (ix >= 0) {
return content.indexOf("]]>", ix);
}
}
return -1;
}
protected int verifyCDataContent(char[] c, int start, int end)
{
if (c != null) {
start += 2;
/* Let's do simple optimization for search...
* (simple bayer-moore - like algorithm)
*/
while (start < end) {
char ch = c[start];
if (ch == ']') {
++start; // let's just move by one in this case
continue;
}
if (ch == '>') { // match?
if (c[start-1] == ']'
&& c[start-2] == ']') {
return start-2;
}
}
start += 2;
}
}
return -1;
}
protected int verifyCommentContent(String content)
{
int ix = content.indexOf('-');
if (ix >= 0) {
/* actually, it's illegal to just end with '-' too, since
* that would cause invalid end marker '--->'
*/
if (ix < (content.length() - 1)) {
ix = content.indexOf("--", ix);
}
}
return ix;
}
protected void writeSegmentedCData(String content, int index)
throws IOException
{
/* It's actually fairly easy, just split "]]>" into 2 pieces;
* for each ']]>'; first one containing "]]", second one ">"
* (as long as necessary)
*/
int start = 0;
while (index >= 0) {
fastWriteRaw("");
start = index+2;
index = content.indexOf("]]>", start);
}
// Ok, then the last segment
fastWriteRaw("");
}
protected void writeSegmentedCData(char[] c, int start, int len, int index)
throws IOException
{
int end = start + len;
while (index >= 0) {
fastWriteRaw("");
start = index+2;
index = verifyCDataContent(c, start, end);
}
// Ok, then the last segment
fastWriteRaw("");
}
protected void writeSegmentedComment(String content, int index)
throws IOException
{
int len = content.length();
// First the special case (last char is hyphen):
if (index == (len-1)) {
fastWriteRaw("");
return;
}
/* Fixing comments is more difficult than that of CDATA segments';
* this because CDATA can still contain embedded ']]'s, but
* comment neither allows '--' nor ending with '-->'; which means
* that it's impossible to just split segments. Instead we'll do
* something more intrusive, and embed single spaces between all
* '--' character pairs... it's intrusive, but comments are not
* supposed to contain any data, so that should be fine (plus
* at least result is valid, unlike contents as is)
*/
fastWriteRaw("");
}
/**
* Method used to figure out which part of the Unicode char set the
* encoding can natively support. Values returned are 7, 8 and 16,
* to indicate (respectively) "ascii", "ISO-Latin" and "native Unicode".
* These just best guesses, but should work ok for the most common
* encodings.
*/
public static int guessEncodingBitSize(String enc)
{
if (enc == null || enc.length() == 0) { // let's assume default is UTF-8...
return 16;
}
// Let's see if we can find a normalized name, first:
enc = CharsetNames.normalize(enc);
// Ok, first, do we have known ones; starting with most common:
if (enc == CharsetNames.CS_UTF8) {
return 16; // meaning up to 2^16 can be represented natively
} else if (enc == CharsetNames.CS_ISO_LATIN1) {
return 8;
} else if (enc == CharsetNames.CS_US_ASCII) {
return 7;
} else if (enc == CharsetNames.CS_UTF16
|| enc == CharsetNames.CS_UTF16BE
|| enc == CharsetNames.CS_UTF16LE
|| enc == CharsetNames.CS_UTF32BE
|| enc == CharsetNames.CS_UTF32LE) {
return 16;
}
/* Above and beyond well-recognized names, it might still be
* good to have more heuristics for as-of-yet unhandled cases...
* But, it's probably easier to only assume 8-bit clean (could
* even make it just 7, let's see how this works out)
*/
return 8;
}
protected final void writeAsEntity(int c)
throws IOException
{
char[] buf = mOutputBuffer;
int ptr = mOutputPtr;
if ((ptr + 10) >= buf.length) { // [up to 6 hex digits] ;
flushBuffer();
ptr = mOutputPtr;
}
buf[ptr++] = '&';
// Can use more optimal notation for 8-bit ascii stuff:
if (c < 256) {
/* Also; although not really mandatory, let's also
* use pre-defined entities where possible.
*/
if (c == '&') {
buf[ptr++] = 'a';
buf[ptr++] = 'm';
buf[ptr++] = 'p';
} else if (c == '<') {
buf[ptr++] = 'l';
buf[ptr++] = 't';
} else if (c == '>') {
buf[ptr++] = 'g';
buf[ptr++] = 't';
} else if (c == '\'') {
buf[ptr++] = 'a';
buf[ptr++] = 'p';
buf[ptr++] = 'o';
buf[ptr++] = 's';
} else if (c == '"') {
buf[ptr++] = 'q';
buf[ptr++] = 'u';
buf[ptr++] = 'o';
buf[ptr++] = 't';
} else {
buf[ptr++] = '#';;
buf[ptr++] = 'x';;
// Can use shortest quoting for tab, cr, lf:
if (c >= 16) {
int digit = (c >> 4);
buf[ptr++] = (char) ((digit < 10) ? ('0' + digit) : (('a' - 10) + digit));
c &= 0xF;
}
buf[ptr++] = (char) ((c < 10) ? ('0' + c) : (('a' - 10) + c));
}
} else {
buf[ptr++] = '#';
buf[ptr++] = 'x';
// Ok, let's write the shortest possible sequence then:
int shift = 20;
int origPtr = ptr;
do {
int digit = (c >> shift) & 0xF;
if (digit > 0 || (ptr != origPtr)) {
buf[ptr++] = (char) ((digit < 10) ? ('0' + digit) : (('a' - 10) + digit));
}
shift -= 4;
} while (shift > 0);
c &= 0xF;
buf[ptr++] = (char) ((c < 10) ? ('0' + c) : (('a' - 10) + c));
}
buf[ptr++] = ';';
mOutputPtr = ptr;
}
}
woodstox-woodstox-core-5.1.0/src/main/java/com/ctc/wstx/sw/EncodingXmlWriter.java 0000664 0000000 0000000 00000071352 13257562550 0030127 0 ustar 00root root 0000000 0000000 /* Woodstox XML processor
*
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in file LICENSE, included with
* the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.sw;
import java.io.IOException;
import java.io.OutputStream;
import java.io.Writer;
import javax.xml.stream.XMLStreamException;
import org.codehaus.stax2.ri.typed.AsciiValueEncoder;
import org.codehaus.stax2.validation.XMLValidator;
import com.ctc.wstx.api.WriterConfig;
import com.ctc.wstx.cfg.XmlConsts;
//import com.ctc.wstx.io.CompletelyCloseable;
/**
* Intermediate base class used when outputting to streams that use
* an encoding that is compatible with 7-bit single-byte Ascii encoding.
* That means it can be used for UTF-8, ISO-Latin1 and pure Ascii.
*
* Implementation notes:
*
* Parts of surrogate handling are implemented here in the base class:
* storage for the first part of a split surrogate (only possible when
* character content is output split in multiple calls) is within
* base class. Also, simple checks for unmatched surrogate pairs are
* in
* Regarding surrogate pair handling: most of the checks are in the base
* class, and here we only need to worry about
* TreeSet is used mostly because clearing it up is faster than
* clearing up HashSet, and the only access is done by
* adding entries and see if an value was already set.
*/
TreeSet
* Note: Caller has to do actual removal of the element from element
* stack, before calling this method.
*
* @param expName Name that the closing element should have; null
* if whatever is in stack should be used
* @param allowEmpty If true, is allowed to create the empty element
* if the closing element was truly empty; if false, has to write
* the full empty element no matter what
*/
private void doWriteEndTag(String expName, boolean allowEmpty)
throws XMLStreamException
{
/* First of all, do we need to close up an earlier empty element?
* (open start element that was not created via call to
* writeEmptyElement gets handled later on)
*/
if (mStartElementOpen && mEmptyElement) {
mEmptyElement = false;
// note: this method guarantees proper updates to validation
closeStartElement(true);
}
// Better have something to close... (to figure out what to close)
if (mState != STATE_TREE) {
// Have to throw an exception always, don't know elem name
reportNwfStructure("No open start element, when trying to write end element");
}
/* Now, do we have an unfinished start element (created via
* writeStartElement() earlier)?
*/
String localName = mElements.removeLast();
if (mCheckStructure) {
if (expName != null && !localName.equals(expName)) {
/* Only gets called when trying to output an XMLEvent... in
* which case names can actually be compared
*/
reportNwfStructure("Mismatching close element name, '"+localName+"'; expected '"+expName+"'.");
}
}
/* Can't yet validate, since we have two paths; one for empty
* elements, another for non-empty...
*/
// Got a half output start element to close?
if (mStartElementOpen) {
/* Can't/shouldn't call closeStartElement, but need to do same
* processing. Thus, this is almost identical to closeStartElement:
*/
if (mValidator != null) {
/* Note: return value is not of much use, since the
* element will be closed right away...
*/
mVldContent = mValidator.validateElementAndAttributes();
}
mStartElementOpen = false;
if (mAttrNames != null) {
mAttrNames.clear();
}
try {
// We could write an empty element, implicitly?
if (allowEmpty) {
mWriter.writeStartTagEmptyEnd();
if (mElements.isEmpty()) {
mState = STATE_EPILOG;
}
if (mValidator != null) {
mVldContent = mValidator.validateElementEnd(localName, XmlConsts.ELEM_NO_NS_URI, XmlConsts.ELEM_NO_PREFIX);
}
return;
}
// Nah, need to close open elem, and then output close elem
mWriter.writeStartTagEnd();
} catch (IOException ioe) {
throwFromIOE(ioe);
}
}
try {
mWriter.writeEndTag(localName);
} catch (IOException ioe) {
throwFromIOE(ioe);
}
if (mElements.isEmpty()) {
mState = STATE_EPILOG;
}
// Ok, time to validate...
if (mValidator != null) {
mVldContent = mValidator.validateElementEnd(localName, XmlConsts.ELEM_NO_NS_URI, XmlConsts.ELEM_NO_PREFIX);
}
}
}
woodstox-woodstox-core-5.1.0/src/main/java/com/ctc/wstx/sw/OutputElementBase.java 0000664 0000000 0000000 00000027646 13257562550 0030137 0 ustar 00root root 0000000 0000000 /* Woodstox XML processor
*
* Copyright (c) 2005 Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in file LICENSE, included with
* the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.sw;
import java.util.*;
import javax.xml.XMLConstants;
import javax.xml.namespace.NamespaceContext;
import javax.xml.stream.XMLStreamException;
import com.ctc.wstx.util.BijectiveNsMap;
import com.ctc.wstx.util.DataUtil;
/**
* Class that encapsulates information about a specific element in virtual
* output stack for namespace-aware writers.
* It provides support for URI-to-prefix mappings as well as namespace
* mapping generation.
*
* One noteworthy feature of the class is that it is designed to allow
* "short-term recycling", ie. instances can be reused within context
* of a simple document output. While reuse/recycling of such lightweight
* object is often useless or even counter productive, here it may
* be worth using, due to simplicity of the scheme (basically using
* a very simple free-elements linked list).
*/
public abstract class OutputElementBase
implements NamespaceContext
{
public final static int PREFIX_UNBOUND = 0;
public final static int PREFIX_OK = 1;
public final static int PREFIX_MISBOUND = 2;
final static String sXmlNsPrefix = XMLConstants.XML_NS_PREFIX;
final static String sXmlNsURI = XMLConstants.XML_NS_URI;
/*
////////////////////////////////////////////
// Namespace binding/mapping information
////////////////////////////////////////////
*/
/**
* Namespace context end application may have supplied, and that
* (if given) should be used to augment explicitly defined bindings.
*/
protected NamespaceContext mRootNsContext;
protected String mDefaultNsURI;
/**
* Mapping of namespace prefixes to URIs and back.
*/
protected BijectiveNsMap mNsMapping;
/**
* True, if {@link #mNsMapping} is a shared copy from the parent;
* false if a local copy was created (which happens when namespaces
* get bound etc).
*/
protected boolean mNsMapShared;
/*
////////////////////////////////////////////
// Life-cycle
////////////////////////////////////////////
*/
/**
* Constructor for the virtual root element
*/
protected OutputElementBase()
{
mNsMapping = null;
mNsMapShared = false;
mDefaultNsURI = "";
mRootNsContext = null;
}
protected OutputElementBase(OutputElementBase parent, BijectiveNsMap ns)
{
mNsMapping = ns;
mNsMapShared = (ns != null);
mDefaultNsURI = parent.mDefaultNsURI;
mRootNsContext = parent.mRootNsContext;
}
/**
* Method called to reuse a pooled instance.
*/
protected void relink(OutputElementBase parent)
{
mNsMapping = parent.mNsMapping;
mNsMapShared = (mNsMapping != null);
mDefaultNsURI = parent.mDefaultNsURI;
mRootNsContext = parent.mRootNsContext;
}
protected abstract void setRootNsContext(NamespaceContext ctxt);
/*
////////////////////////////////////////////
// Public API, accessors
////////////////////////////////////////////
*/
public abstract boolean isRoot();
/**
* @return String presentation of the fully-qualified name, in
* "prefix:localName" format (no URI). Useful for error and
* debugging messages.
*/
public abstract String getNameDesc();
public final String getDefaultNsUri() {
return mDefaultNsURI;
}
/*
////////////////////////////////////////////
// Public API, ns binding, checking
////////////////////////////////////////////
*/
/**
* Method similar to {@link #getPrefix}, but one that will not accept
* the default namespace, only an explicit one. Usually used when
* trying to find a prefix for attributes.
*/
public final String getExplicitPrefix(String uri)
{
if (mNsMapping != null) {
String prefix = mNsMapping.findPrefixByUri(uri);
if (prefix != null) {
return prefix;
}
}
if (mRootNsContext != null) {
String prefix = mRootNsContext.getPrefix(uri);
if (prefix != null) {
// Hmmh... still can't use the default NS:
if (prefix.length() > 0) {
return prefix;
}
// ... should we try to find an explicit one?
}
}
return null;
}
/**
* Method that verifies that passed-in prefix indeed maps to the specified
* namespace URI; and depending on how it goes returns a status for
* caller.
*
* @param isElement If true, rules for the default NS are those of elements
* (ie. empty prefix can map to non-default namespace); if false,
* rules are those of attributes (only non-default prefix can map to
* a non-default namespace).
*
* @return PREFIX_OK, if passed-in prefix matches matched-in namespace URI
* in current scope; PREFIX_UNBOUND if it's not bound to anything,
* and PREFIX_MISBOUND if it's bound to another URI.
*
* @throws XMLStreamException True if default (no) prefix is allowed to
* match a non-default URI (elements); false if not (attributes)
*/
public final int isPrefixValid(String prefix, String nsURI,
boolean isElement)
throws XMLStreamException
{
// Hmmm.... caller shouldn't really pass null.
if (nsURI == null) {
nsURI = "";
}
/* First thing is to see if specified prefix is bound to a namespace;
* and if so, verify it matches with data passed in:
*/
// Checking default namespace?
if (prefix == null || prefix.length() == 0) {
if (isElement) {
// It's fine for elements only if the URI actually matches:
if (nsURI == mDefaultNsURI || nsURI.equals(mDefaultNsURI)) {
return PREFIX_OK;
}
} else {
/* Attributes never use the default namespace: "no prefix"
* can only mean "no namespace"
*/
if (nsURI.length() == 0) {
return PREFIX_OK;
}
}
return PREFIX_MISBOUND;
}
/* Need to handle 'xml' prefix and its associated
* URI; they are always declared by default
*/
if (prefix.equals(sXmlNsPrefix)) {
// Should we thoroughly verify its namespace matches...?
// 01-Apr-2005, TSa: Yes, let's always check this
if (!nsURI.equals(sXmlNsURI)) {
throwOutputError("Namespace prefix '"+sXmlNsPrefix
+"' can not be bound to non-default namespace ('"+nsURI+"'); has to be the default '"
+sXmlNsURI+"'");
}
return PREFIX_OK;
}
// Nope checking some other namespace
String act;
if (mNsMapping != null) {
act = mNsMapping.findUriByPrefix(prefix);
} else {
act = null;
}
if (act == null && mRootNsContext != null) {
act = mRootNsContext.getNamespaceURI(prefix);
}
// Not (yet) bound...
if (act == null) {
return PREFIX_UNBOUND;
}
return (act == nsURI || act.equals(nsURI)) ?
PREFIX_OK : PREFIX_MISBOUND;
}
/*
////////////////////////////////////////////
// Public API, mutators
////////////////////////////////////////////
*/
public abstract void setDefaultNsUri(String uri);
public final String generateMapping(String prefixBase, String uri, int[] seqArr)
{
// This is mostly cut'n pasted from addPrefix()...
if (mNsMapping == null) {
// Didn't have a mapping yet? Need to create one...
mNsMapping = BijectiveNsMap.createEmpty();
} else if (mNsMapShared) {
/* Was shared with parent(s)? Need to create a derivative, to
* allow for nesting/scoping of new prefix
*/
mNsMapping = mNsMapping.createChild();
mNsMapShared = false;
}
return mNsMapping.addGeneratedMapping(prefixBase, mRootNsContext,
uri, seqArr);
}
public final void addPrefix(String prefix, String uri)
{
if (mNsMapping == null) {
// Didn't have a mapping yet? Need to create one...
mNsMapping = BijectiveNsMap.createEmpty();
} else if (mNsMapShared) {
/* Was shared with parent(s)? Need to create a derivative, to
* allow for nesting/scoping of new prefix
*/
mNsMapping = mNsMapping.createChild();
mNsMapShared = false;
}
mNsMapping.addMapping(prefix, uri);
}
/*
//////////////////////////////////////////////////
// NamespaceContext implementation
//////////////////////////////////////////////////
*/
@Override
public final String getNamespaceURI(String prefix)
{
if (prefix.length() == 0) { //default NS
return mDefaultNsURI;
}
if (mNsMapping != null) {
String uri = mNsMapping.findUriByPrefix(prefix);
if (uri != null) {
return uri;
}
}
return (mRootNsContext != null) ?
mRootNsContext.getNamespaceURI(prefix) : null;
}
@Override
public final String getPrefix(String uri)
{
if (mDefaultNsURI.equals(uri)) {
return "";
}
if (mNsMapping != null) {
String prefix = mNsMapping.findPrefixByUri(uri);
if (prefix != null) {
return prefix;
}
}
return (mRootNsContext != null) ?
mRootNsContext.getPrefix(uri) : null;
}
@Override
public final Iterator
* One noteworthy feature of the class is that it is designed to allow
* "short-term recycling", ie. instances can be reused within context
* of a simple document output. While reuse/recycling of such lightweight
* object is often useless or even counter productive, here it may
* be worth using, due to simplicity of the scheme (basically using
* a very simple free-elements linked list).
*/
public final class SimpleOutputElement
extends OutputElementBase
{
/*
///////////////////////////////////////////////////////////////////////
// Information about element itself:
///////////////////////////////////////////////////////////////////////
*/
/**
* Reference to the parent element, element enclosing this element.
* Null for root element.
* Non-final only to allow temporary pooling
* (on per-writer basis, to keep these short-lived).
*/
protected SimpleOutputElement mParent;
/**
* Prefix that is used for the element. Can not be final, since sometimes
* it needs to be dynamically generated and bound after creating the
* element instance.
*/
protected String mPrefix;
/**
* Local name of the element.
* Non-final only to allow reuse.
*/
protected String mLocalName;
/**
* Namespace of the element, whatever {@link #mPrefix} maps to.
* Non-final only to allow reuse.
*/
protected String mURI;
/*
///////////////////////////////////////////////////////////////////////
// Attribute information
///////////////////////////////////////////////////////////////////////
*/
/**
* Map used to check for duplicate attribute declarations, if
* feature is enabled.
*/
protected HashSet
* Note that implementations can have different operating modes:
* specifically, when dealing with illegal content (such as "--"
* in a comment, "?>" in processing instruction, or "]]>" within
* CDATA section), implementations can do one of 3 things:
*
* Note: can throw XMLStreamException, if name checking is enabled,
* and name is invalid (name check has to be in this writer, not
* caller, since it depends not only on xml limitations, but also
* on encoding limitations)
*/
public abstract void writeStartTagStart(String localName)
throws IOException, XMLStreamException;
/**
*
* Note: can throw XMLStreamException, if name checking is enabled,
* and name is invalid (name check has to be in this writer, not
* caller, since it depends not only on xml limitations, but also
* on encoding limitations)
*/
public abstract void writeStartTagStart(String prefix, String localName)
throws IOException, XMLStreamException;
public abstract void writeStartTagEnd()
throws IOException;
public abstract void writeStartTagEmptyEnd()
throws IOException;
public abstract void writeEndTag(String localName)
throws IOException;
public abstract void writeEndTag(String prefix, String localName)
throws IOException;
/*
////////////////////////////////////////////////////
// Write methods, attributes/ns
////////////////////////////////////////////////////
*/
/**
*
* Note: can throw XMLStreamException, if name checking is enabled,
* and name is invalid (name check has to be in this writer, not
* caller, since it depends not only on xml limitations, but also
* on encoding limitations)
*/
public abstract void writeAttribute(String localName, String value)
throws IOException, XMLStreamException;
public abstract void writeAttribute(String localName, char[] value, int offset, int len)
throws IOException, XMLStreamException;
/**
*
* Note: can throw XMLStreamException, if name checking is enabled,
* and name is invalid (name check has to be in this writer, not
* caller, since it depends not only on xml limitations, but also
* on encoding limitations)
*/
public abstract void writeAttribute(String prefix, String localName, String value)
throws IOException, XMLStreamException;
public abstract void writeAttribute(String prefix, String localName, char[] value, int offset, int len)
throws IOException, XMLStreamException;
/*
////////////////////////////////////////////////////
// Write methods, Typed Access API support
////////////////////////////////////////////////////
*/
/**
* Like {@link #writeRaw}, but caller guarantees that the contents
* additionally are known to be in 7-bit ascii range, and also
* passes an encoder object that will encode values only when
* being handed a buffer to append to.
*
* @param enc Encoder that will produce content
*/
public abstract void writeTypedElement(AsciiValueEncoder enc)
throws IOException;
/**
* Like {@link #writeRaw}, but caller guarantees that the contents
* additionally are known to be in 7-bit ascii range, and also
* passes an encoder object that will encode values only when
* being handed a buffer to append to.
*
* @param enc Encoder that will produce content
* @param validator Validator to use for validating serialized textual
* content (can not be null)
* @param copyBuffer Temporary buffer that writer can use for temporary
* copies as necessary
*/
public abstract void writeTypedElement(AsciiValueEncoder enc,
XMLValidator validator, char[] copyBuffer)
throws IOException, XMLStreamException;
/**
* Method similar to {@link #writeAttribute(String,String,char[],int,int)}
* but where is known not to require escaping.
* No validation needs to be performed.
*/
public abstract void writeTypedAttribute(String localName, AsciiValueEncoder enc)
throws IOException, XMLStreamException;
/**
* Method similar to {@link #writeAttribute(String,String,char[],int,int)}
* but where is known not to require escaping.
* No validation needs to be performed.
*/
public abstract void writeTypedAttribute(String prefix, String localName, AsciiValueEncoder enc)
throws IOException, XMLStreamException;
/**
* Method similar to {@link #writeAttribute(String,String,char[],int,int)}
* but where is known not to require escaping.
* Validation of the attribute value must be done by calling given
* validator appropriately.
*/
public abstract void writeTypedAttribute(String prefix, String localName, String nsURI,
AsciiValueEncoder enc,
XMLValidator validator, char[] copyBuffer)
throws IOException, XMLStreamException;
/*
////////////////////////////////////////////////////
// Location information
////////////////////////////////////////////////////
*/
protected abstract int getOutputPtr();
public int getRow() {
return mLocRowNr;
}
public int getColumn() {
return (getOutputPtr() - mLocRowStartOffset) + 1;
}
public int getAbsOffset() {
return mLocPastChars +getOutputPtr();
}
/*
////////////////////////////////////////////////////
// Wrapper methods, semi-public
////////////////////////////////////////////////////
*/
/**
* Method that can be called to get a wrapper instance that
* can be used to essentially call the
* Currently only used by stream writers, but could be more generally useful
* too.
*/
public final class BijectiveNsMap
{
/*
///////////////////////////////////////////////
// Constants
///////////////////////////////////////////////
*/
/**
* Let's plan for having up to 14 explicit namespace declarations (2
* defaults, for 'xml' and 'xmlns', are pre-populated)
*/
final static int DEFAULT_ARRAY_SIZE = 2 * 16;
/*
///////////////////////////////////////////////
// Member vars
///////////////////////////////////////////////
*/
final int mScopeStart;
/**
* Array that contains { prefix, ns-uri } pairs, up to (but not including)
* index {@link #mScopeEnd}.
*/
String[] mNsStrings;
int mScopeEnd;
/*
///////////////////////////////////////////////
// Life-cycle
///////////////////////////////////////////////
*/
private BijectiveNsMap(int scopeStart, String[] strs)
{
mScopeStart = mScopeEnd = scopeStart;
mNsStrings = strs;
}
public static BijectiveNsMap createEmpty()
{
String[] strs = new String[DEFAULT_ARRAY_SIZE];
strs[0] = XMLConstants.XML_NS_PREFIX;
strs[1] = XMLConstants.XML_NS_URI;
strs[2] = XMLConstants.XMLNS_ATTRIBUTE;
strs[3] = XMLConstants.XMLNS_ATTRIBUTE_NS_URI;
/* Let's consider pre-defined ones to be 'out of scope', i.e.
* conceptually be part of (missing) parent's mappings.
*/
return new BijectiveNsMap(4, strs);
}
public BijectiveNsMap createChild() {
return new BijectiveNsMap(mScopeEnd, mNsStrings);
}
/*
///////////////////////////////////////////////
// Public API, accessors
///////////////////////////////////////////////
*/
public String findUriByPrefix(String prefix)
{
/* This is quite simple: just need to locate the last mapping
* for the prefix, if any:
*/
String[] strs = mNsStrings;
int phash = prefix.hashCode();
for (int ix = mScopeEnd - 2; ix >= 0; ix -= 2) {
String thisP = strs[ix];
if (thisP == prefix ||
(thisP.hashCode() == phash && thisP.equals(prefix))) {
return strs[ix+1];
}
}
return null;
}
public String findPrefixByUri(String uri)
{
/* Finding a valid binding for the given URI is trickier, since
* mappings can be masked by others... so, we need to first find
* most recent binding, from the freshest one, and then verify
* it's still unmasked; if not, continue with the first loop,
* and so on.
*/
String[] strs = mNsStrings;
int uhash = uri.hashCode();
main_loop:
for (int ix = mScopeEnd - 1; ix > 0; ix -= 2) {
String thisU = strs[ix];
if (thisU == uri ||
(thisU.hashCode() == uhash && thisU.equals(uri))) {
// match, but has it been masked?
String prefix = strs[ix-1];
/* only need to check, if it wasn't within current scope
* (no masking allowed within scopes)
*/
if (ix < mScopeStart) {
int phash = prefix.hashCode();
for (int j = ix+1, end = mScopeEnd; j < end; j += 2) {
String thisP = strs[j];
if (thisP == prefix ||
(thisP.hashCode() == phash && thisP.equals(prefix))) {
// Masking... got to continue the main loop:
continue main_loop;
}
}
}
// Ok, unmasked one, can return
return prefix;
}
}
return null;
}
public List
* Instances can be in one of 2 modes: either in fully defined mode,
* in which case information refers to location where value was defined
* (ie. we had id as a value of ID type attribute); or in undefined mode,
* in which case information refers to the first reference.
*
* Note: this class is designed to be used with {@link ElementIdMap},
* and as a result has some information specifically needed by the
* map implementation (such as collision links).
*/
public final class ElementId
{
/**
* Flag that indicates whether this Object presents a defined id
* value (value of an ID attribute) or just a reference to one.
*/
private boolean mDefined;
/*
/////////////////////////////////////////////////
// Information about id value or value reference,
// depending on mDefined flag
/////////////////////////////////////////////////
*/
/**
* Actual id value
*/
private final String mIdValue;
/**
* Location of either definition (if {@link #mDefined} is true; or
* first reference (otherwise). Used when reporting errors; either
* a referenced id has not been defined, or there are multiple
* definitions of same id.
*/
private Location mLocation;
/**
* Name of element for which this id refers.
*/
private PrefixedName mElemName;
/**
* Name of the attribute that contains this id value (often "id",
* but need not be)
*/
private PrefixedName mAttrName;
/*
////////////////////////////////////////////////////
// Linking information, needed by the map to keep
// track of collided ids, as well as undefined ids
////////////////////////////////////////////////////
*/
private ElementId mNextUndefined;
/**
* Pointer to the next element within collision chain.
*/
private ElementId mNextColl;
/*
/////////////////////////////////////////////////
// Life cycle
/////////////////////////////////////////////////
*/
ElementId(String id, Location loc, boolean defined,
PrefixedName elemName, PrefixedName attrName)
{
mIdValue = id;
mLocation = loc;
mDefined = defined;
mElemName = elemName;
mAttrName = attrName;
}
protected void linkUndefined(ElementId undefined)
{
if (mNextUndefined != null) {
throw new IllegalStateException("ElementId '"+this+"' already had net undefined set ('"+mNextUndefined+"')");
}
mNextUndefined = undefined;
}
protected void setNextColliding(ElementId nextColl)
{
// May add/remove link, no point in checking
mNextColl = nextColl;
}
/*
/////////////////////////////////////////////////
// Public API
/////////////////////////////////////////////////
*/
public String getId() { return mIdValue; }
public Location getLocation() { return mLocation; }
public PrefixedName getElemName() { return mElemName; }
public PrefixedName getAttrName() { return mAttrName; }
public boolean isDefined() { return mDefined; }
public boolean idMatches(char[] buf, int start, int len)
{
if (mIdValue.length() != len) {
return false;
}
// Assumes it's always at least one char long
if (buf[start] != mIdValue.charAt(0)) {
return false;
}
int i = 1;
len += start;
while (++start < len) {
if (buf[start] != mIdValue.charAt(i)) {
return false;
}
++i;
}
return true;
}
public boolean idMatches(String idStr)
{
return mIdValue.equals(idStr);
}
public ElementId nextUndefined() { return mNextUndefined; }
public ElementId nextColliding() { return mNextColl; }
public void markDefined(Location defLoc) {
if (mDefined) { // sanity check
throw new IllegalStateException(ErrorConsts.ERR_INTERNAL);
}
mDefined = true;
mLocation = defLoc;
}
/*
/////////////////////////////////////////////////
// Other methods
/////////////////////////////////////////////////
*/
@Override
public String toString() {
return mIdValue;
}
}
woodstox-woodstox-core-5.1.0/src/main/java/com/ctc/wstx/util/ElementIdMap.java 0000664 0000000 0000000 00000032230 13257562550 0027343 0 ustar 00root root 0000000 0000000 /* Woodstox XML processor
*
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in the file LICENSE which is
* included with the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.util;
import javax.xml.stream.Location;
/**
* This class is a specialized type-safe linked hash map used for
* storing {@link ElementId} instances. {@link ElementId} instances
* represent both id definitions (values of element attributes that
* have type ID in DTD), and references (values of element attributes
* of type IDREF and IDREFS). These definitions and references are
* stored for the purpose of verifying
* that all referenced id values are defined, and that none are defined
* more than once.
*
* Note: there are 2 somewhat distinct usage modes, by DTDValidator and
* by MSV-based validators.
* DTDs pass raw character arrays, whereas
* MSV-based validators operate on Strings. This is the main reason
* for 2 distinct sets of methods.
*/
public final class ElementIdMap
{
/**
* Default initial table size; set so that usually it need not
* be expanded.
*/
protected static final int DEFAULT_SIZE = 128;
protected static final int MIN_SIZE = 16;
/**
* Let's use 80% fill factor...
*/
protected static final int FILL_PCT = 80;
/*
////////////////////////////////////////
// Actual hash table structure
////////////////////////////////////////
*/
/**
* Actual hash table area
*/
protected ElementId[] mTable;
/**
* Current size (number of entries); needed to know if and when
* rehash.
*/
protected int mSize;
/**
* Limit that indicates maximum size this instance can hold before
* it needs to be expanded and rehashed. Calculated using fill
* factor passed in to constructor.
*/
protected int mSizeThreshold;
/**
* Mask used to get index from hash values; equal to
*
* Note: identical to {@link com.ctc.wstx.util.SymbolTable#calcHash},
* although not required to be.
*
* @param len Length of String; has to be at least 1 (caller guarantees
* this pre-condition)
*/
@SuppressWarnings("cast")
public static int calcHash(char[] buffer, int start, int len)
{
int hash = (int) buffer[start];
for (int i = 1; i < len; ++i) {
hash = (hash * 31) + (int) buffer[start+i];
}
return hash;
}
@SuppressWarnings("cast")
public static int calcHash(String key)
{
int hash = (int) key.charAt(0);
for (int i = 1, len = key.length(); i < len; ++i) {
hash = (hash * 31) + (int) key.charAt(i);
}
return hash;
}
/*
//////////////////////////////////////////////////////////
// Internal methods
//////////////////////////////////////////////////////////
*/
/**
* Method called when size (number of entries) of symbol table grows
* so big that load factor is exceeded. Since size has to remain
* power of two, arrays will then always be doubled. Main work
* is really redistributing old entries into new String/Bucket
* entries.
*/
private void rehash()
{
int size = mTable.length;
/* Let's grow aggressively; this should minimize number of
* resizes, while adding to mem usage. But since these Maps
* are never long-lived (only during parsing and validation of
* a single doc), that shouldn't greatly matter.
*/
int newSize = (size << 2);
ElementId[] oldSyms = mTable;
mTable = new ElementId[newSize];
// Let's update index mask, threshold, now (needed for rehashing)
mIndexMask = newSize - 1;
mSizeThreshold <<= 2;
int count = 0; // let's do sanity check
for (int i = 0; i < size; ++i) {
for (ElementId id = oldSyms[i]; id != null; ) {
++count;
int index = calcHash(id.getId()) & mIndexMask;
ElementId nextIn = id.nextColliding();
id.setNextColliding(mTable[index]);
mTable[index] = id;
id = nextIn;
}
}
if (count != mSize) {
ExceptionUtil.throwInternal("on rehash(): had "+mSize+" entries; now have "+count+".");
}
}
}
woodstox-woodstox-core-5.1.0/src/main/java/com/ctc/wstx/util/EmptyNamespaceContext.java 0000664 0000000 0000000 00000003615 13257562550 0031324 0 ustar 00root root 0000000 0000000 package com.ctc.wstx.util;
import java.io.Writer;
import java.util.Iterator;
import javax.xml.namespace.NamespaceContext;
import javax.xml.stream.XMLStreamWriter;
import javax.xml.stream.events.Namespace;
/**
* Dummy {@link NamespaceContext} (and {@link BaseNsContext})
* implementation that is usually used in
* non-namespace-aware mode.
*
* Note: differs from Stax2 reference implementation's version
* slightly, since it needs to support Woodstox specific extensions
* for efficient namespace declaration serialization.
*/
public final class EmptyNamespaceContext
extends BaseNsContext
{
final static EmptyNamespaceContext sInstance = new EmptyNamespaceContext();
private EmptyNamespaceContext() { }
public static EmptyNamespaceContext getInstance() { return sInstance; }
/*
/////////////////////////////////////////////
// Extended API
/////////////////////////////////////////////
*/
@Override
public Iterator
* Note: that this class extends {@link LinkedHashMap} is an implementation
* detail -- no code should ever directly call Map methods.
*/
@SuppressWarnings("serial")
public final class InternCache extends LinkedHashMap
* One important note about usage is that two of the name components (prefix
* and local name) HAVE to have been interned some way, as all comparisons
* are done using identity comparison; whereas URI is NOT necessarily
* interned.
*
* Note that the main reason this class is mutable -- unlike most key classes
* -- is that this allows reusing key objects for access, as long as the code
* using it knows ramifications of trying to modify a key that's used
* in a data structure.
*
* Note, too, that the hash code is cached as this class is mostly used as
* a Map key, and hash code is used a lot.
*/
public final class PrefixedName
implements Comparable
* Note: it is assumed that the passed-in localName is also
* interned.
*/
public boolean isXmlReservedAttr(boolean nsAware, String localName)
{
if (nsAware) {
if ("xml" == mPrefix) {
return mLocalName == localName;
}
} else {
if (mLocalName.length() == (4 + localName.length())) {
return (mLocalName.startsWith("xml:")
&& mLocalName.endsWith(localName));
}
}
return false;
}
/*
///////////////////////////////////////////////////
// Overridden standard methods:
///////////////////////////////////////////////////
*/
@Override
public String toString()
{
if (mPrefix == null || mPrefix.length() == 0) {
return mLocalName;
}
StringBuilder sb = new StringBuilder(mPrefix.length() + 1 + mLocalName.length());
sb.append(mPrefix);
sb.append(':');
sb.append(mLocalName);
return sb.toString();
}
@Override
public boolean equals(Object o)
{
if (o == this) {
return true;
}
if (!(o instanceof PrefixedName)) { // also filters out nulls
return false;
}
PrefixedName other = (PrefixedName) o;
if (mLocalName != other.mLocalName) { // assumes equality
return false;
}
return (mPrefix == other.mPrefix);
}
@Override
public int hashCode() {
int hash = mHash;
if (hash == 0) {
hash = mLocalName.hashCode();
if (mPrefix != null) {
hash ^= mPrefix.hashCode();
}
mHash = hash;
}
return hash;
}
@Override
public int compareTo(PrefixedName other)
{
// First, by prefix, then by local name:
String op = other.mPrefix;
// Missing prefix is ordered before existing prefix
if (op == null || op.length() == 0) {
if (mPrefix != null && mPrefix.length() > 0) {
return 1;
}
} else if (mPrefix == null || mPrefix.length() == 0) {
return -1;
} else {
int result = mPrefix.compareTo(op);
if (result != 0) {
return result;
}
}
return mLocalName.compareTo(other.mLocalName);
}
}
woodstox-woodstox-core-5.1.0/src/main/java/com/ctc/wstx/util/SimpleCache.java 0000664 0000000 0000000 00000002576 13257562550 0027226 0 ustar 00root root 0000000 0000000 package com.ctc.wstx.util;
import java.util.*;
/**
* Simple Map implementation usable for caches where contents do not
* expire, but where size needs to remain bounded.
*
* Note: we probably should use weak references, or something similar
* to limit maximum memory usage. This could be implemented in many
* ways, perhaps by using two areas: first, smaller one, with strong
* refs, and secondary bigger one that uses soft references.
*/
public final class SimpleCache
* NOTE: we only remove explicit space characters (char code 0x0020);
* the reason being that other white space must have come from
* non-normalizable sources, ie. via entity expansion, and is thus
* not to be normalized
*
* @param buf Buffer that contains the String to check
* @param origStart Offset of the first character of the text to check
* in the buffer
* @param origEnd Offset of the character following the last character
* of the text (as per usual Java API convention)
*
* @return Normalized String, if any white space was removed or
* normalized; null if no changes were necessary.
*/
public static String normalizeSpaces(char[] buf, int origStart, int origEnd)
{
--origEnd;
int start = origStart;
int end = origEnd;
// First let's trim start...
while (start <= end && buf[start] == CHAR_SPACE) {
++start;
}
// Was it all empty?
if (start > end) {
return "";
}
/* Nope, need to trim from the end then (note: it's known that char
* at index 'start' is not a space, at this point)
*/
while (end > start && buf[end] == CHAR_SPACE) {
--end;
}
/* Ok, may have changes or not: now need to normalize
* intermediate duplicate spaces. We also now that the
* first and last characters can not be spaces.
*/
int i = start+1;
while (i < end) {
if (buf[i] == CHAR_SPACE) {
if (buf[i+1] == CHAR_SPACE) {
break;
}
// Nah; no hole for these 2 chars!
i += 2;
} else {
++i;
}
}
// Hit the end?
if (i >= end) {
// Any changes?
if (start == origStart && end == origEnd) {
return null; // none
}
return new String(buf, start, (end-start)+1);
}
/* Nope, got a hole, need to constuct the damn thing. Shouldn't
* happen too often... so let's just use StringBuilder()
*/
StringBuilder sb = new StringBuilder(end-start); // can't be longer
sb.append(buf, start, i-start); // won't add the starting space
while (i <= end) {
char c = buf[i++];
if (c == CHAR_SPACE) {
sb.append(CHAR_SPACE);
// Need to skip dups
while (true) {
c = buf[i++];
if (c != CHAR_SPACE) {
sb.append(c);
break;
}
}
} else {
sb.append(c);
}
}
return sb.toString();
}
public static boolean isAllWhitespace(String str)
{
for (int i = 0, len = str.length(); i < len; ++i) {
if (str.charAt(i) > CHAR_SPACE) {
return false;
}
}
return true;
}
public static boolean isAllWhitespace(char[] ch, int start, int len)
{
len += start;
for (; start < len; ++start) {
if (ch[start] > CHAR_SPACE) {
return false;
}
}
return true;
}
/**
* Internal constant used to denote END-OF-STRING
*/
private final static int EOS = 0x10000;
/**
* Method that implements a loose String compairon for encoding
* Strings. It will work like {@link String#equalsIgnoreCase},
* except that it will also ignore all hyphen, underscore and
* space characters.
*/
public static boolean equalEncodings(String str1, String str2)
{
final int len1 = str1.length();
final int len2 = str2.length();
// Need to loop completely over both Strings
for (int i1 = 0, i2 = 0; i1 < len1 || i2 < len2; ) {
int c1 = (i1 >= len1) ? EOS : str1.charAt(i1++);
int c2 = (i2 >= len2) ? EOS : str2.charAt(i2++);
// Can first do a quick comparison (usually they are equal)
if (c1 == c2) {
continue;
}
// if not equal, maybe there are WS/hyphen/underscores to skip
while (c1 <= INT_SPACE || c1 == '_' || c1 == '-') {
c1 = (i1 >= len1) ? EOS : str1.charAt(i1++);
}
while (c2 <= INT_SPACE || c2 == '_' || c2 == '-') {
c2 = (i2 >= len2) ? EOS : str2.charAt(i2++);
}
// Ok, how about case differences, then?
if (c1 != c2) {
// If one is EOF, can't match (one is substring of the other)
if (c1 == EOS || c2 == EOS) {
return false;
}
if (c1 < 127) { // ascii is easy...
if (c1 <= 'Z' && c1 >= 'A') {
c1 = c1 + ('a' - 'A');
}
} else {
c1 = Character.toLowerCase((char)c1);
}
if (c2 < 127) { // ascii is easy...
if (c2 <= 'Z' && c2 >= 'A') {
c2 = c2 + ('a' - 'A');
}
} else {
c2 = Character.toLowerCase((char)c2);
}
if (c1 != c2) {
return false;
}
}
}
// If we got this far, we are ok as long as we got through it all
return true;
}
public static boolean encodingStartsWith(String enc, String prefix)
{
int len1 = enc.length();
int len2 = prefix.length();
int i1 = 0, i2 = 0;
// Need to loop completely over both Strings
while (i1 < len1 || i2 < len2) {
int c1 = (i1 >= len1) ? EOS : enc.charAt(i1++);
int c2 = (i2 >= len2) ? EOS : prefix.charAt(i2++);
// Can first do a quick comparison (usually they are equal)
if (c1 == c2) {
continue;
}
// if not equal, maybe there are WS/hyphen/underscores to skip
while (c1 <= CHAR_SPACE || c1 == '_' || c1 == '-') {
c1 = (i1 >= len1) ? EOS : enc.charAt(i1++);
}
while (c2 <= CHAR_SPACE || c2 == '_' || c2 == '-') {
c2 = (i2 >= len2) ? EOS : prefix.charAt(i2++);
}
// Ok, how about case differences, then?
if (c1 != c2) {
if (c2 == EOS) { // Prefix done, good!
return true;
}
if (c1 == EOS) { // Encoding done, not good
return false;
}
if (Character.toLowerCase((char)c1) != Character.toLowerCase((char)c2)) {
return false;
}
}
}
// Ok, prefix was exactly the same as encoding... that's fine
return true;
}
/**
* Method that will remove all non-alphanumeric characters, and optionally
* upper-case included letters, from the given String.
*/
public static String trimEncoding(String str, boolean upperCase)
{
int i = 0;
int len = str.length();
// Let's first check if String is fine as is:
for (; i < len; ++i) {
char c = str.charAt(i);
if (c <= CHAR_SPACE || !Character.isLetterOrDigit(c)) {
break;
}
}
if (i == len) {
return str;
}
// Nope: have to trim it
StringBuilder sb = new StringBuilder();
if (i > 0) {
sb.append(str.substring(0, i));
}
for (; i < len; ++i) {
char c = str.charAt(i);
if (c > CHAR_SPACE && Character.isLetterOrDigit(c)) {
if (upperCase) {
c = Character.toUpperCase(c);
}
sb.append(c);
}
}
return sb.toString();
}
public static boolean matches(String str, char[] cbuf, int offset, int len)
{
if (str.length() != len) {
return false;
}
for (int i = 0; i < len; ++i) {
if (str.charAt(i) != cbuf[offset+i]) {
return false;
}
}
return true;
}
/**
*
* Note that it is assumed that any "weird" white space
* (xml 1.1 LSEP and NEL) have been replaced by canonical
* alternatives (linefeed for element content, regular space
* for attributes)
*/
@SuppressWarnings("cast")
public final static boolean isSpace(char c)
{
return ((int) c) <= 0x0020;
}
}
woodstox-woodstox-core-5.1.0/src/main/java/com/ctc/wstx/util/StringVector.java 0000664 0000000 0000000 00000015621 13257562550 0027475 0 ustar 00root root 0000000 0000000 package com.ctc.wstx.util;
/**
* Data container similar {@link java.util.List} (from storage perspective),
* but that can be used in multiple ways. For some uses it acts more like
* type-safe String list/vector; for others as order associative list of
* String-to-String mappings.
*/
public final class StringVector
{
private String[] mStrings;
private int mSize;
/*
///////////////////////////////////////////////////////
// Life-cycle:
///////////////////////////////////////////////////////
*/
public StringVector(int initialCount) {
mStrings = new String[initialCount];
}
/*
///////////////////////////////////////////////////////
// Basic accessors
///////////////////////////////////////////////////////
*/
public int size() { return mSize; }
public boolean isEmpty() { return mSize == 0; }
public String getString(int index) {
if (index < 0 || index >= mSize) {
throw new IllegalArgumentException("Index "+index+" out of valid range; current size: "+mSize+".");
}
return mStrings[index];
}
public String getLastString() {
if (mSize < 1) {
throw new IllegalStateException("getLastString() called on empty StringVector.");
}
return mStrings[mSize-1];
}
public String[] getInternalArray() {
return mStrings;
}
public String[] asArray() {
String[] strs = new String[mSize];
System.arraycopy(mStrings, 0, strs, 0, mSize);
return strs;
}
public boolean containsInterned(String value) {
String[] str = mStrings;
for (int i = 0, len = mSize; i < len; ++i) {
if (str[i] == value) {
return true;
}
}
return false;
}
/*
///////////////////////////////////////////////////////
// Mutators:
///////////////////////////////////////////////////////
*/
public void addString(String str) {
if (mSize == mStrings.length) {
String[] old = mStrings;
int oldSize = old.length;
mStrings = new String[oldSize + (oldSize << 1)];
System.arraycopy(old, 0, mStrings, 0, oldSize);
}
mStrings[mSize++] = str;
}
public void addStrings(String str1, String str2) {
if ((mSize + 2) > mStrings.length) {
String[] old = mStrings;
int oldSize = old.length;
mStrings = new String[oldSize + (oldSize << 1)];
System.arraycopy(old, 0, mStrings, 0, oldSize);
}
mStrings[mSize] = str1;
mStrings[mSize+1] = str2;
mSize += 2;
}
public void setString(int index, String str) {
mStrings[index] = str;
}
public void clear(boolean removeRefs) {
if (removeRefs) {
for (int i = 0, len = mSize; i < len; ++i) {
mStrings[i] = null;
}
}
mSize = 0;
}
public String removeLast() {
String result = mStrings[--mSize];
mStrings[mSize] = null;
return result;
}
public void removeLast(int count) {
while (--count >= 0) {
mStrings[--mSize] = null;
}
}
/*
///////////////////////////////////////////////////////
// Specialized "map accessors":
///////////////////////////////////////////////////////
*/
/**
* Specialized access method; treats vector as a Map, with 2 Strings
* per entry; first one being key, second value. Further, keys are
* assumed to be canonicalized with passed in key (ie. either intern()ed,
* or resolved from symbol table).
* Starting from the
* end (assuming even number of entries), tries to find an entry with
* matching key, and if so, returns value.
*/
public String findLastFromMap(String key) {
int index = mSize;
while ((index -= 2) >= 0) {
if (mStrings[index] == key) {
return mStrings[index+1];
}
}
return null;
}
public String findLastNonInterned(String key)
{
int index = mSize;
while ((index -= 2) >= 0) {
String curr = mStrings[index];
if (curr == key || (curr != null && curr.equals(key))) {
return mStrings[index+1];
}
}
return null;
}
public int findLastIndexNonInterned(String key) {
int index = mSize;
while ((index -= 2) >= 0) {
String curr = mStrings[index];
if (curr == key || (curr != null && curr.equals(key))) {
return index;
}
}
return -1;
}
public String findLastByValueNonInterned(String value) {
for (int index = mSize-1; index > 0; index -= 2) {
String currVal = mStrings[index];
if (currVal == value || (currVal != null && currVal.equals(value))) {
return mStrings[index-1];
}
}
return null;
}
public int findLastIndexByValueNonInterned(String value) {
for (int index = mSize-1; index > 0; index -= 2) {
String currVal = mStrings[index];
if (currVal == value || (currVal != null && currVal.equals(value))) {
return index-1;
}
}
return -1;
}
/*
// Not needed any more
public Iterator findAllByValueNonInterned(String value) {
String first = null;
ArrayList all = null;
for (int index = mSize-1; index > 0; index -= 2) {
String currVal = mStrings[index];
if (currVal == value || (currVal != null && currVal.equals(value))) {
if (first == null) {
first = mStrings[index-1];
} else {
if (all == null) {
all = new ArrayList();
all.add(first);
}
all.add(mStrings[index-1]);
}
}
}
if (all != null) {
return all.iterator();
}
if (first != null) {
return new SingletonIterator(first);
}
return DataUtil.emptyIterator();
}
*/
/*
///////////////////////////////////////////////////////
// Other methods
///////////////////////////////////////////////////////
*/
@Override
public String toString() {
StringBuilder sb = new StringBuilder(mSize * 16);
sb.append("[(size = ");
sb.append(mSize);
sb.append(" ) ");
for (int i = 0; i < mSize; ++i) {
if (i > 0) {
sb.append(", ");
}
sb.append('"');
sb.append(mStrings[i]);
sb.append('"');
sb.append(" == ");
sb.append(Integer.toHexString(System.identityHashCode(mStrings[i])));
}
sb.append(']');
return sb.toString();
}
}
woodstox-woodstox-core-5.1.0/src/main/java/com/ctc/wstx/util/SymbolTable.java 0000664 0000000 0000000 00000060504 13257562550 0027261 0 ustar 00root root 0000000 0000000 /* Woodstox XML processor
*
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in the file LICENSE which is
* included with the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.util;
/**
* This class is a kind of specialized type-safe Map, from char array to
* String value. Specialization means that in addition to type-safety
* and specific access patterns (key char array, Value optionally interned
* String; values added on access if necessary), and that instances are
* meant to be used concurrently, but by using well-defined mechanisms
* to obtain such concurrently usable instances. Main use for the class
* is to store symbol table information for things like compilers and
* parsers; especially when number of symbols (keywords) is limited.
*
* For optimal performance, usage pattern should be one where matches
* should be very common (esp. after "warm-up"), and as with most hash-based
* maps/sets, that hash codes are uniformly distributed. Also, collisions
* are slightly more expensive than with HashMap or HashSet, since hash codes
* are not used in resolving collisions; that is, equals() comparison is
* done with all symbols in same bucket index.
* Usual usage pattern is to create a single "master" instance, and either
* use that instance in sequential fashion, or to create derived "child"
* instances, which after use, are asked to return possible symbol additions
* to master instance. In either case benefit is that symbol table gets
* initialized so that further uses are more efficient, as eventually all
* symbols needed will already be in symbol table. At that point no more
* Symbol String allocations are needed, nor changes to symbol table itself.
*
* Note that while individual SymbolTable instances are NOT thread-safe
* (much like generic collection classes), concurrently used "child"
* instances can be freely used without synchronization. However, using
* master table concurrently with child instances can only be done if
* access to master instance is read-only (ie. no modifications done).
*/
public class SymbolTable {
/**
* Default initial table size; no need to make it miniscule, due
* to couple of things: first, overhead of array reallocation
* is significant,
* and second, overhead of rehashing is also non-negligible.
*
* Let's use 128 as the default; it allows for up to 96 symbols,
* and uses about 512 bytes on 32-bit machines.
*/
protected static final int DEFAULT_TABLE_SIZE = 128;
protected static final float DEFAULT_FILL_FACTOR = 0.75f;
protected static final String EMPTY_STRING = "";
/*
////////////////////////////////////////
// Configuration:
////////////////////////////////////////
*/
/**
* Flag that determines whether Strings to be added need to be
* interned before being added or not. Forcing intern()ing will add
* some overhead when adding new Strings, but may be beneficial if such
* Strings are generally used by other parts of system. Note that even
* without interning, all returned String instances are guaranteed
* to be comparable with equality (==) operator; it's just that such
* guarantees are not made for Strings other classes return.
*/
protected boolean mInternStrings;
/*
////////////////////////////////////////
// Actual symbol table data:
////////////////////////////////////////
*/
/**
* Primary matching symbols; it's expected most match occur from
* here.
*/
protected String[] mSymbols;
/**
* Overflow buckets; if primary doesn't match, lookup is done
* from here.
*
* Note: Number of buckets is half of number of symbol entries, on
* assumption there's less need for buckets.
*/
protected Bucket[] mBuckets;
/**
* Current size (number of entries); needed to know if and when
* rehash.
*/
protected int mSize;
/**
* Limit that indicates maximum size this instance can hold before
* it needs to be expanded and rehashed. Calculated using fill
* factor passed in to constructor.
*/
protected int mSizeThreshold;
/**
* Mask used to get index from hash values; equal to
*
* Note: while data access part of this method is synchronized, it is
* generally not safe to both use makeChild/mergeChild, AND to use instance
* actively. Instead, a separate 'root' instance should be used
* on which only makeChild/mergeChild are called, but instance itself
* is not used as a symbol table.
*/
public SymbolTable makeChild()
{
final boolean internStrings;
final String[] symbols;
final Bucket[] buckets;
final int size;
final int sizeThreshold;
final int indexMask;
final int version;
synchronized (this) {
internStrings = mInternStrings;
symbols = mSymbols;
buckets = mBuckets;
size = mSize;
sizeThreshold = mSizeThreshold;
indexMask = mIndexMask;
version = mThisVersion+1;
}
return new SymbolTable(internStrings, symbols, buckets,
size, sizeThreshold, indexMask, version);
}
/**
* Method that allows contents of child table to potentially be
* "merged in" with contents of this symbol table.
*
* Note that caller has to make sure symbol table passed in is
* really a child or sibling of this symbol table.
*/
public synchronized void mergeChild(SymbolTable child)
{
// Let's do a basic sanity check first:
if (child.size() <= size()) { // nothing to add
return;
}
// Okie dokie, let's get the data in!
mSymbols = child.mSymbols;
mBuckets = child.mBuckets;
mSize = child.mSize;
mSizeThreshold = child.mSizeThreshold;
mIndexMask = child.mIndexMask;
mThisVersion++; // to prevent other children from overriding
// Dirty flag... well, let's just clear it, to force copying just
// in case. Shouldn't really matter, for master tables.
mDirty = false;
/* However, we have to mark child as dirty, so that it will not
* be modifying arrays we "took over" (since child may have
* returned an updated table before it stopped fully using
* the SymbolTable: for example, it may still use it for
* parsing PI targets in epilog)
*/
child.mDirty = false;
}
/*
////////////////////////////////////////////////////
// Public API, configuration
////////////////////////////////////////////////////
*/
public void setInternStrings(boolean state) {
mInternStrings = state;
}
/*
////////////////////////////////////////////////////
// Public API, generic accessors:
////////////////////////////////////////////////////
*/
public int size() { return mSize; }
public int version() { return mThisVersion; }
public boolean isDirty() { return mDirty; }
public boolean isDirectChildOf(SymbolTable t)
{
/* Actually, this doesn't really prove it is a child (would have to
* use sequence number, or identityHash to really prove it), but
* it's good enough if relationship is known to exist.
*/
/* (for real check, one would need to child/descendant stuff; or
* at least an identity hash... or maybe even just a _static_ global
* counter for instances... maybe that would actually be worth
* doing?)
*/
if (mThisVersion == (t.mThisVersion + 1)) {
return true;
}
return false;
}
/*
////////////////////////////////////////////////////
// Public API, accessing symbols:
////////////////////////////////////////////////////
*/
/**
* Main access method; will check if actual symbol String exists;
* if so, returns it; if not, will create, add and return it.
*
* @return The symbol matching String in input array
*/
/*
public String findSymbol(char[] buffer, int start, int len)
{
return findSymbol(buffer, start, len, calcHash(buffer, start, len));
}
*/
public String findSymbol(char[] buffer, int start, int len, int hash)
{
// Sanity check:
if (len < 1) {
return EMPTY_STRING;
}
hash &= mIndexMask;
String sym = mSymbols[hash];
// Optimal case; checking existing primary symbol for hash index:
if (sym != null) {
// Let's inline primary String equality checking:
if (sym.length() == len) {
int i = 0;
do {
if (sym.charAt(i) != buffer[start+i]) {
break;
}
} while (++i < len);
// Optimal case; primary match found
if (i == len) {
return sym;
}
}
// How about collision bucket?
Bucket b = mBuckets[hash >> 1];
if (b != null) {
sym = b.find(buffer, start, len);
if (sym != null) {
return sym;
}
}
}
// Need to expand?
if (mSize >= mSizeThreshold) {
rehash();
/* Need to recalc hash; rare occurence (index mask has been
* recalculated as part of rehash)
*/
hash = calcHash(buffer, start, len) & mIndexMask;
} else if (!mDirty) {
// Or perhaps we need to do copy-on-write?
copyArrays();
mDirty = true;
}
++mSize;
String newSymbol = new String(buffer, start, len);
if (mInternStrings) {
newSymbol = newSymbol.intern();
}
// Ok; do we need to add primary entry, or a bucket?
if (mSymbols[hash] == null) {
mSymbols[hash] = newSymbol;
} else {
int bix = hash >> 1;
mBuckets[bix] = new Bucket(newSymbol, mBuckets[bix]);
}
return newSymbol;
}
/**
* Similar to {link #findSymbol}, but will not add passed in symbol
* if it is not in symbol table yet.
*/
public String findSymbolIfExists(char[] buffer, int start, int len, int hash)
{
// Sanity check:
if (len < 1) {
return EMPTY_STRING;
}
hash &= mIndexMask;
String sym = mSymbols[hash];
// Optimal case; checking existing primary symbol for hash index:
if (sym != null) {
// Let's inline primary String equality checking:
if (sym.length() == len) {
int i = 0;
do {
if (sym.charAt(i) != buffer[start+i]) {
break;
}
} while (++i < len);
// Optimal case; primary match found
if (i == len) {
return sym;
}
}
// How about collision bucket?
Bucket b = mBuckets[hash >> 1];
if (b != null) {
sym = b.find(buffer, start, len);
if (sym != null) {
return sym;
}
}
}
return null;
}
/**
* Similar to to {@link #findSymbol(char[],int,int,int)}; used to either
* do potentially cheap intern() (if table already has intern()ed version),
* or to pre-populate symbol table with known values.
*/
public String findSymbol(String str)
{
int len = str.length();
// Sanity check:
if (len < 1) {
return EMPTY_STRING;
}
int index = calcHash(str) & mIndexMask;
String sym = mSymbols[index];
// Optimal case; checking existing primary symbol for hash index:
if (sym != null) {
// Let's inline primary String equality checking:
if (sym.length() == len) {
int i = 0;
for (; i < len; ++i) {
if (sym.charAt(i) != str.charAt(i)) {
break;
}
}
// Optimal case; primary match found
if (i == len) {
return sym;
}
}
// How about collision bucket?
Bucket b = mBuckets[index >> 1];
if (b != null) {
sym = b.find(str);
if (sym != null) {
return sym;
}
}
}
// Need to expand?
if (mSize >= mSizeThreshold) {
rehash();
/* Need to recalc hash; rare occurence (index mask has been
* recalculated as part of rehash)
*/
index = calcHash(str) & mIndexMask;
} else if (!mDirty) {
// Or perhaps we need to do copy-on-write?
copyArrays();
mDirty = true;
}
++mSize;
if (mInternStrings) {
str = str.intern();
}
// Ok; do we need to add primary entry, or a bucket?
if (mSymbols[index] == null) {
mSymbols[index] = str;
} else {
int bix = index >> 1;
mBuckets[bix] = new Bucket(str, mBuckets[bix]);
}
return str;
}
/**
* Implementation of a hashing method for variable length
* Strings. Most of the time intention is that this calculation
* is done by caller during parsing, not here; however, sometimes
* it needs to be done for parsed "String" too.
*
* @param len Length of String; has to be at least 1 (caller guarantees
* this pre-condition)
*/
@SuppressWarnings("cast")
public static int calcHash(char[] buffer, int start, int len) {
int hash = (int) buffer[start];
for (int i = 1; i < len; ++i) {
hash = (hash * 31) + (int) buffer[start+i];
}
return hash;
}
@SuppressWarnings("cast")
public static int calcHash(String key) {
int hash = (int) key.charAt(0);
for (int i = 1, len = key.length(); i < len; ++i) {
hash = (hash * 31) + (int) key.charAt(i);
}
return hash;
}
/*
//////////////////////////////////////////////////////////
// Internal methods
//////////////////////////////////////////////////////////
*/
/**
* Method called when copy-on-write is needed; generally when first
* change is made to a derived symbol table.
*/
private void copyArrays() {
String[] oldSyms = mSymbols;
int size = oldSyms.length;
mSymbols = new String[size];
System.arraycopy(oldSyms, 0, mSymbols, 0, size);
Bucket[] oldBuckets = mBuckets;
size = oldBuckets.length;
mBuckets = new Bucket[size];
System.arraycopy(oldBuckets, 0, mBuckets, 0, size);
}
/**
* Method called when size (number of entries) of symbol table grows
* so big that load factor is exceeded. Since size has to remain
* power of two, arrays will then always be doubled. Main work
* is really redistributing old entries into new String/Bucket
* entries.
*/
private void rehash()
{
int size = mSymbols.length;
int newSize = size + size;
String[] oldSyms = mSymbols;
Bucket[] oldBuckets = mBuckets;
mSymbols = new String[newSize];
mBuckets = new Bucket[newSize >> 1];
// Let's update index mask, threshold, now (needed for rehashing)
mIndexMask = newSize - 1;
mSizeThreshold += mSizeThreshold;
int count = 0; // let's do sanity check
/* Need to do two loops, unfortunately, since spillover area is
* only half the size:
*/
for (int i = 0; i < size; ++i) {
String symbol = oldSyms[i];
if (symbol != null) {
++count;
int index = calcHash(symbol) & mIndexMask;
if (mSymbols[index] == null) {
mSymbols[index] = symbol;
} else {
int bix = index >> 1;
mBuckets[bix] = new Bucket(symbol, mBuckets[bix]);
}
}
}
size >>= 1;
for (int i = 0; i < size; ++i) {
Bucket b = oldBuckets[i];
while (b != null) {
++count;
String symbol = b.getSymbol();
int index = calcHash(symbol) & mIndexMask;
if (mSymbols[index] == null) {
mSymbols[index] = symbol;
} else {
int bix = index >> 1;
mBuckets[bix] = new Bucket(symbol, mBuckets[bix]);
}
b = b.getNext();
}
}
if (count != mSize) {
throw new IllegalStateException("Internal error on SymbolTable.rehash(): had "+mSize+" entries; now have "+count+".");
}
}
/*
//////////////////////////////////////////////////////////
// Test/debug support:
//////////////////////////////////////////////////////////
*/
public double calcAvgSeek() {
int count = 0;
for (int i = 0, len = mSymbols.length; i < len; ++i) {
if (mSymbols[i] != null) {
++count;
}
}
for (int i = 0, len = mBuckets.length; i < len; ++i) {
Bucket b = mBuckets[i];
int cost = 2;
while (b != null) {
count += cost;
++cost;
b = b.getNext();
}
}
return ((double) count) / ((double) mSize);
}
/*
//////////////////////////////////////////////////////////
// Bucket class
//////////////////////////////////////////////////////////
*/
/**
* This class is a symbol table entry. Each entry acts as a node
* in a linked list.
*/
static final class Bucket {
private final String mSymbol;
private final Bucket mNext;
public Bucket(String symbol, Bucket next) {
mSymbol = symbol;
mNext = next;
}
public String getSymbol() { return mSymbol; }
public Bucket getNext() { return mNext; }
public String find(char[] buf, int start, int len) {
String sym = mSymbol;
Bucket b = mNext;
while (true) { // Inlined equality comparison:
if (sym.length() == len) {
int i = 0;
do {
if (sym.charAt(i) != buf[start+i]) {
break;
}
} while (++i < len);
if (i == len) {
return sym;
}
}
if (b == null) {
break;
}
sym = b.getSymbol();
b = b.getNext();
}
return null;
}
public String find(String str) {
String sym = mSymbol;
Bucket b = mNext;
while (true) {
if (sym.equals(str)) {
return sym;
}
if (b == null) {
break;
}
sym = b.getSymbol();
b = b.getNext();
}
return null;
}
}
}
woodstox-woodstox-core-5.1.0/src/main/java/com/ctc/wstx/util/TextAccumulator.java 0000664 0000000 0000000 00000003601 13257562550 0030163 0 ustar 00root root 0000000 0000000 package com.ctc.wstx.util;
/**
* Simple utility class used to efficiently accumulate and concatenate
* text passed in various forms
*/
public final class TextAccumulator
{
private String mText = null;
private StringBuilder mBuilder = null;
public TextAccumulator() { }
public boolean hasText() {
return (mBuilder != null) || (mText != null);
}
public void addText(String text)
{
int len = text.length();
if (len > 0) {
// Any prior text?
if (mText != null) {
mBuilder = new StringBuilder(mText.length() + len);
mBuilder.append(mText);
mText = null;
}
if (mBuilder != null) {
mBuilder.append(text);
} else {
mText = text;
}
}
}
public void addText(char[] buf, int start, int end)
{
int len = end-start;
if (len > 0) {
// Any prior text?
if (mText != null) {
mBuilder = new StringBuilder(mText.length() + len);
mBuilder.append(mText);
mText = null;
} else if (mBuilder == null) {
/* more efficient to use a builder than a string; and although
* could use a char array, StringBuilder has the benefit of
* being able to share the array, eventually.
*/
mBuilder = new StringBuilder(len);
}
mBuilder.append(buf, start, end-start);
}
}
public String getAndClear()
{
if (mText != null) {
String result = mText;
mText = null;
return result;
}
if (mBuilder != null) {
String result = mBuilder.toString();
mBuilder = null;
return result;
}
return "";
}
}
woodstox-woodstox-core-5.1.0/src/main/java/com/ctc/wstx/util/TextBuffer.java 0000664 0000000 0000000 00000132151 13257562550 0027120 0 ustar 00root root 0000000 0000000 package com.ctc.wstx.util;
import java.io.*;
import java.util.ArrayList;
import javax.xml.stream.Location;
import javax.xml.stream.XMLStreamException;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.ext.LexicalHandler;
import org.codehaus.stax2.typed.Base64Variant;
import org.codehaus.stax2.typed.TypedArrayDecoder;
import org.codehaus.stax2.typed.TypedValueDecoder;
import org.codehaus.stax2.typed.TypedXMLStreamException;
import org.codehaus.stax2.validation.XMLValidator;
import org.codehaus.stax2.ri.typed.CharArrayBase64Decoder;
import com.ctc.wstx.api.ReaderConfig;
import com.ctc.wstx.dtd.DTDEventListener;
import com.ctc.wstx.sr.InputProblemReporter;
import com.ctc.wstx.util.StringUtil;
/**
* TextBuffer is a class similar to {@link StringBuilder}, with
* following differences:
*
* Over time more and more cruft has accumulated here, mostly to
* support efficient access to collected text. Since access is
* easiest to do efficiently using callbacks, this class now needs
* to known interfaces of SAX classes and validators.
*
* Notes about usage: for debugging purposes, it's suggested to use
* {@link #toString} method, as opposed to
* {@link #contentsAsArray} or {@link #contentsAsString}. Internally
* resulting code paths may or may not be different, WRT caching.
*
* @author Tatu Saloranta
*/
public final class TextBuffer
{
/* 23-Mar-2006, TSa: Memory buffer clearing is a significant overhead
* for small documents, no need to use huge buffer -- it will expand
* as necessary for larger docs, but commonly text segments just
* aren't that long.
*/
/**
* Size of the first text segment buffer to allocate; need not contain
* the biggest segment, since new ones will get allocated as needed.
* However, it's sensible to use something that often is big enough
* to contain segments.
*/
final static int DEF_INITIAL_BUFFER_SIZE = 500; // 1k
/**
* We will also restrict maximum length of individual segments
* to allocate (not including cases where we must return a single
* segment). Value is somewhat arbitrary, let's use it so that
* memory used is no more than 1/2 megabytes.
*/
final static int MAX_SEGMENT_LENGTH = 256 * 1024;
final static int INT_SPACE = 0x0020;
// // // Configuration:
private final ReaderConfig mConfig;
// // // Shared read-only input buffer:
/**
* Shared input buffer; stored here in case some input can be returned
* as is, without being copied to collector's own buffers. Note that
* this is read-only for this Objet.
*/
private char[] mInputBuffer;
/**
* Character offset of first char in input buffer; -1 to indicate
* that input buffer currently does not contain any useful char data
*/
private int mInputStart;
/**
* When using shared buffer, offset after the last character in
* shared buffer
*/
private int mInputLen;
// // // Internal non-shared collector buffers:
private boolean mHasSegments = false;
/**
* List of segments prior to currently active segment.
*/
private ArrayList
* Although this is an efficient data struct for specific set of usage
* patterns, one restriction is that the full set of words to include has to
* be known before constructing the instnace. Also, the size of the set is
* limited to total word content of about 20k characters.
*
* TODO: Should document the internal data structure...
*/
public final class WordResolver
{
/**
* Maximum number of words (Strings) an instance can contain
*/
public final static int MAX_WORDS = 0x2000;
final static char CHAR_NULL = (char) 0;
/**
* Offset added to numbers to mark 'negative' numbers. Asymmetric,
* since range of negative markers needed is smaller than positive
* numbers...
*/
final static int NEGATIVE_OFFSET = 0x10000 - MAX_WORDS;
/**
* This is actually just a guess; but in general linear search should
* be faster for short sequences (definitely for 4 or less; maybe up
* to 8 or less?)
*/
final static int MIN_BINARY_SEARCH = 7;
/**
* Compressed presentation of the word set.
*/
final char[] mData;
/**
* Array of actual words returned resolved for matches.
*/
final String[] mWords;
/*
////////////////////////////////////////////////
// Life-cycle
////////////////////////////////////////////////
*/
private WordResolver(String[] words, char[] index) {
mWords = words;
mData = index;
}
/**
* Tries to construct an instance given ordered set of words.
*
* Note: currently maximum number of words that can be contained
* is limited to {@link #MAX_WORDS}; additionally, maximum length
* of all such words can not exceed roughly 28000 characters.
*
* @return WordResolver constructed for given set of words, if
* the word set size is not too big; null to indicate "too big"
* instance.
*/
public static WordResolver constructInstance(TreeSet
* Performance of the set is comparable to that of {@link java.util.TreeSet}
* for Strings, ie. 2-3x slower than {@link java.util.HashSet} when
* using pre-constructed Strings. This is generally result of algorithmic
* complexity of structures; Word and Tree sets are roughly logarithmic
* to the whole data, whereas Hash set is linear to the length of key.
* However:
*
* Although this is an efficient set for specific set of usage patterns,
* one restriction is that the full set of words to include has to be
* known before constructing the set. Also, the size of the set is
* limited to total word content of about 20k characters; factory method
* does verify the limit and indicates if an instance can not be created.
*/
public final class WordSet
{
final static char CHAR_NULL = (char) 0;
/**
* Offset added to numbers to mark 'negative' numbers. Asymmetric,
* since range of negative markers needed is smaller than positive
* numbers...
*/
final static int NEGATIVE_OFFSET = 0xC000;
/**
* This is actually just a guess; but in general linear search should
* be faster for short sequences (definitely for 4 or less; maybe up
* to 8 or less?)
*/
final static int MIN_BINARY_SEARCH = 7;
/**
* Compressed presentation of the word set.
*/
final char[] mData;
/*
////////////////////////////////////////////////
// Life-cycle
////////////////////////////////////////////////
*/
private WordSet(char[] data) {
mData = data;
}
public static WordSet constructSet(TreeSet
Note that some of more generic classes may eventually be moved to more
generic packages under XmlWriter
can only throw IOExceptions.
*/
public interface InvalidCharHandler
{
public char convertInvalidChar(int invalidChar) throws IOException;
/**
* This handler implementation just throws an exception for
* all invalid characters encountered. It is the default handler
* used if nothing else has been specified.
*/
public static class FailingHandler
implements InvalidCharHandler
{
public final static int SURR1_FIRST = 0xD800;
public final static int SURR1_LAST = 0xDBFF;
public final static int SURR2_FIRST = 0xDC00;
public final static int SURR2_LAST = 0xDFFF;
private final static FailingHandler sInstance = new FailingHandler();
protected FailingHandler() { }
public static FailingHandler getInstance() { return sInstance; }
@Override
public char convertInvalidChar(int c) throws IOException
{
/* 17-May-2006, TSa: Would really be useful if we could throw
* XMLStreamExceptions; esp. to indicate actual output location.
* However, this causes problem with methods that call us and
* can only throw IOExceptions (when invoked via Writer proxy).
* Need to figure out how to resolve this.
*/
if (c == 0) {
throw new IOException("Invalid null character in text to output");
}
if (c < ' ' || (c >= 0x7F && c <= 0x9F)) {
String msg = "Invalid white space character (0x"+Integer.toHexString(c)+") in text to output (in xml 1.1, could output as a character entity)";
throw new IOException(msg);
}
if (c > 0x10FFFF) {
throw new IOException("Illegal unicode character point (0x"+Integer.toHexString(c)+") to output; max is 0x10FFFF as per RFC 3629");
}
/* Surrogate pair in non-quotable (not text or attribute value)
* content, and non-unicode encoding (ISO-8859-x, Ascii)?
*/
if (c >= SURR1_FIRST && c <= SURR2_LAST) {
throw new IOException("Illegal surrogate pair -- can only be output via character entities, which are not allowed in this content");
}
throw new IOException("Invalid XML character (0x"+Integer.toHexString(c)+") in text to output");
}
}
/**
* Alternative to the default handler, this handler converts all invalid
* characters to the specified output character. That character will
* not be further verified or modified by the stream writer.
*/
public static class ReplacingHandler
implements InvalidCharHandler
{
final char mReplacementChar;
public ReplacingHandler(char c) {
mReplacementChar = c;
}
@Override
public char convertInvalidChar(int c) throws IOException {
return mReplacementChar;
}
}
}
woodstox-woodstox-core-5.1.0/src/main/java/com/ctc/wstx/api/ReaderConfig.java 0000664 0000000 0000000 00000161557 13257562550 0027202 0 ustar 00root root 0000000 0000000 package com.ctc.wstx.api;
import java.lang.ref.SoftReference;
import java.net.URL;
import java.util.*;
import javax.xml.stream.*;
import org.codehaus.stax2.XMLInputFactory2; // for property consts
import org.codehaus.stax2.XMLStreamProperties; // for property consts
import org.codehaus.stax2.validation.DTDValidationSchema;
import com.ctc.wstx.api.WstxInputProperties;
import com.ctc.wstx.cfg.InputConfigFlags;
import com.ctc.wstx.dtd.DTDEventListener;
import com.ctc.wstx.ent.IntEntity;
import com.ctc.wstx.ent.EntityDecl;
import com.ctc.wstx.io.BufferRecycler;
import com.ctc.wstx.util.ArgUtil;
import com.ctc.wstx.util.DataUtil;
import com.ctc.wstx.util.SymbolTable;
/**
* Simple configuration container class; passed by reader factory to reader
* instance created.
*ThreadLocal
contains a {@link SoftRerefence}
* to a {@link BufferRecycler} used to provide a low-cost
* buffer recycling between Reader instances.
*/
final static ThreadLocal
* None.
*
*
*
*/
public void configureForConvenience()
{
// StAX (1.0) settings:
doCoalesceText(true);
doReplaceEntityRefs(true);
// StAX2:
doReportCData(false);
doReportPrologWhitespace(false);
/* Also, knowing exact locations is nice esp. for error
* reporting purposes
*/
doPreserveLocation(true);
// Woodstox-specific:
/* Also, we can force errors to be reported in timely manner:
* (once again, at potential expense of performance)
*/
doParseLazily(false);
}
/**
* Method to call to make the Reader created be as fast as possible reading
* documents, especially for long-running processes where caching is
* likely to help.
*
* See {@link XMLInputFactory2#configureForSpeed} for
* required settings for standard StAX/StAX properties.
*XMLStreamFactory2.P_LAZY_PARSING
(to allow for synchronous
* error notification by forcing full XML events to be completely
* parsed when reader's next() is called)
*
*
*/
public void configureForSpeed()
{
// StAX (1.0):
doCoalesceText(false);
// StAX2:
doPreserveLocation(false);
doReportPrologWhitespace(false);
//doInternNames(true); // this is a NOP
doInternNsURIs(true);
doXmlIdUniqChecks(false);
// Woodstox-specific:
doCacheDTDs(true);
doParseLazily(true);
/* If we let Reader decide sizes of text segments, it should be
* able to optimize it better, thus low min value. This value
* is only used in cases where text is at buffer boundary, or
* where entity prevents using consequtive chars from input buffer:
*/
setShortestReportedTextSegment(16);
setInputBufferLength(8000); // 16k input buffer
}
/**
* Method to call to minimize the memory usage of the stream/event reader;
* both regarding Objects created, and the temporary memory usage during
* parsing.
* This generally incurs some performance penalties, due to using
* smaller input buffers.
*P_CACHE_DTDS
.
* XMLStremaFactory2.P_LAZY_PARSING
(can improve performance
* especially when skipping text segments)
* P_MIN_TEXT_SEGMENT
, to allow
* reader to optimize segment length it uses (and possibly avoids
* one copy operation in the process)
* P_INPUT_BUFFER_LENGTH
a bit from default,
* to allow for longer consequtive read operations; also reduces cases
* where partial text segments are on input buffer boundaries.
*
*
*/
public void configureForLowMemUsage()
{
// StAX (1.0)
doCoalesceText(false);
// StAX2:
doPreserveLocation(false); // can reduce temporary mem usage
// Woodstox-specific:
doCacheDTDs(false);
doParseLazily(true); // can reduce temporary mem usage
doXmlIdUniqChecks(false); // enabling would increase mem usage
setShortestReportedTextSegment(ReaderConfig.DEFAULT_SHORTEST_TEXT_SEGMENT);
setInputBufferLength(512); // 1k input buffer
// Text buffer need not be huge, as we do not coalesce
}
/**
* Method to call to make Reader try to preserve as much of input
* formatting as possible, so that round-tripping would be as lossless
* as possible.
*P_CACHE_DTDS
* P_PARSE_LAZILY
* P_MIN_TEXT_SEGMENT
to the (somewhat low)
* default value.
* P_INPUT_BUFFER_LENGTH
a bit from the default
*
*
*/
public void configureForRoundTripping()
{
// StAX (1.0)
doCoalesceText(false);
doReplaceEntityRefs(false);
// StAX2:
doReportCData(true);
doReportPrologWhitespace(true);
// Woodstox specific settings
doTreatCharRefsAsEnts(true);
doNormalizeLFs(false);
// effectively prevents from reporting partial segments:
setShortestReportedTextSegment(Integer.MAX_VALUE);
}
/*
///////////////////////////////////////////////////////////////////////
// Buffer recycling:
///////////////////////////////////////////////////////////////////////
*/
public char[] allocSmallCBuffer(int minSize)
{
if (mCurrRecycler != null) {
char[] result = mCurrRecycler.getSmallCBuffer(minSize);
if (result != null) {
return result;
}
}
// Nope; no recycler, or it has no suitable buffers, let's create:
return new char[minSize];
}
public void freeSmallCBuffer(char[] buffer)
{
// Need to create (and assign) the buffer?
if (mCurrRecycler == null) {
mCurrRecycler = createRecycler();
}
mCurrRecycler.returnSmallCBuffer(buffer);
}
public char[] allocMediumCBuffer(int minSize)
{
if (mCurrRecycler != null) {
char[] result = mCurrRecycler.getMediumCBuffer(minSize);
if (result != null) {
return result;
}
}
return new char[minSize];
}
public void freeMediumCBuffer(char[] buffer)
{
if (mCurrRecycler == null) {
mCurrRecycler = createRecycler();
}
mCurrRecycler.returnMediumCBuffer(buffer);
}
public char[] allocFullCBuffer(int minSize)
{
if (mCurrRecycler != null) {
char[] result = mCurrRecycler.getFullCBuffer(minSize);
if (result != null) {
return result;
}
}
return new char[minSize];
}
public void freeFullCBuffer(char[] buffer)
{
// Need to create (and assign) the buffer?
if (mCurrRecycler == null) {
mCurrRecycler = createRecycler();
}
mCurrRecycler.returnFullCBuffer(buffer);
}
public byte[] allocFullBBuffer(int minSize)
{
if (mCurrRecycler != null) {
byte[] result = mCurrRecycler.getFullBBuffer(minSize);
if (result != null) {
return result;
}
}
return new byte[minSize];
}
public void freeFullBBuffer(byte[] buffer)
{
// Need to create (and assign) the buffer?
if (mCurrRecycler == null) {
mCurrRecycler = createRecycler();
}
mCurrRecycler.returnFullBBuffer(buffer);
}
private BufferRecycler createRecycler()
{
BufferRecycler recycler = new BufferRecycler();
// No way to reuse/reset SoftReference, have to create new always:
mRecyclerRef.set(new SoftReferenceP_MIN_TEXT_SEGMENT
to the maximum value so
* that all original text segment chunks are reported without
* segmentation (but without coalescing with adjacent CDATA segments)
* P_TREAT_CHAR_REFS_AS_ENTS
to true, so the all the
* original character references are reported with their position,
* original text, and the replacement text.
* ThreadLocal
contains a {@link SoftRerefence}
* to a {@link BufferRecycler} used to provide a low-cost
* buffer recycling between Reader instances.
*/
final static ThreadLocal
*
*PARSING_MODE_DOCUMENT
) allows parsing of only
* well-formed XML documents, but the other two modes allow more lenient
* parsing. Fragment mode allows parsing of XML content that does not
* have a single root element (can have zero or more), nor can have
* XML or DOCTYPE declarations: this may be useful if parsing a subset
* of a full XML document. Multi-document
* (PARSING_MODE_DOCUMENTS
) mode on the other hand allows
* parsing of a stream that contains multiple consequtive well-formed
* documents, with possibly multiple XML and DOCTYPE declarations.
*close
or
* closeCompletely
, or implicitly by a call
* to writeEndDocument
.
*javax.xml.transform.dom.DOMResult
.
*
*
*
* @author Tatu Saloranta
* @author Dan Diephouse
*/
public class WstxDOMWrappingWriter
extends DOMWrappingWriter
{
/*
///////////////////////////////////////////////////////////
// Constants
///////////////////////////////////////////////////////////
*/
final protected static String ERR_NSDECL_WRONG_STATE =
"Trying to write a namespace declaration when there is no open start element.";
/*
///////////////////////////////////////////////////////////
// Configuration
///////////////////////////////////////////////////////////
*/
protected final WriterConfig mConfig;
/*
///////////////////////////////////////////////////////////
// State
///////////////////////////////////////////////////////////
*/
/**
* This element is the current context element, under which
* all other nodes are added, until matching end element
* is output. Null outside of the main element tree.
*writeEmptyElement
) will never become
* current element.
*/
protected DOMOutputElement mCurrElem;
/**
* This element is non-null right after a call to
* either writeStartElement
and
* writeEmptyElement
, and can be used to
* add attributes and namespace declarations.
*writeEmptyElement
) will
* become open element but NOT current element. Conversely,
* regular elements will remain current element when
* non elements are written (text, comments, PI), but
* not the open element.
*/
protected DOMOutputElement mOpenElement;
/**
* for NsRepairing mode
*/
protected int[] mAutoNsSeq;
protected String mSuggestedDefNs = null;
protected String mAutomaticNsPrefix;
/**
* Map that contains URI-to-prefix entries that point out suggested
* prefixes for URIs. These are populated by calls to
* {@link #setPrefix}, and they are only used as hints for binding;
* if there are conflicts, repairing writer can just use some other
* prefix.
*/
HashMapvalidateElementStart
,
* validateAttribute
,
* validateElementAndAttributes
calls.
*/
protected DTDElement mCurrElem = null;
/**
* Stack of element definitions matching the current active element stack.
* Instances are elements definitions read from DTD.
*/
protected DTDElement[] mElems = null;
/**
* Number of elements in {@link #mElems}.
*/
protected int mElemCount = 0;
/**
* Attribute definitions for attributes the current element may have
*/
protected HashMapmUndeclaredEntity
*/
private String mValue = null;
/**
* For now, let's only keep track of the first undeclared entity:
* can be extended if necessary.
*/
private UndeclaredEntity mUndeclaredEntity = null;
/*
////////////////////////////////////////////////////
// Life-cycle (creation, configuration)
////////////////////////////////////////////////////
*/
private DefaultAttrValue(int defValueType)
{
mDefValueType = defValueType;
}
public static DefaultAttrValue constructImplied() { return sImplied; }
public static DefaultAttrValue constructRequired() { return sRequired; }
public static DefaultAttrValue constructFixed() {
return new DefaultAttrValue(DEF_FIXED);
}
public static DefaultAttrValue constructOptional() {
return new DefaultAttrValue(DEF_DEFAULT);
}
public void setValue(String v) {
mValue = v;
}
public void addUndeclaredPE(String name, Location loc)
{
addUndeclaredEntity(name, loc, true);
}
public void addUndeclaredGE(String name, Location loc)
{
addUndeclaredEntity(name, loc, false);
}
public void reportUndeclared(ValidationContext ctxt, XMLValidator dtd)
throws XMLStreamException
{
mUndeclaredEntity.reportUndeclared(ctxt, dtd);
}
/*
////////////////////////////////////////////////////
// Accessors:
////////////////////////////////////////////////////
*/
public boolean hasUndeclaredEntities() {
return (mUndeclaredEntity != null);
}
public String getValue() {
return mValue;
}
/**
* @return Expanded default value String, if there were no problems
* (no undeclared entities), or null to indicate there were problems.
* In latter case, caller is to figure out exact type of the problem
* and report this appropriately to the application.
*/
public String getValueIfOk()
{
return (mUndeclaredEntity == null) ? mValue : null;
}
public boolean isRequired() {
return (this == sRequired);
}
public boolean isFixed() {
return (mDefValueType == DEF_FIXED);
}
public boolean hasDefaultValue() {
return (mDefValueType == DEF_DEFAULT)
|| (mDefValueType == DEF_FIXED);
}
/**
* Method used by the element to figure out if attribute needs "special"
* checking; basically if it's required, and/or has a default value.
* In both cases missing the attribute has specific consequences, either
* exception or addition of a default value.
*/
public boolean isSpecial() {
// Only non-special if #IMPLIED
return (this != sImplied);
}
/*
////////////////////////////////////////////////////
// Internal methods
////////////////////////////////////////////////////
*/
private void addUndeclaredEntity(String name, Location loc, boolean isPe)
{
if (mUndeclaredEntity == null) {
mUndeclaredEntity = new UndeclaredEntity(name, loc, isPe);
}
}
/*
////////////////////////////////////////////////////
// Helper class(es):
////////////////////////////////////////////////////
*/
final static class UndeclaredEntity
{
final String mName;
final boolean mIsPe;
final Location mLocation;
UndeclaredEntity(String name, Location loc, boolean isPe)
{
mName = name;
mIsPe = isPe;
mLocation = loc;
}
public void reportUndeclared(ValidationContext ctxt, XMLValidator dtd)
throws XMLStreamException
{
String msg = MessageFormat.format(ErrorConsts.ERR_DTD_UNDECLARED_ENTITY, new Object[] { (mIsPe ? "parsed" : "general"), mName });
XMLValidationProblem prob = new XMLValidationProblem
(mLocation, msg, XMLValidationProblem.SEVERITY_FATAL);
prob.setReporter(dtd);
ctxt.reportProblem(prob);
}
}
}
woodstox-woodstox-core-5.1.0/src/main/java/com/ctc/wstx/dtd/EmptyValidator.java 0000664 0000000 0000000 00000002533 13257562550 0027604 0 ustar 00root root 0000000 0000000 package com.ctc.wstx.dtd;
import com.ctc.wstx.util.PrefixedName;
/**
* Simple content model validator that accepts no elements, ever; this
* is true for pure #PCDATA content model as well as EMPTY content model.
* Can be used as a singleton, since all info needed for diagnostics
* is passed via methods.
*/
public class EmptyValidator
extends StructValidator
{
final static EmptyValidator sPcdataInstance = new EmptyValidator("No elements allowed in pure #PCDATA content model");
final static EmptyValidator sEmptyInstance = new EmptyValidator("No elements allowed in EMPTY content model");
final String mErrorMsg;
private EmptyValidator(String errorMsg) {
mErrorMsg = errorMsg;
}
public static EmptyValidator getPcdataInstance() { return sPcdataInstance; }
public static EmptyValidator getEmptyInstance() { return sPcdataInstance; }
/**
* Simple; can always (re)use instance itself; no state information
* is kept.
*/
@Override
public StructValidator newInstance() {
return this;
}
@Override
public String tryToValidate(PrefixedName elemName) {
return mErrorMsg;
}
/**
* If we ever get as far as element closing, things are all good;
* can just return null.
*/
@Override
public String fullyValid() {
return null;
}
}
woodstox-woodstox-core-5.1.0/src/main/java/com/ctc/wstx/dtd/FullDTDReader.java 0000664 0000000 0000000 00000374323 13257562550 0027232 0 ustar 00root root 0000000 0000000 /* Woodstox XML processor
*
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in file LICENSE, included with
* the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.dtd;
import java.io.IOException;
import java.io.Writer;
import java.net.URL;
import java.text.MessageFormat;
import java.util.*;
import javax.xml.stream.Location;
import javax.xml.stream.XMLReporter;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.NotationDeclaration;
import org.codehaus.stax2.validation.XMLValidationProblem;
import org.codehaus.stax2.validation.XMLValidator;
import com.ctc.wstx.api.ReaderConfig;
import com.ctc.wstx.cfg.ErrorConsts;
import com.ctc.wstx.cfg.XmlConsts;
import com.ctc.wstx.ent.*;
import com.ctc.wstx.evt.WNotationDeclaration;
import com.ctc.wstx.exc.WstxIOException;
import com.ctc.wstx.io.WstxInputData;
import com.ctc.wstx.io.WstxInputSource;
import com.ctc.wstx.util.*;
/**
* Reader that reads in DTD information from internal or external subset.
*writeAscii
method, since it is the most convenient
* place to catch cases where a text segment ends with an unmatched
* surrogate pair half.
*/
public abstract class EncodingXmlWriter
extends XmlWriter
{
/**
* Let's use a typical default to have a compromise between large
* enough chunks to output, and minimizing memory overhead.
* 4k should be close enough to a physical page to work out
* acceptably, without causing excessive (if temporary) memory usage.
*/
final static int DEFAULT_BUFFER_SIZE = 4000;
final static byte BYTE_SPACE = (byte) ' ';
final static byte BYTE_COLON = (byte) ':';
final static byte BYTE_SEMICOLON = (byte) ';';
final static byte BYTE_LBRACKET = (byte) '[';
final static byte BYTE_RBRACKET = (byte) ']';
final static byte BYTE_QMARK = (byte) '?';
final static byte BYTE_EQ = (byte) '=';
final static byte BYTE_SLASH = (byte) '/';
final static byte BYTE_HASH = (byte) '#';
final static byte BYTE_HYPHEN = (byte) '-';
final static byte BYTE_LT = (byte) '<';
final static byte BYTE_GT = (byte) '>';
final static byte BYTE_AMP = (byte) '&';
final static byte BYTE_QUOT = (byte) '"';
final static byte BYTE_APOS = (byte) '\'';
final static byte BYTE_A = (byte) 'a';
final static byte BYTE_G = (byte) 'g';
final static byte BYTE_L = (byte) 'l';
final static byte BYTE_M = (byte) 'm';
final static byte BYTE_O = (byte) 'o';
final static byte BYTE_P = (byte) 'p';
final static byte BYTE_Q = (byte) 'q';
final static byte BYTE_S = (byte) 's';
final static byte BYTE_T = (byte) 't';
final static byte BYTE_U = (byte) 'u';
final static byte BYTE_X = (byte) 'x';
/*
////////////////////////////////////////////////
// Output state, buffering
////////////////////////////////////////////////
*/
/**
* Actual output stream to use for outputting encoded content as
* bytes.
*/
private final OutputStream mOut;
protected byte[] mOutputBuffer;
protected int mOutputPtr;
/**
* In case a split surrogate pair is output (which can only successfully
* occur with either writeRaw
or
* writeCharacters
), the first part is temporarily stored
* within this member variable.
*/
protected int mSurrogate = 0;
/*
////////////////////////////////////////////////
//
////////////////////////////////////////////////
*/
public EncodingXmlWriter(OutputStream out, WriterConfig cfg, String encoding,
boolean autoclose)
throws IOException
{
super(cfg, encoding, autoclose);
mOut = out;
mOutputBuffer = cfg.allocFullBBuffer(DEFAULT_BUFFER_SIZE);
mOutputPtr = 0;
}
/**
* This method is needed by the super class, to calculate hard
* byte/char offsets.
*/
@Override
protected int getOutputPtr() {
return mOutputPtr;
}
/*
////////////////////////////////////////////////
// Partial API implementation
////////////////////////////////////////////////
*/
@Override
final protected OutputStream getOutputStream() {
return mOut;
}
@Override
final protected Writer getWriter() {
// No writers are involved with these implementations...
return null;
}
@Override
public void close(boolean forceRealClose) throws IOException
{
flush();
// Buffers to free?
byte[] buf = mOutputBuffer;
if (buf != null) {
mOutputBuffer = null;
mConfig.freeFullBBuffer(buf);
}
// Plus may need to close the actual stream
if (forceRealClose || mAutoCloseOutput) {
/* 14-Nov-2008, TSa: Wrt [WSTX-163]; no need to
* check whether mOut implements CompletelyCloseable
* (unlike with BufferingXmlWriter)
*/
mOut.close();
}
}
@Override
public final void flush() throws IOException
{
flushBuffer();
mOut.flush();
}
@Override
public abstract void writeRaw(char[] cbuf, int offset, int len)
throws IOException;
@Override
public abstract void writeRaw(String str, int offset, int len)
throws IOException;
/*
//////////////////////////////////////////////////
// "Trusted" low-level output methods (that do not
// need to verify validity of input)
//////////////////////////////////////////////////
*/
@Override
public final void writeCDataStart()
throws IOException
{
writeAscii("");
}
@Override
public final void writeCommentStart()
throws IOException
{
writeAscii("");
}
@Override
public final void writePIStart(String target, boolean addSpace)
throws IOException
{
writeAscii(BYTE_LT, BYTE_QMARK);
writeRaw(target);
if (addSpace) {
writeAscii(BYTE_SPACE);
}
}
@Override
public final void writePIEnd() throws IOException
{
writeAscii(BYTE_QMARK, BYTE_GT);
}
/*
////////////////////////////////////////////////
// Higher-level output methods, text output
////////////////////////////////////////////////
*/
@Override
public int writeCData(String data) throws IOException
{
writeAscii("= 0) {
return ix;
}
writeAscii("]]>");
return -1;
}
@Override
public int writeCData(char[] cbuf, int offset, int len)
throws IOException
{
writeAscii("= 0) {
return ix;
}
writeAscii("]]>");
return -1;
}
@Override
public final void writeCharacters(String data)
throws IOException
{
// Note: may get second part of a surrogate
if (mTextWriter != null) { // custom escaping?
mTextWriter.write(data);
} else { // nope, default:
writeTextContent(data);
}
}
@Override
public final void writeCharacters(char[] cbuf, int offset, int len)
throws IOException
{
// Note: may get second part of a surrogate
if (mTextWriter != null) { // custom escaping?
mTextWriter.write(cbuf, offset, len);
} else { // nope, default:
writeTextContent(cbuf, offset, len);
}
}
/**
* Method that will try to output the content as specified. If
* the content passed in has embedded "--" in it, it will either
* add an intervening space between consequtive hyphens (if content
* fixing is enabled), or return the offset of the first hyphen in
* multi-hyphen sequence.
*/
@Override
public int writeComment(String data)
throws IOException
{
writeAscii("");
return -1;
}
@Override
public void writeDTD(String data)
throws IOException
{
if (mSurrogate != 0) {
throwUnpairedSurrogate();
}
writeRaw(data, 0, data.length());
}
@Override
public void writeDTD(String rootName, String systemId, String publicId,
String internalSubset)
throws IOException, XMLStreamException
{
writeAscii(" 0) {
writeAscii(BYTE_SPACE, BYTE_LBRACKET);
writeRaw(internalSubset, 0, internalSubset.length());
writeAscii(BYTE_RBRACKET);
}
writeAscii(BYTE_GT);
}
@Override
public void writeEntityReference(String name)
throws IOException, XMLStreamException
{
if (mSurrogate != 0) {
throwUnpairedSurrogate();
}
writeAscii(BYTE_AMP);
writeName(name);
writeAscii(BYTE_SEMICOLON);
}
@Override
public void writeXmlDeclaration(String version, String encoding, String standalone)
throws IOException
{
final byte byQuote = (mUseDoubleQuotesInXmlDecl ? BYTE_QUOT : BYTE_APOS);
writeAscii(" 0) {
writeAscii(" encoding=");
writeAscii(byQuote);
// Should be ascii, but let's play it safe:
writeRaw(encoding, 0, encoding.length());
writeAscii(byQuote);
}
if (standalone != null) {
writeAscii(" standalone=");
writeAscii(byQuote);
writeAscii(standalone);
writeAscii(byQuote);
}
writeAscii(BYTE_QMARK, BYTE_GT);
}
@Override
public int writePI(String target, String data)
throws IOException, XMLStreamException
{
writeAscii(BYTE_LT, BYTE_QMARK);
writeName(target);
if (data != null && data.length() > 0) {
writeAscii(BYTE_SPACE);
int ix = writePIData(data);
if (ix >= 0) { // embedded "?>"?
return ix;
}
}
writeAscii(BYTE_QMARK, BYTE_GT);
return -1;
}
/*
////////////////////////////////////////////////////
// Write methods, elements
////////////////////////////////////////////////////
*/
@Override
public void writeStartTagStart(String localName)
throws IOException, XMLStreamException
{
writeAscii(BYTE_LT);
writeName(localName);
}
@Override
public void writeStartTagStart(String prefix, String localName)
throws IOException, XMLStreamException
{
if (prefix == null || prefix.length() == 0) {
writeStartTagStart(localName);
return;
}
writeAscii(BYTE_LT);
writeName(prefix);
writeAscii(BYTE_COLON);
writeName(localName);
}
@Override
public void writeStartTagEnd()
throws IOException
{
writeAscii(BYTE_GT);
}
@Override
public void writeStartTagEmptyEnd()
throws IOException
{
if (mAddSpaceAfterEmptyElem) {
writeAscii(" />");
} else {
writeAscii(BYTE_SLASH, BYTE_GT);
}
}
@Override
public void writeEndTag(String localName)
throws IOException
{
writeAscii(BYTE_LT, BYTE_SLASH);
/* At this point, it is assumed caller knows that end tag
* matches with start tag, and that it (by extension) has been
* validated if and as necessary
*/
writeNameUnchecked(localName);
writeAscii(BYTE_GT);
}
@Override
public void writeEndTag(String prefix, String localName)
throws IOException
{
writeAscii(BYTE_LT, BYTE_SLASH);
/* At this point, it is assumed caller knows that end tag
* matches with start tag, and that it (by extension) has been
* validated if and as necessary
*/
if (prefix != null && prefix.length() > 0) {
writeNameUnchecked(prefix);
writeAscii(BYTE_COLON);
}
writeNameUnchecked(localName);
writeAscii(BYTE_GT);
}
/*
////////////////////////////////////////////////////
// Write methods, attributes/ns
////////////////////////////////////////////////////
*/
@Override
public void writeAttribute(String localName, String value)
throws IOException, XMLStreamException
{
writeAscii(BYTE_SPACE);
writeName(localName);
writeAscii(BYTE_EQ, BYTE_QUOT);
int len = value.length();
if (len > 0) {
if (mAttrValueWriter != null) { // custom escaping?
mAttrValueWriter.write(value, 0, len);
} else { // nope, default
writeAttrValue(value);
}
}
writeAscii(BYTE_QUOT);
}
@Override
public void writeAttribute(String localName, char[] value, int offset, int len)
throws IOException, XMLStreamException
{
writeAscii(BYTE_SPACE);
writeName(localName);
writeAscii(BYTE_EQ, BYTE_QUOT);
if (len > 0) {
if (mAttrValueWriter != null) { // custom escaping?
mAttrValueWriter.write(value, offset, len);
} else { // nope, default
writeAttrValue(value, offset, len);
}
}
writeAscii(BYTE_QUOT);
}
@Override
public void writeAttribute(String prefix, String localName, String value)
throws IOException, XMLStreamException
{
writeAscii(BYTE_SPACE);
writeName(prefix);
writeAscii(BYTE_COLON);
writeName(localName);
writeAscii(BYTE_EQ, BYTE_QUOT);
int len = value.length();
if (len > 0) {
if (mAttrValueWriter != null) { // custom escaping?
mAttrValueWriter.write(value, 0, len);
} else { // nope, default
writeAttrValue(value);
}
}
writeAscii(BYTE_QUOT);
}
@Override
public void writeAttribute(String prefix, String localName, char[] value, int offset, int len)
throws IOException, XMLStreamException
{
writeAscii(BYTE_SPACE);
writeName(prefix);
writeAscii(BYTE_COLON);
writeName(localName);
writeAscii(BYTE_EQ, BYTE_QUOT);
if (len > 0) {
if (mAttrValueWriter != null) { // custom escaping?
mAttrValueWriter.write(value, offset, len);
} else { // nope, default
writeAttrValue(value, offset, len);
}
}
writeAscii(BYTE_QUOT);
}
/*
////////////////////////////////////////////////
// Methods used by Typed Access API
////////////////////////////////////////////////
*/
/**
* Non-validating version of typed write method
*/
@Override
public final void writeTypedElement(AsciiValueEncoder enc)
throws IOException
{
if (mSurrogate != 0) {
throwUnpairedSurrogate();
}
if (enc.bufferNeedsFlush(mOutputBuffer.length - mOutputPtr)) {
flush();
}
while (true) {
mOutputPtr = enc.encodeMore(mOutputBuffer, mOutputPtr, mOutputBuffer.length);
// If no flushing needed, indicates that all data was encoded
if (enc.isCompleted()) {
break;
}
flush();
}
}
/**
* Validating version of typed write method
*/
@Override
public final void writeTypedElement(AsciiValueEncoder enc,
XMLValidator validator, char[] copyBuffer)
throws IOException, XMLStreamException
{
if (mSurrogate != 0) {
throwUnpairedSurrogate();
}
/* Ok, this gets trickier: can't use efficient direct-to-bytes
* encoding since validator won't be able to use it. Instead
* have to use temporary copy buffer.
*/
final int copyBufferLen = copyBuffer.length;
// Copy buffer should never be too small, no need to check up front
do {
int ptr = enc.encodeMore(copyBuffer, 0, copyBufferLen);
// False -> can't be sure it's the whole remaining text
validator.validateText(copyBuffer, 0, ptr, false);
writeRawAscii(copyBuffer, 0, ptr);
} while (!enc.isCompleted());
}
@Override
public void writeTypedAttribute(String localName, AsciiValueEncoder enc)
throws IOException, XMLStreamException
{
writeAscii(BYTE_SPACE);
writeName(localName);
writeAscii(BYTE_EQ, BYTE_QUOT);
if (enc.bufferNeedsFlush(mOutputBuffer.length - mOutputPtr)) {
flush();
}
while (true) {
mOutputPtr = enc.encodeMore(mOutputBuffer, mOutputPtr, mOutputBuffer.length);
if (enc.isCompleted()) {
break;
}
flush();
}
writeAscii(BYTE_QUOT);
}
@Override
public void writeTypedAttribute(String prefix, String localName,
AsciiValueEncoder enc)
throws IOException, XMLStreamException
{
System.err.println("DEBUG: write typed attr/0 '"+localName+"'");
writeAscii(BYTE_SPACE);
writeName(prefix);
writeAscii(BYTE_COLON);
writeName(localName);
writeAscii(BYTE_EQ, BYTE_QUOT);
if (enc.bufferNeedsFlush(mOutputBuffer.length - mOutputPtr)) {
flush();
}
while (true) {
mOutputPtr = enc.encodeMore(mOutputBuffer, mOutputPtr, mOutputBuffer.length);
if (enc.isCompleted()) {
break;
}
flush();
}
writeAscii(BYTE_QUOT);
}
@Override
public void writeTypedAttribute(String prefix, String localName, String nsURI,
AsciiValueEncoder enc,
XMLValidator validator, char[] copyBuffer)
throws IOException, XMLStreamException
{
boolean hasPrefix = (prefix != null && prefix.length() > 0);
if (nsURI == null) {
nsURI = "";
}
System.err.println("DEBUG: write typed attr/1 '"+localName+"', vld == "+validator);
//validator.validateAttribute(localName, nsURI, (hasPrefix ? prefix: ""), buf, offset, len);
writeAscii(BYTE_SPACE);
if (hasPrefix) {
writeName(prefix);
writeAscii(BYTE_COLON);
}
writeName(localName);
writeAscii(BYTE_EQ, BYTE_QUOT);
/* Ok, this gets trickier: can't use efficient direct-to-bytes
* encoding since validator won't be able to use it. Instead
* have to use temporary copy buffer.
* In addition, attributes to validate can not be
* split (validators expect complete values). So, if value
* won't fit as is, may need to aggregate using StringBuilder
*/
final int copyBufferLen = copyBuffer.length;
// First, let's see if one call is enough
int last = enc.encodeMore(copyBuffer, 0, copyBufferLen);
writeRawAscii(copyBuffer, 0, last);
if (enc.isCompleted()) {
validator.validateAttribute(localName, nsURI, prefix, copyBuffer, 0, last);
return;
}
// If not, must combine first
StringBuilder sb = new StringBuilder(copyBufferLen << 1);
sb.append(copyBuffer, 0, last);
do {
last = enc.encodeMore(copyBuffer, 0, copyBufferLen);
writeRawAscii(copyBuffer, 0, last);
sb.append(copyBuffer, 0, last);
} while (!enc.isCompleted());
writeAscii(BYTE_QUOT);
// Then validate
String valueStr = sb.toString();
validator.validateAttribute(localName, nsURI, prefix, valueStr);
return;
}
/*
////////////////////////////////////////////////
// Methods for sub-classes to use
////////////////////////////////////////////////
*/
protected final void flushBuffer()
throws IOException
{
if (mOutputPtr > 0 && mOutputBuffer != null) {
int ptr = mOutputPtr;
mOutputPtr = 0;
mOut.write(mOutputBuffer, 0, ptr);
}
}
protected final void writeAscii(byte b)
throws IOException
{
if (mSurrogate != 0) {
throwUnpairedSurrogate();
}
if (mOutputPtr >= mOutputBuffer.length) {
flushBuffer();
}
mOutputBuffer[mOutputPtr++] = b;
}
protected final void writeAscii(byte b1, byte b2)
throws IOException
{
if (mSurrogate != 0) {
throwUnpairedSurrogate();
}
if ((mOutputPtr + 1) >= mOutputBuffer.length) {
flushBuffer();
}
mOutputBuffer[mOutputPtr++] = b1;
mOutputBuffer[mOutputPtr++] = b2;
}
protected final void writeAscii(String str)
throws IOException
{
if (mSurrogate != 0) {
throwUnpairedSurrogate();
}
int len = str.length();
int ptr = mOutputPtr;
byte[] buf = mOutputBuffer;
if ((ptr + len) >= buf.length) {
/* It's even possible that String is longer than the buffer (not
* likely, possible). If so, let's just call the full
* method:
*/
if (len > buf.length) {
writeRaw(str, 0, len);
return;
}
flushBuffer();
ptr = mOutputPtr;
}
mOutputPtr += len;
for (int i = 0; i < len; ++i) {
buf[ptr++] = (byte)str.charAt(i);
}
}
@Override
public final void writeRawAscii(char[] buf, int offset, int len)
throws IOException
{
if (mSurrogate != 0) {
throwUnpairedSurrogate();
}
int ptr = mOutputPtr;
byte[] dst = mOutputBuffer;
if ((ptr + len) >= dst.length) {
if (len > dst.length) {
writeRaw(buf, offset, len);
return;
}
flushBuffer();
ptr = mOutputPtr;
}
mOutputPtr += len;
for (int i = 0; i < len; ++i) {
dst[ptr+i] = (byte)buf[offset+i];
}
}
/**
* Entity writing can be optimized quite nicely, since it only
* needs to output ascii characters.
*
* @return New value of mOutputPtr
*/
protected final int writeAsEntity(int c)
throws IOException
{
byte[] buf = mOutputBuffer;
int ptr = mOutputPtr;
if ((ptr + 10) >= buf.length) { // [up to 6 hex digits] ;
flushBuffer();
ptr = mOutputPtr;
}
buf[ptr++] = BYTE_AMP;
// Can use more optimal notation for 8-bit ascii stuff:
if (c < 256) {
/* Also; although not really mandatory, let's also
* use pre-defined entities where possible.
*/
if (c == '&') {
buf[ptr++] = BYTE_A;
buf[ptr++] = BYTE_M;
buf[ptr++] = BYTE_P;
} else if (c == '<') {
buf[ptr++] = BYTE_L;
buf[ptr++] = BYTE_T;
} else if (c == '>') {
buf[ptr++] = BYTE_G;
buf[ptr++] = BYTE_T;
} else if (c == '\'') {
buf[ptr++] = BYTE_A;
buf[ptr++] = BYTE_P;
buf[ptr++] = BYTE_O;
buf[ptr++] = BYTE_S;
} else if (c == '"') {
buf[ptr++] = BYTE_Q;
buf[ptr++] = BYTE_U;
buf[ptr++] = BYTE_O;
buf[ptr++] = BYTE_T;
} else {
buf[ptr++] = BYTE_HASH;
buf[ptr++] = BYTE_X;
// Can use shortest quoting for tab, cr, lf:
if (c >= 16) {
int digit = (c >> 4);
buf[ptr++] = (byte) ((digit < 10) ? ('0' + digit) : (('a' - 10) + digit));
c &= 0xF;
}
buf[ptr++] = (byte) ((c < 10) ? ('0' + c) : (('a' - 10) + c));
}
} else {
buf[ptr++] = BYTE_HASH;
buf[ptr++] = BYTE_X;
// Ok, let's write the shortest possible sequence then:
int shift = 20;
int origPtr = ptr;
do {
int digit = (c >> shift) & 0xF;
if (digit > 0 || (ptr != origPtr)) {
buf[ptr++] = (byte) ((digit < 10) ? ('0' + digit) : (('a' - 10) + digit));
}
shift -= 4;
} while (shift > 0);
c &= 0xF;
buf[ptr++] = (byte) ((c < 10) ? ('0' + c) : (('a' - 10) + c));
}
buf[ptr++] = BYTE_SEMICOLON;
mOutputPtr = ptr;
return ptr;
}
protected final void writeName(String name)
throws IOException, XMLStreamException
{
if (mCheckNames) {
verifyNameValidity(name, mNsAware);
}
// TODO: maybe we could reuse some previously encoded names?
writeRaw(name, 0, name.length());
}
protected final void writeNameUnchecked(String name)
throws IOException
{
writeRaw(name, 0, name.length());
}
protected final int calcSurrogate(int secondSurr)
throws IOException
{
// First, let's verify first surrogate is valid:
int firstSurr = mSurrogate;
mSurrogate = 0;
if (firstSurr < SURR1_FIRST || firstSurr > SURR1_LAST) {
throwUnpairedSurrogate(firstSurr);
}
// Then that the second one is:
if ((secondSurr < SURR2_FIRST) || (secondSurr > SURR2_LAST)) {
throwUnpairedSurrogate(secondSurr);
}
int ch = 0x10000 + ((firstSurr - SURR1_FIRST) << 10) + (secondSurr - SURR2_FIRST);
if (ch > XmlConsts.MAX_UNICODE_CHAR) {
throw new IOException("Illegal surrogate character pair, resulting code 0x"+Integer.toHexString(ch)+" above legal XML character range");
}
return ch;
}
protected final void throwUnpairedSurrogate()
throws IOException
{
int surr = mSurrogate;
mSurrogate = 0;
throwUnpairedSurrogate(surr);
}
protected final void throwUnpairedSurrogate(int code)
throws IOException
{
// Let's flush to make debugging easier
flush();
throw new IOException("Unpaired surrogate character (0x"+Integer.toHexString(code)+")");
}
/*
////////////////////////////////////////////////
// Abstract methods for sub-classes to define
////////////////////////////////////////////////
*/
protected abstract void writeAttrValue(String data)
throws IOException;
protected abstract void writeAttrValue(char[] value, int offset, int len)
throws IOException;
protected abstract int writeCDataContent(String data)
throws IOException;
protected abstract int writeCDataContent(char[] cbuf, int start, int len)
throws IOException;
protected abstract int writeCommentContent(String data)
throws IOException;
protected abstract int writePIData(String data)
throws IOException, XMLStreamException;
protected abstract void writeTextContent(String data)
throws IOException;
protected abstract void writeTextContent(char[] cbuf, int start, int len)
throws IOException;
}
woodstox-woodstox-core-5.1.0/src/main/java/com/ctc/wstx/sw/ISOLatin1XmlWriter.java 0000664 0000000 0000000 00000072000 13257562550 0030073 0 ustar 00root root 0000000 0000000 /* Woodstox XML processor
*
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in file LICENSE, included with
* the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.sw;
import java.io.*;
import javax.xml.stream.XMLStreamException;
import com.ctc.wstx.api.WriterConfig;
import com.ctc.wstx.io.CharsetNames;
/**
* Concrete implementation of {@link EncodingXmlWriter} used when output
* is to be encoded using ISO-8859-1, aka ISO-Latin1 encoding.
*writeRaw
* methods.
*/
public final class ISOLatin1XmlWriter
extends EncodingXmlWriter
{
public ISOLatin1XmlWriter(OutputStream out, WriterConfig cfg, boolean autoclose)
throws IOException
{
super(out, cfg, CharsetNames.CS_ISO_LATIN1, autoclose);
}
@Override
public void writeRaw(char[] cbuf, int offset, int len)
throws IOException
{
if (mSurrogate != 0) {
throwUnpairedSurrogate();
}
int ptr = mOutputPtr;
while (len > 0) {
int max = mOutputBuffer.length - ptr;
if (max < 1) { // output buffer full?
mOutputPtr = ptr;
flushBuffer();
ptr = 0;
max = mOutputBuffer.length;
}
// How much can we output?
if (max > len) {
max = len;
}
if (mCheckContent) {
for (int inEnd = offset + max; offset < inEnd; ++offset) {
int c = cbuf[offset];
if (c < 32) {
if (c == '\n') {
// !!! TBI: line nr
} else if (c == '\r') {
// !!! TBI: line nr (and skipping \n that may follow)
} else if (c != '\t') {
mOutputPtr = ptr;
c = handleInvalidChar(c);
}
} else if (c > 0x7E) {
if (c > 0xFF) {
mOutputPtr = ptr;
handleInvalidLatinChar(c);
} else if (mXml11) {
if (c < 0x9F && c != 0x85) {
mOutputPtr = ptr;
c = handleInvalidChar(c);
}
}
}
mOutputBuffer[ptr++] = (byte) c;
}
} else {
for (int inEnd = offset + max; offset < inEnd; ++offset) {
mOutputBuffer[ptr++] = (byte) cbuf[offset];
}
}
len -= max;
}
mOutputPtr = ptr;
}
@Override
public void writeRaw(String str, int offset, int len)
throws IOException
{
if (mSurrogate != 0) {
throwUnpairedSurrogate();
}
int ptr = mOutputPtr;
while (len > 0) {
int max = mOutputBuffer.length - ptr;
if (max < 1) { // output buffer full?
mOutputPtr = ptr;
flushBuffer();
ptr = 0;
max = mOutputBuffer.length;
}
// How much can we output?
if (max > len) {
max = len;
}
if (mCheckContent) {
for (int inEnd = offset + max; offset < inEnd; ++offset) {
int c = str.charAt(offset);
if (c < 32) {
if (c == '\n') {
// !!! TBI: line nr
} else if (c == '\r') {
// !!! TBI: line nr (and skipping \n that may follow)
} else if (c != '\t') {
mOutputPtr = ptr;
c = handleInvalidChar(c);
}
} else if (c > 0x7E) {
if (c > 0xFF) {
mOutputPtr = ptr;
handleInvalidLatinChar(c);
} else if (mXml11) {
if (c < 0x9F && c != 0x85) {
mOutputPtr = ptr;
c = handleInvalidChar(c);
}
}
}
mOutputBuffer[ptr++] = (byte) c;
}
} else {
for (int inEnd = offset + max; offset < inEnd; ++offset) {
mOutputBuffer[ptr++] = (byte) str.charAt(offset);
}
}
len -= max;
}
mOutputPtr = ptr;
}
@Override
protected void writeAttrValue(String data)
throws IOException
{
int offset = 0;
int len = data.length();
int ptr = mOutputPtr;
main_loop:
while (len > 0) {
int max = mOutputBuffer.length - ptr;
if (max < 1) { // output buffer full?
mOutputPtr = ptr;
flushBuffer();
ptr = 0;
max = mOutputBuffer.length;
}
// Do we start with a surrogate?
if (mSurrogate != 0) {
int sec = data.charAt(offset++);
sec = calcSurrogate(sec);
mOutputPtr = ptr;
ptr = writeAsEntity(sec);
--len;
continue main_loop;
}
// How much can we output?
if (max > len) {
max = len;
}
inner_loop:
for (int inEnd = offset + max; offset < inEnd; ) {
int c = data.charAt(offset++);
if (c < 32) {
/* Need to quote all white space except for regular
* space chars, to preserve them (round-tripping)
*/
if (c == '\r') {
if (!mEscapeCR) {
mOutputBuffer[ptr++] = (byte) c;
continue;
}
} else if (c != '\n' && c != '\t') {
if (mCheckContent) {
if (!mXml11 || c == 0) {
c = handleInvalidChar(c);
mOutputBuffer[ptr++] = (byte) c;
continue;
}
}
}
// fall-through to char entity output
} else if (c < 0x7F) {
if (c != '<' && c != '&' && c != '"') {
mOutputBuffer[ptr++] = (byte) c;
continue;
}
// otherwise fall back on quoting
} else if (c > 0x9F && c <= 0xFF) {
mOutputBuffer[ptr++] = (byte) c;
continue; // [WSTX-88]
} else {
// Surrogate?
if (c >= SURR1_FIRST && c <= SURR2_LAST) {
mSurrogate = c;
// Last char needs special handling:
if (offset == inEnd) {
break inner_loop;
}
c = calcSurrogate(data.charAt(offset++));
// Let's fall down to entity output
}
}
/* Has to be escaped as char entity; as such, also need
* to re-calc max. continguous data we can output
*/
mOutputPtr = ptr;
ptr = writeAsEntity(c);
len = data.length() - offset;
continue main_loop;
}
len -= max;
}
mOutputPtr = ptr;
}
@Override
protected void writeAttrValue(char[] data, int offset, int len)
throws IOException
{
int ptr = mOutputPtr;
main_loop:
while (len > 0) {
int max = mOutputBuffer.length - ptr;
if (max < 1) { // output buffer full?
mOutputPtr = ptr;
flushBuffer();
ptr = 0;
max = mOutputBuffer.length;
}
// Do we start with a surrogate?
if (mSurrogate != 0) {
int sec = data[offset++];
sec = calcSurrogate(sec);
mOutputPtr = ptr;
ptr = writeAsEntity(sec);
--len;
continue main_loop;
}
// How much can we output?
if (max > len) {
max = len;
}
inner_loop:
for (int inEnd = offset + max; offset < inEnd; ) {
int c = data[offset++];
if (c < 32) {
/* Need to quote all white space except for regular
* space chars, to preserve them (round-tripping)
*/
if (c == '\r') {
if (!mEscapeCR) {
mOutputBuffer[ptr++] = (byte) c;
continue;
}
} else if (c != '\n' && c != '\t') {
if (mCheckContent) {
if (!mXml11 || c == 0) {
c = handleInvalidChar(c);
mOutputBuffer[ptr++] = (byte) c;
continue;
}
}
}
// fall-through to char entity output
} else if (c < 0x7F) {
if (c != '<' && c != '&' && c != '"') {
mOutputBuffer[ptr++] = (byte) c;
continue;
}
// otherwise fall back on quoting
} else if (c > 0x9F && c <= 0xFF) {
mOutputBuffer[ptr++] = (byte) c;
continue; // [WSTX-88]
} else {
// Surrogate?
if (c >= SURR1_FIRST && c <= SURR2_LAST) {
mSurrogate = c;
// Last char needs special handling:
if (offset == inEnd) {
break inner_loop;
}
c = calcSurrogate(data[offset++]);
// Let's fall down to entity output
}
}
/* Has to be escaped as char entity; as such, also need
* to re-calc max. contiguous data we can output
*/
mOutputPtr = ptr;
ptr = writeAsEntity(c);
max -= (inEnd - offset); // since we didn't loop completely
break inner_loop;
}
len -= max;
}
mOutputPtr = ptr;
}
@Override
protected int writeCDataContent(String data)
throws IOException
{
// Note: mSurrogate can not be non-zero at this point, no need to check
int offset = 0;
int len = data.length();
if (!mCheckContent) {
writeRaw(data, offset, len);
return -1;
}
int ptr = mOutputPtr;
main_loop:
while (len > 0) {
int max = mOutputBuffer.length - ptr;
if (max < 1) { // output buffer full?
mOutputPtr = ptr;
flushBuffer();
ptr = 0;
max = mOutputBuffer.length;
}
// How much can we output?
if (max > len) {
max = len;
}
for (int inEnd = offset + max; offset < inEnd; ) {
int c = data.charAt(offset++);
if (c < 32) {
if (c == '\n') {
// !!! TBI: line nr
} else if (c == '\r') {
// !!! TBI: line nr (and skipping \n that may follow)
} else if (c != '\t') {
mOutputPtr = ptr;
c = handleInvalidChar(c);
}
} else if (c > 0x7E) {
if (c > 0xFF) {
mOutputPtr = ptr;
handleInvalidLatinChar(c);
} else if (mXml11) {
if (c < 0x9F && c != 0x85) {
mOutputPtr = ptr;
c = handleInvalidChar(c);
}
}
} else if (c == '>') { // embedded "]]>"?
if (offset > 2 && data.charAt(offset-2) == ']'
&& data.charAt(offset-3) == ']') {
if (!mFixContent) {
return offset-3;
}
/* Relatively easy fix; just need to close this
* section, and open a new one...
*/
mOutputPtr = ptr;
writeCDataEnd();
writeCDataStart();
writeAscii(BYTE_GT);
ptr = mOutputPtr;
/* No guarantees there's as much free room in the
* output buffer, thus, need to restart loop:
*/
len = data.length() - offset;
continue main_loop;
}
}
mOutputBuffer[ptr++] = (byte) c;
}
len -= max;
}
mOutputPtr = ptr;
return -1;
}
@Override
protected int writeCDataContent(char[] cbuf, int start, int len)
throws IOException
{
// Note: mSurrogate can not be non-zero at this point, no need to check
if (!mCheckContent) {
writeRaw(cbuf, start, len);
return -1;
}
int ptr = mOutputPtr;
int offset = start;
while (len > 0) {
int max = mOutputBuffer.length - ptr;
if (max < 1) { // output buffer full?
mOutputPtr = ptr;
flushBuffer();
ptr = 0;
max = mOutputBuffer.length;
}
// How much can we output?
if (max > len) {
max = len;
}
inner_loop:
for (int inEnd = offset + max; offset < inEnd; ) {
int c = cbuf[offset++];
if (c < 32) {
if (c == '\n') {
// !!! TBI: line nr
} else if (c == '\r') {
// !!! TBI: line nr (and skipping \n that may follow)
} else if (c != '\t') {
mOutputPtr = ptr;
c = handleInvalidChar(c);
}
} else if (c > 0x7E) {
if (c > 0xFF) {
mOutputPtr = ptr;
handleInvalidLatinChar(c);
} else if (mXml11) {
if (c < 0x9F && c != 0x85) {
mOutputPtr = ptr;
c = handleInvalidChar(c);
}
}
} else if (c == '>') { // embedded "]]>"?
if (offset >= (start+3) && cbuf[offset-2] == ']'
&& cbuf[offset-3] == ']') {
if (!mFixContent) {
return offset-3;
}
/* Relatively easy fix; just need to close this
* section, and open a new one...
*/
mOutputPtr = ptr;
writeCDataEnd();
writeCDataStart();
writeAscii(BYTE_GT);
ptr = mOutputPtr;
/* No guarantees there's as much free room in the
* output buffer, thus, need to restart loop:
*/
max -= (inEnd - offset);
break inner_loop;
}
}
mOutputBuffer[ptr++] = (byte) c;
}
len -= max;
}
mOutputPtr = ptr;
return -1;
}
@Override
protected int writeCommentContent(String data)
throws IOException
{
// Note: mSurrogate can not be non-zero at this point, no need to check
int offset = 0;
int len = data.length();
if (!mCheckContent) {
writeRaw(data, offset, len);
return -1;
}
int ptr = mOutputPtr;
while (len > 0) {
int max = mOutputBuffer.length - ptr;
if (max < 1) { // output buffer full?
mOutputPtr = ptr;
flushBuffer();
ptr = 0;
max = mOutputBuffer.length;
}
// How much can we output?
if (max > len) {
max = len;
}
inner_loop:
for (int inEnd = offset + max; offset < inEnd; ) {
int c = data.charAt(offset++);
if (c < 32) {
if (c == '\n') {
// !!! TBI: line nr
} else if (c == '\r') {
// !!! TBI: line nr (and skipping \n that may follow)
} else if (c != '\t') {
mOutputPtr = ptr;
c = handleInvalidChar(c);
}
} else if (c > 0x7E) {
if (c > 0xFF) {
mOutputPtr = ptr;
handleInvalidLatinChar(c);
} else if (mXml11) {
if (c < 0x9F && c != 0x85) {
mOutputPtr = ptr;
c = handleInvalidChar(c);
}
}
} else if (c == '-') { // embedded "--"?
if (offset > 1 && data.charAt(offset-2) == '-') {
if (!mFixContent) {
return offset-2;
}
/* Quite easy to fix: just add an extra space
* in front. There will be room for that char;
* but may need to take that the following '-'
* also fits.
*/
mOutputBuffer[ptr++] = ' ';
if (ptr >= mOutputBuffer.length) { // whops. need to flush
mOutputPtr = ptr;
flushBuffer();
ptr = 0;
}
mOutputBuffer[ptr++] = BYTE_HYPHEN;
/* Also, since we did output an extra char, better
* restart the loop (since max calculation is now
* off)
*/
max -= (inEnd - offset);
break inner_loop;
}
}
mOutputBuffer[ptr++] = (byte) c;
}
len -= max;
}
mOutputPtr = ptr;
return -1;
}
@Override
protected int writePIData(String data)
throws IOException, XMLStreamException
{
// Note: mSurrogate can not be non-zero at this point, no need to check
int offset = 0;
int len = data.length();
if (!mCheckContent) {
writeRaw(data, offset, len);
return -1;
}
int ptr = mOutputPtr;
while (len > 0) {
int max = mOutputBuffer.length - ptr;
if (max < 1) { // output buffer full?
mOutputPtr = ptr;
flushBuffer();
ptr = 0;
max = mOutputBuffer.length;
}
// How much can we output?
if (max > len) {
max = len;
}
for (int inEnd = offset + max; offset < inEnd; ++offset) {
int c = data.charAt(offset);
if (c < 32) {
if (c == '\n') {
// !!! TBI: line nr
} else if (c == '\r') {
// !!! TBI: line nr (and skipping \n that may follow)
} else if (c != '\t') {
mOutputPtr = ptr;
c = handleInvalidChar(c);
}
} else if (c > 0x7E) {
if (c > 0xFF) {
mOutputPtr = ptr;
handleInvalidLatinChar(c);
} else if (mXml11) {
if (c < 0x9F && c != 0x85) {
mOutputPtr = ptr;
c = handleInvalidChar(c);
}
}
} else if (c == '>') { // enclosed end marker ("?>")?
if (offset > 0 && data.charAt(offset-1) == '?') {
return offset-2;
}
}
mOutputBuffer[ptr++] = (byte) c;
}
len -= max;
}
mOutputPtr = ptr;
return -1;
}
@Override
protected void writeTextContent(String data)
throws IOException
{
int offset = 0;
int len = data.length();
main_loop:
while (len > 0) {
int max = mOutputBuffer.length - mOutputPtr;
if (max < 1) { // output buffer full?
flushBuffer();
max = mOutputBuffer.length;
}
// Do we start with a surrogate?
if (mSurrogate != 0) {
int sec = data.charAt(offset++);
sec = calcSurrogate(sec);
writeAsEntity(sec);
--len;
continue main_loop;
}
// How much can we output?
if (max > len) {
max = len;
}
inner_loop:
for (int inEnd = offset + max; offset < inEnd; ) {
int c = data.charAt(offset++);
if (c < 32) {
if (c == '\n' || c == '\t') { // TODO: line count
mOutputBuffer[mOutputPtr++] = (byte) c;
continue;
} else if (c == '\r') {
if (!mEscapeCR) {
mOutputBuffer[mOutputPtr++] = (byte) c;
continue;
}
} else if (!mXml11 || c == 0) { // ok in xml1.1, as entity
if (mCheckContent) {
c = handleInvalidChar(c);
mOutputBuffer[mOutputPtr++] = (byte) c;
continue;
}
// otherwise... well, I guess we can just escape it
}
// \r, or xml1.1 + other whitespace, need to escape
} else if (c < 0x7F) {
if (c != '<' && c != '&') {
if (c != '>' || (offset > 1 && data.charAt(offset-2) != ']')) {
mOutputBuffer[mOutputPtr++] = (byte) c;
continue;
}
}
// otherwise fall back on quoting
} else if (c > 0x9F && c <= 0xFF) {
mOutputBuffer[mOutputPtr++] = (byte) c;
continue; // [WSTX-88]
} else {
// Surrogate?
if (c >= SURR1_FIRST && c <= SURR2_LAST) {
mSurrogate = c;
// Last char needs special handling:
if (offset == inEnd) {
break inner_loop;
}
c = calcSurrogate(data.charAt(offset++));
// Let's fall down to entity output
}
}
/* Has to be escaped as char entity; as such, also need
* to re-calc max. continguous data we can output
*/
writeAsEntity(c);
len = data.length() - offset;
continue main_loop;
}
len -= max;
}
}
@Override
protected void writeTextContent(char[] cbuf, int offset, int len)
throws IOException
{
main_loop:
while (len > 0) {
int max = mOutputBuffer.length - mOutputPtr;
if (max < 1) { // output buffer full?
flushBuffer();
max = mOutputBuffer.length;
}
// Do we start with a surrogate?
if (mSurrogate != 0) {
int sec = cbuf[offset++];
sec = calcSurrogate(sec);
writeAsEntity(sec);
--len;
continue main_loop;
}
// How much can we output?
if (max > len) {
max = len;
}
inner_loop:
for (int inEnd = offset + max; offset < inEnd; ) {
int c = cbuf[offset++];
if (c < 32) {
if (c == '\n' || c == '\t') { // TODO: line count
mOutputBuffer[mOutputPtr++] = (byte) c;
continue;
} else if (c == '\r') {
if (!mEscapeCR) {
mOutputBuffer[mOutputPtr++] = (byte) c;
continue;
}
} else if (!mXml11 || c == 0) { // ok in xml1.1, as entity
if (mCheckContent) {
c = handleInvalidChar(c);
mOutputBuffer[mOutputPtr++] = (byte) c;
continue;
}
// otherwise... well, I guess we can just escape it
}
// \r, or xml1.1 + other whitespace, need to escape
} else if (c < 0x7F) {
if (c !='<' && c != '&') {
/* Since we can be conservative, it doesn't matter
* if second check is not exact
*/
if (c != '>' || (offset > 1 && cbuf[offset-2] != ']')) {
mOutputBuffer[mOutputPtr++] = (byte) c;
continue;
}
}
// otherwise fall back on quoting
} else if (c > 0x9F && c <= 0xFF) {
mOutputBuffer[mOutputPtr++] = (byte) c;
continue; // [WSTX-88]
} else {
// Surrogate?
if (c >= SURR1_FIRST && c <= SURR2_LAST) {
mSurrogate = c;
// Last char needs special handling:
if (offset == inEnd) {
break inner_loop;
}
c = calcSurrogate(cbuf[offset++]);
// Let's fall down to entity output
}
}
/* Has to be escaped as char entity; as such, also need
* to re-calc max. continguous data we can output
*/
writeAsEntity(c);
max -= (inEnd - offset);
break inner_loop;
}
len -= max;
}
}
/*
////////////////////////////////////////////////////
// Internal methods
////////////////////////////////////////////////////
*/
protected void handleInvalidLatinChar(int c)
throws IOException
{
// First, let's flush any output we may have, to make debugging easier
flush();
/* 17-May-2006, TSa: Would really be useful if we could throw
* XMLStreamExceptions; esp. to indicate actual output location.
* However, this causes problem with methods that call us and
* can only throw IOExceptions (when invoked via Writer proxy).
* Need to figure out how to resolve this.
*/
throw new IOException("Invalid XML character (0x"+Integer.toHexString(c)+"); can only be output using character entity when using ISO-8859-1 encoding");
}
}
woodstox-woodstox-core-5.1.0/src/main/java/com/ctc/wstx/sw/NonNsStreamWriter.java 0000664 0000000 0000000 00000044445 13257562550 0030132 0 ustar 00root root 0000000 0000000 /* Woodstox XML processor
*
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in the file LICENSE which is
* included with the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.sw;
import java.io.IOException;
import java.util.*;
import javax.xml.XMLConstants;
import javax.xml.namespace.NamespaceContext;
import javax.xml.namespace.QName;
import javax.xml.stream.XMLStreamWriter;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.Attribute;
import javax.xml.stream.events.StartElement;
import org.codehaus.stax2.ri.typed.AsciiValueEncoder;
import com.ctc.wstx.api.WriterConfig;
import com.ctc.wstx.cfg.ErrorConsts;
import com.ctc.wstx.cfg.XmlConsts;
import com.ctc.wstx.sr.AttributeCollector;
import com.ctc.wstx.sr.InputElementStack;
import com.ctc.wstx.util.EmptyNamespaceContext;
import com.ctc.wstx.util.StringVector;
/**
* Implementation of {@link XMLStreamWriter} used when namespace support
* is not enabled. This means that only local names are used for elements
* and attributes; and if rudimentary namespace declarations need to be
* output, they are output using attribute writing methods.
*/
public class NonNsStreamWriter
extends TypedStreamWriter
{
/*
////////////////////////////////////////////////////
// State information
////////////////////////////////////////////////////
*/
/**
* Stack of currently open start elements; only local names
* are included.
*/
final StringVector mElements;
/**
* Container for attribute names for current element; used only
* if uniqueness of attribute names is to be enforced.
*setPrefix
, writeNamespace
* and writeStartElement
; the order writers expect is
* bit different from the order in which element information is
* passed in.
*/
@Override
public final void copyStartElement(InputElementStack elemStack, AttributeCollector ac)
throws IOException, XMLStreamException
{
/* In case of repairing stream writer, we can actually just
* go ahead and first output the element: stream writer should
* be able to resolve namespace mapping for the element
* automatically, as necessary.
*/
String prefix = elemStack.getPrefix();
String uri = elemStack.getNsURI();
writeStartElement(prefix, elemStack.getLocalName(), uri);
/* 04-Sep-2006, TSa: Although we could really just ignore all
* namespace declarations, some apps prefer (or even expect...)
* that ns bindings are preserved as much as possible. So, let's
* just try to output them as they are (could optimize and skip
* ones related to the start element [same prefix or URI], but
* for now let's not bother)
*/
int nsCount = elemStack.getCurrentNsCount();
if (nsCount > 0) { // yup, got some...
for (int i = 0; i < nsCount; ++i) {
writeNamespace(elemStack.getLocalNsPrefix(i), elemStack.getLocalNsURI(i));
}
}
/* And then let's just output attributes, if any (whether to copy
* implicit, aka "default" attributes, is configurable)
*/
int attrCount = mCfgCopyDefaultAttrs ? ac.getCount() : ac.getSpecifiedCount();
/* Unlike in non-ns and simple-ns modes, we can not simply literally
* copy the attributes here. It is possible that some namespace
* prefixes have been remapped... so need to be bit more careful.
*/
if (attrCount > 0) {
for (int i = 0; i < attrCount; ++i) {
// First; need to make sure that the prefix-to-ns mapping
// attribute has is valid... and can not output anything
// before that's done (since remapping will output a namespace
// declaration!)
uri = ac.getURI(i);
prefix = ac.getPrefix(i);
// With attributes, missing/empty prefix always means 'no
// namespace', can take a shortcut:
if (prefix == null || prefix.length() == 0) {
;
} else {
// and otherwise we'll always have a prefix as attributes
// can not make use of the def. namespace...
prefix = findOrCreateAttrPrefix(prefix, uri, mCurrElem);
}
/* Hmmh. Since the prefix we use may be different from what
* collector has, we can not use pass-through method of
* the collector, but need to call XmlWriter directly:
*/
if (prefix == null || prefix.length() == 0) {
mWriter.writeAttribute(ac.getLocalName(i), ac.getValue(i));
} else {
mWriter.writeAttribute(prefix, ac.getLocalName(i), ac.getValue(i));
}
}
}
}
@Override
public String validateQNamePrefix(QName name)
throws XMLStreamException
{
/* Gets bit more complicated: we need to ensure that given URI
* is properly bound...
*/
String uri = name.getNamespaceURI();
String suggPrefix = name.getPrefix();
String actPrefix = validateElemPrefix(suggPrefix, uri, mCurrElem);
if (actPrefix == null) { // no suitable prefix, must bind
/* Need to ensure that we'll pass "" as prefix, not null, so
* that it is understood as "I want to use the default NS", not
* as "whatever prefix, I don't care"
*/
if (suggPrefix == null) {
suggPrefix = "";
}
actPrefix = generateElemPrefix(suggPrefix, uri, mCurrElem);
if (actPrefix == null || actPrefix.length() == 0) { // def NS
writeDefaultNamespace(uri);
} else {
writeNamespace(actPrefix, uri);
}
}
return actPrefix;
}
/*
///////////////////////////////////////////////////////////
// Internal methods
///////////////////////////////////////////////////////////
*/
/**
* Method called to find an existing prefix for the given namespace,
* if any exists in the scope. If one is found, it's returned (including
* "" for the current default namespace); if not, null is returned.
*
* @param nsURI URI of namespace for which we need a prefix
*/
protected final String findElemPrefix(String nsURI, SimpleOutputElement elem)
throws XMLStreamException
{
/* Special case: empty NS URI can only be bound to the empty
* prefix...
*/
if (nsURI == null || nsURI.length() == 0) {
String currDefNsURI = elem.getDefaultNsUri();
if (currDefNsURI != null && currDefNsURI.length() > 0) {
// Nope; won't do... has to be re-bound, but not here:
return null;
}
return "";
}
return mCurrElem.getPrefix(nsURI);
}
/**
* Method called after {@link #findElemPrefix} has returned null,
* to create and bind a namespace mapping for specified namespace.
*/
protected final String generateElemPrefix(String suggPrefix, String nsURI,
SimpleOutputElement elem)
throws XMLStreamException
{
/* Ok... now, since we do not have an existing mapping, let's
* see if we have a preferred prefix to use.
*/
/* Except if we need the empty namespace... that can only be
* bound to the empty prefix:
*/
if (nsURI == null || nsURI.length() == 0) {
return "";
}
/* Ok; with elements this is easy: the preferred prefix can
* ALWAYS be used, since it can mask preceding bindings:
*/
if (suggPrefix == null) {
// caller wants this URI to map as the default namespace?
if (mSuggestedDefNs != null && mSuggestedDefNs.equals(nsURI)) {
suggPrefix = "";
} else {
suggPrefix = (mSuggestedPrefixes == null) ? null:
mSuggestedPrefixes.get(nsURI);
if (suggPrefix == null) {
/* 16-Oct-2005, TSa: We have 2 choices here, essentially;
* could make elements always try to override the def
* ns... or can just generate new one. Let's do latter
* for now.
*/
if (mAutoNsSeq == null) {
mAutoNsSeq = new int[1];
mAutoNsSeq[0] = 1;
}
suggPrefix = elem.generateMapping(mAutomaticNsPrefix, nsURI,
mAutoNsSeq);
}
}
}
// Ok; let's let the caller deal with bindings
return suggPrefix;
}
/**
* Method called to somehow find a prefix for given namespace, to be
* used for a new start element; either use an existing one, or
* generate a new one. If a new mapping needs to be generated,
* it will also be automatically bound, and necessary namespace
* declaration output.
*
* @param suggPrefix Suggested prefix to bind, if any; may be null
* to indicate "no preference"
* @param nsURI URI of namespace for which we need a prefix
* @param elem Currently open start element, on which the attribute
* will be added.
*/
protected final String findOrCreateAttrPrefix(String suggPrefix, String nsURI,
SimpleOutputElement elem)
throws XMLStreamException
{
if (nsURI == null || nsURI.length() == 0) {
/* Attributes never use the default namespace; missing
* prefix always leads to the empty ns... so nothing
* special is needed here.
*/
return null;
}
// Maybe the suggested prefix is properly bound?
if (suggPrefix != null) {
int status = elem.isPrefixValid(suggPrefix, nsURI, false);
if (status == SimpleOutputElement.PREFIX_OK) {
return suggPrefix;
}
/* Otherwise, if the prefix is unbound, let's just bind
* it -- if caller specified a prefix, it probably prefers
* binding that prefix even if another prefix already existed?
* The remaining case (already bound to another URI) we don't
* want to touch, at least not yet: it may or not be safe
* to change binding, so let's just not try it.
*/
if (status == SimpleOutputElement.PREFIX_UNBOUND) {
elem.addPrefix(suggPrefix, nsURI);
doWriteNamespace(suggPrefix, nsURI);
return suggPrefix;
}
}
// If not, perhaps there's another existing binding available?
String prefix = elem.getExplicitPrefix(nsURI);
if (prefix != null) { // already had a mapping for the URI... cool.
return prefix;
}
/* Nope, need to create one. First, let's see if there's a
* preference...
*/
if (suggPrefix != null) {
prefix = suggPrefix;
} else if (mSuggestedPrefixes != null) {
prefix = mSuggestedPrefixes.get(nsURI);
// note: def ns is never added to suggested prefix map
}
if (prefix != null) {
/* Can not use default namespace for attributes.
* Also, re-binding is tricky for attributes; can't
* re-bind anything that's bound on this scope... or
* used in this scope. So, to simplify life, let's not
* re-bind anything for attributes.
*/
if (prefix.length() == 0
|| (elem.getNamespaceURI(prefix) != null)) {
prefix = null;
}
}
if (prefix == null) {
if (mAutoNsSeq == null) {
mAutoNsSeq = new int[1];
mAutoNsSeq[0] = 1;
}
prefix = mCurrElem.generateMapping(mAutomaticNsPrefix, nsURI,
mAutoNsSeq);
}
// Ok; so far so good: let's now bind and output the namespace:
elem.addPrefix(prefix, nsURI);
doWriteNamespace(prefix, nsURI);
return prefix;
}
private final String validateElemPrefix(String prefix, String nsURI,
SimpleOutputElement elem)
throws XMLStreamException
{
/* 06-Feb-2005, TSa: Special care needs to be taken for the
* "empty" (or missing) namespace:
* (see comments from findOrCreatePrefix())
*/
if (nsURI == null || nsURI.length() == 0) {
String currURL = elem.getDefaultNsUri();
if (currURL == null || currURL.length() == 0) {
// Ok, good:
return "";
}
// Nope, needs to be re-bound:
return null;
}
int status = elem.isPrefixValid(prefix, nsURI, true);
if (status == SimpleOutputElement.PREFIX_OK) {
return prefix;
}
/* Hmmh... now here's bit of dilemma: that particular prefix is
* either not bound, or is masked... but it is possible some other
* prefix would be bound. Should we search for another one, or
* try to re-define suggested one? Let's do latter, for now;
* caller can then (try to) bind the preferred prefix:
*/
return null;
}
}
woodstox-woodstox-core-5.1.0/src/main/java/com/ctc/wstx/sw/SimpleNsStreamWriter.java 0000664 0000000 0000000 00000030005 13257562550 0030614 0 ustar 00root root 0000000 0000000 /* Woodstox XML processor
*
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in the file LICENSE,
* included with the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.sw;
import java.io.IOException;
import java.util.Iterator;
import javax.xml.XMLConstants;
import javax.xml.namespace.QName;
import javax.xml.stream.XMLStreamWriter;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.Attribute;
import javax.xml.stream.events.Namespace;
import javax.xml.stream.events.StartElement;
import com.ctc.wstx.api.WriterConfig;
import com.ctc.wstx.cfg.ErrorConsts;
import com.ctc.wstx.sr.AttributeCollector;
import com.ctc.wstx.sr.InputElementStack;
/**
* Namespace-aware implementation of {@link XMLStreamWriter}, that does
* not do namespace repairing, ie doesn't try to resolve possible
* conflicts between prefixes and namespace URIs, or automatically
* create namespace bindings.
*/
public class SimpleNsStreamWriter
extends BaseNsStreamWriter
{
/*
////////////////////////////////////////////////////
// Life-cycle (ctors)
////////////////////////////////////////////////////
*/
public SimpleNsStreamWriter(XmlWriter xw, String enc, WriterConfig cfg)
{
super(xw, enc, cfg, false);
}
/*
////////////////////////////////////////////////////
// XMLStreamWriter API
////////////////////////////////////////////////////
*/
//public NamespaceContext getNamespaceContext()
//public void setNamespaceContext(NamespaceContext context)
//public String getPrefix(String uri)
//public void setPrefix(String prefix, String uri)
//public void writeAttribute(String localName, String value)
@Override
public void writeAttribute(String nsURI, String localName, String value)
throws XMLStreamException
{
// No need to set mAnyOutput, nor close the element
if (!mStartElementOpen) {
throwOutputError(ErrorConsts.WERR_ATTR_NO_ELEM);
}
String prefix = mCurrElem.getExplicitPrefix(nsURI);
if (!mReturnNullForDefaultNamespace && prefix == null) {
throwOutputError("Unbound namespace URI '" + nsURI + "'");
}
doWriteAttr(localName, nsURI, prefix, value);
}
@Override
public void writeAttribute(String prefix, String nsURI,
String localName, String value)
throws XMLStreamException
{
if (!mStartElementOpen) {
throwOutputError(ErrorConsts.WERR_ATTR_NO_ELEM);
}
doWriteAttr(localName, nsURI, prefix, value);
}
//public void writeEmptyElement(String localName) throws XMLStreamException
//public void writeEmptyElement(String nsURI, String localName) throws XMLStreamException
//public void writeEmptyElement(String prefix, String localName, String nsURI) throws XMLStreamException
//public void writeEndElement() throws XMLStreamException
@Override
public void writeDefaultNamespace(String nsURI)
throws XMLStreamException
{
if (!mStartElementOpen) {
throwOutputError(ERR_NSDECL_WRONG_STATE);
}
// 27-Mar-2007, TSa: Apparently TCK expects a binding to be added
setDefaultNamespace(nsURI);
doWriteDefaultNs(nsURI);
}
@Override
public void writeNamespace(String prefix, String nsURI)
throws XMLStreamException
{
if (prefix == null || prefix.length() == 0
|| prefix.equals(XMLConstants.XMLNS_ATTRIBUTE)) {
writeDefaultNamespace(nsURI);
return;
}
// No need to set mAnyOutput, and shouldn't close the element.
// But element needs to be open, obviously.
if (!mStartElementOpen) {
throwOutputError(ERR_NSDECL_WRONG_STATE);
}
/* 05-Feb-2005, TSa: Also, as per namespace specs; the 'empty'
* namespace URI can not be bound as a non-default namespace
* (ie. for any actual prefix)
*/
/* 04-Feb-2005, TSa: Namespaces 1.1 does allow this, though,
* so for xml 1.1 documents we need to allow it
*/
if (!mXml11) {
if (nsURI.length() == 0) {
throwOutputError(ErrorConsts.ERR_NS_EMPTY);
}
// 01-Apr-2005, TSa: Can we (and do we want to) verify NS consistency?
}
// 27-Mar-2007, TSa: Apparently TCK expects a binding to be added
setPrefix(prefix, nsURI);
doWriteNamespace(prefix, nsURI);
}
/*
////////////////////////////////////////////////////
// Package methods:
////////////////////////////////////////////////////
*/
@Override
public void setDefaultNamespace(String uri) throws XMLStreamException
{
mCurrElem.setDefaultNsUri(uri);
}
@Override
public void doSetPrefix(String prefix, String uri) throws XMLStreamException
{
mCurrElem.addPrefix(prefix, uri);
}
@Override
public void writeStartElement(StartElement elem) throws XMLStreamException
{
QName name = elem.getName();
@SuppressWarnings("unchecked")
IteratorsetPrefix
, writeNamespace
* and writeStartElement
; the order writers expect is
* bit different from the order in which element information is
* passed in.
*/
@Override
public final void copyStartElement(InputElementStack elemStack,
AttributeCollector attrCollector)
throws IOException, XMLStreamException
{
// Any namespace declarations/bindings?
int nsCount = elemStack.getCurrentNsCount();
if (nsCount > 0) { // yup, got some...
/* First, need to (or at least, should?) add prefix bindings:
* (may not be 100% required, but probably a good thing to do,
* just so that app code has access to prefixes then)
*/
for (int i = 0; i < nsCount; ++i) {
String prefix = elemStack.getLocalNsPrefix(i);
String uri = elemStack.getLocalNsURI(i);
if (prefix == null || prefix.length() == 0) { // default NS
setDefaultNamespace(uri);
} else {
setPrefix(prefix, uri);
}
}
}
writeStartElement(elemStack.getPrefix(),
elemStack.getLocalName(),
elemStack.getNsURI());
if (nsCount > 0) {
// And then output actual namespace declarations:
for (int i = 0; i < nsCount; ++i) {
String prefix = elemStack.getLocalNsPrefix(i);
String uri = elemStack.getLocalNsURI(i);
if (prefix == null || prefix.length() == 0) { // default NS
writeDefaultNamespace(uri);
} else {
writeNamespace(prefix, uri);
}
}
}
/* And then let's just output attributes, if any (whether to copy
* implicit, aka "default" attributes, is configurable)
*/
int attrCount = mCfgCopyDefaultAttrs ?
attrCollector.getCount() :
attrCollector.getSpecifiedCount();
if (attrCount > 0) {
for (int i = 0; i < attrCount; ++i) {
attrCollector.writeAttribute(i, mWriter, mValidator);
}
}
}
@Override
public String validateQNamePrefix(QName name)
{
// Good as is, let's not complicate things
return name.getPrefix();
}
}
woodstox-woodstox-core-5.1.0/src/main/java/com/ctc/wstx/sw/SimpleOutputElement.java 0000664 0000000 0000000 00000026276 13257562550 0030514 0 ustar 00root root 0000000 0000000 /* Woodstox XML processor
*
* Copyright (c) 2005 Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in file LICENSE, included with
* the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.sw;
import java.util.*;
import javax.xml.namespace.NamespaceContext;
import javax.xml.namespace.QName;
import javax.xml.stream.XMLStreamException;
import com.ctc.wstx.compat.QNameCreator;
import com.ctc.wstx.util.BijectiveNsMap;
/**
* Class that encapsulates information about a specific element in virtual
* output stack for namespace-aware writers.
* It provides support for URI-to-prefix mappings as well as namespace
* mapping generation.
*
*
*/
public abstract class XmlWriter
{
protected final static int SURR1_FIRST = 0xD800;
protected final static int SURR1_LAST = 0xDBFF;
protected final static int SURR2_FIRST = 0xDC00;
protected final static int SURR2_LAST = 0xDFFF;
protected final static char DEFAULT_QUOTE_CHAR = '"';
protected final WriterConfig mConfig;
protected final String mEncoding;
// // // Operating mode: base class needs to know whether
// // // namespaces are support (for entity/PI target validation)
protected final boolean mNsAware;
protected final boolean mCheckStructure;
protected final boolean mCheckContent;
protected final boolean mCheckNames;
protected final boolean mFixContent;
/**
* Whether to escape CR (\r) character.
*/
final boolean mEscapeCR;
/**
* Whether to add a space after empty element (before closing "/>")
* or not.
*/
final boolean mAddSpaceAfterEmptyElem;
/**
* Whether to use double quotes in XML declaration or not.
*/
final boolean mUseDoubleQuotesInXmlDecl;
/**
* Flag that defines whether close() on this writer should call
* close on the underlying output object (stream, writer)
*/
protected final boolean mAutoCloseOutput;
/**
* Optional escaping writer used for escaping characters like '<'
* '&' and '>' in textual content.
* Constructed if calling code has
* installed a special escaping writer factory for text content.
* Null if the default escaper is to be used.
*/
protected Writer mTextWriter;
/**
* Optional escaping writer used for escaping characters like '"'
* '&' and '<' in attribute values.
* Constructed if calling code has
* installed a special escaping writer factory for text content.
* Null if the default escaper is to be used.
*/
protected Writer mAttrValueWriter;
/**
* Indicates whether output is to be compliant; if false, is to be
* xml 1.0 compliant, if true, xml 1.1 compliant.
*/
protected boolean mXml11 = false;
/**
* Lazy-constructed wrapper object, which will route all calls to
* Writer API, to matching writeRaw
methods of this
* XmlWriter instance.
*/
protected XmlWriterWrapper mRawWrapper = null;
/**
* Lazy-constructed wrapper object, which will route all calls to
* Writer API, to matching writeCharacters
methods of this
* XmlWriter instance.
*/
protected XmlWriterWrapper mTextWrapper = null;
/*
///////////////////////////////////////////////////////
// Output location info
///////////////////////////////////////////////////////
*/
/**
* Number of characters output prior to currently buffered output
*/
protected int mLocPastChars = 0;
protected int mLocRowNr = 1;
/**
* Offset of the first character on this line. May be negative, if
* the offset was in a buffer that has been flushed out.
*/
protected int mLocRowStartOffset = 0;
/*
///////////////////////////////////////////////////////
// Life-cycle
///////////////////////////////////////////////////////
*/
protected XmlWriter(WriterConfig cfg, String encoding, boolean autoclose)
throws IOException
{
mConfig = cfg;
mEncoding = encoding;
mAutoCloseOutput = autoclose;
int flags = cfg.getConfigFlags();
mNsAware = (flags & OutputConfigFlags.CFG_ENABLE_NS) != 0;
mCheckStructure = (flags & OutputConfigFlags.CFG_VALIDATE_STRUCTURE) != 0;
mCheckContent = (flags & OutputConfigFlags.CFG_VALIDATE_CONTENT) != 0;
mCheckNames = (flags & OutputConfigFlags.CFG_VALIDATE_NAMES) != 0;
mFixContent = (flags & OutputConfigFlags.CFG_FIX_CONTENT) != 0;
mEscapeCR = (flags & OutputConfigFlags.CFG_ESCAPE_CR) != 0;
mAddSpaceAfterEmptyElem = (flags & OutputConfigFlags.CFG_ADD_SPACE_AFTER_EMPTY_ELEM) != 0;
mUseDoubleQuotesInXmlDecl = (flags & OutputConfigFlags.CFG_USE_DOUBLE_QUOTES_IN_XML_DECL) != 0;
// Has caller requested any custom text or attr value escaping?
EscapingWriterFactory f = mConfig.getTextEscaperFactory();
if (f == null) {
mTextWriter = null;
} else {
String enc = (mEncoding == null || mEncoding.length() == 0) ?
WstxOutputProperties.DEFAULT_OUTPUT_ENCODING : mEncoding;
mTextWriter = f.createEscapingWriterFor(wrapAsRawWriter(), enc);
}
f = mConfig.getAttrValueEscaperFactory();
if (f == null) {
mAttrValueWriter = null;
} else {
String enc = (mEncoding == null || mEncoding.length() == 0) ?
WstxOutputProperties.DEFAULT_OUTPUT_ENCODING : mEncoding;
mAttrValueWriter = f.createEscapingWriterFor(wrapAsRawWriter(), enc);
}
}
/*
////////////////////////////////////////////////////
// Extra configuration
////////////////////////////////////////////////////
*/
public void enableXml11() {
mXml11 = true;
}
/*
////////////////////////////////////////////////////
// Access to underlying physical output destinations
////////////////////////////////////////////////////
*/
/**
* @return Underlying OutputStream used for physical output,
* if the writer was constructed using one
*/
protected abstract OutputStream getOutputStream();
/**
* @return Underlying Writer used for physical output,
* if the writer was constructed with one, or one was
* created to be used with an OutputStream.
*/
protected abstract Writer getWriter();
/*
////////////////////////////////////////////////////
// Basic methods for communicating with underlying
// stream or writer
////////////////////////////////////////////////////
*/
/**
* Method called to flush the buffer(s), and close the output
* sink (stream or writer) if enabled (auto-closing) or
* forced.
*/
public abstract void close(boolean forceRealClose) throws IOException;
public abstract void flush()
throws IOException;
public abstract void writeRaw(String str, int offset, int len)
throws IOException;
public void writeRaw(String str)
throws IOException
{
writeRaw(str, 0, str.length());
}
public abstract void writeRaw(char[] cbuf, int offset, int len)
throws IOException;
/**
* Like {@link #writeRaw}, but caller guarantees that the contents
* additionally are known to be in 7-bit ascii range.
*/
public abstract void writeRawAscii(char[] cbuf, int offset, int len)
throws IOException;
/*
////////////////////////////////////////////////////
// Raw, non-verifying write methods; used when
// directly copying trusted content
////////////////////////////////////////////////////
*/
public abstract void writeCDataStart()
throws IOException;
public abstract void writeCDataEnd()
throws IOException;
public abstract void writeCommentStart()
throws IOException;
public abstract void writeCommentEnd()
throws IOException;
public abstract void writePIStart(String target, boolean addSpace)
throws IOException;
public abstract void writePIEnd()
throws IOException;
/*
////////////////////////////////////////////////////
// Write methods, textual:
////////////////////////////////////////////////////
*/
/**
* @param data Contents of the CDATA section to write out
* @return offset of the (first) illegal content segment ("]]>") in
* passed content and not in repairing mode; or -1 if none or is
* repairing
*/
public abstract int writeCData(String data)
throws IOException, XMLStreamException;
public abstract int writeCData(char[] cbuf, int offset, int len)
throws IOException, XMLStreamException;
public abstract void writeCharacters(String data)
throws IOException;
public abstract void writeCharacters(char[] cbuf, int offset, int len)
throws IOException;
/*
////////////////////////////////////////////////////
// Write methods, non-textual, non-elem/attr:
////////////////////////////////////////////////////
*/
/**
* Method that will try to output the content as specified. If
* the content passed in has embedded "--" in it, it will either
* add an intervening space between consequtive hyphens (if content
* fixing is enabled), or return the offset of the first hyphen in
* multi-hyphen sequence.
*/
public abstract int writeComment(String data)
throws IOException, XMLStreamException;
/**
* Older "legacy" output method for outputting DOCTYPE declaration.
* Assumes that the passed-in String contains a complete DOCTYPE
* declaration properly quoted.
*/
public abstract void writeDTD(String data)
throws IOException, XMLStreamException;
public abstract void writeDTD(String rootName,
String systemId, String publicId,
String internalSubset)
throws IOException, XMLStreamException;
public abstract void writeEntityReference(String name)
throws IOException, XMLStreamException;
public abstract int writePI(String target, String data)
throws IOException, XMLStreamException;
public abstract void writeXmlDeclaration(String version, String enc, String standalone)
throws IOException;
/*
////////////////////////////////////////////////////
// Write methods, elements
////////////////////////////////////////////////////
*/
/**
*writeRaw
* method.
*/
public final Writer wrapAsRawWriter()
{
if (mRawWrapper == null) {
mRawWrapper = XmlWriterWrapper.wrapWriteRaw(this);
}
return mRawWrapper;
}
public final Writer wrapAsTextWriter()
{
if (mTextWrapper == null) {
mTextWrapper = XmlWriterWrapper.wrapWriteCharacters(this);
}
return mTextWrapper;
}
/*
////////////////////////////////////////////////////
// Helper methods for sub-classes
////////////////////////////////////////////////////
*/
/**
* Method called to verify that the name is a legal XML name.
*/
public final void verifyNameValidity(String name, boolean checkNs)
throws XMLStreamException
{
/* No empty names... caller must have dealt with optional arguments
* prior to calling this method
*/
if (name == null || name.length() == 0) {
reportNwfName(ErrorConsts.WERR_NAME_EMPTY);
}
int illegalIx = WstxInputData.findIllegalNameChar(name, checkNs, mXml11);
if (illegalIx >= 0) {
if (illegalIx == 0) {
reportNwfName(ErrorConsts.WERR_NAME_ILLEGAL_FIRST_CHAR,
WstxInputData.getCharDesc(name.charAt(0)));
}
reportNwfName(ErrorConsts.WERR_NAME_ILLEGAL_CHAR,
WstxInputData.getCharDesc(name.charAt(illegalIx)));
}
}
/**
* This is the method called when an output method call violates
* name well-formedness checks
* and {@link WstxOutputProperties#P_OUTPUT_VALIDATE_NAMES} is
* is enabled.
*/
protected void reportNwfName(String msg)
throws XMLStreamException
{
throwOutputError(msg);
}
protected void reportNwfName(String msg, Object arg)
throws XMLStreamException
{
throwOutputError(msg, arg);
}
protected void reportNwfContent(String msg)
throws XMLStreamException
{
throwOutputError(msg);
}
protected void throwOutputError(String msg)
throws XMLStreamException
{
// First, let's flush any output we may have, to make debugging easier
try {
flush();
} catch (IOException ioe) {
throw new WstxIOException(ioe);
}
throw new XMLStreamException(msg);
}
protected void throwOutputError(String format, Object arg)
throws XMLStreamException
{
String msg = MessageFormat.format(format, new Object[] { arg });
throwOutputError(msg);
}
/**
* Method called to handle invalid character in textual content requested
* to be output. Content may be part of textual events (CHARACTER, CDATA),
* attribute value, COMMENT content or PROCESSING_INSTRUCTION data.
* The default behavior is to just throw an exception, but this can
* be configured via property {@link WstxOutputProperties#P_OUTPUT_INVALID_CHAR_HANDLER}.
*/
protected char handleInvalidChar(int c)
throws IOException
{
// First, let's flush any output we may have, to make debugging easier
flush();
InvalidCharHandler h = mConfig.getInvalidCharHandler();
if (h == null) {
h = InvalidCharHandler.FailingHandler.getInstance();
}
return h.convertInvalidChar(c);
}
}
woodstox-woodstox-core-5.1.0/src/main/java/com/ctc/wstx/sw/XmlWriterWrapper.java 0000664 0000000 0000000 00000010636 13257562550 0030017 0 ustar 00root root 0000000 0000000 /* Woodstox XML processor
*
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in file LICENSE, included with
* the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.sw;
import java.io.*;
/**
* This is a simple wrapper class, which decorates an {@link XmlWriter}
* to look like a Writer. This is necessary to implement a (legacy)
* character quoting system introduced for Woodstox 2.0, which relies
* on having a Writer to use for outputting.
*/
public abstract class XmlWriterWrapper
extends Writer
{
protected final XmlWriter mWriter;
private char[] mBuffer = null;
public static XmlWriterWrapper wrapWriteRaw(XmlWriter xw)
{
return new RawWrapper(xw);
}
public static XmlWriterWrapper wrapWriteCharacters(XmlWriter xw)
{
return new TextWrapper(xw);
}
protected XmlWriterWrapper(XmlWriter writer)
{
mWriter = writer;
}
@Override
public final void close() throws IOException
{
mWriter.close(false);
}
@Override
public final void flush() throws IOException
{
mWriter.flush();
}
/* !!! 30-Nov-2006, TSa: Due to co-variance between Appendable and
* Writer, this would not compile with javac 1.5, in 1.4 mode
* (source and target set to "1.4". Not a huge deal, but since
* the base impl is just fine, no point in overriding it.
*/
/*
public final Writer append(char c)
throws IOException
{
if (mBuffer == null) {
mBuffer = new char[1];
}
mBuffer[0] = (char) c;
write(mBuffer, 0, 1);
return this;
}
*/
@Override
public final void write(char[] cbuf) throws IOException {
write(cbuf, 0, cbuf.length);
}
@Override
public abstract void write(char[] cbuf, int off, int len) throws IOException;
@Override
public final void write(int c) throws IOException
{
if (mBuffer == null) {
mBuffer = new char[1];
}
mBuffer[0] = (char) c;
write(mBuffer, 0, 1);
}
@Override
public abstract void write(String str) throws IOException;
@Override
public abstract void write(String str, int off, int len) throws IOException;
/*
//////////////////////////////////////////////////
// Implementation classes
//////////////////////////////////////////////////
*/
/**
* This wrapper directs calls to writeRaw
methods. Thus,
* it is a "vanilla" writer, and no escaping is done.
*/
private final static class RawWrapper
extends XmlWriterWrapper
{
protected RawWrapper(XmlWriter writer)
{
super(writer);
}
@Override
public void write(char[] cbuf, int off, int len) throws IOException
{
mWriter.writeRaw(cbuf, off, len);
}
@Override
public void write(String str, int off, int len) throws IOException
{
mWriter.writeRaw(str, off, len);
}
@Override
public final void write(String str) throws IOException
{
mWriter.writeRaw(str, 0, str.length());
}
}
/**
* This wrapper directs calls to writeCharacters
methods.
* This means that text content escaping (and, possibly, validation)
* is done, using default or custom escaping code.
*/
private static class TextWrapper
extends XmlWriterWrapper
{
protected TextWrapper(XmlWriter writer) {
super(writer);
}
@Override
public void write(char[] cbuf, int off, int len) throws IOException {
mWriter.writeCharacters(cbuf, off, len);
}
@Override
public void write(String str) throws IOException {
mWriter.writeCharacters(str);
}
@Override
public void write(String str, int off, int len) throws IOException {
mWriter.writeCharacters(str.substring(off, off+len));
}
}
}
woodstox-woodstox-core-5.1.0/src/main/java/com/ctc/wstx/util/ 0000775 0000000 0000000 00000000000 13257562550 0024174 5 ustar 00root root 0000000 0000000 woodstox-woodstox-core-5.1.0/src/main/java/com/ctc/wstx/util/ArgUtil.java 0000664 0000000 0000000 00000005762 13257562550 0026420 0 ustar 00root root 0000000 0000000 package com.ctc.wstx.util;
/**
* Simple static utility class that contains (static) utility methods useful
* when parsing non-typesafe arguments (String-only configuration, command
* line args).
*/
public final class ArgUtil
{
private ArgUtil() { }
public static boolean convertToBoolean(String prop, Object value)
{
if (value == null) {
return false;
}
if (value instanceof Boolean) {
return ((Boolean) value).booleanValue();
}
if (value instanceof String) {
String str = (String) value;
if (str.equalsIgnoreCase("false")) {
return false;
}
if (str.equalsIgnoreCase("true")) {
return true;
}
throw new IllegalArgumentException("Invalid String value for property '"+prop+"': expected Boolean value.");
}
throw new IllegalArgumentException("Invalid value type ("+value.getClass()+") for property '"+prop+"': expected Boolean value.");
}
public static int convertToInt(String prop, Object value, int minValue)
{
int i;
if (value == null) {
i = 0;
} else if (value instanceof Number) {
i = ((Number) value).intValue();
} else if (value instanceof String) {
try {
i = Integer.parseInt((String) value);
} catch (NumberFormatException nex) {
throw new IllegalArgumentException("Invalid String value for property '"+prop+"': expected a number (Integer).");
}
} else {
throw new IllegalArgumentException("Invalid value type ("+value.getClass()+") for property '"+prop+"': expected Integer value.");
}
if (i < minValue) {
throw new IllegalArgumentException("Invalid numeric value ("+i
+") for property '"+prop
+"': minimum is "+minValue+".");
}
return i;
}
public static long convertToLong(String prop, Object value, long minValue)
{
long i;
if (value == null) {
i = 0;
} else if (value instanceof Number) {
i = ((Number) value).longValue();
} else if (value instanceof String) {
try {
i = Long.parseLong((String) value);
} catch (NumberFormatException nex) {
throw new IllegalArgumentException("Invalid String value for property '"+prop+"': expected a number (Long).");
}
} else {
throw new IllegalArgumentException("Invalid value type ("+value.getClass()+") for property '"+prop+"': expected Long value.");
}
if (i < minValue) {
throw new IllegalArgumentException("Invalid numeric value ("+i
+") for property '"+prop
+"': minimum is "+minValue+".");
}
return i;
}
}
woodstox-woodstox-core-5.1.0/src/main/java/com/ctc/wstx/util/BaseNsContext.java 0000664 0000000 0000000 00000010361 13257562550 0027560 0 ustar 00root root 0000000 0000000 /* Woodstox XML processor
*
* Copyright (c) 2004 Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in file LICENSE, included with
* the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.util;
import java.io.IOException;
import java.io.Writer;
import java.util.Iterator;
import javax.xml.XMLConstants;
import javax.xml.namespace.NamespaceContext;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamWriter;
import javax.xml.stream.events.Namespace;
import com.ctc.wstx.cfg.ErrorConsts;
/**
* Abstract base class that defines extra features defined by most
* NamespaceContext implementations Wodstox uses.
*/
public abstract class BaseNsContext
implements NamespaceContext
{
/**
* This is the URI returned for default namespace, when it hasn't
* been explicitly declared; could be either "" or null.
*/
protected final static String UNDECLARED_NS_URI = "";
/*
/////////////////////////////////////////////
// NamespaceContext API
/////////////////////////////////////////////
*/
@Override
public final String getNamespaceURI(String prefix)
{
/* First the known offenders; invalid args, 2 predefined xml namespace
* prefixes
*/
if (prefix == null) {
throw new IllegalArgumentException(ErrorConsts.ERR_NULL_ARG);
}
if (prefix.length() > 0) {
if (prefix.equals(XMLConstants.XML_NS_PREFIX)) {
return XMLConstants.XML_NS_URI;
}
if (prefix.equals(XMLConstants.XMLNS_ATTRIBUTE)) {
return XMLConstants.XMLNS_ATTRIBUTE_NS_URI;
}
}
return doGetNamespaceURI(prefix);
}
@Override
public final String getPrefix(String nsURI)
{
/* First the known offenders; invalid args, 2 predefined xml namespace
* prefixes
*/
if (nsURI == null || nsURI.length() == 0) {
throw new IllegalArgumentException("Illegal to pass null/empty prefix as argument.");
}
if (nsURI.equals(XMLConstants.XML_NS_URI)) {
return XMLConstants.XML_NS_PREFIX;
}
if (nsURI.equals(XMLConstants.XMLNS_ATTRIBUTE_NS_URI)) {
return XMLConstants.XMLNS_ATTRIBUTE;
}
return doGetPrefix(nsURI);
}
@Override
public final IteratormBuckets.length - 1
, when mBuckets.length is
* a power of two.
*/
protected int mIndexMask;
/*
////////////////////////////////////////
// Linked list info
////////////////////////////////////////
*/
protected ElementId mHead;
protected ElementId mTail;
/*
////////////////////////////////////////
// Life-cycle:
////////////////////////////////////////
*/
public ElementIdMap()
{
this(DEFAULT_SIZE);
}
/**
* This constructor is mainly used for testing, as it can be sized
* appropriately to test rehashing etc.
*/
public ElementIdMap(int initialSize)
{
int actual = MIN_SIZE;
while (actual < initialSize) {
actual += actual;
}
mTable = new ElementId[actual];
// Mask is easy to calc for powers of two.
mIndexMask = actual - 1;
mSize = 0;
mSizeThreshold = (actual * FILL_PCT) / 100;
mHead = mTail = null;
}
/*
////////////////////////////////////////////////////
// Public API
////////////////////////////////////////////////////
*/
public ElementId getFirstUndefined()
{
/* Since the linked list is pruned to always start with
* the first (in doc order) undefined id, we can just
* return head:
*/
return mHead;
}
/**
* Method called when a reference to id is encountered. If so, need
* to check if specified id entry (ref or definiton) exists; and if not,
* to add a reference marker.
*/
public ElementId addReferenced(char[] buffer, int start, int len, int hash,
Location loc, PrefixedName elemName, PrefixedName attrName)
{
int index = (hash & mIndexMask);
ElementId id = mTable[index];
while (id != null) {
if (id.idMatches(buffer, start, len)) { // found existing one
return id;
}
id = id.nextColliding();
}
// Not found, need to create a placeholder...
// But first, do we need more room?
if (mSize >= mSizeThreshold) {
rehash();
// Index changes, for the new entr:
index = (hash & mIndexMask);
}
++mSize;
// Ok, then, let's create the entry
String idStr = new String(buffer, start, len);
id = new ElementId(idStr, loc, false, elemName, attrName);
// First, let's link it to Map; all ids have to be connected
id.setNextColliding(mTable[index]);
mTable[index] = id;
// And then add the undefined entry at the end of list
if (mHead == null) {
mHead = mTail = id;
} else {
mTail.linkUndefined(id);
mTail = id;
}
return id;
}
public ElementId addReferenced(String idStr,
Location loc, PrefixedName elemName, PrefixedName attrName)
{
int hash = calcHash(idStr);
int index = (hash & mIndexMask);
ElementId id = mTable[index];
while (id != null) {
if (id.idMatches(idStr)) { // found existing one
return id;
}
id = id.nextColliding();
}
// Not found, need to create a placeholder...
// But first, do we need more room?
if (mSize >= mSizeThreshold) {
rehash();
// Index changes, for the new entr:
index = (hash & mIndexMask);
}
++mSize;
// Ok, then, let's create the entry
id = new ElementId(idStr, loc, false, elemName, attrName);
// First, let's link it to Map; all ids have to be connected
id.setNextColliding(mTable[index]);
mTable[index] = id;
// And then add the undefined entry at the end of list
if (mHead == null) {
mHead = mTail = id;
} else {
mTail.linkUndefined(id);
mTail = id;
}
return id;
}
/**
* Method called when an id definition is encountered. If so, need
* to check if specified id entry (ref or definiton) exists. If not,
* need to add the definition marker. If it does exist, need to
* 'upgrade it', if it was a reference marker; otherwise need to
* just return the old entry, and expect caller to check for dups
* and report the error.
*/
public ElementId addDefined(char[] buffer, int start, int len, int hash,
Location loc, PrefixedName elemName, PrefixedName attrName)
{
int index = (hash & mIndexMask);
ElementId id = mTable[index];
while (id != null) {
if (id.idMatches(buffer, start, len)) {
break;
}
id = id.nextColliding();
}
/* Not found, can just add it to the Map; no need to add to the
* linked list as it's not undefined
*/
if (id == null) {
// First, do we need more room?
if (mSize >= mSizeThreshold) {
rehash();
index = (hash & mIndexMask);
}
++mSize;
String idStr = new String(buffer, start, len);
id = new ElementId(idStr, loc, true, elemName, attrName);
id.setNextColliding(mTable[index]);
mTable[index] = id;
} else {
/* If already defined, nothing additional to do (we could
* signal an error here, though... for now, we'll let caller
* do that
*/
if (id.isDefined()) {
;
} else {
/* Not defined, just need to upgrade, and possibly remove from
* the linked list.
*/
id.markDefined(loc);
/* Ok; if it was the first undefined, need to unlink it, as
* well as potentially next items.
*/
if (id == mHead) {
do {
mHead = mHead.nextUndefined();
} while (mHead != null && mHead.isDefined());
// Did we clear up all undefined ids?
if (mHead == null) {
mTail = null;
}
}
}
}
return id;
}
public ElementId addDefined(String idStr,
Location loc, PrefixedName elemName, PrefixedName attrName)
{
int hash = calcHash(idStr);
int index = (hash & mIndexMask);
ElementId id = mTable[index];
while (id != null) {
if (id.idMatches(idStr)) {
break;
}
id = id.nextColliding();
}
/* Not found, can just add it to the Map; no need to add to the
* linked list as it's not undefined
*/
if (id == null) {
if (mSize >= mSizeThreshold) { // need more room
rehash();
index = (hash & mIndexMask);
}
++mSize;
id = new ElementId(idStr, loc, true, elemName, attrName);
id.setNextColliding(mTable[index]);
mTable[index] = id;
} else {
/* If already defined, nothing additional to do (we could
* signal an error here, though... for now, we'll let caller
* do that
*/
if (id.isDefined()) {
;
} else {
/* Not defined, just need to upgrade, and possibly remove from
* the linked list.
*/
id.markDefined(loc);
/* Ok; if it was the first undefined, need to unlink it, as
* well as potentially next items.
*/
if (id == mHead) {
do {
mHead = mHead.nextUndefined();
} while (mHead != null && mHead.isDefined());
if (mHead == null) { // cleared up all undefined ids?
mTail = null;
}
}
}
}
return id;
}
/**
* Implementation of a hashing method for variable length
* Strings. Most of the time intention is that this calculation
* is done by caller during parsing, not here; however, sometimes
* it needs to be done for parsed "String" too.
*
* Finally, rehashing is also more expensive, as hash codes are not
* stored; rehashing requires all entries' hash codes to be recalculated.
* Reason for not storing hash codes is reduced memory usage, hoping
* for better memory locality.
*mBuckets.length - 1
, when mBuckets.length is
* a power of two.
*/
protected int mIndexMask;
/*
////////////////////////////////////////
// Information about concurrency
////////////////////////////////////////
*/
/**
* Version of this table instance; used when deriving new concurrently
* used versions from existing 'master' instance.
*/
protected int mThisVersion;
/**
* Flag that indicates if any changes have been made to the data;
* used to both determine if bucket array needs to be copied when
* (first) change is made, and potentially if updated bucket list
* is to be resync'ed back to master instance.
*/
protected boolean mDirty;
/*
////////////////////////////////////////
// Life-cycle:
////////////////////////////////////////
*/
/**
* Method for constructing a master symbol table instance; this one
* will create master instance with default size, and with interning
* enabled.
*/
public SymbolTable() {
this(true);
}
/**
* Method for constructing a master symbol table instance.
*/
public SymbolTable(boolean internStrings) {
this(internStrings, DEFAULT_TABLE_SIZE);
}
/**
* Method for constructing a master symbol table instance.
*/
public SymbolTable(boolean internStrings, int initialSize) {
this(internStrings, initialSize, DEFAULT_FILL_FACTOR);
}
/**
* Main method for constructing a master symbol table instance; will
* be called by other public constructors.
*
* @param internStrings Whether Strings to add are intern()ed or not
* @param initialSize Minimum initial size for bucket array; internally
* will always use a power of two equal to or bigger than this value.
* @param fillFactor Maximum fill factor allowed for bucket table;
* when more entries are added, table will be expanded.
*/
public SymbolTable(boolean internStrings, int initialSize,
float fillFactor)
{
mInternStrings = internStrings;
// Let's start versions from 1
mThisVersion = 1;
// And we'll also set flags so no copying of buckets is needed:
mDirty = true;
// No point in requesting funny initial sizes...
if (initialSize < 1) {
throw new IllegalArgumentException("Can not use negative/zero initial size: "+initialSize);
}
/* Initial size has to be a power of two. Also, let's not honour
* sizes that are ridiculously small...
*/
{
int currSize = 4;
while (currSize < initialSize) {
currSize += currSize;
}
initialSize = currSize;
}
mSymbols = new String[initialSize];
mBuckets = new Bucket[initialSize >> 1];
// Mask is easy to calc for powers of two.
mIndexMask = initialSize - 1;
mSize = 0;
// Sanity check for fill factor:
if (fillFactor < 0.01f) {
throw new IllegalArgumentException("Fill factor can not be lower than 0.01.");
}
if (fillFactor > 10.0f) { // just to catch stupid values, ie. useless from performance perspective
throw new IllegalArgumentException("Fill factor can not be higher than 10.0.");
}
mSizeThreshold = (int) (initialSize * fillFactor + 0.5);
}
/**
* Internal constructor used when creating child instances.
*/
private SymbolTable(boolean internStrings, String[] symbols,
Bucket[] buckets, int size, int sizeThreshold,
int indexMask, int version)
{
mInternStrings = internStrings;
mSymbols = symbols;
mBuckets = buckets;
mSize = size;
mSizeThreshold = sizeThreshold;
mIndexMask = indexMask;
mThisVersion = version;
// Need to make copies of arrays, if/when adding new entries
mDirty = false;
}
/**
* "Factory" method; will create a new child instance of this symbol
* table. It will be a copy-on-write instance, ie. it will only use
* read-only copy of parent's data, but when changes are needed, a
* copy will be created.
*
*
*end - start
equals word length (similar
* to the way String.substring()
has).
*
* @return (Shared) string instance of the word, if it exists in
* the word set; null if not.
*/
@SuppressWarnings("cast")
public String find(char[] str, final int start, final int end)
{
char[] data = mData;
// 03-Jan-2006, TSa: Special case; one entry
if (data == null) {
return findFromOne(str, start, end);
}
int ptr = 0; // pointer to compressed set data
int offset = start;
while (true) {
// End of input String? Need to match the runt entry!
if (offset == end) {
if (data[ptr+1] == CHAR_NULL) {
return mWords[data[ptr+2] - NEGATIVE_OFFSET];
}
return null;
}
int count = data[ptr++];
// Need to find the branch to follow, if any
char c = str[offset++];
inner_block:
do { // dummy loop, need to have break
// Linear or binary search?
if (count < MIN_BINARY_SEARCH) {
// always at least two branches; never less
if (data[ptr] == c) {
ptr = (int) data[ptr+1];
break inner_block;
}
if (data[ptr+2] == c) {
ptr = (int) data[ptr+3];
break inner_block;
}
int branchEnd = ptr + (count << 1);
// Starts from entry #3, if such exists
for (ptr += 4; ptr < branchEnd; ptr += 2) {
if (data[ptr] == c) {
ptr = (int) data[ptr+1];
break inner_block;
}
}
return null; // No match!
} else { // Ok, binary search:
int low = 0;
int high = count-1;
int mid;
while (low <= high) {
mid = (low + high) >> 1;
int ix = ptr + (mid << 1);
int diff = data[ix] - c;
if (diff > 0) { // char was 'higher', need to go down
high = mid-1;
} else if (diff < 0) { // lower, need to go up
low = mid+1;
} else { // match (so far)
ptr = (int) data[ix+1];
break inner_block;
}
}
return null; // No match!
}
} while (false);
// Ok; now, is it the end?
if (ptr >= NEGATIVE_OFFSET) {
String word = mWords[ptr - NEGATIVE_OFFSET];
int expLen = (end - start);
if (word.length() != expLen) {
return null;
}
for (int i = offset - start; offset < end; ++i, ++offset) {
if (word.charAt(i) != str[offset]) {
return null;
}
}
return word;
}
}
// never gets here
}
private String findFromOne(char[] str, final int start, final int end)
{
String word = mWords[0];
int len = end-start;
if (word.length() != len) {
return null;
}
for (int i = 0; i < len; ++i) {
if (word.charAt(i) != str[start+i]) {
return null;
}
}
return word;
}
/**
* @return (Shared) string instance of the word, if it exists in
* the word set; null if not.
*/
@SuppressWarnings("cast")
public String find(String str)
{
char[] data = mData;
// 03-Jan-2006, TSa: Special case; one entry
if (data == null) {
String word = mWords[0];
return word.equals(str) ? word : null;
}
int ptr = 0; // pointer to compressed set data
int offset = 0;
int end = str.length();
while (true) {
// End of input String? Need to match the runt entry!
if (offset == end) {
if (data[ptr+1] == CHAR_NULL) {
return mWords[data[ptr+2] - NEGATIVE_OFFSET];
}
return null;
}
int count = data[ptr++];
// Need to find the branch to follow, if any
char c = str.charAt(offset++);
inner_block:
do { // dummy loop, need to have break
// Linear or binary search?
if (count < MIN_BINARY_SEARCH) {
// always at least two branches; never less
if (data[ptr] == c) {
ptr = (int) data[ptr+1];
break inner_block;
}
if (data[ptr+2] == c) {
ptr = (int) data[ptr+3];
break inner_block;
}
int branchEnd = ptr + (count << 1);
// Starts from entry #3, if such exists
for (ptr += 4; ptr < branchEnd; ptr += 2) {
if (data[ptr] == c) {
ptr = (int) data[ptr+1];
break inner_block;
}
}
return null; // No match!
} else { // Ok, binary search:
int low = 0;
int high = count-1;
int mid;
while (low <= high) {
mid = (low + high) >> 1;
int ix = ptr + (mid << 1);
int diff = data[ix] - c;
if (diff > 0) { // char was 'higher', need to go down
high = mid-1;
} else if (diff < 0) { // lower, need to go up
low = mid+1;
} else { // match (so far)
ptr = (int) data[ix+1];
break inner_block;
}
}
return null; // No match!
}
} while (false);
// Ok; now, is it the end?
if (ptr >= NEGATIVE_OFFSET) {
String word = mWords[ptr - NEGATIVE_OFFSET];
if (word.length() != str.length()) {
return null;
}
for (; offset < end; ++offset) {
if (word.charAt(offset) != str.charAt(offset)) {
return null;
}
}
return word;
}
}
// never gets here
}
/*
////////////////////////////////////////////////
// Re-defined public methods
////////////////////////////////////////////////
*/
@Override
public String toString()
{
StringBuilder sb = new StringBuilder(16 + (mWords.length << 3));
for (int i = 0, len = mWords.length; i < len; ++i) {
if (i > 0) {
sb.append(", ");
}
sb.append(mWords[i]);
}
return sb.toString();
}
/*
////////////////////////////////////////////////
// Helper classes
////////////////////////////////////////////////
*/
private final static class Builder
{
final String[] mWords;
char[] mData;
/**
* Number of characters currently used from mData
*/
int mSize;
public Builder(TreeSetend - start - 1
*/
@SuppressWarnings("cast")
private void constructBranch(int charIndex, int start, int end)
{
// If more than one entry, need to divide into groups
// First, need to add placeholder for branch count:
if (mSize >= mData.length) {
expand(1);
}
mData[mSize++] = 0; // placeholder!
/* structStart will point to second char of first entry
* (which will temporarily have entry count, eventually 'link'
* to continuation)
*/
int structStart = mSize + 1;
int groupCount = 0;
int groupStart = start;
String[] words = mWords;
boolean gotRunt;
/* First thing we need to do is a special check for the
* first entry -- it may be "runt" word, one that has no
* more chars but also has a longer version ("id" vs.
* "identifier"). If so, it needs to be marked; this is done
* by adding a special entry before other entries (since such
* entry would always be ordered first alphabetically)
*/
if (words[groupStart].length() == charIndex) { // yup, got one:
if ((mSize + 2) > mData.length) {
expand(2);
}
/* First null marks the "missing" char (or, end-of-word);
* and then we need the index
*/
mData[mSize++] = CHAR_NULL;
mData[mSize++] = (char) (NEGATIVE_OFFSET + groupStart);
// Ok, let's then ignore that entry
++groupStart;
++groupCount;
gotRunt = true;
} else {
gotRunt = false;
}
// Ok, then, let's find the ('real') groupings:
while (groupStart < end) {
// Inner loop, let's find the group:
char c = words[groupStart].charAt(charIndex);
int j = groupStart+1;
while (j < end && words[j].charAt(charIndex) == c) {
++j;
}
/* Ok, let's store the char in there, along with count;
* count will be needed in second, and will then get
* overwritten with actual data later on
*/
if ((mSize + 2) > mData.length) {
expand(2);
}
mData[mSize++] = c;
mData[mSize++] = (char) (j - groupStart); // entries in group
groupStart = j;
++groupCount;
}
/* Ok, groups found; need to loop through them, recursively
* calling branch and/or leaf methods
*/
// first let's output the header, ie. group count:
mData[structStart-2] = (char) groupCount;
groupStart = start;
// Do we have the "runt" to skip?
if (gotRunt) {
structStart += 2;
++groupStart;
}
int structEnd = mSize;
++charIndex;
for (; structStart < structEnd; structStart += 2) {
groupCount = (int) mData[structStart]; // no sign expansion, is ok
/* Ok, count gotten, can either create a branch (if more than
* one entry) or leaf (just one entry)
*/
if (groupCount == 1) {
mData[structStart] = (char) (NEGATIVE_OFFSET + groupStart);
} else {
mData[structStart] = (char) mSize;
constructBranch(charIndex, groupStart,
groupStart + groupCount);
}
groupStart += groupCount;
}
// done!
}
private char[] expand(int needSpace)
{
char[] old = mData;
int len = old.length;
int newSize = len + ((len < 4096) ? len : (len >> 1));
/* Let's verify we get enough; should always be true but
* better safe than sorry
*/
if (newSize < (mSize + needSpace)) {
newSize = mSize + needSpace + 64;
}
mData = new char[newSize];
System.arraycopy(old, 0, mData, 0, len);
return mData;
}
}
/*
////////////////////////////////////////////////////
// Simple test driver, useful for debugging
// (uncomment if needed -- commented out so it won't
// affect coverage testing)
////////////////////////////////////////////////////
*/
/*
public static void main(String[] args)
{
if (args.length < 2) {
System.err.println("Usage: "+WordResolver.class+" word1 [word2] ... [wordN] keyword");
System.exit(1);
}
String key = args[args.length-1];
TreeSet words = new TreeSet();
for (int i = 0; i < args.length-1; ++i) {
words.add(args[i]);
}
WordResolver set = WordResolver.constructInstance(words);
//outputData(set.mData);
// Ok, and then the test!
char[] keyA = new char[key.length() + 4];
key.getChars(0, key.length(), keyA, 2);
//System.out.println("Word '"+key+"' found via array search: "+WordResolver.find(data, keyA, 2, key.length() + 2));
System.out.println("Word '"+key+"' found via array search: "+set.find(keyA, 2, key.length() + 2));
}
static void outputData(char[] data)
{
for (int i = 0; i < data.length; ++i) {
char c = data[i];
System.out.print(Integer.toHexString(i)+" ["+Integer.toHexString(c)+"]");
if (c > 32 && c <= 127) { // printable char (letter)
System.out.println(" -> '"+c+"'");
} else {
System.out.println();
}
}
}
*/
}
woodstox-woodstox-core-5.1.0/src/main/java/com/ctc/wstx/util/WordSet.java 0000664 0000000 0000000 00000037630 13257562550 0026437 0 ustar 00root root 0000000 0000000 package com.ctc.wstx.util;
import java.util.*;
/**
* An efficient (both memory and time) implementation of a Set used to
* verify that a given
* word is contained within the set. The general usage pattern is expected
* to be such that most checks are positive, ie. that the word indeed
* is contained in the set.
*
*
*end - start - 1
*/
@SuppressWarnings("cast")
private void constructBranch(int charIndex, int start, int end)
{
// If more than one entry, need to divide into groups
// First, need to add placeholder for branch count:
if (mSize >= mData.length) {
expand(1);
}
mData[mSize++] = 0; // placeholder!
/* structStart will point to second char of first entry
* (which will temporarily have entry count, eventually 'link'
* to continuation)
*/
int structStart = mSize + 1;
int groupCount = 0;
int groupStart = start;
String[] words = mWords;
/* First thing we need to do is a special check for the
* first entry -- it may be "runt" word, one that has no
* more chars but also has a longer version ("id" vs.
* "identifier"). If there is such a word, it'll always
* be first in alphabetic ordering:
*/
if (words[groupStart].length() == charIndex) { // yup, got one:
if ((mSize + 2) > mData.length) {
expand(2);
}
/* Nulls mark both imaginary branching null char and
* "missing link" to the rest
*/
mData[mSize++] = CHAR_NULL;
mData[mSize++] = CHAR_NULL;
// Ok, let's then ignore that entry
++groupStart;
++groupCount;
}
// Ok, then, let's find the ('real') groupings:
while (groupStart < end) {
// Inner loop, let's find the group:
char c = words[groupStart].charAt(charIndex);
int j = groupStart+1;
while (j < end && words[j].charAt(charIndex) == c) {
++j;
}
/* Ok, let's store the char in there, along with count;
* count will be needed in second, and will then get
* overwritten with actual data later on
*/
if ((mSize + 2) > mData.length) {
expand(2);
}
mData[mSize++] = c;
mData[mSize++] = (char) (j - groupStart); // entries in group
groupStart = j;
++groupCount;
}
/* Ok, groups found; need to loop through them, recursively
* calling branch and/or leaf methods
*/
// first let's output the header, ie. group count:
mData[structStart-2] = (char) groupCount;
groupStart = start;
// Do we have the "runt" to skip?
if (mData[structStart] == CHAR_NULL) {
structStart += 2;
++groupStart;
}
int structEnd = mSize;
++charIndex;
for (; structStart < structEnd; structStart += 2) {
groupCount = (int) mData[structStart]; // no sign expansion, is ok
// Ok, count gotten, can now put the 'link' (pointer) in there
mData[structStart] = (char) mSize;
if (groupCount == 1) {
/* One optimization; if it'd lead to a single runt
* entry, we can just add 'null' link:
*/
String word = words[groupStart];
if (word.length() == charIndex) {
mData[structStart] = CHAR_NULL;
} else { // otherwise, let's just create end state:
constructLeaf(charIndex, groupStart);
}
} else {
constructBranch(charIndex, groupStart,
groupStart + groupCount);
}
groupStart += groupCount;
}
// done!
}
/**
* Method called to add leaf entry to word set; basically
* "here is the rest of the only matching word"
*/
private void constructLeaf(int charIndex, int wordIndex)
{
String word = mWords[wordIndex];
int len = word.length();
char[] data = mData;
// need room for 1 header char, rest of the word
if ((mSize + len + 1) >= data.length) {
data = expand(len+1);
}
data[mSize++] = (char) (NEGATIVE_OFFSET + (len - charIndex));
for (; charIndex < len; ++charIndex) {
data[mSize++] = word.charAt(charIndex);
}
}
private char[] expand(int needSpace)
{
char[] old = mData;
int len = old.length;
int newSize = len + ((len < 4096) ? len : (len >> 1));
/* Let's verify we get enough; should always be true but
* better safe than sorry
*/
if (newSize < (mSize + needSpace)) {
newSize = mSize + needSpace + 64;
}
mData = new char[newSize];
System.arraycopy(old, 0, mData, 0, len);
return mData;
}
}
}
woodstox-woodstox-core-5.1.0/src/main/java/com/ctc/wstx/util/XmlChars.java 0000664 0000000 0000000 00000053522 13257562550 0026567 0 ustar 00root root 0000000 0000000 package com.ctc.wstx.util;
/**
* Simple utility class that encapsulates logic of determining validity
* of characters outside basic 7-bit range of Unicode, for XML 1.0
*/
public final class XmlChars
{
// We don't need full 64k bits... (0x80 - 0x312C) / 32. But to
// simplify things, let's just include first 0x80 entries in there etc
final static int SIZE = (0x3140 >> 5); // 32 bits per int
final static int[] sXml10StartChars = new int[SIZE];
static {
SETBITS(sXml10StartChars, 0xC0, 0xD6);
SETBITS(sXml10StartChars, 0xD8, 0xF6);
SETBITS(sXml10StartChars, 0xF8, 0xFF);
SETBITS(sXml10StartChars, 0x100, 0x131);
SETBITS(sXml10StartChars, 0x134, 0x13e);
SETBITS(sXml10StartChars, 0x141, 0x148);
SETBITS(sXml10StartChars, 0x14a, 0x17e);
SETBITS(sXml10StartChars, 0x180, 0x1c3);
SETBITS(sXml10StartChars, 0x1cd, 0x1f0);
SETBITS(sXml10StartChars, 0x1f4, 0x1f5);
SETBITS(sXml10StartChars, 0x1fa, 0x217);
SETBITS(sXml10StartChars, 0x250, 0x2a8);
SETBITS(sXml10StartChars, 0x2bb, 0x2c1);
SETBITS(sXml10StartChars, 0x386);
SETBITS(sXml10StartChars, 0x388, 0x38a);
SETBITS(sXml10StartChars, 0x38c);
SETBITS(sXml10StartChars, 0x38e, 0x3a1);
SETBITS(sXml10StartChars, 0x3a3, 0x3ce);
SETBITS(sXml10StartChars, 0x3d0, 0x3d6);
SETBITS(sXml10StartChars, 0x3da);
SETBITS(sXml10StartChars, 0x3dc);
SETBITS(sXml10StartChars, 0x3de);
SETBITS(sXml10StartChars, 0x3e0);
SETBITS(sXml10StartChars, 0x3e2, 0x3f3);
SETBITS(sXml10StartChars, 0x401, 0x40c);
SETBITS(sXml10StartChars, 0x40e, 0x44f);
SETBITS(sXml10StartChars, 0x451, 0x45c);
SETBITS(sXml10StartChars, 0x45e, 0x481);
SETBITS(sXml10StartChars, 0x490, 0x4c4);
SETBITS(sXml10StartChars, 0x4c7, 0x4c8);
SETBITS(sXml10StartChars, 0x4cb, 0x4cc);
SETBITS(sXml10StartChars, 0x4d0, 0x4eb);
SETBITS(sXml10StartChars, 0x4ee, 0x4f5);
SETBITS(sXml10StartChars, 0x4f8, 0x4f9);
SETBITS(sXml10StartChars, 0x531, 0x556);
SETBITS(sXml10StartChars, 0x559);
SETBITS(sXml10StartChars, 0x561, 0x586);
SETBITS(sXml10StartChars, 0x5d0, 0x5ea);
SETBITS(sXml10StartChars, 0x5f0, 0x5f2);
SETBITS(sXml10StartChars, 0x621, 0x63a);
SETBITS(sXml10StartChars, 0x641, 0x64a);
SETBITS(sXml10StartChars, 0x671, 0x6b7);
SETBITS(sXml10StartChars, 0x6ba, 0x6be);
SETBITS(sXml10StartChars, 0x6c0, 0x6ce);
SETBITS(sXml10StartChars, 0x6d0, 0x6d3);
SETBITS(sXml10StartChars, 0x6d5);
SETBITS(sXml10StartChars, 0x6e5, 0x6e6);
SETBITS(sXml10StartChars, 0x905, 0x939);
SETBITS(sXml10StartChars, 0x93d);
SETBITS(sXml10StartChars, 0x958, 0x961);
SETBITS(sXml10StartChars, 0x985, 0x98c);
SETBITS(sXml10StartChars, 0x98f, 0x990);
SETBITS(sXml10StartChars, 0x993, 0x9a8);
SETBITS(sXml10StartChars, 0x9aa, 0x9b0);
SETBITS(sXml10StartChars, 0x9b2);
SETBITS(sXml10StartChars, 0x9b6, 0x9b9);
SETBITS(sXml10StartChars, 0x9dc);
SETBITS(sXml10StartChars, 0x9dd);
SETBITS(sXml10StartChars, 0x9df, 0x9e1);
SETBITS(sXml10StartChars, 0x9f0); SETBITS(sXml10StartChars, 0x9f1);
SETBITS(sXml10StartChars, 0xA05, 0xA0A);
SETBITS(sXml10StartChars, 0xA0F); SETBITS(sXml10StartChars, 0xA10);
SETBITS(sXml10StartChars, 0xA13, 0xA28);
SETBITS(sXml10StartChars, 0xA2A, 0xA30);
SETBITS(sXml10StartChars, 0xA32); SETBITS(sXml10StartChars, 0xA33);
SETBITS(sXml10StartChars, 0xA35); SETBITS(sXml10StartChars, 0xA36);
SETBITS(sXml10StartChars, 0xA38); SETBITS(sXml10StartChars, 0xA39);
SETBITS(sXml10StartChars, 0xA59, 0xA5C);
SETBITS(sXml10StartChars, 0xA5E);
SETBITS(sXml10StartChars, 0xA72, 0xA74);
SETBITS(sXml10StartChars, 0xA85, 0xA8B);
SETBITS(sXml10StartChars, 0xA8D);
SETBITS(sXml10StartChars, 0xA8F, 0xA91);
SETBITS(sXml10StartChars, 0xA93, 0xAA8);
SETBITS(sXml10StartChars, 0xAAA, 0xAB0);
SETBITS(sXml10StartChars, 0xAB2, 0xAB3);
SETBITS(sXml10StartChars, 0xAB5, 0xAB9);
SETBITS(sXml10StartChars, 0xABD);
SETBITS(sXml10StartChars, 0xAE0);
SETBITS(sXml10StartChars, 0xB05, 0xB0C);
SETBITS(sXml10StartChars, 0xB0F); SETBITS(sXml10StartChars, 0xB10);
SETBITS(sXml10StartChars, 0xB13, 0xB28);
SETBITS(sXml10StartChars, 0xB2A, 0xB30);
SETBITS(sXml10StartChars, 0xB32); SETBITS(sXml10StartChars, 0xB33);
SETBITS(sXml10StartChars, 0xB36, 0xB39);
SETBITS(sXml10StartChars, 0xB3D);
SETBITS(sXml10StartChars, 0xB5C); SETBITS(sXml10StartChars, 0xB5D);
SETBITS(sXml10StartChars, 0xB5F, 0xB61);
SETBITS(sXml10StartChars, 0xB85, 0xB8A);
SETBITS(sXml10StartChars, 0xB8E, 0xB90);
SETBITS(sXml10StartChars, 0xB92, 0xB95);
SETBITS(sXml10StartChars, 0xB99, 0xB9A);
SETBITS(sXml10StartChars, 0xB9C);
SETBITS(sXml10StartChars, 0xB9E); SETBITS(sXml10StartChars, 0xB9F);
SETBITS(sXml10StartChars, 0xBA3); SETBITS(sXml10StartChars, 0xBA4);
SETBITS(sXml10StartChars, 0xBA8, 0xBAA);
SETBITS(sXml10StartChars, 0xBAE, 0xBB5);
SETBITS(sXml10StartChars, 0xBB7, 0xBB9);
SETBITS(sXml10StartChars, 0xC05, 0xC0C);
SETBITS(sXml10StartChars, 0xC0E, 0xC10);
SETBITS(sXml10StartChars, 0xC12, 0xC28);
SETBITS(sXml10StartChars, 0xC2A, 0xC33);
SETBITS(sXml10StartChars, 0xC35, 0xC39);
SETBITS(sXml10StartChars, 0xC60); SETBITS(sXml10StartChars, 0xC61);
SETBITS(sXml10StartChars, 0xC85, 0xC8C);
SETBITS(sXml10StartChars, 0xC8E, 0xC90);
SETBITS(sXml10StartChars, 0xC92, 0xCA8);
SETBITS(sXml10StartChars, 0xCAA, 0xCB3);
SETBITS(sXml10StartChars, 0xCB5, 0xCB9);
SETBITS(sXml10StartChars, 0xCDE);
SETBITS(sXml10StartChars, 0xCE0); SETBITS(sXml10StartChars, 0xCE1);
SETBITS(sXml10StartChars, 0xD05, 0xD0C);
SETBITS(sXml10StartChars, 0xD0E, 0xD10);
SETBITS(sXml10StartChars, 0xD12, 0xD28);
SETBITS(sXml10StartChars, 0xD2A, 0xD39);
SETBITS(sXml10StartChars, 0xD60); SETBITS(sXml10StartChars, 0xD61);
SETBITS(sXml10StartChars, 0xE01, 0xE2E);
SETBITS(sXml10StartChars, 0xE30);
SETBITS(sXml10StartChars, 0xE32); SETBITS(sXml10StartChars, 0xE33);
SETBITS(sXml10StartChars, 0xE40, 0xE45);
SETBITS(sXml10StartChars, 0xE81); SETBITS(sXml10StartChars, 0xE82);
SETBITS(sXml10StartChars, 0xE84);
SETBITS(sXml10StartChars, 0xE87); SETBITS(sXml10StartChars, 0xE88);
SETBITS(sXml10StartChars, 0xE8A); SETBITS(sXml10StartChars, 0xE8D);
SETBITS(sXml10StartChars, 0xE94, 0xE97);
SETBITS(sXml10StartChars, 0xE99, 0xE9F);
SETBITS(sXml10StartChars, 0xEA1, 0xEA3);
SETBITS(sXml10StartChars, 0xEA5); SETBITS(sXml10StartChars, 0xEA7);
SETBITS(sXml10StartChars, 0xEAA); SETBITS(sXml10StartChars, 0xEAB);
SETBITS(sXml10StartChars, 0xEAD); SETBITS(sXml10StartChars, 0xEAE);
SETBITS(sXml10StartChars, 0xEB0);
SETBITS(sXml10StartChars, 0xEB2); SETBITS(sXml10StartChars, 0xEB3);
SETBITS(sXml10StartChars, 0xEBD);
SETBITS(sXml10StartChars, 0xEC0, 0xEC4);
SETBITS(sXml10StartChars, 0xF40, 0xF47);
SETBITS(sXml10StartChars, 0xF49, 0xF69);
SETBITS(sXml10StartChars, 0x10a0, 0x10c5);
SETBITS(sXml10StartChars, 0x10d0, 0x10f6);
SETBITS(sXml10StartChars, 0x1100);
SETBITS(sXml10StartChars, 0x1102, 0x1103);
SETBITS(sXml10StartChars, 0x1105, 0x1107);
SETBITS(sXml10StartChars, 0x1109);
SETBITS(sXml10StartChars, 0x110b, 0x110c);
SETBITS(sXml10StartChars, 0x110e, 0x1112);
SETBITS(sXml10StartChars, 0x113c);
SETBITS(sXml10StartChars, 0x113e);
SETBITS(sXml10StartChars, 0x1140);
SETBITS(sXml10StartChars, 0x114c);
SETBITS(sXml10StartChars, 0x114e);
SETBITS(sXml10StartChars, 0x1150);
SETBITS(sXml10StartChars, 0x1154, 0x1155);
SETBITS(sXml10StartChars, 0x1159);
SETBITS(sXml10StartChars, 0x115f, 0x1161);
SETBITS(sXml10StartChars, 0x1163);
SETBITS(sXml10StartChars, 0x1165);
SETBITS(sXml10StartChars, 0x1167);
SETBITS(sXml10StartChars, 0x1169);
SETBITS(sXml10StartChars, 0x116d, 0x116e);
SETBITS(sXml10StartChars, 0x1172, 0x1173);
SETBITS(sXml10StartChars, 0x1175);
SETBITS(sXml10StartChars, 0x119e);
SETBITS(sXml10StartChars, 0x11a8);
SETBITS(sXml10StartChars, 0x11ab);
SETBITS(sXml10StartChars, 0x11ae, 0x11af);
SETBITS(sXml10StartChars, 0x11b7, 0x11b8);
SETBITS(sXml10StartChars, 0x11ba);
SETBITS(sXml10StartChars, 0x11bc, 0x11c2);
SETBITS(sXml10StartChars, 0x11eb);
SETBITS(sXml10StartChars, 0x11f0);
SETBITS(sXml10StartChars, 0x11f9);
SETBITS(sXml10StartChars, 0x1e00, 0x1e9b);
SETBITS(sXml10StartChars, 0x1ea0, 0x1ef9);
SETBITS(sXml10StartChars, 0x1f00, 0x1f15);
SETBITS(sXml10StartChars, 0x1f18, 0x1f1d);
SETBITS(sXml10StartChars, 0x1f20, 0x1f45);
SETBITS(sXml10StartChars, 0x1f48, 0x1f4d);
SETBITS(sXml10StartChars, 0x1f50, 0x1f57);
SETBITS(sXml10StartChars, 0x1f59);
SETBITS(sXml10StartChars, 0x1f5b);
SETBITS(sXml10StartChars, 0x1f5d);
SETBITS(sXml10StartChars, 0x1f5f, 0x1f7d);
SETBITS(sXml10StartChars, 0x1f80, 0x1fb4);
SETBITS(sXml10StartChars, 0x1fb6, 0x1fbc);
SETBITS(sXml10StartChars, 0x1fbe);
SETBITS(sXml10StartChars, 0x1fc2, 0x1fc4);
SETBITS(sXml10StartChars, 0x1fc6, 0x1fcc);
SETBITS(sXml10StartChars, 0x1fd0, 0x1fd3);
SETBITS(sXml10StartChars, 0x1fd6, 0x1fdb);
SETBITS(sXml10StartChars, 0x1fe0, 0x1fec);
SETBITS(sXml10StartChars, 0x1ff2, 0x1ff4);
SETBITS(sXml10StartChars, 0x1ff6, 0x1ffc);
SETBITS(sXml10StartChars, 0x2126);
SETBITS(sXml10StartChars, 0x212a, 0x212b);
SETBITS(sXml10StartChars, 0x212e);
SETBITS(sXml10StartChars, 0x2180, 0x2182);
SETBITS(sXml10StartChars, 0x3041, 0x3094);
SETBITS(sXml10StartChars, 0x30a1, 0x30fa);
SETBITS(sXml10StartChars, 0x3105, 0x312c);
// note: AC00 - D7A3 handled separately
// [86] Ideographic (but note: > 0x312c handled separately)
SETBITS(sXml10StartChars, 0x3007);
SETBITS(sXml10StartChars, 0x3021, 0x3029);
}
final static int[] sXml10Chars = new int[SIZE];
static {
// Let's start with all valid start chars:
System.arraycopy(sXml10StartChars, 0, sXml10Chars, 0, SIZE);
// [87] CombiningChar ::=
SETBITS(sXml10Chars, 0x300, 0x345);
SETBITS(sXml10Chars, 0x360, 0x361);
SETBITS(sXml10Chars, 0x483, 0x486);
SETBITS(sXml10Chars, 0x591, 0x5a1);
SETBITS(sXml10Chars, 0x5a3, 0x5b9);
SETBITS(sXml10Chars, 0x5bb, 0x5bd);
SETBITS(sXml10Chars, 0x5bf);
SETBITS(sXml10Chars, 0x5c1, 0x5c2);
SETBITS(sXml10Chars, 0x5c4);
SETBITS(sXml10Chars, 0x64b, 0x652);
SETBITS(sXml10Chars, 0x670);
SETBITS(sXml10Chars, 0x6d6, 0x6dc);
SETBITS(sXml10Chars, 0x6dd, 0x6df);
SETBITS(sXml10Chars, 0x6e0, 0x6e4);
SETBITS(sXml10Chars, 0x6e7, 0x6e8);
SETBITS(sXml10Chars, 0x6ea, 0x6ed);
SETBITS(sXml10Chars, 0x901, 0x903);
SETBITS(sXml10Chars, 0x93c);
SETBITS(sXml10Chars, 0x93e, 0x94c);
SETBITS(sXml10Chars, 0x94d);
SETBITS(sXml10Chars, 0x951, 0x954);
SETBITS(sXml10Chars, 0x962); SETBITS(sXml10Chars, 0x963);
SETBITS(sXml10Chars, 0x981, 0x983);
SETBITS(sXml10Chars, 0x9bc);
SETBITS(sXml10Chars, 0x9be); SETBITS(sXml10Chars, 0x9bf);
SETBITS(sXml10Chars, 0x9c0, 0x9c4);
SETBITS(sXml10Chars, 0x9c7); SETBITS(sXml10Chars, 0x9c8);
SETBITS(sXml10Chars, 0x9cb, 0x9cd);
SETBITS(sXml10Chars, 0x9d7);
SETBITS(sXml10Chars, 0x9e2); SETBITS(sXml10Chars, 0x9e3);
SETBITS(sXml10Chars, 0xA02);
SETBITS(sXml10Chars, 0xA3C);
SETBITS(sXml10Chars, 0xA3E); SETBITS(sXml10Chars, 0xA3F);
SETBITS(sXml10Chars, 0xA40, 0xA42);
SETBITS(sXml10Chars, 0xA47); SETBITS(sXml10Chars, 0xA48);
SETBITS(sXml10Chars, 0xA4B, 0xA4D);
SETBITS(sXml10Chars, 0xA70); SETBITS(sXml10Chars, 0xA71);
SETBITS(sXml10Chars, 0xA81, 0xA83);
SETBITS(sXml10Chars, 0xABC);
SETBITS(sXml10Chars, 0xABE, 0xAC5);
SETBITS(sXml10Chars, 0xAC7, 0xAC9);
SETBITS(sXml10Chars, 0xACB, 0xACD);
SETBITS(sXml10Chars, 0xB01, 0xB03);
SETBITS(sXml10Chars, 0xB3C);
SETBITS(sXml10Chars, 0xB3E, 0xB43);
SETBITS(sXml10Chars, 0xB47); SETBITS(sXml10Chars, 0xB48);
SETBITS(sXml10Chars, 0xB4B, 0xB4D);
SETBITS(sXml10Chars, 0xB56); SETBITS(sXml10Chars, 0xB57);
SETBITS(sXml10Chars, 0xB82); SETBITS(sXml10Chars, 0xB83);
SETBITS(sXml10Chars, 0xBBE, 0xBC2);
SETBITS(sXml10Chars, 0xBC6, 0xBC8);
SETBITS(sXml10Chars, 0xBCA, 0xBCD);
SETBITS(sXml10Chars, 0xBD7);
SETBITS(sXml10Chars, 0xC01, 0xC03);
SETBITS(sXml10Chars, 0xC3E, 0xC44);
SETBITS(sXml10Chars, 0xC46, 0xC48);
SETBITS(sXml10Chars, 0xC4A, 0xC4D);
SETBITS(sXml10Chars, 0xC55, 0xC56);
SETBITS(sXml10Chars, 0xC82, 0xC83);
SETBITS(sXml10Chars, 0xCBE, 0xCC4);
SETBITS(sXml10Chars, 0xCC6, 0xCC8);
SETBITS(sXml10Chars, 0xCCA, 0xCCD);
SETBITS(sXml10Chars, 0xCD5, 0xCD6);
SETBITS(sXml10Chars, 0xD02, 0xD03);
SETBITS(sXml10Chars, 0xD3E, 0xD43);
SETBITS(sXml10Chars, 0xD46, 0xD48);
SETBITS(sXml10Chars, 0xD4A, 0xD4D);
SETBITS(sXml10Chars, 0xD57);
SETBITS(sXml10Chars, 0xE31);
SETBITS(sXml10Chars, 0xE34, 0xE3A);
SETBITS(sXml10Chars, 0xE47, 0xE4E);
SETBITS(sXml10Chars, 0xEB1);
SETBITS(sXml10Chars, 0xEB4, 0xEB9);
SETBITS(sXml10Chars, 0xEBB, 0xEBC);
SETBITS(sXml10Chars, 0xEC8, 0xECD);
SETBITS(sXml10Chars, 0xF18, 0xF19);
SETBITS(sXml10Chars, 0xF35); SETBITS(sXml10Chars, 0xF37);
SETBITS(sXml10Chars, 0xF39);
SETBITS(sXml10Chars, 0xF3E); SETBITS(sXml10Chars, 0xF3F);
SETBITS(sXml10Chars, 0xF71, 0xF84);
SETBITS(sXml10Chars, 0xF86, 0xF8B);
SETBITS(sXml10Chars, 0xF90, 0xF95);
SETBITS(sXml10Chars, 0xF97);
SETBITS(sXml10Chars, 0xF99, 0xFAD);
SETBITS(sXml10Chars, 0xFB1, 0xFB7);
SETBITS(sXml10Chars, 0xFB9);
SETBITS(sXml10Chars, 0x20D0, 0x20DC);
SETBITS(sXml10Chars, 0x20E1);
SETBITS(sXml10Chars, 0x302A, 0x302F);
SETBITS(sXml10Chars, 0x3099); SETBITS(sXml10Chars, 0x309A);
// [88] Digit:
SETBITS(sXml10Chars, 0x660, 0x669);
SETBITS(sXml10Chars, 0x6f0, 0x6f9);
SETBITS(sXml10Chars, 0x966, 0x96f);
SETBITS(sXml10Chars, 0x9e6, 0x9ef);
SETBITS(sXml10Chars, 0xa66, 0xa6f);
SETBITS(sXml10Chars, 0xae6, 0xaef);
SETBITS(sXml10Chars, 0xb66, 0xb6f);
SETBITS(sXml10Chars, 0xbe7, 0xbef);
SETBITS(sXml10Chars, 0xc66, 0xc6f);
SETBITS(sXml10Chars, 0xce6, 0xcef);
SETBITS(sXml10Chars, 0xd66, 0xd6f);
SETBITS(sXml10Chars, 0xe50, 0xe59);
SETBITS(sXml10Chars, 0xed0, 0xed9);
SETBITS(sXml10Chars, 0xf20, 0xf29);
// [89] Extender:
SETBITS(sXml10Chars, 0xb7);
SETBITS(sXml10Chars, 0x2d0);
SETBITS(sXml10Chars, 0x2d1);
SETBITS(sXml10Chars, 0x387);
SETBITS(sXml10Chars, 0x640);
SETBITS(sXml10Chars, 0xE46);
SETBITS(sXml10Chars, 0xEC6);
SETBITS(sXml10Chars, 0x3005);
SETBITS(sXml10Chars, 0x3031, 0x3035);
SETBITS(sXml10Chars, 0x309d, 0x309e);
SETBITS(sXml10Chars, 0x30fc, 0x30fe);
}
private XmlChars() { }
public final static boolean is10NameStartChar(char c)
{
// First, let's deal with outliers
if (c > 0x312C) { // Most valid chars are below this..
if (c < 0xAC00) {
return (c >= 0x4E00 && c <= 0x9FA5); // valid ideograms
}
if (c <= 0xD7A3) { // 0xAC00 - 0xD7A3, valid base chars
return true;
}
/* As to surrogate pairs... let's do the bare minimum;
* 0xD800 - 0xDBFF (high surrogate) are ok; low surrogates
* can only follow high one
*/
return (c <= 0xDBFF && c >= 0xD800);
}
// but then we'll just need to use the table...
@SuppressWarnings("cast")
int ix = (int) c;
return (sXml10StartChars[ix >> 5] & (1 << (ix & 31))) != 0;
}
public final static boolean is10NameChar(char c)
{
// First, let's deal with outliers
if (c > 0x312C) { // Most valid chars are below this..
if (c < 0xAC00) {
return (c >= 0x4E00 && c <= 0x9FA5); // valid ideograms
}
if (c <= 0xD7A3) { // 0xAC00 - 0xD7A3, valid base chars
return true;
}
/* As to surrogate pairs... let's do the bare minimum;
* 0xD800 - 0xDFFF (high, low surrogate) are ok (need to
* check pairing in future)
*/
return (c >= 0xD800 && c <= 0xDFFF);
}
// but then we'll just need to use the table...
@SuppressWarnings("cast")
int ix = (int) c;
return (sXml10Chars[ix >> 5] & (1 << (ix & 31))) != 0;
}
public final static boolean is11NameStartChar(char c)
{
// Others are checked block-by-block:
if (c <= 0x2FEF) {
if (c < 0x300) {
if (c < 0x00C0) { // 8-bit ctrl chars
return false;
}
// most of the rest are fine...
return (c != 0xD7 && c != 0xF7);
}
if (c >= 0x2C00) {
// 0x2C00 - 0x2FEF are ok
return true;
}
if (c < 0x370 || c > 0x218F) {
// 0x300 - 0x36F, 0x2190 - 0x2BFF invalid
return false;
}
if (c < 0x2000) {
// 0x370 - 0x37D, 0x37F - 0x1FFF are ok
return (c != 0x37E);
}
if (c >= 0x2070) {
// 0x2070 - 0x218F are ok
return (c <= 0x218F);
}
// And finally, 0x200C - 0x200D
return (c == 0x200C || c == 0x200D);
}
// 0x3000 and above:
if (c >= 0x3001) {
/* Hmmh, let's allow high surrogates here, without checking
* that they are properly followed... crude basic support,
* I know, but allows valid combinations, just doesn't catch
* invalid ones
*/
if (c <= 0xDBFF) { // 0x3001 - 0xD7FF (chars),
// 0xD800 - 0xDBFF (high surrogate) are ok (unlike DC00-DFFF)
return true;
}
if (c >= 0xF900 && c <= 0xFFFD) {
/* Check above removes low surrogate (since one can not
* START an identifier), and byte-order markers..
*/
return (c <= 0xFDCF || c >= 0xFDF0);
}
}
return false;
}
public final static boolean is11NameChar(char c)
{
// Others are checked block-by-block:
if (c <= 0x2FEF) {
if (c < 0x2000) { // only 8-bit ctrl chars and 0x37E to filter out
return (c >= 0x00C0 && c != 0x37E) || (c == 0xB7);
}
if (c >= 0x2C00) {
// 0x100 - 0x1FFF, 0x2C00 - 0x2FEF are ok
return true;
}
if (c < 0x200C || c > 0x218F) {
// 0x2000 - 0x200B, 0x2190 - 0x2BFF invalid
return false;
}
if (c >= 0x2070) {
// 0x2070 - 0x218F are ok
return true;
}
// And finally, 0x200C - 0x200D, 0x203F - 0x2040 are ok
return (c == 0x200C || c == 0x200D
|| c == 0x203F || c == 0x2040);
}
// 0x3000 and above:
if (c >= 0x3001) {
/* Hmmh, let's allow surrogate heres, without checking that
* they have proper ordering. For non-first name chars, both are
* ok, for valid names. Crude basic support,
* I know, but allows valid combinations, just doesn't catch
* invalid ones
*/
if (c <= 0xDFFF) { // 0x3001 - 0xD7FF (chars),
// 0xD800 - 0xDFFF (high, low surrogate) are ok:
return true;
}
if (c >= 0xF900 && c <= 0xFFFD) {
/* Check above removes other invalid chars (below valid
* range), and byte-order markers (0xFFFE, 0xFFFF).
*/
return (c <= 0xFDCF || c >= 0xFDF0);
}
}
return false;
}
private static void SETBITS(int[] array, int start, int end)
{
int bit1 = (start & 31);
int bit2 = (end & 31);
start >>= 5;
end >>= 5;
/* Ok; this is not perfectly optimal, but should be good enough...
* we'll only do one-by-one at the ends.
*/
if (start == end) {
for (; bit1 <= bit2; ++bit1) {
array[start] |= (1 << bit1);
}
} else {
for (int bit = bit1; bit <= 31; ++bit) {
array[start] |= (1 << bit);
}
while (++start < end) {
array[start] = -1;
}
for (int bit = 0; bit <= bit2; ++bit) {
array[end] |= (1 << bit);
}
}
}
private static void SETBITS(int[] array, int point) {
int ix = (point >> 5);
int bit = (point & 31);
array[ix] |= (1 << bit);
}
}
woodstox-woodstox-core-5.1.0/src/main/java/com/ctc/wstx/util/package.html 0000664 0000000 0000000 00000000370 13257562550 0026455 0 ustar 00root root 0000000 0000000
Contains utility classes that are not directly Woodstox specific, but are
for now only used by Woodstox.
com.ctc
package.
woodstox-woodstox-core-5.1.0/src/main/resources/ 0000775 0000000 0000000 00000000000 13257562550 0021754 5 ustar 00root root 0000000 0000000 woodstox-woodstox-core-5.1.0/src/main/resources/META-INF/ 0000775 0000000 0000000 00000000000 13257562550 0023114 5 ustar 00root root 0000000 0000000 woodstox-woodstox-core-5.1.0/src/main/resources/META-INF/LICENSE 0000664 0000000 0000000 00000000501 13257562550 0024115 0 ustar 00root root 0000000 0000000 This copy of Jackson JSON processor databind module is licensed under the
Apache (Software) License, version 2.0 ("the License").
See the License for details about distribution rights, and the
specific rights regarding derivate works.
You may obtain a copy of the License at:
http://www.apache.org/licenses/LICENSE-2.0
woodstox-woodstox-core-5.1.0/src/main/resources/META-INF/services/ 0000775 0000000 0000000 00000000000 13257562550 0024737 5 ustar 00root root 0000000 0000000 woodstox-woodstox-core-5.1.0/src/main/resources/META-INF/services/javax.xml.stream.XMLEventFactory 0000664 0000000 0000000 00000000043 13257562550 0033052 0 ustar 00root root 0000000 0000000 com.ctc.wstx.stax.WstxEventFactory
woodstox-woodstox-core-5.1.0/src/main/resources/META-INF/services/javax.xml.stream.XMLInputFactory 0000664 0000000 0000000 00000000042 13257562550 0033067 0 ustar 00root root 0000000 0000000 com.ctc.wstx.stax.WstxInputFactory woodstox-woodstox-core-5.1.0/src/main/resources/META-INF/services/javax.xml.stream.XMLOutputFactory 0000664 0000000 0000000 00000000043 13257562550 0033271 0 ustar 00root root 0000000 0000000 com.ctc.wstx.stax.WstxOutputFactory org.codehaus.stax2.validation.XMLValidationSchemaFactory.dtd 0000664 0000000 0000000 00000000042 13257562550 0040246 0 ustar 00root root 0000000 0000000 woodstox-woodstox-core-5.1.0/src/main/resources/META-INF/services com.ctc.wstx.dtd.DTDSchemaFactory
org.codehaus.stax2.validation.XMLValidationSchemaFactory.relaxng 0000664 0000000 0000000 00000000046 13257562550 0041137 0 ustar 00root root 0000000 0000000 woodstox-woodstox-core-5.1.0/src/main/resources/META-INF/services com.ctc.wstx.msv.RelaxNGSchemaFactory
org.codehaus.stax2.validation.XMLValidationSchemaFactory.w3c 0000664 0000000 0000000 00000000042 13257562550 0040167 0 ustar 00root root 0000000 0000000 woodstox-woodstox-core-5.1.0/src/main/resources/META-INF/services com.ctc.wstx.msv.W3CSchemaFactory
woodstox-woodstox-core-5.1.0/src/test/ 0000775 0000000 0000000 00000000000 13257562550 0017775 5 ustar 00root root 0000000 0000000 woodstox-woodstox-core-5.1.0/src/test/java/ 0000775 0000000 0000000 00000000000 13257562550 0020716 5 ustar 00root root 0000000 0000000 woodstox-woodstox-core-5.1.0/src/test/java/failing/ 0000775 0000000 0000000 00000000000 13257562550 0022327 5 ustar 00root root 0000000 0000000 woodstox-woodstox-core-5.1.0/src/test/java/failing/TestBasicSax.java 0000664 0000000 0000000 00000003750 13257562550 0025534 0 ustar 00root root 0000000 0000000 package failing;
import java.io.*;
import javax.xml.parsers.SAXParser;
import org.xml.sax.*;
import org.xml.sax.ext.DefaultHandler2;
import com.ctc.wstx.sax.*;
import wstxtest.BaseWstxTest;
/**
* Simple unit tests to verify that most fundamental parsing functionality
* works via Woodstox SAX implementation.
*/
public class TestBasicSax
extends BaseWstxTest
{
/**
* Test for [WSTX_227]
*/
public void testCData() throws Exception
{
SAXParser parser = new WstxSAXParser();
StringBuffer buffer = new StringBuffer("