*
* @author Tatu Saloranta
*/
public abstract class BasicStreamReader
extends StreamScanner
implements StreamReaderImpl, DTDInfo, LocationInfo
{
/*
///////////////////////////////////////////////////////////////////////
// Constants
///////////////////////////////////////////////////////////////////////
*/
// // // Standalone values:
final static int DOC_STANDALONE_UNKNOWN = 0;
final static int DOC_STANDALONE_YES = 1;
final static int DOC_STANDALONE_NO = 2;
// // // Main state consts:
final static int STATE_PROLOG = 0; // Before root element
final static int STATE_TREE = 1; // Parsing actual XML tree
final static int STATE_EPILOG = 2; // After root element has been closed
final static int STATE_MULTIDOC_HACK = 3; // State "between" multiple documents (in multi-doc mode)
final static int STATE_CLOSED = 4; // After reader has been closed
// // // Tokenization state consts:
// no idea as to what comes next (unknown type):
final static int TOKEN_NOT_STARTED = 0;
// token type figured out, but not long enough:
final static int TOKEN_STARTED = 1;
/* minimum token length returnable achieved; only used for CDATA and
* CHARACTERS events which allow fragments to be returned
*/
final static int TOKEN_PARTIAL_SINGLE = 2;
/* a single physical event has been succesfully tokenized; as with
* partial, only used with CDATA and CHARACTERS (meaningless for others,
* which should only use TOKEN_FULL_COALESCED, TOKEN_NOT_STARTED or
* TOKEN_STARTED.
*/
final static int TOKEN_FULL_SINGLE = 3;
/* all adjacent (text) events have been tokenized and coalesced (for
* CDATA and CHARACTERS), or that the full event has been parsed (for
* others)
*/
final static int TOKEN_FULL_COALESCED = 4;
// // // Bit masks used for quick type comparisons
/**
* This mask covers all types for which basic {@link #getText} method
* can be called.
*/
final protected static int MASK_GET_TEXT =
(1 << CHARACTERS) | (1 << CDATA) | (1 << SPACE)
| (1 << COMMENT) | (1 << DTD) | (1 << ENTITY_REFERENCE);
/**
* This mask covers all types for which extends getTextXxx
* methods can be called; which is less than those for which
* {@link #getText} can be called. Specifically, DTD
and
* ENTITY_REFERENCE
types do not support these extended
*/
final protected static int MASK_GET_TEXT_XXX =
(1 << CHARACTERS) | (1 << CDATA) | (1 << SPACE) | (1 << COMMENT);
/**
* This mask is used with Stax2 getText() method (one that takes
* Writer as an argument): accepts even wider range of event types.
*/
final protected static int MASK_GET_TEXT_WITH_WRITER =
(1 << CHARACTERS) | (1 << CDATA) | (1 << SPACE)
| (1 << COMMENT) | (1 << DTD) | (1 << ENTITY_REFERENCE)
| (1 << PROCESSING_INSTRUCTION);
final protected static int MASK_GET_ELEMENT_TEXT =
(1 << CHARACTERS) | (1 << CDATA) | (1 << SPACE)
| (1 << ENTITY_REFERENCE);
// // // Indicator of type of text in text event (WRT white space)
final static int ALL_WS_UNKNOWN = 0x0000;
final static int ALL_WS_YES = 0x0001;
final static int ALL_WS_NO = 0x0002;
/* 2 magic constants used for enabling/disabling indentation checks:
* (to minimize negative impact for both small docs, and large
* docs with non-regular white space)
*/
private final static int INDENT_CHECK_START = 16;
private final static int INDENT_CHECK_MAX = 40;
// // // Shared namespace symbols
final protected static String sPrefixXml = DefaultXmlSymbolTable.getXmlSymbol();
final protected static String sPrefixXmlns = DefaultXmlSymbolTable.getXmlnsSymbol();
/*
///////////////////////////////////////////////////////////////////////
// Configuration
///////////////////////////////////////////////////////////////////////
*/
// note: mConfig defined in base class
/**
* Set of locally stored configuration flags
*/
protected final int mConfigFlags;
// // // Various extracted settings:
protected final boolean mCfgCoalesceText;
protected final boolean mCfgReportTextAsChars;
protected final boolean mCfgLazyParsing;
/**
* Minimum number of characters parser can return as partial text
* segment, IF it's not required to coalesce adjacent text
* segments.
*/
protected final int mShortestTextSegment;
/*
///////////////////////////////////////////////////////////////////////
// Symbol handling:
///////////////////////////////////////////////////////////////////////
*/
/**
* Object to notify about shared stuff, such as symbol tables, as well
* as to query for additional config settings if necessary.
*/
final protected ReaderCreator mOwner;
/*
///////////////////////////////////////////////////////////////////////
// Additional XML document information, in addition
// to what StreamScanner has
///////////////////////////////////////////////////////////////////////
*/
/**
* Status about "stand-aloneness" of document; set to 'yes'/'no'/'unknown'
* based on whether there was xml declaration, and if so, whether
* it had standalone attribute.
*/
protected int mDocStandalone = DOC_STANDALONE_UNKNOWN;
/*
///////////////////////////////////////////////////////////////////////
// DOCTYPE information from document type declaration
// (if any found)
///////////////////////////////////////////////////////////////////////
*/
/**
* Prefix of root element, as dictated by DOCTYPE declaration; null
* if no DOCTYPE declaration, or no root prefix
*/
String mRootPrefix;
/**
* Local name of root element, as dictated by DOCTYPE declaration; null
* if no DOCTYPE declaration.
*/
String mRootLName;
/**
* Public id of the DTD, if one exists and has been parsed.
*/
protected String mDtdPublicId;
/**
* System id of the DTD, if one exists and has been parsed.
*/
protected String mDtdSystemId;
/*
///////////////////////////////////////////////////////////////////////
// Information about currently open subtree, content
///////////////////////////////////////////////////////////////////////
*/
/**
* TextBuffer mostly used to collect non-element textual content
* (text, CDATA, comment content, pi data)
*/
final protected TextBuffer mTextBuffer;
/**
* Currently open element tree
*/
final protected InputElementStack mElementStack;
/**
* Object that stores information about currently accessible attributes.
*/
final protected AttributeCollector mAttrCollector;
/*
///////////////////////////////////////////////////////////////////////
// Tokenization state
////////////////////////////////////////////////////
*/
/// Flag set when DOCTYPE declaration has been parsed
protected boolean mStDoctypeFound = false;
/**
* State of the current token; one of M_ - constants from above.
*
* Initially set to fully tokenized, since it's the virtual
* START_DOCUMENT event that we fully know by now (parsed by
* bootstrapper)
*/
protected int mTokenState = TOKEN_FULL_COALESCED;
/**
* Threshold value that defines tokenization state that needs to be
* achieved to "finish" current logical text segment (which
* may consist of adjacent CDATA and text segments; or be a complete
* physical segment; or just even a fragment of such a segment)
*/
protected final int mStTextThreshold;
/// Flag that indicates current start element is an empty element
protected boolean mStEmptyElem = false;
/**
* Main parsing/tokenization state (STATE_xxx)
*/
protected int mParseState;
/**
* Current state of the stream, ie token value returned by
* {@link #getEventType}. Needs to be initialized to START_DOCUMENT,
* since that's the state it starts in.
*/
protected int mCurrToken = START_DOCUMENT;
/**
* Additional information sometimes stored (when generating dummy
* events in multi-doc mode, for example) temporarily when
* {@link #mCurrToken} is already populated.
*/
protected int mSecondaryToken = START_DOCUMENT;
/**
* Status of current (text) token's "whitespaceness", that is,
* whether it is or is not all white space.
*/
protected int mWsStatus;
/**
* Flag that indicates that textual content (CDATA, CHARACTERS) is to
* be validated within current element's scope. Enabled if one of
* validators returns {@link XMLValidator#CONTENT_ALLOW_VALIDATABLE_TEXT},
* and will prevent lazy parsing of text.
*/
protected boolean mValidateText = false;
/**
* Counter used for determining whether we are to try to heuristically
* "intern" white space that seems to be used for indentation purposes
*/
protected int mCheckIndentation;
/**
* Due to the way Stax API does not allow throwing stream exceptions
* from many methods for which Woodstox would need to throw one
* (especially getText
and its variations), we may need
* to delay throwing an exception until {@link #next} is called next
* time. If so, this variable holds the pending stream exception.
*/
protected XMLStreamException mPendingException = null;
/*
///////////////////////////////////////////////////////////////////////
// DTD information (entities, content spec stub)
///////////////////////////////////////////////////////////////////////
*/
/**
* Entities parsed from internal/external DTD subsets. Although it
* will remain null for this class, extended classes make use of it,
* plus, to be able to share some of entity resolution code, instance
* is left here even though it semantically belongs to the sub-class.
*/
protected Map mGeneralEntities = null;
/**
* Mode information needed at this level; mostly to check what kind
* of textual content (if any) is allowed in current element
* context. Constants come from
* {@link XMLValidator},
* (like {@link XMLValidator#CONTENT_ALLOW_VALIDATABLE_TEXT}).
* Only used inside tree; ignored for prolog/epilog (which
* have straight-forward static rules).
*/
protected int mVldContent = XMLValidator.CONTENT_ALLOW_ANY_TEXT;
/**
* Configuration from {@link XMLStreamProperties.RETURN_NULL_FOR_DEFAULT_NAMESPACE}
*
* @since 4.1.2
*/
protected boolean mReturnNullForDefaultNamespace;
/*
///////////////////////////////////////////////////////////////////////
// Instance construction, initialization
///////////////////////////////////////////////////////////////////////
*/
/**
* @param elemStack Input element stack to use; if null, will create
* instance locally.
* @param forER Override indicator; if true, this stream reader will be
* used by an event reader, and should modify some of the base config
* settings appropriately. If false, configuration settings are to
* be used as is.
*/
protected BasicStreamReader(InputBootstrapper bs,
BranchingReaderSource input, ReaderCreator owner,
ReaderConfig cfg, InputElementStack elemStack,
boolean forER)
throws XMLStreamException
{
super(input, cfg, cfg.getEntityResolver());
mOwner = owner;
mTextBuffer = TextBuffer.createRecyclableBuffer(cfg);
// // // First, configuration settings:
mConfigFlags = cfg.getConfigFlags();
mCfgCoalesceText = (mConfigFlags & CFG_COALESCE_TEXT) != 0;
mCfgReportTextAsChars = (mConfigFlags & CFG_REPORT_CDATA) == 0;
mXml11 = cfg.isXml11();
// Can only use canonical white space if we are normalizing lfs
mCheckIndentation = mNormalizeLFs ? 16 : 0;
/* 30-Sep-2005, TSa: Let's not do lazy parsing when access is via
* Event API. Reason is that there will be no performance benefit
* (event objects always access full info right after traversal),
* but the wrapping of stream exceptions within runtime exception
* wrappers would happen, which is inconvenient (loss of stack trace,
* not catching all exceptions as expected)
*/
mCfgLazyParsing = !forER && ((mConfigFlags & CFG_LAZY_PARSING) != 0);
/* There are a few derived settings used during tokenization that
* need to be initialized now...
*/
if (mCfgCoalesceText) {
mStTextThreshold = TOKEN_FULL_COALESCED;
mShortestTextSegment = Integer.MAX_VALUE;
} else {
mStTextThreshold = TOKEN_PARTIAL_SINGLE;
if (forER) {
/* 30-Sep-2005, TSa: No point in returning runt segments for
* event readers (due to event object overhead, less
* convenient); let's just force returning of full length
* segments.
*/
mShortestTextSegment = Integer.MAX_VALUE;
} else {
mShortestTextSegment = cfg.getShortestReportedTextSegment();
}
}
// // // Then handling of xml declaration data:
mDocXmlVersion = bs.getDeclaredVersion();
mDocInputEncoding = bs.getInputEncoding();
mDocXmlEncoding = bs.getDeclaredEncoding();
String sa = bs.getStandalone();
if (sa == null) {
mDocStandalone = DOC_STANDALONE_UNKNOWN;
} else {
if (XmlConsts.XML_SA_YES.equals(sa)) {
mDocStandalone = DOC_STANDALONE_YES;
} else {
mDocStandalone = DOC_STANDALONE_NO;
}
}
/* Ok; either we got declaration or not, but in either case we can
* now initialize prolog parsing settings, without having to really
* parse anything more.
*/
/* 07-Oct-2005, TSa: Except, if we are in fragment mode, in which
* case we are kind of "in tree" mode...
*/
mParseState = mConfig.inputParsingModeFragment() ?
STATE_TREE : STATE_PROLOG;
// // // And then connecting element stack and attribute collector
mElementStack = elemStack;
mAttrCollector = elemStack.getAttrCollector();
// And finally, location information may have offsets:
input.initInputLocation(this, mCurrDepth);
elemStack.connectReporter(this);
Object value = getProperty(WstxInputProperties.P_RETURN_NULL_FOR_DEFAULT_NAMESPACE);
mReturnNullForDefaultNamespace = (value instanceof Boolean) && ((Boolean) value).booleanValue();
}
protected static InputElementStack createElementStack(ReaderConfig cfg)
{
return new InputElementStack(cfg, cfg.willSupportNamespaces());
}
/*
///////////////////////////////////////////////////////////////////////
// XMLStreamReader, document info
///////////////////////////////////////////////////////////////////////
*/
/**
* As per Stax (1.0) specs, needs to return whatever xml declaration
* claimed encoding is, if any; or null if no xml declaration found.
*
* Note: method name is rather confusing (compare to {@link #getEncoding}). */ public String getCharacterEncodingScheme() { return mDocXmlEncoding; } /** * As per Stax (1.0) specs, needs to return whatever parser determined * the encoding was, if it was able to figure it out. If not (there are * cases where this can not be found; specifically when being passed a * {@link Reader}), it should return null. */ public String getEncoding() { return mDocInputEncoding; } public String getVersion() { if (mDocXmlVersion == XmlConsts.XML_V_10) { return XmlConsts.XML_V_10_STR; } if (mDocXmlVersion == XmlConsts.XML_V_11) { return XmlConsts.XML_V_11_STR; } return null; // unknown } public boolean isStandalone() { return mDocStandalone == DOC_STANDALONE_YES; } public boolean standaloneSet() { return mDocStandalone != DOC_STANDALONE_UNKNOWN; } /* /////////////////////////////////////////////////////////////////////// // Public API, configuration /////////////////////////////////////////////////////////////////////// */ public Object getProperty(String name) { /* 18-Nov-2008, TSa: As per [WSTX-50], should report the * actual Base URL. It can be overridden by matching * setProperty, but if not, is set to actual source * of content being parsed. */ if (WstxInputProperties.P_BASE_URL.equals(name)) { return mInput.getSource(); } /* 23-Apr-2008, TSa: Let's NOT throw IllegalArgumentException * for unknown property; JavaDocs do not suggest it needs * to be done (different from that of XMLInputFactory * and XMLStreamWriter specification) */ return mConfig.safeGetProperty(name); } /* /////////////////////////////////////////////////////////////////////// // XMLStreamReader, current state /////////////////////////////////////////////////////////////////////// */ // // // Attribute access: public int getAttributeCount() { if (mCurrToken != START_ELEMENT) { throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_STELEM); } return mAttrCollector.getCount(); } public String getAttributeLocalName(int index) { if (mCurrToken != START_ELEMENT) { throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_STELEM); } return mAttrCollector.getLocalName(index); } public QName getAttributeName(int index) { if (mCurrToken != START_ELEMENT) { throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_STELEM); } return mAttrCollector.getQName(index); } public String getAttributeNamespace(int index) { if (mCurrToken != START_ELEMENT) { throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_STELEM); } // Internally it's marked as null, externally need to see "" String uri = mAttrCollector.getURI(index); return (uri == null) ? XmlConsts.ATTR_NO_NS_URI : uri; } public String getAttributePrefix(int index) { if (mCurrToken != START_ELEMENT) { throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_STELEM); } // Internally it's marked as null, externally need to see "" String p = mAttrCollector.getPrefix(index); return (p == null) ? XmlConsts.ATTR_NO_PREFIX : p; } public String getAttributeType(int index) { if (mCurrToken != START_ELEMENT) { throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_STELEM); } // Attr. collector doesn't know it, elem stack does: return mElementStack.getAttributeType(index); } public String getAttributeValue(int index) { if (mCurrToken != START_ELEMENT) { throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_STELEM); } return mAttrCollector.getValue(index); } public String getAttributeValue(String nsURI, String localName) { if (mCurrToken != START_ELEMENT) { throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_STELEM); } return mAttrCollector.getValue(nsURI, localName); } /** * From StAX specs: *
* Reads the content of a text-only element, an exception is thrown if * this is not a text-only element. * Regardless of value of javax.xml.stream.isCoalescing this method always * returns coalesced content. **/ public String getElementText() throws XMLStreamException { if (mCurrToken != START_ELEMENT) { throwParseError(ErrorConsts.ERR_STATE_NOT_STELEM, null, null); } /* Ok, now: with START_ELEMENT we know that it's not partially * processed; that we are in-tree (not prolog or epilog). * The only possible complication would be: */ if (mStEmptyElem) { /* And if so, we'll then get 'virtual' close tag; things * are simple as location info was set when dealing with * empty start element; and likewise, validation (if any) * has been taken care of */ mStEmptyElem = false; mCurrToken = END_ELEMENT; return ""; } // First need to find a textual event while (true) { int type = next(); if (type == END_ELEMENT) { return ""; } if (type == COMMENT || type == PROCESSING_INSTRUCTION) { continue; } if (((1 << type) & MASK_GET_ELEMENT_TEXT) == 0) { throw _constructUnexpectedInTyped(type); } break; } if (mTokenState < TOKEN_FULL_SINGLE) { readCoalescedText(mCurrToken, false); } /* Ok: then a quick check; if it looks like we are directly * followed by the end tag, we need not construct String * quite yet. */ if ((mInputPtr + 1) < mInputEnd && mInputBuffer[mInputPtr] == '<' && mInputBuffer[mInputPtr+1] == '/') { // Note: next() has validated text, no need for more validation mInputPtr += 2; mCurrToken = END_ELEMENT; // must first get text, as call to readEndElem may break it: String result = mTextBuffer.contentsAsString(); // Can by-pass next(), nextFromTree(), in this case: readEndElem(); // and then return results return result; } // Otherwise, we'll need to do slower processing int extra = 1 + (mTextBuffer.size() >> 1); // let's add 50% space StringBuffer sb = mTextBuffer.contentsAsStringBuffer(extra); int type; while ((type = next()) != END_ELEMENT) { if (((1 << type) & MASK_GET_ELEMENT_TEXT) != 0) { if (mTokenState < mStTextThreshold) { finishToken(false); } mTextBuffer.contentsToStringBuffer(sb); continue; } if (type != COMMENT && type != PROCESSING_INSTRUCTION) { throw _constructUnexpectedInTyped(type); } } // Note: calls next() have validated text, no need for more validation return sb.toString(); } /** * Returns type of the last event returned; or START_DOCUMENT before * any events has been explicitly returned. */ public int getEventType() { /* Only complication -- multi-part coalesced text is to be reported * as CHARACTERS always, never as CDATA (StAX specs). */ if (mCurrToken == CDATA) { if (mCfgCoalesceText || mCfgReportTextAsChars) { return CHARACTERS; } } return mCurrToken; } public String getLocalName() { // Note: for this we need not (yet) finish reading element if (mCurrToken == START_ELEMENT || mCurrToken == END_ELEMENT) { return mElementStack.getLocalName(); } if (mCurrToken == ENTITY_REFERENCE) { /* 30-Sep-2005, TSa: Entity will be null in non-expanding mode * if no definition was found: */ return (mCurrEntity == null) ? mCurrName: mCurrEntity.getName(); } throw new IllegalStateException("Current state not START_ELEMENT, END_ELEMENT or ENTITY_REFERENCE"); } // // // getLocation() defined in StreamScanner public QName getName() { if (mCurrToken != START_ELEMENT && mCurrToken != END_ELEMENT) { throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_ELEM); } return mElementStack.getCurrentElementName(); } // // // Namespace access public NamespaceContext getNamespaceContext() { /* Unlike other getNamespaceXxx methods, this is available * for all events. * Note that the context is "live", ie. remains active (but not * static) even through calls to next(). StAX compliant apps * should not count on this behaviour, however. */ return mElementStack; } public int getNamespaceCount() { if (mCurrToken != START_ELEMENT && mCurrToken != END_ELEMENT) { throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_ELEM); } return mElementStack.getCurrentNsCount(); } public String getNamespacePrefix(int index) { if (mCurrToken != START_ELEMENT && mCurrToken != END_ELEMENT) { throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_ELEM); } // Internally it's marked as null, externally need to see "" or null, depending String p = mElementStack.getLocalNsPrefix(index); if (p == null) { return mReturnNullForDefaultNamespace ? null : XmlConsts.ATTR_NO_PREFIX; } return p; } public String getNamespaceURI() { if (mCurrToken != START_ELEMENT && mCurrToken != END_ELEMENT) { throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_ELEM); } // Internally it's marked as null, externally need to see "" String uri = mElementStack.getNsURI(); return (uri == null) ? XmlConsts.ELEM_NO_NS_URI : uri; } public String getNamespaceURI(int index) { if (mCurrToken != START_ELEMENT && mCurrToken != END_ELEMENT) { throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_ELEM); } // Internally it's marked as null, externally need to see "" String uri = mElementStack.getLocalNsURI(index); return (uri == null) ? XmlConsts.ATTR_NO_NS_URI : uri; } public String getNamespaceURI(String prefix) { if (mCurrToken != START_ELEMENT && mCurrToken != END_ELEMENT) { throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_ELEM); } /* Note: this will need to return null if no URI found for * the prefix, so we can't mask it. */ return mElementStack.getNamespaceURI(prefix); } public String getPIData() { if (mCurrToken != PROCESSING_INSTRUCTION) { throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_PI); } if (mTokenState <= TOKEN_STARTED) { safeFinishToken(); } return mTextBuffer.contentsAsString(); } public String getPITarget() { if (mCurrToken != PROCESSING_INSTRUCTION) { throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_PI); } // Target is always parsed automatically, not lazily... return mCurrName; } public String getPrefix() { if (mCurrToken != START_ELEMENT && mCurrToken != END_ELEMENT) { throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_ELEM); } // Internally it's marked as null, externally need to see "" String p = mElementStack.getPrefix(); return (p == null) ? XmlConsts.ELEM_NO_PREFIX : p; } public String getText() { if (((1 << mCurrToken) & MASK_GET_TEXT) == 0) { throwNotTextual(mCurrToken); } if (mTokenState < mStTextThreshold) { safeFinishToken(); } if (mCurrToken == ENTITY_REFERENCE) { return (mCurrEntity == null) ? null : mCurrEntity.getReplacementText(); } if (mCurrToken == DTD) { /* 16-Aug-2004, TSa: Hmmh. Specs are bit ambiguous on whether this * should return just the internal subset, or the whole * thing... */ return getDTDInternalSubset(); } return mTextBuffer.contentsAsString(); } public char[] getTextCharacters() { if (((1 << mCurrToken) & MASK_GET_TEXT_XXX) == 0) { throwNotTextXxx(mCurrToken); } if (mTokenState < mStTextThreshold) { safeFinishToken(); } if (mCurrToken == ENTITY_REFERENCE) { return mCurrEntity.getReplacementChars(); } if (mCurrToken == DTD) { return getDTDInternalSubsetArray(); } return mTextBuffer.getTextBuffer(); } public int getTextCharacters(int sourceStart, char[] target, int targetStart, int len) { if (((1 << mCurrToken) & MASK_GET_TEXT_XXX) == 0) { throwNotTextXxx(mCurrToken); } if (mTokenState < mStTextThreshold) { safeFinishToken(); } return mTextBuffer.contentsToArray(sourceStart, target, targetStart, len); } public int getTextLength() { if (((1 << mCurrToken) & MASK_GET_TEXT_XXX) == 0) { throwNotTextXxx(mCurrToken); } if (mTokenState < mStTextThreshold) { safeFinishToken(); } return mTextBuffer.size(); } public int getTextStart() { if (((1 << mCurrToken) & MASK_GET_TEXT_XXX) == 0) { throwNotTextXxx(mCurrToken); } if (mTokenState < mStTextThreshold) { safeFinishToken(); } return mTextBuffer.getTextStart(); } public boolean hasName() { return (mCurrToken == START_ELEMENT) || (mCurrToken == END_ELEMENT); } public boolean hasNext() { /* 08-Oct-2005, TSa: In multi-doc mode, we have different * criteria... */ return (mCurrToken != END_DOCUMENT) || (mParseState == STATE_MULTIDOC_HACK); } public boolean hasText() { return (((1 << mCurrToken) & MASK_GET_TEXT) != 0); } public boolean isAttributeSpecified(int index) { /* No need to check for ATTRIBUTE since we never return that... */ if (mCurrToken != START_ELEMENT) { throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_STELEM); } return mAttrCollector.isSpecified(index); } public boolean isCharacters() { /* 21-Dec-2005, TSa: Changed for 3.0 to work the same way as stax * ref impl. */ //return (mCurrToken == CHARACTERS || mCurrToken == CDATA || mCurrToken == SPACE); /* 21-Apr-2009, TSa: As per [WSTX-201], should be consistent with * what getEventType() returns (affects CDATA, SPACE, in * coalescing mode or when explicitly asked to return CDATA * as CHARACTERS) */ return (getEventType() == CHARACTERS); } public boolean isEndElement() { return (mCurrToken == END_ELEMENT); } public boolean isStartElement() { return (mCurrToken == START_ELEMENT); } /** *
Precondition: the current event is START_ELEMENT. *
Postcondition: the current event is the corresponding END_ELEMENT. *
* 05-Apr-2004, TSa: Could try to determine status when text is actually * read. That'd prevent double reads... but would it slow down that * one reading so that net effect would be negative? */ public boolean isWhiteSpace() { if (mCurrToken == CHARACTERS || mCurrToken == CDATA) { if (mTokenState < mStTextThreshold) { safeFinishToken(); } if (mWsStatus == ALL_WS_UNKNOWN) { mWsStatus = mTextBuffer.isAllWhitespace() ? ALL_WS_YES : ALL_WS_NO; } return mWsStatus == ALL_WS_YES; } return (mCurrToken == SPACE); } public void require(int type, String nsUri, String localName) throws XMLStreamException { int curr = mCurrToken; /* There are some special cases; specifically, CDATA * is sometimes reported as CHARACTERS. Let's be lenient by * allowing both 'real' and reported types, for now. */ if (curr != type) { if (curr == CDATA) { if (mCfgCoalesceText || mCfgReportTextAsChars) { curr = CHARACTERS; } } else if (curr == SPACE) { // Hmmh. Should we require it to be empty or something? //curr = CHARACTERS; // For now, let's not change the check } } if (type != curr) { throwParseError("Expected type "+tokenTypeDesc(type) +", current type " +tokenTypeDesc(curr)); } if (localName != null) { if (curr != START_ELEMENT && curr != END_ELEMENT && curr != ENTITY_REFERENCE) { throwParseError("Expected non-null local name, but current token not a START_ELEMENT, END_ELEMENT or ENTITY_REFERENCE (was "+tokenTypeDesc(mCurrToken)+")"); } String n = getLocalName(); if (n != localName && !n.equals(localName)) { throwParseError("Expected local name '"+localName+"'; current local name '"+n+"'."); } } if (nsUri != null) { if (curr != START_ELEMENT && curr != END_ELEMENT) { throwParseError("Expected non-null NS URI, but current token not a START_ELEMENT or END_ELEMENT (was "+tokenTypeDesc(curr)+")"); } String uri = mElementStack.getNsURI(); // No namespace? if (nsUri.length() == 0) { if (uri != null && uri.length() > 0) { throwParseError("Expected empty namespace, instead have '"+uri+"'."); } } else { if ((nsUri != uri) && !nsUri.equals(uri)) { throwParseError("Expected namespace '"+nsUri+"'; have '" +uri+"'."); } } } // Ok, fine, all's good } /* //////////////////////////////////////////////////// // XMLStreamReader, iterating //////////////////////////////////////////////////// */ public final int next() throws XMLStreamException { /* 24-Sep-2006, TSa: We may have deferred an exception that occurred * during parsing of the previous event. If so, now it needs to * be thrown. */ if (mPendingException != null) { XMLStreamException strEx = mPendingException; mPendingException = null; throw strEx; } /* Note: can not yet accurately record the location, since the * previous event might not yet be completely finished... */ if (mParseState == STATE_TREE) { int type = nextFromTree(); mCurrToken = type; if (mTokenState < mStTextThreshold) { // incomplete? /* Can remain incomplete if lazy parsing is enabled, * and this is not a validatable text segment; otherwise * must finish */ if (!mCfgLazyParsing || (mValidateText && (type == CHARACTERS || type == CDATA))) { finishToken(false); } } /* Special cases -- sometimes (when coalescing text, or * when specifically configured to do so), CDATA and SPACE are * to be reported as CHARACTERS, although we still will * internally keep track of the real type. */ if (type == CDATA) { if (mValidateText) { mElementStack.validateText(mTextBuffer, false); } if (mCfgCoalesceText || mCfgReportTextAsChars) { return CHARACTERS; } /* } else if (type == SPACE) { //if (mValidateText) { throw new IllegalStateException("Internal error: trying to validate SPACE event"); } */ } else if (type == CHARACTERS) { if (mValidateText) { /* We may be able to determine that there will be * no more text coming for this element: but only * seeing the end tag marker ("") is certain * (PIs and comments won't do, nor CDATA; start * element possibly... but that indicates mixed * content that's generally non-validatable) */ if ((mInputPtr+1) < mInputEnd && mInputBuffer[mInputPtr] == '<' && mInputBuffer[mInputPtr+1] == '/') { // yup, it's all there is mElementStack.validateText(mTextBuffer, true); } else { mElementStack.validateText(mTextBuffer, false); } } } return type; } if (mParseState == STATE_PROLOG) { nextFromProlog(true); } else if (mParseState == STATE_EPILOG) { if (nextFromProlog(false)) { // We'll return END_DOCUMENT, need to mark it 'as consumed' mSecondaryToken = 0; } } else if (mParseState == STATE_MULTIDOC_HACK) { mCurrToken = nextFromMultiDocState(); } else { // == STATE_CLOSED if (mSecondaryToken == END_DOCUMENT) { // marker mSecondaryToken = 0; // mark end doc as consumed return END_DOCUMENT; } throw new java.util.NoSuchElementException(); } return mCurrToken; } public int nextTag() throws XMLStreamException { while (true) { int next = next(); switch (next) { case SPACE: case COMMENT: case PROCESSING_INSTRUCTION: continue; case CDATA: case CHARACTERS: if (isWhiteSpace()) { continue; } throwParseError("Received non-all-whitespace CHARACTERS or CDATA event in nextTag()."); break; // never gets here, but jikes complains without case START_ELEMENT: case END_ELEMENT: return next; } throwParseError("Received event "+ErrorConsts.tokenTypeDesc(next) +", instead of START_ELEMENT or END_ELEMENT."); } } /** *
* Note: as per StAX 1.0 specs, this method does NOT close the underlying
* input reader. That is, unless the new StAX2 property
* {@link org.codehaus.stax2.XMLInputFactory2#P_AUTO_CLOSE_INPUT} is
* set to true.
*/
public void close()
throws XMLStreamException
{
if (mParseState != STATE_CLOSED) {
mParseState = STATE_CLOSED;
/* Let's see if we should notify factory that symbol table
* has new entries, and may want to reuse this symbol table
* instead of current root.
*/
if (mCurrToken != END_DOCUMENT) {
mCurrToken = mSecondaryToken = END_DOCUMENT;
if (mSymbols.isDirty()) {
mOwner.updateSymbolTable(mSymbols);
}
}
/* Hmmh. Actually, we need to close all the dependant input
* sources, first, and then also call close()
* on the root input source object; it
* will only do real close if that was enabled earlier.
* The root input source also prevents multiple close() calls
* for the underlying source, so we need not check that here.
*/
closeAllInput(false);
// And finally, can now recycle low-level (text) buffers
mTextBuffer.recycle(true);
}
}
/*
////////////////////////////////////////////////////
// XMLStreamReader2 (StAX2) implementation
////////////////////////////////////////////////////
*/
// // // StAX2, per-reader configuration
public Object getFeature(String name)
{
// No readable features defined yet...
throw new IllegalArgumentException(MessageFormat.format(ErrorConsts.ERR_UNKNOWN_FEATURE, new Object[] { name }));
}
public void setFeature(String name, Object value)
{
// Base-class has no settable features at this point.
throw new IllegalArgumentException(MessageFormat.format(ErrorConsts.ERR_UNKNOWN_FEATURE, new Object[] { name }));
}
// NOTE: getProperty() defined in Stax 1.0 interface
public boolean isPropertySupported(String name) {
// !!! TBI: not all these properties are really supported
return mConfig.isPropertySupported(name);
}
/**
* @param name Name of the property to set
* @param value Value to set property to.
*
* @return True, if the specified property was succesfully
* set to specified value; false if its value was not changed
*/
public boolean setProperty(String name, Object value)
{
boolean ok = mConfig.setProperty(name, value);
/* To make [WSTX-50] work fully dynamically (i.e. allow
* setting BASE_URL after stream reader has been constructed)
* need to force
*/
if (ok && WstxInputProperties.P_BASE_URL.equals(name)) {
// Easiest to just access from config: may come in as a String etc
mInput.overrideSource(mConfig.getBaseURL());
}
return ok;
}
// // // StAX2, additional traversal methods
public void skipElement() throws XMLStreamException
{
if (mCurrToken != START_ELEMENT) {
throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_STELEM);
}
int nesting = 1; // need one more end elements than start elements
while (true) {
int type = next();
if (type == START_ELEMENT) {
++nesting;
} else if (type == END_ELEMENT) {
if (--nesting == 0) {
break;
}
}
}
}
// // // StAX2, additional attribute access
public AttributeInfo getAttributeInfo() throws XMLStreamException
{
if (mCurrToken != START_ELEMENT) {
throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_STELEM);
}
/* Although attribute collector knows about specific parsed
* information, the element stack has DTD-derived information (if
* any)... and knows how to call attribute collector when necessary.
*/
return mElementStack;
}
// // // StAX2, Additional DTD access
/**
* Since this class implements {@link DTDInfo}, method can just
* return this
.
*/
public DTDInfo getDTDInfo() throws XMLStreamException
{
/* Let's not allow it to be accessed during other events -- that
* way callers won't count on it being available afterwards.
*/
if (mCurrToken != DTD) {
return null;
}
if (mTokenState < TOKEN_FULL_SINGLE) { // need to fully read it in now
finishToken(false);
}
return this;
}
// // // StAX2, Additional location information
/**
* Location information is always accessible, for this reader.
*/
public final LocationInfo getLocationInfo() {
return this;
}
// // // StAX2, Pass-through text accessors
/**
* Method similar to {@link #getText()}, except
* that it just uses provided Writer to write all textual content.
* For further optimization, it may also be allowed to do true
* pass-through, thus possibly avoiding one temporary copy of the
* data.
*
* TODO: try to optimize to allow completely streaming pass-through:
* currently will still read all data in memory buffers before
* outputting
*
* @param w Writer to use for writing textual contents
* @param preserveContents If true, reader has to preserve contents
* so that further calls to getText
will return
* proper conntets. If false, reader is allowed to skip creation
* of such copies: this can improve performance, but it also means
* that further calls to getText
is not guaranteed to
* return meaningful data.
*
* @return Number of characters written to the reader
*/
public int getText(Writer w, boolean preserveContents)
throws IOException, XMLStreamException
{
if (((1 << mCurrToken) & MASK_GET_TEXT_WITH_WRITER) == 0) {
throwNotTextual(mCurrToken);
}
/* May need to be able to do fully streaming... but only for
* text events that have not yet been fully read; for other
* types there's less benefit, and for fully read ones, we
* already have everything ready.
*/
if (!preserveContents) {
if (mCurrToken == CHARACTERS) {
int count = mTextBuffer.rawContentsTo(w);
/* Let's also clear whatever was collected (as allowed by
* method contract) previously, to both save memory, and
* to ensure caller doesn't accidentally try to access it
* (and get otherwise 'random' results).
*/
mTextBuffer.resetWithEmpty();
if (mTokenState < TOKEN_FULL_SINGLE) {
count += readAndWriteText(w);
}
if (mCfgCoalesceText &&
(mTokenState < TOKEN_FULL_COALESCED)) {
if (mCfgCoalesceText) {
count += readAndWriteCoalesced(w, false);
}
}
return count;
} else if (mCurrToken == CDATA) {
int count = mTextBuffer.rawContentsTo(w);
mTextBuffer.resetWithEmpty(); // same as with CHARACTERS
if (mTokenState < TOKEN_FULL_SINGLE) {
count += readAndWriteCData(w);
}
if (mCfgCoalesceText &&
(mTokenState < TOKEN_FULL_COALESCED)) {
if (mCfgCoalesceText) {
count += readAndWriteCoalesced(w, true);
}
}
return count;
}
}
if (mTokenState < mStTextThreshold) {
/* Otherwise, let's just finish the token; and due to guarantee
* by streaming method, let's try ensure we get it all.
*/
finishToken(false); // false -> shouldn't defer errors
}
if (mCurrToken == ENTITY_REFERENCE) {
return mCurrEntity.getReplacementText(w);
}
if (mCurrToken == DTD) {
char[] ch = getDTDInternalSubsetArray();
if (ch != null) {
w.write(ch);
return ch.length;
}
return 0;
}
return mTextBuffer.rawContentsTo(w);
}
// // // StAX 2, Other accessors
/**
* @return Number of open elements in the stack; 0 when parser is in
* prolog/epilog, 1 inside root element and so on.
*/
public int getDepth() {
/* Note: we can not necessarily use mCurrDepth, since it is
* directly synchronized to the input (to catch unbalanced entity
* expansion WRT element nesting), and not to actual token values
* returned.
*/
return mElementStack.getDepth();
}
/**
* @return True, if cursor points to a start or end element that is
* constructed from 'empty' element (ends with '/>');
* false otherwise.
*/
public boolean isEmptyElement() throws XMLStreamException
{
return (mCurrToken == START_ELEMENT) ? mStEmptyElem : false;
}
public NamespaceContext getNonTransientNamespaceContext()
{
// null -> no Location info, not needed with basic API
return mElementStack.createNonTransientNsContext(null);
}
public String getPrefixedName()
{
switch (mCurrToken) {
case START_ELEMENT:
case END_ELEMENT:
{
String prefix = mElementStack.getPrefix();
String ln = mElementStack.getLocalName();
if (prefix == null) {
return ln;
}
StringBuffer sb = new StringBuffer(ln.length() + 1 + prefix.length());
sb.append(prefix);
sb.append(':');
sb.append(ln);
return sb.toString();
}
case ENTITY_REFERENCE:
return getLocalName();
case PROCESSING_INSTRUCTION:
return getPITarget();
case DTD:
return getDTDRootName();
}
throw new IllegalStateException("Current state not START_ELEMENT, END_ELEMENT, ENTITY_REFERENCE, PROCESSING_INSTRUCTION or DTD");
}
public void closeCompletely() throws XMLStreamException
{
closeAllInput(true);
}
/*
////////////////////////////////////////////////////
// DTDInfo implementation (StAX 2)
////////////////////////////////////////////////////
*/
/**
*
* Note: DTD-handling sub-classes need to override this method. */ public Object getProcessedDTD() { return null; } public String getDTDRootName() { if (mRootPrefix == null) { return mRootLName; } return mRootPrefix + ":" + mRootLName; } public String getDTDPublicId() { return mDtdPublicId; } public String getDTDSystemId() { return mDtdSystemId; } /** * @return Internal subset portion of the DOCTYPE declaration, if any; * empty String if none */ public String getDTDInternalSubset() { if (mCurrToken != DTD) { return null; } return mTextBuffer.contentsAsString(); } /** * Internal method used by implementation */ private char[] getDTDInternalSubsetArray() { /* Note: no checks for current state, but only because it's * an internal method and callers are known to ensure it's ok * to call this */ return mTextBuffer.contentsAsArray(); } // // StAX2, v2.0 /** * Sub-class will override this method */ public DTDValidationSchema getProcessedDTDSchema() { return null; } /* //////////////////////////////////////////////////// // LocationInfo implementation (StAX 2) //////////////////////////////////////////////////// */ // // // First, the "raw" offset accessors: public long getStartingByteOffset() { /* 15-Apr-2005, TSa: No way to reliably keep track of byte offsets, * at least for variable-length encodings... so let's just * return -1 for now */ return -1L; } public long getStartingCharOffset() { return mTokenInputTotal; } public long getEndingByteOffset() throws XMLStreamException { /* 15-Apr-2005, TSa: No way to reliably keep track of byte offsets, * at least for variable-length encodings... so let's just * return -1 for now */ return -1; } public long getEndingCharOffset() throws XMLStreamException { // Need to get to the end of the token, if not there yet if (mTokenState < mStTextThreshold) { finishToken(false); } return mCurrInputProcessed + mInputPtr; } // // // and then the object-based access methods: public final Location getLocation() { return getStartLocation(); } // public XMLStreamLocation2 getStartLocation() // from base class // public XMLStreamLocation2 getCurrentLocation() // - "" - public final XMLStreamLocation2 getEndLocation() throws XMLStreamException { // Need to get to the end of the token, if not there yet if (mTokenState < mStTextThreshold) { finishToken(false); } // And then we just need the current location! return getCurrentLocation(); } /* //////////////////////////////////////////////////// // Stax2 validation //////////////////////////////////////////////////// */ public XMLValidator validateAgainst(XMLValidationSchema schema) throws XMLStreamException { // Not implemented by the basic reader: return null; } public XMLValidator stopValidatingAgainst(XMLValidationSchema schema) throws XMLStreamException { // Not implemented by the basic reader: return null; } public XMLValidator stopValidatingAgainst(XMLValidator validator) throws XMLStreamException { // Not implemented by the basic reader: return null; } public ValidationProblemHandler setValidationProblemHandler(ValidationProblemHandler h) { // Not implemented by the basic reader: return null; } /* ////////////////////////////////////////////////////// // StreamReaderImpl implementation ////////////////////////////////////////////////////// */ public EntityDecl getCurrentEntityDecl() { return mCurrEntity; } /** * Method called by {@link com.ctc.wstx.evt.DefaultEventAllocator} * to get double-indirection necessary for constructing start element * events. * * @return Null, if stream does not point to start element; whatever * callback returns otherwise. */ public Object withStartElement(ElemCallback cb, Location loc) { if (mCurrToken != START_ELEMENT) { return null; } return cb.withStartElement(loc, getName(), mElementStack.createNonTransientNsContext(loc), mAttrCollector.buildAttrOb(), mStEmptyElem); } public boolean isNamespaceAware() { return mCfgNsEnabled; } /** * Method needed by classes (like stream writer implementations) * that want to have efficient direct access to element stack * implementation */ public InputElementStack getInputElementStack() { return mElementStack; } /** * Method needed by classes (like stream writer implementations) * that want to have efficient direct access to attribute collector * Object, for optimal attribute name and value access. */ public AttributeCollector getAttributeCollector() { return mAttrCollector; } /* ////////////////////////////////////////////////////// // Support for SAX XMLReader implementation ////////////////////////////////////////////////////// */ public void fireSaxStartElement(ContentHandler h, Attributes attrs) throws SAXException { if (h != null) { // First; any ns declarations? int nsCount = mElementStack.getCurrentNsCount(); for (int i = 0; i < nsCount; ++i) { String prefix = mElementStack.getLocalNsPrefix(i); String uri = mElementStack.getLocalNsURI(i); h.startPrefixMapping((prefix == null) ? "" : prefix, uri); } // Then start-elem event itself: String uri = mElementStack.getNsURI(); // Sax requires "" (not null) for ns uris... h.startElement((uri == null) ? "" : uri, mElementStack.getLocalName(), getPrefixedName(), attrs); } } public void fireSaxEndElement(ContentHandler h) throws SAXException { if (h != null) { /* Order of events is reversed (wrt. start-element): first * the end tag event, then unbound prefixes */ String uri = mElementStack.getNsURI(); // Sax requires "" (not null) for ns uris... h.endElement((uri == null) ? "" : uri, mElementStack.getLocalName(), getPrefixedName()); // Any expiring ns declarations? int nsCount = mElementStack.getCurrentNsCount(); for (int i = 0; i < nsCount; ++i) { String prefix = mElementStack.getLocalNsPrefix(i); //String nsUri = mElementStack.getLocalNsURI(i); h.endPrefixMapping((prefix == null) ? "" : prefix); } } } public void fireSaxCharacterEvents(ContentHandler h) throws XMLStreamException, SAXException { if (h != null) { if (mPendingException != null) { XMLStreamException sex = mPendingException; mPendingException = null; throw sex; } /* Let's not defer errors; SAXTest implies * it's expected errors are thrown right away */ if (mTokenState < mStTextThreshold) { finishToken(false); } mTextBuffer.fireSaxCharacterEvents(h); } } public void fireSaxSpaceEvents(ContentHandler h) throws XMLStreamException, SAXException { if (h != null) { if (mTokenState < mStTextThreshold) { finishToken(false); // no error deferring } mTextBuffer.fireSaxSpaceEvents(h); } } public void fireSaxCommentEvent(LexicalHandler h) throws XMLStreamException, SAXException { if (h != null) { if (mTokenState < mStTextThreshold) { finishToken(false); // no error deferring } mTextBuffer.fireSaxCommentEvent(h); } } public void fireSaxPIEvent(ContentHandler h) throws XMLStreamException, SAXException { if (h != null) { if (mTokenState < mStTextThreshold) { finishToken(false); // no error deferring } h.processingInstruction(mCurrName, mTextBuffer.contentsAsString()); } } /* //////////////////////////////////////////////////// // Internal methods, config access //////////////////////////////////////////////////// */ protected final boolean hasConfigFlags(int flags) { return (mConfigFlags & flags) == flags; } /* //////////////////////////////////////////////////// // Internal methods, parsing helper methods //////////////////////////////////////////////////// */ /** * @return Null, if keyword matches ok; String that contains erroneous * keyword if not. */ protected String checkKeyword(char c, String expected) throws XMLStreamException { int ptr = 0; int len = expected.length(); while (expected.charAt(ptr) == c && ++ptr < len) { if (mInputPtr < mInputEnd) { c = mInputBuffer[mInputPtr++]; } else { int ci = getNext(); if (ci < 0) { // EOF break; } c = (char) ci; } } if (ptr == len) { // Probable match... but let's make sure keyword is finished: int i = peekNext(); if (i < 0 || (!isNameChar((char) i) && i != ':')) { return null; } // Nope, continues, need to find the rest: } StringBuffer sb = new StringBuffer(expected.length() + 16); sb.append(expected.substring(0, ptr)); if (ptr < len) { sb.append(c); } while (true) { if (mInputPtr < mInputEnd) { c = mInputBuffer[mInputPtr++]; } else { int ci = getNext(); if (ci < 0) { // EOF break; } c = (char) ci; } if (!isNameChar(c)) { // Let's push it back then --mInputPtr; break; } sb.append(c); } return sb.toString(); } protected void checkCData() throws XMLStreamException { String wrong = checkKeyword(getNextCharFromCurrent(SUFFIX_IN_CDATA), "CDATA"); if (wrong != null) { throwParseError("Unrecognized XML directive '"+wrong+"'; expected 'CDATA'."); } // Plus, need the bracket too: char c = getNextCharFromCurrent(SUFFIX_IN_CDATA); if (c != '[') { throwUnexpectedChar(c, "excepted '[' after '= 3 && (ch = resolveSimpleEntity(true)) != 0) { // Ok, fine, c is whatever it is ; } else { // full entity just changes buffer... ch = fullyResolveEntity(false); if (ch == 0) { // need to skip output, thusly (expanded to new input source) continue; } } if (ch <= 0xFFFF) { c = (char) ch; } else { ch -= 0x10000; if (outPtr >= outLen) { outBuf = tb.bufferFull(1); outLen = outBuf.length; } outBuf[outPtr++] = (char) ((ch >> 10) + 0xD800); c = (char) ((ch & 0x3FF) + 0xDC00); } } } else if (c == '<') { throwParseError("Unexpected '<' "+SUFFIX_IN_ATTR_VALUE); } // Ok, let's just add char in, whatever it was if (outPtr >= outLen) { outBuf = tb.bufferFull(1); outLen = outBuf.length; } outBuf[outPtr++] = c; } // Fine; let's tell TextBuild we're done: tb.setBufferSize(outPtr); } /* ///////////////////////////////////////////////////// // Internal methods, parsing prolog (before root) and // epilog ///////////////////////////////////////////////////// */ /** * Method called to find type of next token in prolog; either reading * just enough information to know the type (lazy parsing), or the * full contents (non-lazy) * * @return True if we hit EOI, false otherwise */ private boolean nextFromProlog(boolean isProlog) throws XMLStreamException { int i; // First, do we need to finish currently open token? if (mTokenState < mStTextThreshold) { mTokenState = TOKEN_FULL_COALESCED; i = skipToken(); // note: skipToken() updates the start location } else { // Need to update the start location... mTokenInputTotal = mCurrInputProcessed + mInputPtr; mTokenInputRow = mCurrInputRow; mTokenInputCol = mInputPtr - mCurrInputRowStart; i = getNext(); } // Any white space to parse or skip? if (i <= CHAR_SPACE && i >= 0) { // Need to return as an event? if (hasConfigFlags(CFG_REPORT_PROLOG_WS)) { mCurrToken = SPACE; if (readSpacePrimary((char) i, true)) { /* no need to worry about coalescing, since CDATA is not * allowed at this level... */ mTokenState = TOKEN_FULL_COALESCED; } else { if (mCfgLazyParsing) { /* Let's not even bother checking if it's * "long enough"; shouldn't usually matter, but few * apps care to get multiple adjacent SPACE events... */ mTokenState = TOKEN_STARTED; } else { readSpaceSecondary(true); mTokenState = TOKEN_FULL_COALESCED; } } return false; } // If not, can skip it right away --mInputPtr; // to handle linefeeds gracefully i = getNextAfterWS(); if (i >= 0) { // ... after which location has to be reset properly: /* 11-Apr-2005, TSa: But note that we need to "move back" * column and total offset values by one, to compensate * for the char that was read (row can not have changed, * since it's non-WS, and thus non-lf/cr char) */ mTokenInputTotal = mCurrInputProcessed + mInputPtr - 1; mTokenInputRow = mCurrInputRow; mTokenInputCol = mInputPtr - mCurrInputRowStart - 1; } } // Did we hit EOI? if (i < 0) { handleEOF(isProlog); mParseState = STATE_CLOSED; return true; } // Now we better have a lt... if (i != '<') { throwUnexpectedChar(i, (isProlog ? SUFFIX_IN_PROLOG : SUFFIX_IN_EPILOG) +"; expected '<'"); } // And then it should be easy to figure out type: char c = getNextChar(isProlog ? SUFFIX_IN_PROLOG : SUFFIX_IN_EPILOG); if (c == '?') { // proc. inst mCurrToken = readPIPrimary(); } else if (c == '!') { // DOCTYPE or comment (or CDATA, but not legal here) // Need to figure out bit more first... nextFromPrologBang(isProlog); } else if (c == '/') { // end tag not allowed... if (isProlog) { throwParseError("Unexpected character combination '' in prolog."); } throwParseError("Unexpected character combination '' in epilog (extra close tag?)."); } else if (c == ':' || isNameStartChar(c)) { // Root element, only allowed after prolog if (!isProlog) { /* This call will throw an exception if there's a problem; * otherwise set up everything properly */ mCurrToken = handleExtraRoot(c); // will check input parsing mode... return false; } handleRootElem(c); mCurrToken = START_ELEMENT; } else { throwUnexpectedChar(c, (isProlog ? SUFFIX_IN_PROLOG : SUFFIX_IN_EPILOG) +", after '<'."); } // Ok; final twist, maybe we do NOT want lazy parsing? if (!mCfgLazyParsing && mTokenState < mStTextThreshold) { finishToken(false); } return false; } protected void handleRootElem(char c) throws XMLStreamException { mParseState = STATE_TREE; initValidation(); handleStartElem(c); // Does name match with DOCTYPE declaration (if any)? // 20-Jan-2006, TSa: Only check this is we are (DTD) validating... if (mRootLName != null) { if (hasConfigFlags(CFG_VALIDATE_AGAINST_DTD)) { if (!mElementStack.matches(mRootPrefix, mRootLName)) { String actual = (mRootPrefix == null) ? mRootLName : (mRootPrefix + ":" + mRootLName); reportValidationProblem(ErrorConsts.ERR_VLD_WRONG_ROOT, actual, mRootLName); } } } } /** * Method called right before the document root element is handled. * The default implementation is empty; validating stream readers * should override the method and do whatever initialization is * necessary */ protected void initValidation() throws XMLStreamException { ; // nothing to do here } protected int handleEOF(boolean isProlog) throws XMLStreamException { /* 19-Aug-2006, TSa: mSecondaryToken needs to be initialized to * END_DOCUMENT so we'll know it hasn't been yet accessed. */ mCurrToken = mSecondaryToken = END_DOCUMENT; /* Although buffers have most likely already been recycled, * let's call this again just in case. At this point we can * safely discard any contents */ mTextBuffer.recycle(true); // true -> clean'n recycle // It's ok to get EOF from epilog but not from prolog if (isProlog) { throwUnexpectedEOF(SUFFIX_IN_PROLOG); } return mCurrToken; } /** * Method called if a root-level element is found after the main * root element was closed. This is legal in multi-doc parsing * mode (and in fragment mode), but not in the default single-doc * mode. * * @return Token to return */ private int handleExtraRoot(char c) throws XMLStreamException { if (!mConfig.inputParsingModeDocuments()) { /* Has to be single-doc mode, since fragment mode * should never get here (since fragment mode never has epilog * or prolog modes) */ throwParseError("Illegal to have multiple roots (start tag in epilog?)."); } // Need to push back the char, since it is the first char of elem name --mInputPtr; return handleMultiDocStart(START_ELEMENT); } /** * Method called when an event was encountered that indicates document * boundary in multi-doc mode. Needs to trigger dummy * END_DOCUMENT/START_DOCUMENT event combination, followed by the * handling of the original event. * * @return Event type to return */ protected int handleMultiDocStart(int nextEvent) { mParseState = STATE_MULTIDOC_HACK; mTokenState = TOKEN_FULL_COALESCED; // this is a virtual event after all... mSecondaryToken = nextEvent; return END_DOCUMENT; } /** * Method called to get the next event when we are "multi-doc hack" mode, * during which extra END_DOCUMENT/START_DOCUMENT events need to be * returned. */ private int nextFromMultiDocState() throws XMLStreamException { if (mCurrToken == END_DOCUMENT) { /* Ok; this is the initial step; need to advance: need to parse * xml declaration if that was the cause, otherwise just clear * up values. */ if (mSecondaryToken == START_DOCUMENT) { handleMultiDocXmlDecl(); } else { // Nah, DOCTYPE or start element... just need to clear decl info: mDocXmlEncoding = null; mDocXmlVersion = XmlConsts.XML_V_UNKNOWN; mDocStandalone = DOC_STANDALONE_UNKNOWN; } return START_DOCUMENT; } if (mCurrToken == START_DOCUMENT) { mParseState = STATE_PROLOG; // yup, we are now officially in prolog again... // Had an xml decl (ie. "real" START_DOCUMENT event) if (mSecondaryToken == START_DOCUMENT) { // was a real xml decl nextFromProlog(true); return mCurrToken; } // Nah, start elem or DOCTYPE if (mSecondaryToken == START_ELEMENT) { handleRootElem(getNextChar(SUFFIX_IN_ELEMENT)); return START_ELEMENT; } if (mSecondaryToken == DTD) { mStDoctypeFound = true; startDTD(); return DTD; } } throw new IllegalStateException("Internal error: unexpected state; current event " +tokenTypeDesc(mCurrToken)+", sec. state: "+tokenTypeDesc(mSecondaryToken)); } protected void handleMultiDocXmlDecl() throws XMLStreamException { // Let's default these first mDocStandalone = DOC_STANDALONE_UNKNOWN; mDocXmlEncoding = null; char c = getNextInCurrAfterWS(SUFFIX_IN_XML_DECL); String wrong = checkKeyword(c, XmlConsts.XML_DECL_KW_VERSION); if (wrong != null) { throwParseError(ErrorConsts.ERR_UNEXP_KEYWORD, wrong, XmlConsts.XML_DECL_KW_VERSION); } c = skipEquals(XmlConsts.XML_DECL_KW_VERSION, SUFFIX_IN_XML_DECL); TextBuffer tb = mTextBuffer; tb.resetInitialized(); parseQuoted(XmlConsts.XML_DECL_KW_VERSION, c, tb); if (tb.equalsString(XmlConsts.XML_V_10_STR)) { mDocXmlVersion = XmlConsts.XML_V_10; mXml11 = false; } else if (tb.equalsString(XmlConsts.XML_V_11_STR)) { mDocXmlVersion = XmlConsts.XML_V_11; mXml11 = true; } else { mDocXmlVersion = XmlConsts.XML_V_UNKNOWN; mXml11 = false; throwParseError("Unexpected xml version '"+tb.toString()+"'; expected '"+XmlConsts.XML_V_10_STR+"' or '"+XmlConsts.XML_V_11_STR+"'"); } c = getNextInCurrAfterWS(SUFFIX_IN_XML_DECL); if (c != '?') { // '?' signals end... if (c == 'e') { // encoding wrong = checkKeyword(c, XmlConsts.XML_DECL_KW_ENCODING); if (wrong != null) { throwParseError(ErrorConsts.ERR_UNEXP_KEYWORD, wrong, XmlConsts.XML_DECL_KW_ENCODING); } c = skipEquals(XmlConsts.XML_DECL_KW_ENCODING, SUFFIX_IN_XML_DECL); tb.resetWithEmpty(); parseQuoted(XmlConsts.XML_DECL_KW_ENCODING, c, tb); mDocXmlEncoding = tb.toString(); /* should we verify encoding at this point? let's not, for now; * since it's for information only, first declaration from * bootstrapper is used for the whole stream. */ c = getNextInCurrAfterWS(SUFFIX_IN_XML_DECL); } else if (c != 's') { throwUnexpectedChar(c, " in xml declaration; expected either 'encoding' or 'standalone' pseudo-attribute"); } // Standalone? if (c == 's') { wrong = checkKeyword(c, XmlConsts.XML_DECL_KW_STANDALONE); if (wrong != null) { throwParseError(ErrorConsts.ERR_UNEXP_KEYWORD, wrong, XmlConsts.XML_DECL_KW_STANDALONE); } c = skipEquals(XmlConsts.XML_DECL_KW_STANDALONE, SUFFIX_IN_XML_DECL); tb.resetWithEmpty(); parseQuoted(XmlConsts.XML_DECL_KW_STANDALONE, c, tb); if (tb.equalsString(XmlConsts.XML_SA_YES)) { mDocStandalone = DOC_STANDALONE_YES; } else if (tb.equalsString(XmlConsts.XML_SA_NO)) { mDocStandalone = DOC_STANDALONE_NO; } else { throwParseError("Unexpected xml '"+XmlConsts.XML_DECL_KW_STANDALONE+"' pseudo-attribute value '" +tb.toString()+"'; expected '"+XmlConsts.XML_SA_YES+"' or '"+ XmlConsts.XML_SA_NO+"'"); } c = getNextInCurrAfterWS(SUFFIX_IN_XML_DECL); } } if (c != '?') { throwUnexpectedChar(c, " in xml declaration; expected '?>' as the end marker"); } c = getNextCharFromCurrent(SUFFIX_IN_XML_DECL); if (c != '>') { throwUnexpectedChar(c, " in xml declaration; expected '>' to close the declaration"); } } /** * Method that checks that input following is of form * '[S]* '=' [S]*' (as per XML specs, production #25). * Will push back non-white space characters as necessary, in * case no equals char is encountered. */ protected final char skipEquals(String name, String eofMsg) throws XMLStreamException { char c = getNextInCurrAfterWS(eofMsg); if (c != '=') { throwUnexpectedChar(c, " in xml declaration; expected '=' to follow pseudo-attribute '"+name+"'"); } // trailing space? return getNextInCurrAfterWS(eofMsg); } /** * Method called to parse quoted xml declaration pseudo-attribute values. * Works similar to attribute value parsing, except no entities can be * included, and in general need not be as picky (since caller is to * verify contents). One exception is that we do check for linefeeds * and lt chars, since they generally would indicate problems and * are useful to catch early on (can happen if a quote is missed etc) *
* Note: since it'll be called at most 3 times per document, this method * is not optimized too much. */ protected final void parseQuoted(String name, char quoteChar, TextBuffer tbuf) throws XMLStreamException { if (quoteChar != '"' && quoteChar != '\'') { throwUnexpectedChar(quoteChar, " in xml declaration; waited ' or \" to start a value for pseudo-attribute '"+name+"'"); } char[] outBuf = tbuf.getCurrentSegment(); int outPtr = 0; while (true) { char c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++] : getNextChar(SUFFIX_IN_XML_DECL); if (c == quoteChar) { break; } if (c < CHAR_SPACE || c == '<') { throwUnexpectedChar(c, SUFFIX_IN_XML_DECL); } else if (c == CHAR_NULL) { throwNullChar(); } if (outPtr >= outBuf.length) { outBuf = tbuf.finishCurrentSegment(); outPtr = 0; } outBuf[outPtr++] = c; } tbuf.setCurrentLength(outPtr); } /** * Called after character sequence '<!' has been found; expectation is * that it'll either be DOCTYPE declaration (if we are in prolog and * haven't yet seen one), or a comment. CDATA is not legal here; * it would start same way otherwise. */ private void nextFromPrologBang(boolean isProlog) throws XMLStreamException { int i = getNext(); if (i < 0) { throwUnexpectedEOF(SUFFIX_IN_PROLOG); } if (i == 'D') { // Doctype declaration? String keyw = checkKeyword('D', "DOCTYPE"); if (keyw != null) { throwParseError("Unrecognized XML directive ' * *. And we have already read the DOCTYPE token. */ char c = getNextInCurrAfterWS(SUFFIX_IN_DTD); if (mCfgNsEnabled) { String str = parseLocalName(c); c = getNextChar(SUFFIX_IN_DTD); if (c == ':') { // Ok, got namespace and local name mRootPrefix = str; mRootLName = parseLocalName(getNextChar(SUFFIX_EOF_EXP_NAME)); } else if (c <= CHAR_SPACE || c == '[' || c == '>') { // ok to get white space or '[', or closing '>' --mInputPtr; // pushback mRootPrefix = null; mRootLName = str; } else { throwUnexpectedChar(c, " in DOCTYPE declaration; expected '[' or white space."); } } else { mRootLName = parseFullName(c); mRootPrefix = null; } // Ok, fine, what next? c = getNextInCurrAfterWS(SUFFIX_IN_DTD); if (c != '[' && c != '>') { String keyw = null; if (c == 'P') { keyw = checkKeyword(getNextChar(SUFFIX_IN_DTD), "UBLIC"); if (keyw != null) { keyw = "P" + keyw; } else { if (!skipWS(getNextChar(SUFFIX_IN_DTD))) { throwUnexpectedChar(c, SUFFIX_IN_DTD+"; expected a space between PUBLIC keyword and public id"); } c = getNextCharFromCurrent(SUFFIX_IN_DTD); if (c != '"' && c != '\'') { throwUnexpectedChar(c, SUFFIX_IN_DTD+"; expected a public identifier."); } mDtdPublicId = parsePublicId(c, SUFFIX_IN_DTD); if (mDtdPublicId.length() == 0) { // According to XML specs, this isn't illegal? // however, better report it as empty, not null. //mDtdPublicId = null; } if (!skipWS(getNextChar(SUFFIX_IN_DTD))) { throwUnexpectedChar(c, SUFFIX_IN_DTD+"; expected a space between public and system identifiers"); } c = getNextCharFromCurrent(SUFFIX_IN_DTD); if (c != '"' && c != '\'') { throwParseError(SUFFIX_IN_DTD+"; expected a system identifier."); } mDtdSystemId = parseSystemId(c, mNormalizeLFs, SUFFIX_IN_DTD); if (mDtdSystemId.length() == 0) { // According to XML specs, this isn't illegal? // however, better report it as empty, not null. //mDtdSystemId = null; } } } else if (c == 'S') { mDtdPublicId = null; keyw = checkKeyword(getNextChar(SUFFIX_IN_DTD), "YSTEM"); if (keyw != null) { keyw = "S" + keyw; } else { c = getNextInCurrAfterWS(SUFFIX_IN_DTD); if (c != '"' && c != '\'') { throwUnexpectedChar(c, SUFFIX_IN_DTD+"; expected a system identifier."); } mDtdSystemId = parseSystemId(c, mNormalizeLFs, SUFFIX_IN_DTD); if (mDtdSystemId.length() == 0) { // According to XML specs, this isn't illegal? mDtdSystemId = null; } } } else { if (!isNameStartChar(c)) { throwUnexpectedChar(c, SUFFIX_IN_DTD+"; expected keywords 'PUBLIC' or 'SYSTEM'."); } else { --mInputPtr; keyw = checkKeyword(c, "SYSTEM"); // keyword passed in doesn't matter } } if (keyw != null) { // error: throwParseError("Unexpected keyword '"+keyw+"'; expected 'PUBLIC' or 'SYSTEM'"); } // Ok, should be done with external DTD identifier: c = getNextInCurrAfterWS(SUFFIX_IN_DTD); } if (c == '[') { // internal subset ; } else { if (c != '>') { throwUnexpectedChar(c, SUFFIX_IN_DTD+"; expected closing '>'."); } } /* Actually, let's just push whatever char it is, back; this way * we can lazily initialize text buffer with DOCTYPE declaration * if/as necessary, even if there's no internal subset. */ --mInputPtr; // pushback mTokenState = TOKEN_STARTED; } /** * This method gets called to handle remainder of DOCTYPE declaration, * essentially the optional internal subset. This class implements the * basic "ignore it" functionality, but can optionally still store copy * of the contents to the read buffer. *
* NOTE: Since this default implementation will be overridden by * some sub-classes, make sure you do NOT change the method signature. * * @param copyContents If true, will copy contents of the internal * subset of DOCTYPE declaration * in the text buffer; if false, will just completely ignore the * subset (if one found). */ protected void finishDTD(boolean copyContents) throws XMLStreamException { /* We know there are no spaces, as this char was read and pushed * back earlier... */ char c = getNextChar(SUFFIX_IN_DTD); if (c == '[') { // Do we need to get contents as text too? if (copyContents) { ((BranchingReaderSource) mInput).startBranch(mTextBuffer, mInputPtr, mNormalizeLFs); } try { MinimalDTDReader.skipInternalSubset(this, mInput, mConfig); } finally { /* Let's close branching in any and every case (may allow * graceful recovery in error cases in future */ if (copyContents) { /* Need to "push back" ']' got in the succesful case * (that's -1 part below); * in error case it'll just be whatever last char was. */ ((BranchingReaderSource) mInput).endBranch(mInputPtr-1); } } // And then we need closing '>' c = getNextCharAfterWS(SUFFIX_IN_DTD_INTERNAL); } if (c != '>') { throwUnexpectedChar(c, "; expected '>' to finish DOCTYPE declaration."); } } /* //////////////////////////////////////////////////// // Internal methods, main parsing (inside root) //////////////////////////////////////////////////// */ /** * Method called to parse beginning of the next event within * document tree, and return its type. */ private final int nextFromTree() throws XMLStreamException { int i; // First, do we need to finish currently open token? if (mTokenState < mStTextThreshold) { // No need to update state... will get taken care of /* 03-Mar-2006, TSa: Let's add a sanity check here, temporarily, * to ensure we never skip any textual content when it is * to be validated */ if (mVldContent == XMLValidator.CONTENT_ALLOW_VALIDATABLE_TEXT) { if (mCurrToken == CHARACTERS || mCurrToken == CDATA) { // should never happen throwParseError("Internal error: skipping validatable text"); } } i = skipToken(); // note: skipToken() updates the start location } else { /* Start/end elements are never unfinished (ie. are always * completely read in) */ if (mCurrToken == START_ELEMENT) { // Start tag may be an empty tag: if (mStEmptyElem) { // and if so, we'll then get 'virtual' close tag: mStEmptyElem = false; // ... and location info is correct already /* 27-Feb-2009, TSa: but we do have to handle validation * of the end tag now */ int vld = mElementStack.validateEndElement(); mVldContent = vld; mValidateText = (vld == XMLValidator.CONTENT_ALLOW_VALIDATABLE_TEXT); return END_ELEMENT; } } else if (mCurrToken == END_ELEMENT) { // Close tag removes current element from stack if (!mElementStack.pop()) { // false if root closed // if so, we'll get to epilog, unless in fragment mode if (!mConfig.inputParsingModeFragment()) { return closeContentTree(); } // in fragment mode, fine, we'll just continue } } else if (mCurrToken == CDATA && mTokenState <= TOKEN_PARTIAL_SINGLE) { /* Just returned a partial CDATA... that's ok, just need to * know we won't get opening marker etc. * The tricky part here is just to ensure there's at least * one character; if not, need to just discard the empty * 'event' (note that it is possible to have an initial * empty CDATA event for truly empty CDATA block; but not * partial ones!). Let's just read it like a new * CData section first: */ // First, need to update the start location... mTokenInputTotal = mCurrInputProcessed + mInputPtr; mTokenInputRow = mCurrInputRow; mTokenInputCol = mInputPtr - mCurrInputRowStart; char c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++] : getNextChar(SUFFIX_IN_CDATA); if (readCDataPrimary(c)) { // got it all! /* note: can not be in coalescing mode at this point; * as we can never have partial cdata without unfinished * token */ // ... still need to have gotten at least 1 char though: if (mTextBuffer.size() > 0) { return CDATA; } // otherwise need to continue and parse the next event } else { /* Hmmh. Have to verify we get at least one char from * CData section; if so, we are good to go for now; * if not, need to get that damn char first: */ if (mTextBuffer.size() == 0 && readCDataSecondary(mCfgLazyParsing ? 1 : mShortestTextSegment)) { // Ok, all of it read if (mTextBuffer.size() > 0) { // And had some contents mTokenState = TOKEN_FULL_SINGLE; return CDATA; } // if nothing read, we'll just fall back (see below) } else { // good enough! mTokenState = TOKEN_PARTIAL_SINGLE; return CDATA; } } /* If we get here, it was the end of the section, without * any more text inside CDATA, so let's just continue */ } // Once again, need to update the start location info: mTokenInputTotal = mCurrInputProcessed + mInputPtr; mTokenInputRow = mCurrInputRow; mTokenInputCol = mInputPtr - mCurrInputRowStart; i = getNext(); } if (i < 0) { /* 07-Oct-2005, TSa: May be ok in fragment mode (not otherwise), * but we can just check if element stack has anything, as that * handles all cases */ if (!mElementStack.isEmpty()) { throwUnexpectedEOF(); } return handleEOF(false); } /* 26-Aug-2004, TSa: We have to deal with entities, usually, if * they are the next thing; even in non-expanding mode there * are entities and then there are entities... :-) * Let's start with char entities; they can be expanded right away. */ while (i == '&') { mWsStatus = ALL_WS_UNKNOWN; /* 30-Aug-2004, TSa: In some contexts entities are not * allowed in any way, shape or form: */ if (mVldContent == XMLValidator.CONTENT_ALLOW_NONE) { /* May be char entity, general entity; whatever it is it's * invalid! */ reportInvalidContent(ENTITY_REFERENCE); } /* Need to call different methods based on whether we can do * automatic entity expansion or not: */ int ch = mCfgReplaceEntities ? fullyResolveEntity(true) : resolveCharOnlyEntity(true); if (ch != 0) { /* Char-entity... need to initialize text output buffer, then; * independent of whether it'll be needed or not. */ /* 30-Aug-2004, TSa: In some contexts only white space is * accepted... */ if (mVldContent <= XMLValidator.CONTENT_ALLOW_WS) { // As per xml specs, only straight white space is legal if (ch > CHAR_SPACE) { /* 21-Sep-2008, TSa: Used to also require a call to * 'mElementStack.reallyValidating', if only ws * allowed, to cover the case where non-typing-dtd * was only used to discover SPACE type. But * now that we have CONTENT_ALLOW_WS_NONSTRICT, * shouldn't be needed. */ //if (mVldContent < XMLValidator.CONTENT_ALLOW_WS || mElementStack.reallyValidating()) { reportInvalidContent(CHARACTERS); } } TextBuffer tb = mTextBuffer; tb.resetInitialized(); if (ch <= 0xFFFF) { tb.append((char) ch); } else { ch -= 0x10000; tb.append((char) ((ch >> 10) + 0xD800)); tb.append((char) ((ch & 0x3FF) + 0xDC00)); } mTokenState = TOKEN_STARTED; return CHARACTERS; } /* Nope; was a general entity... in auto-mode, it's now been * expanded; in non-auto, need to figure out entity itself. */ if (!mCfgReplaceEntities|| mCfgTreatCharRefsAsEntities) { if (!mCfgTreatCharRefsAsEntities) { final EntityDecl ed = resolveNonCharEntity(); // Note: ed may still be null at this point mCurrEntity = ed; } // Note: ed may still be null at this point mTokenState = TOKEN_FULL_COALESCED; /* // let's not worry about non-parsed entities, since this is unexpanded mode // ... although it'd be an error either way? Should we report it? if (ed != null && !ed.isParsed()) { throwParseError("Reference to unparsed entity '"+ed.getName()+"' from content not allowed."); } */ return ENTITY_REFERENCE; } // Otherwise automatic expansion fine; just need the next char: i = getNextChar(SUFFIX_IN_DOC); } if (i == '<') { // Markup // And then it should be easy to figure out type: char c = getNextChar(SUFFIX_IN_ELEMENT); if (c == '?') { // proc. inst // 30-Aug-2004, TSa: Not legal for EMPTY elements if (mVldContent == XMLValidator.CONTENT_ALLOW_NONE) { reportInvalidContent(PROCESSING_INSTRUCTION); } return readPIPrimary(); } if (c == '!') { // CDATA or comment // Need to figure out bit more first... int type = nextFromTreeCommentOrCData(); // 30-Aug-2004, TSa: Not legal for EMPTY elements if (mVldContent == XMLValidator.CONTENT_ALLOW_NONE) { reportInvalidContent(type); } return type; } if (c == '/') { // always legal (if name matches etc) readEndElem(); return END_ELEMENT; } if (c == ':' || isNameStartChar(c)) { /* Note: checking for EMPTY content type is done by the * validator, no need to check here */ handleStartElem(c); return START_ELEMENT; } if (c == '[') { throwUnexpectedChar(c, " in content after '<' (malformed = mShortestTextSegment) { mTokenState = TOKEN_PARTIAL_SINGLE; } else { mTokenState = TOKEN_STARTED; } } return CHARACTERS; } /** * Method called when advacing stream past the end tag that closes * the root element of the open document. * Document can be either the singular one, in regular mode, or one of * possibly multiple, in multi-doc mode: this method is never called * in fragment mode. Method needs to update state properly and * parse following epilog event (if any). * * @return Event following end tag of the root elemennt, if any; * END_DOCUMENT otherwis.e */ private int closeContentTree() throws XMLStreamException { mParseState = STATE_EPILOG; // this call will update the location too... if (nextFromProlog(false)) { mSecondaryToken = 0; } /* 10-Apr-2006, TSa: Let's actually try to update * SymbolTable here (after main xml tree); caller * may not continue parsing after this. */ if (mSymbols.isDirty()) { mOwner.updateSymbolTable(mSymbols); } /* May be able to recycle, but not certain; and * definitely can not just clean contents (may * contain space(s) read) */ mTextBuffer.recycle(false); return mCurrToken; } /** * Method that takes care of parsing of start elements; including * full parsing of namespace declarations and attributes, as well as * namespace resolution. */ private final void handleStartElem(char c) throws XMLStreamException { mTokenState = TOKEN_FULL_COALESCED; boolean empty; if (mCfgNsEnabled) { String str = parseLocalName(c); c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_EOF_EXP_NAME); if (c == ':') { // Ok, got namespace and local name c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_EOF_EXP_NAME); mElementStack.push(str, parseLocalName(c)); c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_IN_ELEMENT); } else { mElementStack.push(null, str); // c is fine as } /* Enough about element name itself; let's then parse attributes * and namespace declarations. Split into another method for clarity, * and so that maybe JIT has easier time to optimize it separately. */ /* 04-Jul-2005, TSa: But hold up: we can easily check for a fairly * common case of no attributes showing up, and us getting the * closing '>' right away. Let's do that, since it can save * a call to a rather long method. */ empty = (c == '>') ? false : handleNsAttrs(c); } else { // Namespace handling not enabled: mElementStack.push(null, parseFullName(c)); c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_IN_ELEMENT); empty = (c == '>') ? false : handleNonNsAttrs(c); } if (!empty) { ++mCurrDepth; // needed to match nesting with entity expansion } mStEmptyElem = empty; /* 27-Feb-2009, TSa: [WSTX-191]: We used to validate virtual * end element here for empty elements, but it really should * occur later on when actually returning that end element. */ int vld = mElementStack.resolveAndValidateElement(); mVldContent = vld; mValidateText = (vld == XMLValidator.CONTENT_ALLOW_VALIDATABLE_TEXT); } /** * @return True if this is an empty element; false if not */ private final boolean handleNsAttrs(char c) throws XMLStreamException { AttributeCollector ac = mAttrCollector; while (true) { if (c <= CHAR_SPACE) { c = getNextInCurrAfterWS(SUFFIX_IN_ELEMENT, c); } else if (c != '/' && c != '>') { throwUnexpectedChar(c, " excepted space, or '>' or \"/>\""); } if (c == '/') { c = getNextCharFromCurrent(SUFFIX_IN_ELEMENT); if (c != '>') { throwUnexpectedChar(c, " expected '>'"); } return true; } else if (c == '>') { return false; } else if (c == '<') { throwParseError("Unexpected '<' character in element (missing closing '>'?)"); } String prefix, localName; String str = parseLocalName(c); c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_EOF_EXP_NAME); if (c == ':') { // Ok, got namespace and local name prefix = str; c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_EOF_EXP_NAME); localName = parseLocalName(c); } else { --mInputPtr; // pushback prefix = null; localName = str; } c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_IN_ELEMENT); if (c <= CHAR_SPACE) { c = getNextInCurrAfterWS(SUFFIX_IN_ELEMENT, c); } if (c != '=') { throwUnexpectedChar(c, " expected '='"); } c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_IN_ELEMENT); if (c <= CHAR_SPACE) { c = getNextInCurrAfterWS(SUFFIX_IN_ELEMENT, c); } // And then a quote: if (c != '"' && c != '\'') { throwUnexpectedChar(c, SUFFIX_IN_ELEMENT+" Expected a quote"); } // And then the actual value int startLen = -1; TextBuilder tb; if (prefix == sPrefixXmlns) { // non-default namespace declaration tb = ac.getNsBuilder(localName); // returns null if it's a dupe: if (null == tb) { throwParseError("Duplicate declaration for namespace prefix '"+localName+"'."); } startLen = tb.getCharSize(); } else if (localName == sPrefixXmlns && prefix == null) { tb = ac.getDefaultNsBuilder(); // returns null if default ns was already declared if (null == tb) { throwParseError("Duplicate default namespace declaration."); } } else { tb = ac.getAttrBuilder(prefix, localName); } parseAttrValue(c, tb); /* 19-Jul-2004, TSa: Need to check that non-default namespace * URI is NOT empty, as per XML namespace specs, #2, * ("...In such declarations, the namespace name may not * be empty.") */ /* (note: startLen is only set to first char position for * non-default NS declarations, see above...) */ /* 04-Feb-2005, TSa: Namespaces 1.1 does allow this, though, * so for xml 1.1 documents we need to allow it */ if (!mXml11) { if (startLen >= 0 && tb.getCharSize() == startLen) { // is empty! throwParseError(ErrorConsts.ERR_NS_EMPTY); } } // and then we need to iterate some more c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_IN_ELEMENT); } // never gets here } /** * @return True if this is an empty element; false if not */ private final boolean handleNonNsAttrs(char c) throws XMLStreamException { AttributeCollector ac = mAttrCollector; while (true) { if (c <= CHAR_SPACE) { c = getNextInCurrAfterWS(SUFFIX_IN_ELEMENT, c); } else if (c != '/' && c != '>') { throwUnexpectedChar(c, " excepted space, or '>' or \"/>\""); } if (c == '/') { c = getNextCharFromCurrent(SUFFIX_IN_ELEMENT); if (c != '>') { throwUnexpectedChar(c, " expected '>'"); } return true; } else if (c == '>') { return false; } else if (c == '<') { throwParseError("Unexpected '<' character in element (missing closing '>'?)"); } String name = parseFullName(c); TextBuilder tb = ac.getAttrBuilder(null, name); c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_IN_ELEMENT); if (c <= CHAR_SPACE) { c = getNextInCurrAfterWS(SUFFIX_IN_ELEMENT, c); } if (c != '=') { throwUnexpectedChar(c, " expected '='"); } c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_IN_ELEMENT); if (c <= CHAR_SPACE) { c = getNextInCurrAfterWS(SUFFIX_IN_ELEMENT, c); } // And then a quote: if (c != '"' && c != '\'') { throwUnexpectedChar(c, SUFFIX_IN_ELEMENT+" Expected a quote"); } // And then the actual value parseAttrValue(c, tb); // and then we need to iterate some more c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_IN_ELEMENT); } // never gets here } /** * Method called to completely read a close tag, and update element * stack appropriately (including checking that tag matches etc). */ protected final void readEndElem() throws XMLStreamException { mTokenState = TOKEN_FULL_COALESCED; // will be read completely if (mElementStack.isEmpty()) { // Let's just offline this for clarity reportExtraEndElem(); return; // never gets here } char c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_IN_CLOSE_ELEMENT); // Quick check first; missing name? if (!isNameStartChar(c) && c != ':') { if (c <= CHAR_SPACE) { // space throwUnexpectedChar(c, "; missing element name?"); } throwUnexpectedChar(c, "; expected an element name."); } /* Ok, now; good thing is we know exactly what to compare * against... */ String expPrefix = mElementStack.getPrefix(); String expLocalName = mElementStack.getLocalName(); // Prefix to match? if (expPrefix != null && expPrefix.length() > 0) { int len = expPrefix.length(); int i = 0; while (true){ if (c != expPrefix.charAt(i)) { reportWrongEndPrefix(expPrefix, expLocalName, i); return; // never gets here } if (++i >= len) { break; } c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_IN_CLOSE_ELEMENT); } // And then we should get a colon c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_IN_CLOSE_ELEMENT); if (c != ':') { reportWrongEndPrefix(expPrefix, expLocalName, i); return; } c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_IN_CLOSE_ELEMENT); } // Ok, then, does the local name match? int len = expLocalName.length(); int i = 0; while (true){ if (c != expLocalName.charAt(i)) { // Not a match... reportWrongEndElem(expPrefix, expLocalName, i); return; // never gets here } if (++i >= len) { break; } c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_IN_CLOSE_ELEMENT); } // Let's see if end element still continues, however? c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_IN_CLOSE_ELEMENT); if (c <= CHAR_SPACE) { c = getNextInCurrAfterWS(SUFFIX_IN_CLOSE_ELEMENT, c); } else if (c == '>') { ; } else if (c == ':' || isNameChar(c)) { reportWrongEndElem(expPrefix, expLocalName, len); } // Ok, fine, match ok; now we just need the closing gt char. if (c != '>') { throwUnexpectedChar(c, SUFFIX_IN_CLOSE_ELEMENT+" Expected '>'."); } // Finally, let's let validator detect if things are ok int vld = mElementStack.validateEndElement(); mVldContent = vld; mValidateText = (vld == XMLValidator.CONTENT_ALLOW_VALIDATABLE_TEXT); // Plus verify WFC that start and end tags came from same entity /* 13-Feb-2006, TSa: Are we about to close an element that * started within a parent element? * That's a GE/element nesting WFC violation... */ if (mCurrDepth == mInputTopDepth) { handleGreedyEntityProblem(mInput); } --mCurrDepth; } private void reportExtraEndElem() throws XMLStreamException { String name = parseFNameForError(); throwParseError("Unbalanced close tag "+name+">; no open start tag."); } private void reportWrongEndPrefix(String prefix, String localName, int done) throws XMLStreamException { --mInputPtr; // pushback String fullName = prefix + ":" + localName; String rest = parseFNameForError(); String actName = fullName.substring(0, done) + rest; throwParseError("Unexpected close tag "+actName+">; expected " +fullName+">."); } private void reportWrongEndElem(String prefix, String localName, int done) throws XMLStreamException { --mInputPtr; // pushback String fullName; if (prefix != null && prefix.length() > 0) { fullName = prefix + ":" + localName; done += 1 + prefix.length(); } else { fullName = localName; } String rest = parseFNameForError(); String actName = fullName.substring(0, done) + rest; throwParseError("Unexpected close tag "+actName+">; expected " +fullName+">."); } /** *
* Note: According to StAX 1.0, coalesced text events are always to be
* returned as CHARACTERS, never as CDATA. And since at this point we
* don't really know if there's anything to coalesce (but there may
* be), let's convert CDATA if necessary.
*/
private int nextFromTreeCommentOrCData()
throws XMLStreamException
{
char c = getNextCharFromCurrent(SUFFIX_IN_DOC);
if (c == '[') {
checkCData();
/* Good enough; it is a CDATA section... but let's just also
* parse the easy ("free") stuff:
*/
c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++]
: getNextCharFromCurrent(SUFFIX_IN_CDATA);
readCDataPrimary(c); // sets token state appropriately...
return CDATA;
}
if (c == '-' && getNextCharFromCurrent(SUFFIX_IN_DOC) == '-') {
mTokenState = TOKEN_STARTED;
return COMMENT;
}
throwParseError("Unrecognized XML directive; expected CDATA or comment ('
* Note: this method is to accurately update the location information
* to reflect where the next event will start (or, in case of EOF, where
* EOF was encountered, ie. where event would start, if there was one).
*
* @return Next character after node has been skipped, or -1 if EOF
* follows
*/
private int skipToken()
throws XMLStreamException
{
int result;
main_switch:
switch (mCurrToken) {
case CDATA:
{
/* 30-Aug-2004, TSa: Need to be careful here: we may
* actually have finished with CDATA, but are just
* coalescing... if so, need to skip first part of
* skipping
*/
if (mTokenState <= TOKEN_PARTIAL_SINGLE) {
// Skipping CDATA is easy; just need to spot closing ]]>
skipCommentOrCData(SUFFIX_IN_CDATA, ']', false);
}
result = getNext();
// ... except if coalescing, may need to skip more:
if (mCfgCoalesceText) {
result = skipCoalescedText(result);
}
}
break;
case COMMENT:
skipCommentOrCData(SUFFIX_IN_COMMENT, '-', true);
result = 0;
break;
case CHARACTERS:
{
result = skipTokenText(getNext());
// ... except if coalescing, need to skip more:
if (mCfgCoalesceText) {
result = skipCoalescedText(result);
}
}
break;
case DTD:
finishDTD(false);
result = 0;
break;
case PROCESSING_INSTRUCTION:
while (true) {
char c = (mInputPtr < mInputEnd)
? mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_IN_PROC_INSTR);
if (c == '?') {
do {
c = (mInputPtr < mInputEnd)
? mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_IN_PROC_INSTR);
} while (c == '?');
if (c == '>') {
result = 0;
break main_switch;
}
}
if (c < CHAR_SPACE) {
if (c == '\n' || c == '\r') {
skipCRLF(c);
} else if (c != '\t') {
throwInvalidSpace(c);
}
}
}
// never gets in here
case SPACE:
while (true) {
// Fairly easy to skip through white space...
while (mInputPtr < mInputEnd) {
char c = mInputBuffer[mInputPtr++];
if (c > CHAR_SPACE) { // non-EOF non-WS?
result = c;
break main_switch;
}
if (c == '\n' || c == '\r') {
skipCRLF(c);
} else if (c != CHAR_SPACE && c != '\t') {
throwInvalidSpace(c);
}
}
if (!loadMore()) {
result = -1;
break main_switch;
}
}
// never gets in here
case ENTITY_REFERENCE: // these should never end up in here...
case ENTITY_DECLARATION:
case NOTATION_DECLARATION:
case START_DOCUMENT:
case END_DOCUMENT:
// As are start/end document
throw new IllegalStateException("skipToken() called when current token is "+tokenTypeDesc(mCurrToken));
case ATTRIBUTE:
case NAMESPACE:
// These two are never returned by this class
case START_ELEMENT:
case END_ELEMENT:
/* Never called for elements tokens; start token handled
* differently, end token always completely read in the first place
*/
default:
throw new IllegalStateException("Internal error: unexpected token "+tokenTypeDesc(mCurrToken));
}
/* Ok; now we have 3 possibilities; result is:
*
* + 0 -> could reliably read the prev event, now need the
* following char/EOF
* + -1 -> hit EOF; can return it
* + something else -> this is the next char, return it.
*
* In first 2 cases, next event start offset is the current location;
* in third case, it needs to be backtracked by one char
*/
if (result < 1) {
mTokenInputRow = mCurrInputRow;
mTokenInputTotal = mCurrInputProcessed + mInputPtr;
mTokenInputCol = mInputPtr - mCurrInputRowStart;
return (result < 0) ? result : getNext();
}
// Ok, need to offset location, and return whatever we got:
mTokenInputRow = mCurrInputRow;
mTokenInputTotal = mCurrInputProcessed + mInputPtr - 1;
mTokenInputCol = mInputPtr - mCurrInputRowStart - 1;
return result;
}
private void skipCommentOrCData(String errorMsg, char endChar, boolean preventDoubles)
throws XMLStreamException
{
/* Let's skip all chars except for double-ending chars in
* question (hyphen for comments, right brack for cdata)
*/
while (true) {
char c;
do {
c = (mInputPtr < mInputEnd)
? mInputBuffer[mInputPtr++] : getNextCharFromCurrent(errorMsg);
if (c < CHAR_SPACE) {
if (c == '\n' || c == '\r') {
skipCRLF(c);
} else if (c != '\t') {
throwInvalidSpace(c);
}
}
} while (c != endChar);
// Now, we may be getting end mark; first need second marker char:.
c = getNextChar(errorMsg);
if (c == endChar) { // Probably?
// Now; we should be getting a '>', most likely.
c = getNextChar(errorMsg);
if (c == '>') {
break;
}
if (preventDoubles) { // if not, it may be a problem...
throwParseError("String '--' not allowed in comment (missing '>'?)");
}
// Otherwise, let's loop to see if there is end
while (c == endChar) {
c = (mInputPtr < mInputEnd)
? mInputBuffer[mInputPtr++] : getNextCharFromCurrent(errorMsg);
}
if (c == '>') {
break;
}
}
// No match, did we get a linefeed?
if (c < CHAR_SPACE) {
if (c == '\n' || c == '\r') {
skipCRLF(c);
} else if (c != '\t') {
throwInvalidSpace(c);
}
}
// Let's continue from beginning, then
}
}
/**
* Method called to skip past all following text and CDATA segments,
* until encountering something else (including a general entity,
* which may in turn expand to text).
*
* @return Character following all the skipped text and CDATA segments,
* if any; or -1 to denote EOF
*/
private int skipCoalescedText(int i)
throws XMLStreamException
{
while (true) {
// Ok, plain text or markup?
if (i == '<') { // markup, maybe CDATA?
// Need to distinguish "= 3
&& resolveSimpleEntity(true) != 0) {
;
} else {
i = fullyResolveEntity(true);
/* Either way, it's just fine; we don't care about
* returned single-char value.
*/
}
} else {
/* Can only skip character entities; others need to
* be returned separately.
*/
if (resolveCharOnlyEntity(true) == 0) {
/* Now points to the char after ampersand, and we need
* to return the ampersand itself
*/
return i;
}
}
} else if (i < CHAR_SPACE) {
if (i == '\r' || i == '\n') {
skipCRLF((char) i);
} else if (i < 0) { // EOF
return i;
} else if (i != '\t') {
throwInvalidSpace(i);
}
}
// Hmmh... let's do quick looping here:
while (mInputPtr < mInputEnd) {
char c = mInputBuffer[mInputPtr++];
if (c < CHAR_FIRST_PURE_TEXT) { // need to check it
i = c;
continue main_loop;
}
}
i = getNext();
}
// never gets here...
}
/*
////////////////////////////////////////////////////
// Internal methods, parsing
////////////////////////////////////////////////////
*/
protected void ensureFinishToken()
throws XMLStreamException
{
if (mTokenState < mStTextThreshold) {
finishToken(false);
}
}
protected void safeEnsureFinishToken()
{
if (mTokenState < mStTextThreshold) {
safeFinishToken();
}
}
protected void safeFinishToken()
{
try {
/* 24-Sep-2006, TSa: Let's try to reduce number of unchecked
* (wrapped) exceptions we throw, and defer some. For now,
* this is only for CHARACTERS (since it's always legal to
* split CHARACTERS segment); could be expanded in future.
*/
boolean deferErrors = (mCurrToken == CHARACTERS);
finishToken(deferErrors);
} catch (XMLStreamException strex) {
throwLazyError(strex);
}
}
/**
* Method called to read in contents of the token completely, if not
* yet read. Generally called when caller needs to access anything
* other than basic token type (except for elements), text contents
* or such.
*
* @param deferErrors Flag to enable storing an exception to a
* variable, instead of immediately throwing it. If true, will
* just store the exception; if false, will not store, just throw.
*/
protected void finishToken(boolean deferErrors)
throws XMLStreamException
{
switch (mCurrToken) {
case CDATA:
if (mCfgCoalesceText) {
readCoalescedText(mCurrToken, deferErrors);
} else {
if (readCDataSecondary(mShortestTextSegment)) {
mTokenState = TOKEN_FULL_SINGLE;
} else {
mTokenState = TOKEN_PARTIAL_SINGLE;
}
}
return;
case CHARACTERS:
if (mCfgCoalesceText) {
/* 21-Sep-2005, TSa: It is often possible to optimize
* here: if we get '<' NOT followed by '!', it can not
* be CDATA, and thus we are done.
*/
if (mTokenState == TOKEN_FULL_SINGLE
&& (mInputPtr + 1) < mInputEnd
&& mInputBuffer[mInputPtr+1] != '!') {
mTokenState = TOKEN_FULL_COALESCED;
return;
}
readCoalescedText(mCurrToken, deferErrors);
} else {
if (readTextSecondary(mShortestTextSegment, deferErrors)) {
mTokenState = TOKEN_FULL_SINGLE;
} else {
mTokenState = TOKEN_PARTIAL_SINGLE;
}
}
return;
case SPACE:
{
/* Only need to ensure there's no non-whitespace text
* when parsing 'real' ignorable white space (in validating
* mode, but that's implicit here)
*/
boolean prolog = (mParseState != STATE_TREE);
readSpaceSecondary(prolog);
mTokenState = TOKEN_FULL_COALESCED;
}
return;
case COMMENT:
readComment();
mTokenState = TOKEN_FULL_COALESCED;
return;
case DTD:
/* 05-Jan-2006, TSa: Although we shouldn't have to use finally
* here, it's probably better to do that for robustness
* (specifically, in case of a parsing problem, we don't want
* to remain in 'DTD partially read' case -- it's better
* to get in panic mode and skip the rest)
*/
try {
finishDTD(true);
} finally {
mTokenState = TOKEN_FULL_COALESCED;
}
return;
case PROCESSING_INSTRUCTION:
readPI();
mTokenState = TOKEN_FULL_COALESCED;
return;
case START_ELEMENT:
case END_ELEMENT: // these 2 should never end up in here...
case ENTITY_REFERENCE:
case ENTITY_DECLARATION:
case NOTATION_DECLARATION:
case START_DOCUMENT:
case END_DOCUMENT:
throw new IllegalStateException("finishToken() called when current token is "+tokenTypeDesc(mCurrToken));
case ATTRIBUTE:
case NAMESPACE:
// These two are never returned by this class
default:
}
throw new IllegalStateException("Internal error: unexpected token "+tokenTypeDesc(mCurrToken));
}
private void readComment()
throws XMLStreamException
{
char[] inputBuf = mInputBuffer;
int inputLen = mInputEnd;
int ptr = mInputPtr;
int start = ptr;
// Let's first see if we can just share input buffer:
while (ptr < inputLen) {
char c = inputBuf[ptr++];
if (c > '-') {
continue;
}
if (c < CHAR_SPACE) {
if (c == '\n') {
markLF(ptr);
} else if (c == '\r') {
if (!mNormalizeLFs && ptr < inputLen) {
if (inputBuf[ptr] == '\n') {
++ptr;
}
markLF(ptr);
} else {
--ptr; // pushback
break;
}
} else if (c != '\t') {
throwInvalidSpace(c);
}
} else if (c == '-') {
// Ok; need to get '->', can not get '--'
if ((ptr + 1) >= inputLen) {
/* Can't check next 2, let's push '-' back, for rest of
* code to take care of
*/
--ptr;
break;
}
if (inputBuf[ptr] != '-') {
// Can't skip, might be LF/CR
continue;
}
// Ok; either get '>' or error:
c = inputBuf[ptr+1];
if (c != '>') {
throwParseError("String '--' not allowed in comment (missing '>'?)");
}
mTextBuffer.resetWithShared(inputBuf, start, ptr-start-1);
mInputPtr = ptr + 2;
return;
}
}
mInputPtr = ptr;
mTextBuffer.resetWithCopy(inputBuf, start, ptr-start);
readComment2(mTextBuffer);
}
private void readComment2(TextBuffer tb)
throws XMLStreamException
{
/* Output pointers; calls will also ensure that the buffer is
* not shared, AND has room for at least one more char
*/
char[] outBuf = mTextBuffer.getCurrentSegment();
int outPtr = mTextBuffer.getCurrentSegmentSize();
int outLen = outBuf.length;
while (true) {
char c = (mInputPtr < mInputEnd) ?
mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_IN_COMMENT);
if (c < CHAR_SPACE) {
if (c == '\n') {
markLF();
} else if (c == '\r') {
if (skipCRLF(c)) { // got 2 char LF
if (!mNormalizeLFs) {
if (outPtr >= outLen) { // need more room?
outBuf = mTextBuffer.finishCurrentSegment();
outLen = outBuf.length;
outPtr = 0;
}
outBuf[outPtr++] = c;
}
// And let's let default output the 2nd char
c = '\n';
} else if (mNormalizeLFs) { // just \r, but need to convert
c = '\n'; // For Mac text
}
} else if (c != '\t') {
throwInvalidSpace(c);
}
} else if (c == '-') { // Ok; need to get '->', can not get '--'
c = getNextCharFromCurrent(SUFFIX_IN_COMMENT);
if (c == '-') { // Ok, has to be end marker then:
// Either get '>' or error:
c = getNextCharFromCurrent(SUFFIX_IN_COMMENT);
if (c != '>') {
throwParseError(ErrorConsts.ERR_HYPHENS_IN_COMMENT);
}
break;
}
/* Not the end marker; let's just output the first hyphen,
* push the second char back , and let main
* code handle it.
*/
c = '-';
--mInputPtr;
}
// Need more room?
if (outPtr >= outLen) {
outBuf = mTextBuffer.finishCurrentSegment();
outLen = outBuf.length;
outPtr = 0;
}
// Ok, let's add char to output:
outBuf[outPtr++] = c;
}
// Ok, all done, then!
mTextBuffer.setCurrentLength(outPtr);
}
/**
* Method that reads the primary part of a PI, ie. target, and also
* skips white space between target and data (if any data)
*
* @return Usually PROCESSING_INSTRUCTION
; but may be
* different in multi-doc mode, if we actually hit a secondary
* xml declaration.
*/
private final int readPIPrimary()
throws XMLStreamException
{
// Ok, first we need the name:
String target = parseFullName();
mCurrName = target;
if (target.length() == 0) {
throwParseError(ErrorConsts.ERR_WF_PI_MISSING_TARGET);
}
// As per XML specs, #17, case-insensitive 'xml' is illegal:
if (target.equalsIgnoreCase("xml")) {
// 07-Oct-2005, TSa: Still legal in multi-doc mode...
if (!mConfig.inputParsingModeDocuments()) {
throwParseError(ErrorConsts.ERR_WF_PI_XML_TARGET, target, null);
}
// Ok, let's just verify we get space then
char c = getNextCharFromCurrent(SUFFIX_IN_XML_DECL);
if (!isSpaceChar(c)) {
throwUnexpectedChar(c, "excepted a space in xml declaration after 'xml'");
}
return handleMultiDocStart(START_DOCUMENT);
}
// And then either white space before data, or end marker:
char c = (mInputPtr < mInputEnd) ?
mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_IN_PROC_INSTR);
if (isSpaceChar(c)) { // Ok, space to skip
mTokenState = TOKEN_STARTED;
// Need to skip the WS...
skipWS(c);
} else { // Nope; apparently finishes right away...
mTokenState = TOKEN_FULL_COALESCED;
mTextBuffer.resetWithEmpty();
// or does it?
if (c != '?' || getNextCharFromCurrent(SUFFIX_IN_PROC_INSTR) != '>') {
throwUnexpectedChar(c, ErrorConsts.ERR_WF_PI_XML_MISSING_SPACE);
}
}
return PROCESSING_INSTRUCTION;
}
/**
* Method that parses a processing instruction's data portion; at this
* point target has been parsed.
*/
private void readPI()
throws XMLStreamException
{
int ptr = mInputPtr;
int start = ptr;
char[] inputBuf = mInputBuffer;
int inputLen = mInputEnd;
outer_loop:
while (ptr < inputLen) {
char c = inputBuf[ptr++];
if (c < CHAR_SPACE) {
if (c == '\n') {
markLF(ptr);
} else if (c == '\r') {
if (ptr < inputLen && !mNormalizeLFs) {
if (inputBuf[ptr] == '\n') {
++ptr;
}
markLF(ptr);
} else {
--ptr; // pushback
break;
}
} else if (c != '\t') {
throwInvalidSpace(c);
}
} else if (c == '?') {
// K; now just need '>' after zero or more '?'s
while (true) {
if (ptr >= inputLen) {
/* end of buffer; need to push back at least one of
* question marks (not all, since just one is needed
* to close the PI)
*/
--ptr;
break outer_loop;
}
c = inputBuf[ptr++];
if (c == '>') {
mInputPtr = ptr;
// Need to discard trailing '?>'
mTextBuffer.resetWithShared(inputBuf, start, ptr-start-2);
return;
}
if (c != '?') {
// Not end, can continue, but need to push back last char, in case it's LF/CR
--ptr;
break;
}
}
}
}
mInputPtr = ptr;
// No point in trying to share... let's just append
mTextBuffer.resetWithCopy(inputBuf, start, ptr-start);
readPI2(mTextBuffer);
}
private void readPI2(TextBuffer tb)
throws XMLStreamException
{
char[] inputBuf = mInputBuffer;
int inputLen = mInputEnd;
int inputPtr = mInputPtr;
/* Output pointers; calls will also ensure that the buffer is
* not shared, AND has room for one more char
*/
char[] outBuf = tb.getCurrentSegment();
int outPtr = tb.getCurrentSegmentSize();
main_loop:
while (true) {
// Let's first ensure we have some data in there...
if (inputPtr >= inputLen) {
loadMoreFromCurrent(SUFFIX_IN_PROC_INSTR);
inputBuf = mInputBuffer;
inputPtr = mInputPtr;
inputLen = mInputEnd;
}
// And then do chunks
char c = inputBuf[inputPtr++];
if (c < CHAR_SPACE) {
if (c == '\n') {
markLF(inputPtr);
} else if (c == '\r') {
mInputPtr = inputPtr;
if (skipCRLF(c)) { // got 2 char LF
if (!mNormalizeLFs) {
// Special handling, to output 2 chars at a time:
if (outPtr >= outBuf.length) { // need more room?
outBuf = mTextBuffer.finishCurrentSegment();
outPtr = 0;
}
outBuf[outPtr++] = c;
}
// And let's let default output the 2nd char, either way
c = '\n';
} else if (mNormalizeLFs) { // just \r, but need to convert
c = '\n'; // For Mac text
}
/* Since skipCRLF() needs to peek(), buffer may have
* changed, even if there was no CR+LF.
*/
inputPtr = mInputPtr;
inputBuf = mInputBuffer;
inputLen = mInputEnd;
} else if (c != '\t') {
throwInvalidSpace(c);
}
} else if (c == '?') { // Ok, just need '>' after zero or more '?'s
mInputPtr = inputPtr; // to allow us to call getNextChar
qmLoop:
while (true) {
c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++]
: getNextCharFromCurrent(SUFFIX_IN_PROC_INSTR);
if (c == '>') { // got it!
break main_loop;
} else if (c == '?') {
if (outPtr >= outBuf.length) { // need more room?
outBuf = tb.finishCurrentSegment();
outPtr = 0;
}
outBuf[outPtr++] = c;
} else {
/* Hmmh. Wasn't end mark after all. Thus, need to
* fall back to normal processing, with one more
* question mark (first one matched that wasn't
* yet output),
* reset variables, and go back to main loop.
*/
inputPtr = --mInputPtr; // push back last char
inputBuf = mInputBuffer;
inputLen = mInputEnd;
c = '?';
break qmLoop;
}
}
} // if (c == '?)
// Need more room?
if (outPtr >= outBuf.length) {
outBuf = tb.finishCurrentSegment();
outPtr = 0;
}
// Ok, let's add char to output:
outBuf[outPtr++] = c;
} // while (true)
tb.setCurrentLength(outPtr);
}
/**
* Method called to read the content of both current CDATA/CHARACTERS
* events, and all following consequtive events into the text buffer.
* At this point the current type is known, prefix (for CDATA) skipped,
* and initial consequtive contents (if any) read in.
*
* @param deferErrors Flag to enable storing an exception to a
* variable, instead of immediately throwing it. If true, will
* just store the exception; if false, will not store, just throw.
*/
protected void readCoalescedText(int currType, boolean deferErrors)
throws XMLStreamException
{
boolean wasCData;
// Ok; so we may need to combine adjacent text/CDATA chunks.
if (currType == CHARACTERS || currType == SPACE) {
readTextSecondary(Integer.MAX_VALUE, deferErrors);
wasCData = false;
} else if (currType == CDATA) {
/* We may have actually really finished it, but just left
* the 'unfinished' flag due to need to coalesce...
*/
if (mTokenState <= TOKEN_PARTIAL_SINGLE) {
readCDataSecondary(Integer.MAX_VALUE);
}
wasCData = true;
} else {
throw new IllegalStateException("Internal error: unexpected token "+tokenTypeDesc(mCurrToken)+"; expected CHARACTERS, CDATA or SPACE.");
}
// But how about additional text?
while (!deferErrors || (mPendingException == null)) {
if (mInputPtr >= mInputEnd) {
mTextBuffer.ensureNotShared();
if (!loadMore()) {
// ??? Likely an error but let's just break
break;
}
}
// Let's peek, ie. not advance it yet
char c = mInputBuffer[mInputPtr];
if (c == '<') { // CDATA, maybe?
// Need to distinguish ") or until
* first 'hole' in text (buffer end, 2-char lf to convert, entity).
*
* When the method is called, it's expected that the first character * has been read as is in the current input buffer just before current * pointer * * @param c First character in the CDATA segment (possibly part of end * marker for empty segments * * @return True if the whole CDATA segment was completely read; this * happens only if lt-char is hit; false if it's possible that * it wasn't read (ie. end-of-buffer or entity encountered). */ private final boolean readCDataPrimary(char c) throws XMLStreamException { mWsStatus = (c <= CHAR_SPACE) ? ALL_WS_UNKNOWN : ALL_WS_NO; int ptr = mInputPtr; int inputLen = mInputEnd; char[] inputBuf = mInputBuffer; int start = ptr-1; while (true) { if (c < CHAR_SPACE) { if (c == '\n') { markLF(ptr); } else if (c == '\r') { if (ptr >= inputLen) { // can't peek? --ptr; break; } if (mNormalizeLFs) { // can we do in-place Mac replacement? if (inputBuf[ptr] == '\n') { // nope, 2 char lf --ptr; break; } inputBuf[ptr-1] = '\n'; // yup } else { // No LF normalization... can we just skip it? if (inputBuf[ptr] == '\n') { ++ptr; } } markLF(ptr); } else if (c != '\t') { throwInvalidSpace(c); } } else if (c == ']') { // Ok; need to get one or more ']'s, then '>' if ((ptr + 1) >= inputLen) { // not enough room? need to push it back --ptr; break; } // Needs to be followed by another ']'... if (inputBuf[ptr] == ']') { ++ptr; inner_loop: while (true) { if (ptr >= inputLen) { /* Need to push back last 2 right brackets; it may * be end marker divided by input buffer boundary */ ptr -= 2; break inner_loop; } c = inputBuf[ptr++]; if (c == '>') { // Ok, got it! mInputPtr = ptr; ptr -= (start+3); mTextBuffer.resetWithShared(inputBuf, start, ptr); mTokenState = TOKEN_FULL_SINGLE; return true; } if (c != ']') { // Need to re-check this char (may be linefeed) --ptr; break inner_loop; } // Fall through to next round } } } if (ptr >= inputLen) { // end-of-buffer? break; } c = inputBuf[ptr++]; } mInputPtr = ptr; /* If we end up here, we either ran out of input, or hit something * which would leave 'holes' in buffer... fine, let's return then; * we can still update shared buffer copy: would be too early to * make a copy since caller may not even be interested in the * stuff. */ int len = ptr - start; mTextBuffer.resetWithShared(inputBuf, start, len); if (mCfgCoalesceText || (mTextBuffer.size() < mShortestTextSegment)) { mTokenState = TOKEN_STARTED; } else { mTokenState = TOKEN_PARTIAL_SINGLE; } return false; } /** * @return True if the whole CData section was completely read (we * hit the end marker); false if a shorter segment was returned. */ protected boolean readCDataSecondary(int shortestSegment) throws XMLStreamException { // Input pointers char[] inputBuf = mInputBuffer; int inputLen = mInputEnd; int inputPtr = mInputPtr; /* Output pointers; calls will also ensure that the buffer is * not shared, AND has room for one more char */ char[] outBuf = mTextBuffer.getCurrentSegment(); int outPtr = mTextBuffer.getCurrentSegmentSize(); while (true) { if (inputPtr >= inputLen) { loadMore(SUFFIX_IN_CDATA); inputBuf = mInputBuffer; inputPtr = mInputPtr; inputLen = mInputEnd; } char c = inputBuf[inputPtr++]; if (c < CHAR_SPACE) { if (c == '\n') { markLF(inputPtr); } else if (c == '\r') { mInputPtr = inputPtr; if (skipCRLF(c)) { // got 2 char LF if (!mNormalizeLFs) { // Special handling, to output 2 chars at a time: outBuf[outPtr++] = c; if (outPtr >= outBuf.length) { // need more room? outBuf = mTextBuffer.finishCurrentSegment(); outPtr = 0; } } // And let's let default output the 2nd char, either way c = '\n'; } else if (mNormalizeLFs) { // just \r, but need to convert c = '\n'; // For Mac text } /* Since skipCRLF() needs to peek(), buffer may have * changed, even if there was no CR+LF. */ inputPtr = mInputPtr; inputBuf = mInputBuffer; inputLen = mInputEnd; } else if (c != '\t') { throwInvalidSpace(c); } } else if (c == ']') { // Ok; need to get ']>' mInputPtr = inputPtr; if (checkCDataEnd(outBuf, outPtr)) { return true; } inputPtr = mInputPtr; inputBuf = mInputBuffer; inputLen = mInputEnd; outBuf = mTextBuffer.getCurrentSegment(); outPtr = mTextBuffer.getCurrentSegmentSize(); continue; // need to re-process last (non-bracket) char } // Ok, let's add char to output: outBuf[outPtr++] = c; // Need more room? if (outPtr >= outBuf.length) { TextBuffer tb = mTextBuffer; // Perhaps we have now enough to return? if (!mCfgCoalesceText) { tb.setCurrentLength(outBuf.length); if (tb.size() >= shortestSegment) { mInputPtr = inputPtr; return false; } } // If not, need more buffer space: outBuf = tb.finishCurrentSegment(); outPtr = 0; } } // never gets here } /** * Method that will check, given the starting ']', whether there is * ending ']]>' (including optional extra ']'s); if so, will updated * output buffer with extra ]s, if not, will make sure input and output * are positioned for further checking. * * @return True, if we hit the end marker; false if not. */ private boolean checkCDataEnd(char[] outBuf, int outPtr) throws XMLStreamException { int bracketCount = 0; char c; do { ++bracketCount; c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_IN_CDATA); } while (c == ']'); boolean match = (bracketCount >= 2 && c == '>'); if (match) { bracketCount -= 2; } while (bracketCount > 0) { --bracketCount; outBuf[outPtr++] = ']'; if (outPtr >= outBuf.length) { /* Can't really easily return, even if we have enough * stuff here, since we've more than one char... */ outBuf = mTextBuffer.finishCurrentSegment(); outPtr = 0; } } mTextBuffer.setCurrentLength(outPtr); // Match? Can break, then: if (match) { return true; } // No match, need to push the last char back and admit defeat... --mInputPtr; return false; } /** * Method called to read in consequtive beginning parts of a text * segment, up to either end of the segment (lt char) or until * first 'hole' in text (buffer end, 2-char lf to convert, entity). *
* When the method is called, it's expected that the first character * has been read as is in the current input buffer just before current * pointer * * @param c First character of the text segment * * @return True if the whole text segment was completely read; this * happens only if lt-char is hit; false if it's possible that * it wasn't read (ie. end-of-buffer or entity encountered). */ private final boolean readTextPrimary(char c) throws XMLStreamException { int ptr = mInputPtr; int start = ptr-1; // First: can we heuristically canonicalize ws used for indentation? if (c <= CHAR_SPACE) { int len = mInputEnd; /* Even without indentation removal, it's good idea to * 'convert' \r or \r\n into \n (by replacing or skipping first * char): this may allow reusing the buffer. * But note that conversion MUST be enabled -- this is toggled * by code that includes internal entities, to prevent replacement * of CRs from int. general entities, as applicable. */ do { // We'll need at least one char, no matter what: if (ptr < len && mNormalizeLFs) { if (c == '\r') { c = '\n'; if (mInputBuffer[ptr] == c) { /* Ok, whatever happens, can 'skip' \r, to * point to following \n: */ ++start; // But if that's buffer end, can't skip that if (++ptr >= len) { break; } } else { mInputBuffer[start] = c; } } else if (c != '\n') { break; } markLF(ptr); if (mCheckIndentation > 0) { ptr = readIndentation(c, ptr); if (ptr < 0) { // success! return true; } } // If we got this far, we skipped a lf, need to read next char c = mInputBuffer[ptr++]; } } while (false); // can we figure out indentation? mWsStatus = ALL_WS_UNKNOWN; } else { mWsStatus = ALL_WS_NO; } char[] inputBuf = mInputBuffer; int inputLen = mInputEnd; // Let's first see if we can just share input buffer: while (true) { if (c < CHAR_FIRST_PURE_TEXT) { if (c == '<') { mInputPtr = --ptr; mTextBuffer.resetWithShared(inputBuf, start, ptr-start); return true; } if (c < CHAR_SPACE) { if (c == '\n') { markLF(ptr); } else if (c == '\r') { if (ptr >= inputLen) { // can't peek? --ptr; break; } if (mNormalizeLFs) { // can we do in-place Mac replacement? if (inputBuf[ptr] == '\n') { // nope, 2 char lf --ptr; break; } /* This would otherwise be risky (may modify value * of a shared entity value), but since DTDs are * cached/accessed based on properties including * lf-normalization there's no harm in 'fixing' it * in place. */ inputBuf[ptr-1] = '\n'; // yup } else { // No LF normalization... can we just skip it? if (inputBuf[ptr] == '\n') { ++ptr; } } markLF(ptr); } else if (c != '\t') { // Should consume invalid char, but not include in result mInputPtr = ptr; mTextBuffer.resetWithShared(inputBuf, start, ptr-start-1); /* Let's defer exception, provided we got at least * one valid character (if not, better throw * exception right away) */ boolean deferErrors = (ptr - start) > 1; mPendingException = throwInvalidSpace(c, deferErrors); return true; } } else if (c == '&') { // Let's push it back and break --ptr; break; } else if (c == '>') { // Let's see if we got ']]>'? if ((ptr - start) >= 3) { if (inputBuf[ptr-3] == ']' && inputBuf[ptr-2] == ']') { /* Let's include ']]' in there, not '>' (since that * makes it non-wellformed): but need to consume * that char nonetheless */ mInputPtr = ptr; mTextBuffer.resetWithShared(inputBuf, start, ptr-start-1); mPendingException = throwWfcException(ErrorConsts.ERR_BRACKET_IN_TEXT, true); return true; // and we are fully done } } } } // if (char in lower code range) if (ptr >= inputLen) { // end-of-buffer? break; } c = inputBuf[ptr++]; } mInputPtr = ptr; /* If we end up here, we either ran out of input, or hit something * which would leave 'holes' in buffer... fine, let's return then; * we can still update shared buffer copy: would be too early to * make a copy since caller may not even be interested in the * stuff. */ mTextBuffer.resetWithShared(inputBuf, start, ptr - start); return false; } /** * * @param deferErrors Flag to enable storing an exception to a * variable, instead of immediately throwing it. If true, will * just store the exception; if false, will not store, just throw. * * @return True if the text segment was completely read ('<' was hit, * or in non-entity-expanding mode, a non-char entity); false if * it may still continue */ protected final boolean readTextSecondary(int shortestSegment, boolean deferErrors) throws XMLStreamException { /* Output pointers; calls will also ensure that the buffer is * not shared, AND has room for at least one more char */ char[] outBuf = mTextBuffer.getCurrentSegment(); int outPtr = mTextBuffer.getCurrentSegmentSize(); int inputPtr = mInputPtr; char[] inputBuffer = mInputBuffer; int inputLen = mInputEnd; while (true) { if (inputPtr >= inputLen) { /* 07-Oct-2005, TSa: Let's not throw an exception for EOF from * here -- in fragment mode, it shouldn't be thrown, and in * other modes we might as well first return text, and only * then throw an exception: no need to do that yet. */ mInputPtr = inputPtr; if (!loadMore()) { break; } inputPtr = mInputPtr; inputBuffer = mInputBuffer; inputLen = mInputEnd; } char c = inputBuffer[inputPtr++]; // Most common case is we don't have special char, thus: if (c < CHAR_FIRST_PURE_TEXT) { if (c < CHAR_SPACE) { if (c == '\n') { markLF(inputPtr); } else if (c == '\r') { mInputPtr = inputPtr; if (skipCRLF(c)) { // got 2 char LF if (!mNormalizeLFs) { // Special handling, to output 2 chars at a time: outBuf[outPtr++] = c; if (outPtr >= outBuf.length) { // need more room? outBuf = mTextBuffer.finishCurrentSegment(); outPtr = 0; } } // And let's let default output the 2nd char c = '\n'; } else if (mNormalizeLFs) { // just \r, but need to convert c = '\n'; // For Mac text } /* note: skipCRLF() may change ptr and len, but since * it does not close input source, it won't change * actual buffer object: */ //inputBuffer = mInputBuffer; inputLen = mInputEnd; inputPtr = mInputPtr; } else if (c != '\t') { mTextBuffer.setCurrentLength(outPtr); mInputPtr = inputPtr; mPendingException = throwInvalidSpace(c, deferErrors); break; } } else if (c == '<') { // end is nigh! mInputPtr = inputPtr-1; break; } else if (c == '&') { mInputPtr = inputPtr; int ch; if (mCfgReplaceEntities) { // can we expand all entities? if ((inputLen - inputPtr) >= 3 && (ch = resolveSimpleEntity(true)) != 0) { // Ok, it's fine then } else { ch = fullyResolveEntity(true); if (ch == 0) { // Input buffer changed, nothing to output quite yet: inputBuffer = mInputBuffer; inputLen = mInputEnd; inputPtr = mInputPtr; continue; } // otherwise char is now fine... } } else { /* Nope, can only expand char entities; others need * to be separately handled. */ ch = resolveCharOnlyEntity(true); if (ch == 0) { // some other entity... /* can't expand; underlying pointer now points to * char after ampersand, need to rewind */ --mInputPtr; break; } // .. otherwise we got char we needed } if (ch <= 0xFFFF) { c = (char) ch; } else { ch -= 0x10000; // need more room? if (outPtr >= outBuf.length) { outBuf = mTextBuffer.finishCurrentSegment(); outPtr = 0; } outBuf[outPtr++] = (char) ((ch >> 10) + 0xD800); c = (char) ((ch & 0x3FF) + 0xDC00); } inputPtr = mInputPtr; // not quite sure why this is needed... but it is: inputLen = mInputEnd; } else if (c == '>') { // Let's see if we got ']]>'? /* 21-Apr-2005, TSa: But we can NOT check the output buffer * as it contains _expanded_ stuff... only input side. * For now, 98% accuracy has to do, as we may not be able * to access previous buffer's contents. But at least we * won't produce false positives from entity expansion */ if (inputPtr > 2) { // can we do it here? // Since mInputPtr has been advanced, -1 refers to '>' if (inputBuffer[inputPtr-3] == ']' && inputBuffer[inputPtr-2] == ']') { mInputPtr = inputPtr; /* We have already added ']]' into output buffer... * should be ok, since only with '>' does it become * non-wellformed. */ mTextBuffer.setCurrentLength(outPtr); mPendingException = throwWfcException(ErrorConsts.ERR_BRACKET_IN_TEXT, deferErrors); break; } } else { /* 21-Apr-2005, TSa: No good way to verify it, * at this point. Should come back and think of how * to properly handle this (rare) possibility. */ ; } } } // Ok, let's add char to output: outBuf[outPtr++] = c; // Need more room? if (outPtr >= outBuf.length) { TextBuffer tb = mTextBuffer; // Perhaps we have now enough to return? tb.setCurrentLength(outBuf.length); if (tb.size() >= shortestSegment) { mInputPtr = inputPtr; return false; } // If not, need more buffer space: outBuf = tb.finishCurrentSegment(); outPtr = 0; } } mTextBuffer.setCurrentLength(outPtr); return true; } /** * Method called to try to parse and canonicalize white space that * has a good chance of being white space with somewhat regular * structure; specifically, something that looks like typical * indentation. *
* Note: Caller guarantees that there will be at least 2 characters * available in the input buffer. And method has to ensure that if * it does not find a match, it will return pointer value such * that there is at least one valid character remaining. * * @return -1, if the content was determined to be canonicalizable * (indentation) white space; and thus fully parsed. Otherwise * pointer (value to set to mInputPtr) to the next character * to process (not processed by this method) */ private final int readIndentation(char c, int ptr) throws XMLStreamException { /* We need to verify that: * (a) we can read enough contiguous data to do determination * (b) sequence is a linefeed, with either zero or more following * spaces, or zero or more tabs; and followed by non-directive * tag (start/end tag) * and if so, we can use a canonical shared representation of * this even. */ final int inputLen = mInputEnd; final char[] inputBuf = mInputBuffer; int start = ptr-1; final char lf = c; // Note: caller guarantees at least one more char in the input buffer ws_loop: do { // dummy loop to allow for break (which indicates failure) c = inputBuf[ptr++]; if (c == ' ' || c == '\t') { // indentation? // Need to limit to maximum int lastIndCharPos = (c == ' ') ? TextBuffer.MAX_INDENT_SPACES : TextBuffer.MAX_INDENT_TABS; lastIndCharPos += ptr; if (lastIndCharPos > inputLen) { lastIndCharPos = inputLen; } inner_loop: while (true) { if (ptr >= lastIndCharPos) { // overflow; let's backtrack --ptr; break ws_loop; } char d = inputBuf[ptr++]; if (d != c) { if (d == '<') { // yup, got it! break inner_loop; } --ptr; // caller needs to reprocess it break ws_loop; // nope, blew it } } // This means we had success case; let's fall through } else if (c != '<') { // nope, can not be --ptr; // simpler if we just push it back; needs to be processed later on break ws_loop; } // Ok; we got '<'... just need any other char than '!'... if (ptr < inputLen && inputBuf[ptr] != '!') { // Voila! mInputPtr = --ptr; // need to push back that '<' too mTextBuffer.resetWithIndentation(ptr - start - 1, c); // One more thing: had a positive match, need to note it if (mCheckIndentation < INDENT_CHECK_MAX) { mCheckIndentation += INDENT_CHECK_START; } mWsStatus = ALL_WS_YES; return -1; } // Nope: need to push '<' back, then --ptr; } while (false); // Ok, nope... caller can/need to take care of it: /* Also, we may need to subtract indentation check count to possibly * disable this check if it doesn't seem to work. */ --mCheckIndentation; /* Also; if lf we got was \r, need to convert it now (this * method only gets called in lf converting mode) * (and yes, it is safe to modify input buffer at this point; * see calling method for details) */ if (lf == '\r') { inputBuf[start] = '\n'; } return ptr; } /** * Reading whitespace should be very similar to reading normal text; * although couple of simplifications can be made. Further, since this * method is very unlikely to be of much performance concern, some * optimizations are left out, where it simplifies code. * * @param c First white space characters; known to contain white space * at this point * @param prologWS If true, is reading white space outside XML tree, * and as such can get EOF. If false, should not get EOF, nor be * followed by any other char than < * * @return True if the whole white space segment was read; false if * something prevented that (end of buffer, replaceable 2-char lf) */ private final boolean readSpacePrimary(char c, boolean prologWS) throws XMLStreamException { int ptr = mInputPtr; char[] inputBuf = mInputBuffer; int inputLen = mInputEnd; int start = ptr-1; // Let's first see if we can just share input buffer: while (true) { /* 30-Aug-2006, TSa: Let's not check for validity errors yet, * even if we could detect problems at this point. * This because it's not always * an error (in dtd-aware, non-validating mode); but also since * that way we can first return all space we got, and only * indicate error when next token is to be accessed. */ if (c > CHAR_SPACE) { // End of whitespace mInputPtr = --ptr; mTextBuffer.resetWithShared(mInputBuffer, start, ptr-start); return true; } if (c == '\n') { markLF(ptr); } else if (c == '\r') { if (ptr >= mInputEnd) { // can't peek? --ptr; break; } if (mNormalizeLFs) { // can we do in-place Mac replacement? if (inputBuf[ptr] == '\n') { // nope, 2 char lf --ptr; break; } inputBuf[ptr-1] = '\n'; // yup } else { // No LF normalization... can we just skip it? if (inputBuf[ptr] == '\n') { ++ptr; } } markLF(ptr); } else if (c != CHAR_SPACE && c != '\t') { throwInvalidSpace(c); } if (ptr >= inputLen) { // end-of-buffer? break; } c = inputBuf[ptr++]; } mInputPtr = ptr; /* Ok, couldn't read it completely, let's just return whatever * we did get as shared data */ mTextBuffer.resetWithShared(inputBuf, start, ptr - start); return false; } /** * This is very similar to readSecondaryText(); called when we need * to read in rest of (ignorable) white space segment. * * @param prologWS True if the ignorable white space is within prolog * (or epilog); false if it's within xml tree. */ private void readSpaceSecondary(boolean prologWS) throws XMLStreamException { /* Let's not bother optimizing input. However, we can easily optimize * output, since it's easy to do, yet has more effect on performance * than localizing input variables. */ char[] outBuf = mTextBuffer.getCurrentSegment(); int outPtr = mTextBuffer.getCurrentSegmentSize(); while (true) { if (mInputPtr >= mInputEnd) { /* 07-Oct-2005, TSa: Let's not throw an exception yet -- * can return SPACE, and let exception be thrown * when trying to fetch next event. */ if (!loadMore()) { break; } } char c = mInputBuffer[mInputPtr]; if (c > CHAR_SPACE) { // end of WS? break; } ++mInputPtr; if (c == '\n') { markLF(); } else if (c == '\r') { if (skipCRLF(c)) { if (!mNormalizeLFs) { // Special handling, to output 2 chars at a time: outBuf[outPtr++] = c; if (outPtr >= outBuf.length) { // need more room? outBuf = mTextBuffer.finishCurrentSegment(); outPtr = 0; } } c = '\n'; } else if (mNormalizeLFs) { c = '\n'; // For Mac text } } else if (c != CHAR_SPACE && c != '\t') { throwInvalidSpace(c); } // Ok, let's add char to output: outBuf[outPtr++] = c; // Need more room? if (outPtr >= outBuf.length) { outBuf = mTextBuffer.finishCurrentSegment(); outPtr = 0; } } mTextBuffer.setCurrentLength(outPtr); } /** * Method called to read the contents of the current CHARACTERS * event, and write all contents using the specified Writer. * * @param w Writer to use for writing out textual content parsed * * @return Total number of characters written using the writer */ private int readAndWriteText(Writer w) throws IOException, XMLStreamException { mTokenState = TOKEN_FULL_SINGLE; // we'll read it all /* We should be able to mostly just use the input buffer at this * point; exceptions being two-char linefeeds (when converting * to single ones) and entities (which likewise can expand or * shrink), both of which require flushing and/or single byte * output. */ int start = mInputPtr; int count = 0; main_loop: while (true) { char c; // Reached the end of buffer? Need to flush, then if (mInputPtr >= mInputEnd) { int len = mInputPtr - start; if (len > 0) { w.write(mInputBuffer, start, len); count += len; } c = getNextChar(SUFFIX_IN_TEXT); start = mInputPtr-1; // needs to be prior to char we got } else { c = mInputBuffer[mInputPtr++]; } // Most common case is we don't have a special char, thus: if (c < CHAR_FIRST_PURE_TEXT) { if (c < CHAR_SPACE) { if (c == '\n') { markLF(); } else if (c == '\r') { char d; if (mInputPtr >= mInputEnd) { /* If we can't peek easily, let's flush past stuff * and load more... (have to flush, since new read * will overwrite inbut buffers) */ int len = mInputPtr - start; if (len > 0) { w.write(mInputBuffer, start, len); count += len; } d = getNextChar(SUFFIX_IN_TEXT); start = mInputPtr; // to mark 'no past content' } else { d = mInputBuffer[mInputPtr++]; } if (d == '\n') { if (mNormalizeLFs) { /* Let's flush content prior to 2-char LF, and * start the new segment on the second char... * this way, no mods are needed for the buffer, * AND it'll also work on split 2-char lf! */ int len = mInputPtr - 2 - start; if (len > 0) { w.write(mInputBuffer, start, len); count += len; } start = mInputPtr-1; // so '\n' is the first char } else { ; // otherwise it's good as is } } else { // not 2-char... need to replace? --mInputPtr; if (mNormalizeLFs) { mInputBuffer[mInputPtr-1] = '\n'; } } markLF(); } else if (c != '\t') { throwInvalidSpace(c); } } else if (c == '<') { // end is nigh! break main_loop; } else if (c == '&') { /* Have to flush all stuff, since entities pretty much * force it; input buffer won't be contiguous */ int len = mInputPtr - 1 - start; // -1 to remove ampersand if (len > 0) { w.write(mInputBuffer, start, len); count += len; } int ch; if (mCfgReplaceEntities) { // can we expand all entities? if ((mInputEnd - mInputPtr) < 3 || (ch = resolveSimpleEntity(true)) == 0) { ch = fullyResolveEntity(true); } } else { ch = resolveCharOnlyEntity(true); if (ch == 0) { // some other entity... /* can't expand, so, let's just bail out... but * let's also ensure no text is added twice, as * all prev text was just flushed, but resolve * may have moved input buffer around. */ start = mInputPtr; break main_loop; } } if (ch != 0) { if (ch <= 0xFFFF) { c = (char) ch; } else { ch -= 0x10000; w.write((char) ((ch >> 10) + 0xD800)); c = (char) ((ch & 0x3FF) + 0xDC00); } w.write(c); ++count; } start = mInputPtr; } else if (c == '>') { // did we get ']]>'? /* 21-Apr-2005, TSa: But we can NOT check the output buffer * (see comments in readTextSecondary() for details) */ if (mInputPtr >= 2) { // can we do it here? if (mInputBuffer[mInputPtr-2] == ']' && mInputBuffer[mInputPtr-1] == ']') { // Anything to flush? int len = mInputPtr - start; if (len > 0) { w.write(mInputBuffer, start, len); } throwParseError(ErrorConsts.ERR_BRACKET_IN_TEXT); } } else { ; // !!! TBI: how to check past boundary? } } else if (c == CHAR_NULL) { throwNullChar(); } } } // while (true) /* Need to push back '<' or '&', whichever caused us to * get out... */ --mInputPtr; // Anything left to flush? int len = mInputPtr - start; if (len > 0) { w.write(mInputBuffer, start, len); count += len; } return count; } /** * Method called to read the contents of the current (possibly partially * read) CDATA * event, and write all contents using the specified Writer. * * @param w Writer to use for writing out textual content parsed * * @return Total number of characters written using the writer for * the current CDATA event */ private int readAndWriteCData(Writer w) throws IOException, XMLStreamException { mTokenState = TOKEN_FULL_SINGLE; // we'll read it all /* Ok; here we can basically have 2 modes; first the big loop to * gather all data up until a ']'; and then another loop to see * if ']' is part of ']]>', and after this if no end marker found, * go back to the first part. */ char c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++] : getNextChar(SUFFIX_IN_CDATA); int count = 0; main_loop: while (true) { int start = mInputPtr-1; quick_loop: while (true) { if (c > CHAR_CR_LF_OR_NULL) { if (c == ']') { break quick_loop; } } else { if (c < CHAR_SPACE) { if (c == '\n') { markLF(); } else if (c == '\r') { char d; if (mInputPtr >= mInputEnd) { /* If we can't peek easily, let's flush past stuff * and load more... (have to flush, since new read * will overwrite inbut buffers) */ int len = mInputPtr - start; if (len > 0) { w.write(mInputBuffer, start, len); count += len; } d = getNextChar(SUFFIX_IN_CDATA); start = mInputPtr; // to mark 'no past content' } else { d = mInputBuffer[mInputPtr++]; } if (d == '\n') { if (mNormalizeLFs) { /* Let's flush content prior to 2-char LF, and * start the new segment on the second char... * this way, no mods are needed for the buffer, * AND it'll also work on split 2-char lf! */ int len = mInputPtr - 2 - start; if (len > 0) { w.write(mInputBuffer, start, len); count += len; } start = mInputPtr-1; // so '\n' is the first char } else { // otherwise it's good as is } } else { // not 2-char... need to replace? --mInputPtr; if (mNormalizeLFs) { mInputBuffer[mInputPtr-1] = '\n'; } } markLF(); } else if (c != '\t') { throwInvalidSpace(c); } } } // Reached the end of buffer? Need to flush, then if (mInputPtr >= mInputEnd) { int len = mInputPtr - start; if (len > 0) { w.write(mInputBuffer, start, len); count += len; } start = 0; c = getNextChar(SUFFIX_IN_CDATA); } else { c = mInputBuffer[mInputPtr++]; } } // while (true) // Anything to flush once we hit ']'? { /* -1 since the last char in there (a '[') is NOT to be * output at this point */ int len = mInputPtr - start - 1; if (len > 0) { w.write(mInputBuffer, start, len); count += len; } } /* Ok; we only get this far when we hit a ']'. We got one, * so let's see if we can find at least one more bracket, * immediately followed by '>'... */ int bracketCount = 0; do { ++bracketCount; c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_IN_CDATA); } while (c == ']'); boolean match = (bracketCount >= 2 && c == '>'); if (match) { bracketCount -= 2; } while (bracketCount > 0) { --bracketCount; w.write(']'); ++count; } if (match) { break main_loop; } /* Otherwise we'll just loop; now c is properly set to be * the next char as well. */ } // while (true) return count; } /** * @return Number of characters written to Writer during the call */ private int readAndWriteCoalesced(Writer w, boolean wasCData) throws IOException, XMLStreamException { mTokenState = TOKEN_FULL_COALESCED; int count = 0; /* Ok, so what do we have next? CDATA, CHARACTERS, or something * else? */ main_loop: while (true) { if (mInputPtr >= mInputEnd) { if (!loadMore()) { /* Shouldn't normally happen, but let's just let * caller deal with it... */ break main_loop; } } // Let's peek, ie. not advance it yet char c = mInputBuffer[mInputPtr]; if (c == '<') { // CDATA, maybe? // Need to distinguish " CHAR_SPACE) { return false; } while (true) { // Linefeed? if (c == '\n' || c == '\r') { skipCRLF(c); } else if (c != CHAR_SPACE && c != '\t') { throwInvalidSpace(c); } if (mInputPtr >= mInputEnd) { // Let's see if current source has more if (!loadMoreFromCurrent()) { return true; } } c = mInputBuffer[mInputPtr]; if (c > CHAR_SPACE) { // not WS? Need to return return true; } ++mInputPtr; } } /* /////////////////////////////////////////////////////////////////////// // Abstract method implementations /////////////////////////////////////////////////////////////////////// */ // @Override protected EntityDecl findEntity(String id, Object arg) throws XMLStreamException { EntityDecl ed = (EntityDecl) mConfig.findCustomInternalEntity(id); if (ed == null && mGeneralEntities != null) { ed = (EntityDecl) mGeneralEntities.get(id); } /* 05-Mar-2006, TSa: Externally declared entities are illegal * if we were declared as "standalone='yes'"... */ if (mDocStandalone == DOC_STANDALONE_YES) { if (ed != null && ed.wasDeclaredExternally()) { throwParseError(ErrorConsts.ERR_WF_ENTITY_EXT_DECLARED, ed.getName(), null); } } return ed; } protected void handleUndeclaredEntity(String id) throws XMLStreamException { throwParseError(((mDocStandalone == DOC_STANDALONE_YES) ? ErrorConsts.ERR_WF_GE_UNDECLARED_SA : ErrorConsts.ERR_WF_GE_UNDECLARED), id, null); } protected void handleIncompleteEntityProblem(WstxInputSource closing) throws XMLStreamException { String top = mElementStack.isEmpty() ? "[ROOT]" : mElementStack.getTopElementDesc(); throwParseError("Unexpected end of entity expansion for entity &{0}; was expecting a close tag for element <{1}>", closing.getEntityId(), top); } /* /////////////////////////////////////////////////////////////////////// // Internal methods, validation, error handling and reporting /////////////////////////////////////////////////////////////////////// */ /** * This problem gets reported if an entity tries to expand to * a close tag matching start tag that did not came from the same * entity (but from parent). */ protected void handleGreedyEntityProblem(WstxInputSource input) throws XMLStreamException { String top = mElementStack.isEmpty() ? "[ROOT]" : mElementStack.getTopElementDesc(); throwParseError("Improper GE/element nesting: entity &" +input.getEntityId()+" contains closing tag for <"+top+">"); } private void throwNotTextual(int type) { throw new IllegalStateException("Not a textual event (" +tokenTypeDesc(mCurrToken)+")"); } private void throwNotTextXxx(int type) { throw new IllegalStateException("getTextXxx() methods can not be called on " +tokenTypeDesc(mCurrToken)); } protected void throwNotTextualOrElem(int type) { throw new IllegalStateException(MessageFormat.format(ErrorConsts.ERR_STATE_NOT_ELEM_OR_TEXT, new Object[] { tokenTypeDesc(type) })); } /** * Method called when we get an EOF within content tree */ protected void throwUnexpectedEOF() throws WstxException { throwUnexpectedEOF("; was expecting a close tag for element <"+mElementStack.getTopElementDesc()+">"); } /** * Method called to report a problem with */ protected XMLStreamException _constructUnexpectedInTyped(int nextToken) { if (nextToken == START_ELEMENT) { return _constructTypeException("Element content can not contain child START_ELEMENT when using Typed Access methods", null); } return _constructTypeException("Expected a text token, got "+tokenTypeDesc(nextToken), null); } protected TypedXMLStreamException _constructTypeException(String msg, String lexicalValue) { return new TypedXMLStreamException(lexicalValue, msg, getStartLocation()); } /** * Stub method implemented by validating parsers, to report content * that's not valid for current element context. Defined at this * level since some such problems need to be caught at low-level; * however, details of error reports are not needed here. */ protected void reportInvalidContent(int evtType) throws XMLStreamException { // should never happen; sub-class has to override: throwParseError("Internal error: sub-class should override method"); } } woodstox-4.1.3/src/java/com/ctc/wstx/sr/ReaderCreator.java 0000644 0001750 0001750 00000002020 11745427074 023675 0 ustar giovanni giovanni package com.ctc.wstx.sr; import com.ctc.wstx.dtd.DTDId; import com.ctc.wstx.dtd.DTDSubset; import com.ctc.wstx.util.SymbolTable; /** * Interface that defines callbacks readers can use to access settings * of the input factory that created them, as well as update cached * data factory may store (shared symbol tables, cached DTDs etc). *
* Note that readers in general should only access the configuration info * when they are created (from constructor). */ public interface ReaderCreator { /* /////////////////////////////////////////////////////// // Methods for accessing configuration info /////////////////////////////////////////////////////// */ public DTDSubset findCachedDTD(DTDId id); /* /////////////////////////////////////////////////////// // Methods for updating information factory has /////////////////////////////////////////////////////// */ public void updateSymbolTable(SymbolTable t); public void addCachedDTD(DTDId id, DTDSubset extSubset); } woodstox-4.1.3/src/java/com/ctc/wstx/sr/StreamScanner.java 0000644 0001750 0001750 00000247715 11745427074 023746 0 ustar giovanni giovanni /* Woodstox XML processor * * Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi * * Licensed under the License specified in file LICENSE, included with * the source code. * You may not use this file except in compliance with the License. * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.ctc.wstx.sr; import java.io.FileNotFoundException; import java.io.IOException; import java.net.URL; import java.text.MessageFormat; import java.util.Collections; import java.util.HashMap; import java.util.Map; import javax.xml.stream.Location; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLReporter; import javax.xml.stream.XMLResolver; import javax.xml.stream.XMLStreamException; import org.codehaus.stax2.XMLReporter2; import org.codehaus.stax2.XMLStreamLocation2; import org.codehaus.stax2.validation.XMLValidationProblem; import com.ctc.wstx.api.ReaderConfig; import com.ctc.wstx.cfg.ErrorConsts; import com.ctc.wstx.cfg.InputConfigFlags; import com.ctc.wstx.cfg.ParsingErrorMsgs; import com.ctc.wstx.cfg.XmlConsts; import com.ctc.wstx.dtd.MinimalDTDReader; import com.ctc.wstx.ent.EntityDecl; import com.ctc.wstx.ent.IntEntity; import com.ctc.wstx.exc.*; import com.ctc.wstx.io.DefaultInputResolver; import com.ctc.wstx.io.WstxInputData; import com.ctc.wstx.io.WstxInputLocation; import com.ctc.wstx.io.WstxInputSource; import com.ctc.wstx.util.ExceptionUtil; import com.ctc.wstx.util.SymbolTable; import com.ctc.wstx.util.TextBuffer; /** * Abstract base class that defines some basic functionality that all * Woodstox reader classes (main XML reader, DTD reader) extend from. */ public abstract class StreamScanner extends WstxInputData implements InputProblemReporter, InputConfigFlags, ParsingErrorMsgs { // // // Some well-known chars: /** * Last (highest) char code of the three, LF, CR and NULL */ public final static char CHAR_CR_LF_OR_NULL = (char) 13; public final static int INT_CR_LF_OR_NULL = 13; /** * Character that allows quick check of whether a char can potentially * be some kind of markup, WRT input stream processing; * has to contain linefeeds, &, < and > (">" only matters when * quoting text, as part of "]]>") */ protected final static char CHAR_FIRST_PURE_TEXT = (char) ('>' + 1); /** * First character in Unicode (ie one with lowest id) that is legal * as part of a local name (all valid name chars minus ':'). Used * for doing quick check for local name end; usually name ends in * a whitespace or equals sign. */ protected final static char CHAR_LOWEST_LEGAL_LOCALNAME_CHAR = '-'; /* /////////////////////////////////////////////////////////// // Character validity constants, structs /////////////////////////////////////////////////////////// */ /** * We will only use validity array for first 256 characters, mostly * because after those characters it's easier to do fairly simple * block checks. */ private final static int VALID_CHAR_COUNT = 0x100; private final static byte NAME_CHAR_INVALID_B = (byte) 0; private final static byte NAME_CHAR_ALL_VALID_B = (byte) 1; private final static byte NAME_CHAR_VALID_NONFIRST_B = (byte) -1; private final static byte[] sCharValidity = new byte[VALID_CHAR_COUNT]; static { /* First, since all valid-as-first chars are also valid-as-other chars, * we'll initialize common chars: */ sCharValidity['_'] = NAME_CHAR_ALL_VALID_B; for (int i = 0, last = ('z' - 'a'); i <= last; ++i) { sCharValidity['A' + i] = NAME_CHAR_ALL_VALID_B; sCharValidity['a' + i] = NAME_CHAR_ALL_VALID_B; } for (int i = 0xC0; i < 0xF6; ++i) { // not all are fully valid, but sCharValidity[i] = NAME_CHAR_ALL_VALID_B; } // ... now we can 'revert' ones not fully valid: sCharValidity[0xD7] = NAME_CHAR_INVALID_B; sCharValidity[0xF7] = NAME_CHAR_INVALID_B; /* And then we can proceed with ones only valid-as-other. */ sCharValidity['-'] = NAME_CHAR_VALID_NONFIRST_B; sCharValidity['.'] = NAME_CHAR_VALID_NONFIRST_B; sCharValidity[0xB7] = NAME_CHAR_VALID_NONFIRST_B; for (int i = '0'; i <= '9'; ++i) { sCharValidity[i] = NAME_CHAR_VALID_NONFIRST_B; } } /** * Public identifiers only use 7-bit ascii range. */ private final static int VALID_PUBID_CHAR_COUNT = 0x80; private final static byte[] sPubidValidity = new byte[VALID_PUBID_CHAR_COUNT]; // private final static byte PUBID_CHAR_INVALID_B = (byte) 0; private final static byte PUBID_CHAR_VALID_B = (byte) 1; static { for (int i = 0, last = ('z' - 'a'); i <= last; ++i) { sPubidValidity['A' + i] = PUBID_CHAR_VALID_B; sPubidValidity['a' + i] = PUBID_CHAR_VALID_B; } for (int i = '0'; i <= '9'; ++i) { sPubidValidity[i] = PUBID_CHAR_VALID_B; } // 3 main white space types are valid sPubidValidity[0x0A] = PUBID_CHAR_VALID_B; sPubidValidity[0x0D] = PUBID_CHAR_VALID_B; sPubidValidity[0x20] = PUBID_CHAR_VALID_B; // And many of punctuation/separator ascii chars too: sPubidValidity['-'] = PUBID_CHAR_VALID_B; sPubidValidity['\''] = PUBID_CHAR_VALID_B; sPubidValidity['('] = PUBID_CHAR_VALID_B; sPubidValidity[')'] = PUBID_CHAR_VALID_B; sPubidValidity['+'] = PUBID_CHAR_VALID_B; sPubidValidity[','] = PUBID_CHAR_VALID_B; sPubidValidity['.'] = PUBID_CHAR_VALID_B; sPubidValidity['/'] = PUBID_CHAR_VALID_B; sPubidValidity[':'] = PUBID_CHAR_VALID_B; sPubidValidity['='] = PUBID_CHAR_VALID_B; sPubidValidity['?'] = PUBID_CHAR_VALID_B; sPubidValidity[';'] = PUBID_CHAR_VALID_B; sPubidValidity['!'] = PUBID_CHAR_VALID_B; sPubidValidity['*'] = PUBID_CHAR_VALID_B; sPubidValidity['#'] = PUBID_CHAR_VALID_B; sPubidValidity['@'] = PUBID_CHAR_VALID_B; sPubidValidity['$'] = PUBID_CHAR_VALID_B; sPubidValidity['_'] = PUBID_CHAR_VALID_B; sPubidValidity['%'] = PUBID_CHAR_VALID_B; } /* /////////////////////////////////////////////////////////// // Basic configuration /////////////////////////////////////////////////////////// */ /** * Copy of the configuration object passed by the factory. * Contains immutable settings for this reader (or in case * of DTD parsers, reader that uses it) */ protected final ReaderConfig mConfig; // // // Various extracted settings: /** * If true, Reader is namespace aware, and should do basic checks * (usually enforcing limitations on having colons in names) */ protected final boolean mCfgNsEnabled; // Extracted standard on/off settings: /** * note: left non-final on purpose: sub-class may need to modify * the default value after construction. */ protected boolean mCfgReplaceEntities; /* /////////////////////////////////////////////////////////// // Symbol handling, if applicable /////////////////////////////////////////////////////////// */ final SymbolTable mSymbols; /** * Local full name for the event, if it has one (note: element events * do NOT use this variable; those names are stored in element stack): * target for processing instructions. *
* Currently used for proc. instr. target, and entity name (at least * when current entity reference is null). *
* Note: this variable is generally not cleared, since it comes from * a symbol table, ie. this won't be the only reference. */ protected String mCurrName; /* /////////////////////////////////////////////////////////// // Input handling /////////////////////////////////////////////////////////// */ /** * Currently active input source; contains link to parent (nesting) input * sources, if any. */ protected WstxInputSource mInput; /** * Top-most input source this reader can use; due to input source * chaining, this is not necessarily the root of all input; for example, * external DTD subset reader's root input still has original document * input as its parent. */ protected final WstxInputSource mRootInput; /** * Custom resolver used to handle external entities that are to be expanded * by this reader (external param/general entity expander) */ XMLResolver mEntityResolver = null; /** * This is the current depth of the input stack (same as what input * element stack would return as its depth). * It is used to enforce input scope constraints for nesting of * elements (for xml reader) and dtd declaration (for dtd reader) * with regards to input block (entity expansion) boundaries. *
* Basically this value is compared to {@link #mInputTopDepth}, which * indicates what was the depth at the point where the currently active * input scope/block was started. */ protected int mCurrDepth = 0; protected int mInputTopDepth = 0; /** * Flag that indicates whether linefeeds in the input data are to * be normalized or not. * Xml specs mandate that the line feeds are only normalized * when they are from the external entities (main doc, external * general/parsed entities), so normalization has to be * suppressed when expanding internal general/parsed entities. */ protected boolean mNormalizeLFs; /* /////////////////////////////////////////////////////////// // Buffer(s) for local name(s) and text content /////////////////////////////////////////////////////////// */ /** * Temporary buffer used if local name can not be just directly * constructed from input buffer (name is on a boundary or such). */ protected char[] mNameBuffer = null; /* /////////////////////////////////////////////////////////// // Information about starting location of event // Reader is pointing to; updated on-demand /////////////////////////////////////////////////////////// */ // // // Location info at point when current token was started /** * Total number of characters read before start of current token. * For big (gigabyte-sized) sizes are possible, needs to be long, * unlike pointers and sizes related to in-memory buffers. */ protected long mTokenInputTotal = 0; /** * Input row on which current token starts, 1-based */ protected int mTokenInputRow = 1; /** * Column on input row that current token starts; 0-based (although * in the end it'll be converted to 1-based) */ protected int mTokenInputCol = 0; /* /////////////////////////////////////////////////////////// // XML document information (from doc decl if one // was found) common to all entities (main xml // document, external DTD subset) /////////////////////////////////////////////////////////// */ /** * Input stream encoding, if known (passed in, or determined by * auto-detection); null if not. */ String mDocInputEncoding = null; /** * Character encoding from xml declaration, if any; null if no * declaration, or it didn't specify encoding. */ String mDocXmlEncoding = null; /** * XML version as declared by the document; one of constants * from {@link XmlConsts} (like {@link XmlConsts#XML_V_10}). */ protected int mDocXmlVersion = XmlConsts.XML_V_UNKNOWN; /** * Cache of internal character entities; */ protected Map mCachedEntities; /** * Flag for whether or not character references should be treated as entities */ protected boolean mCfgTreatCharRefsAsEntities; /** * Entity reference stream currently points to. */ protected EntityDecl mCurrEntity; /* /////////////////////////////////////////////////////////// // Life-cycle /////////////////////////////////////////////////////////// */ /** * Constructor used when creating a complete new (main-level) reader that * does not share its input buffers or state with another reader. */ protected StreamScanner(WstxInputSource input, ReaderConfig cfg, XMLResolver res) { super(); mInput = input; // 17-Jun-2004, TSa: Need to know root-level input source mRootInput = input; mConfig = cfg; mSymbols = cfg.getSymbols(); int cf = cfg.getConfigFlags(); mCfgNsEnabled = (cf & CFG_NAMESPACE_AWARE) != 0; mCfgReplaceEntities = (cf & CFG_REPLACE_ENTITY_REFS) != 0; mNormalizeLFs = mConfig.willNormalizeLFs(); mInputBuffer = null; mInputPtr = mInputEnd = 0; mEntityResolver = res; mCfgTreatCharRefsAsEntities = mConfig.willTreatCharRefsAsEnts(); mCachedEntities = mCfgTreatCharRefsAsEntities ? new HashMap() : Collections.EMPTY_MAP; } /* /////////////////////////////////////////////////////////// // Package API /////////////////////////////////////////////////////////// */ /** * Method that returns location of the last character returned by this * reader; that is, location "one less" than the currently pointed to * location. */ protected WstxInputLocation getLastCharLocation() { return mInput.getLocation(mCurrInputProcessed + mInputPtr - 1, mCurrInputRow, mInputPtr - mCurrInputRowStart); } protected URL getSource() { return mInput.getSource(); } protected String getSystemId() { return mInput.getSystemId(); } /* /////////////////////////////////////////////////////////// // Partial LocationInfo implementation (not implemented // by this base class, but is by some sub-classes) /////////////////////////////////////////////////////////// */ /** * Returns location of last properly parsed token; as per StAX specs, * apparently needs to be the end of current event, which is the same * as the start of the following event (or EOF if that's next). */ public abstract Location getLocation(); public XMLStreamLocation2 getStartLocation() { // note: +1 is used as columns are 1-based... return mInput.getLocation(mTokenInputTotal, mTokenInputRow, mTokenInputCol + 1); } public XMLStreamLocation2 getCurrentLocation() { return mInput.getLocation(mCurrInputProcessed + mInputPtr, mCurrInputRow, mInputPtr - mCurrInputRowStart + 1); } /* /////////////////////////////////////////////////////////// // InputProblemReporter implementation /////////////////////////////////////////////////////////// */ public WstxException throwWfcException(String msg, boolean deferErrors) throws WstxException { WstxException ex = constructWfcException(msg); if (!deferErrors) { throw ex; } return ex; } public void throwParseError(String msg) throws XMLStreamException { throwParseError(msg, null, null); } /** * Throws generic parse error with specified message and current parsing * location. *
* Note: public access only because core code in other packages needs * to access it. */ public void throwParseError(String format, Object arg, Object arg2) throws XMLStreamException { String msg = (arg != null || arg2 != null) ? MessageFormat.format(format, new Object[] { arg, arg2 }) : format; throw constructWfcException(msg); } public void reportProblem(String probType, String format, Object arg, Object arg2) throws XMLStreamException { XMLReporter rep = mConfig.getXMLReporter(); if (rep != null) { _reportProblem(rep, probType, MessageFormat.format(format, new Object[] { arg, arg2 }), null); } } public void reportProblem(Location loc, String probType, String format, Object arg, Object arg2) throws XMLStreamException { XMLReporter rep = mConfig.getXMLReporter(); if (rep != null) { String msg = (arg != null || arg2 != null) ? MessageFormat.format(format, new Object[] { arg, arg2 }) : format; _reportProblem(rep, probType, msg, loc); } } protected void _reportProblem(XMLReporter rep, String probType, String msg, Location loc) throws XMLStreamException { if (loc == null) { loc = getLastCharLocation(); } _reportProblem(rep, new XMLValidationProblem(loc, msg, XMLValidationProblem.SEVERITY_ERROR, probType)); } protected void _reportProblem(XMLReporter rep, XMLValidationProblem prob) throws XMLStreamException { if (rep != null) { Location loc = prob.getLocation(); if (loc == null) { loc = getLastCharLocation(); prob.setLocation(loc); } // Backwards-compatibility fix: add non-null type, if missing: if (prob.getType() == null) { prob.setType(ErrorConsts.WT_VALIDATION); } // [WSTX-154]: was catching and dropping thrown exception: shouldn't. // [WTSX-157]: need to support XMLReporter2 if (rep instanceof XMLReporter2) { ((XMLReporter2) rep).report(prob); } else { rep.report(prob.getMessage(), prob.getType(), prob, loc); } } } /** *
* Note: this is the base implementation used for implementing
* ValidationContext
*/
public void reportValidationProblem(XMLValidationProblem prob)
throws XMLStreamException
{
// !!! TBI: Fail-fast vs. deferred modes?
/* For now let's implement basic functionality: warnings get
* reported via XMLReporter, errors and fatal errors result in
* immediate exceptions.
*/
/* 27-May-2008, TSa: [WSTX-153] Above is incorrect: as per Stax
* javadocs for XMLReporter, both warnings and non-fatal errors
* (which includes all validation errors) should be reported via
* XMLReporter interface, and only fatals should cause an
* immediate stream exception (by-passing reporter)
*/
if (prob.getSeverity() > XMLValidationProblem.SEVERITY_ERROR) {
throw WstxValidationException.create(prob);
}
XMLReporter rep = mConfig.getXMLReporter();
if (rep != null) {
_reportProblem(rep, prob);
} else {
/* If no reporter, regular non-fatal errors are to be reported
* as exceptions as well, for backwards compatibility
*/
if (prob.getSeverity() >= XMLValidationProblem.SEVERITY_ERROR) {
throw WstxValidationException.create(prob);
}
}
}
public void reportValidationProblem(String msg, int severity)
throws XMLStreamException
{
reportValidationProblem(new XMLValidationProblem(getLastCharLocation(),
msg, severity));
}
public void reportValidationProblem(String msg)
throws XMLStreamException
{
reportValidationProblem(new XMLValidationProblem(getLastCharLocation(),
msg,
XMLValidationProblem.SEVERITY_ERROR));
}
public void reportValidationProblem(Location loc, String msg)
throws XMLStreamException
{
reportValidationProblem(new XMLValidationProblem(loc, msg));
}
public void reportValidationProblem(String format, Object arg, Object arg2)
throws XMLStreamException
{
reportValidationProblem(MessageFormat.format(format, new Object[] { arg, arg2 }));
}
/*
///////////////////////////////////////////////////////////
// Other error reporting methods
///////////////////////////////////////////////////////////
*/
protected WstxException constructWfcException(String msg)
{
return new WstxParsingException(msg, getLastCharLocation());
}
/**
* Construct and return a {@link XMLStreamException} to throw
* as a result of a failed Typed Access operation (but one not
* caused by a Well-Formedness Constraint or Validation Constraint
* problem)
*/
/*
protected WstxException _constructTypeException(String msg)
{
// Hmmh. Should there be a distinct sub-type?
return new WstxParsingException(msg, getLastCharLocation());
}
*/
protected WstxException constructFromIOE(IOException ioe)
{
return new WstxIOException(ioe);
}
protected WstxException constructNullCharException()
{
return new WstxUnexpectedCharException("Illegal character (NULL, unicode 0) encountered: not valid in any content",
getLastCharLocation(), CHAR_NULL);
}
protected void throwUnexpectedChar(int i, String msg)
throws WstxException
{
char c = (char) i;
String excMsg = "Unexpected character "+getCharDesc(c)+msg;
throw new WstxUnexpectedCharException(excMsg, getLastCharLocation(), c);
}
protected void throwNullChar()
throws WstxException
{
throw constructNullCharException();
}
protected void throwInvalidSpace(int i)
throws WstxException
{
throwInvalidSpace(i, false);
}
protected WstxException throwInvalidSpace(int i, boolean deferErrors)
throws WstxException
{
char c = (char) i;
WstxException ex;
if (c == CHAR_NULL) {
ex = constructNullCharException();
} else {
String msg = "Illegal character ("+getCharDesc(c)+")";
if (mXml11) {
msg += " [note: in XML 1.1, it could be included via entity expansion]";
}
ex = new WstxUnexpectedCharException(msg, getLastCharLocation(), c);
}
if (!deferErrors) {
throw ex;
}
return ex;
}
protected void throwUnexpectedEOF(String msg)
throws WstxException
{
throw new WstxEOFException("Unexpected EOF"
+(msg == null ? "" : msg),
getLastCharLocation());
}
/**
* Similar to {@link #throwUnexpectedEOF}, but only indicates ending
* of an input block. Used when reading a token that can not span
* input block boundaries (ie. can not continue past end of an
* entity expansion).
*/
protected void throwUnexpectedEOB(String msg)
throws WstxException
{
throw new WstxEOFException("Unexpected end of input block"
+(msg == null ? "" : msg),
getLastCharLocation());
}
protected void throwFromIOE(IOException ioe)
throws WstxException
{
throw new WstxIOException(ioe);
}
protected void throwFromStrE(XMLStreamException strex)
throws WstxException
{
if (strex instanceof WstxException) {
throw (WstxException) strex;
}
WstxException newEx = new WstxException(strex);
ExceptionUtil.setInitCause(newEx, strex);
throw newEx;
}
/**
* Method called to report an error, when caller's signature only
* allows runtime exceptions to be thrown.
*/
protected void throwLazyError(Exception e)
{
if (e instanceof XMLStreamException) {
WstxLazyException.throwLazily((XMLStreamException) e);
}
ExceptionUtil.throwRuntimeException(e);
}
protected String tokenTypeDesc(int type)
{
return ErrorConsts.tokenTypeDesc(type);
}
/*
///////////////////////////////////////////////////////////
// Input buffer handling
///////////////////////////////////////////////////////////
*/
/**
* Returns current input source this source uses.
*
* Note: public only because some implementations are on different * package. */ public final WstxInputSource getCurrentInput() { return mInput; } protected final int inputInBuffer() { return mInputEnd - mInputPtr; } protected final int getNext() throws XMLStreamException { if (mInputPtr >= mInputEnd) { if (!loadMore()) { return -1; } } return (int) mInputBuffer[mInputPtr++]; } /** * Similar to {@link #getNext}, but does not advance pointer * in input buffer. *
* Note: this method only peeks within current input source; * it does not close it and check nested input source (if any). * This is necessary when checking keywords, since they can never * cross input block boundary. */ protected final int peekNext() throws XMLStreamException { if (mInputPtr >= mInputEnd) { if (!loadMoreFromCurrent()) { return -1; } } return (int) mInputBuffer[mInputPtr]; } protected final char getNextChar(String errorMsg) throws XMLStreamException { if (mInputPtr >= mInputEnd) { loadMore(errorMsg); } return mInputBuffer[mInputPtr++]; } /** * Similar to {@link #getNextChar}, but will not read more characters * from parent input source(s) if the current input source doesn't * have more content. This is often needed to prevent "runaway" content, * such as comments that start in an entity but do not have matching * close marker inside entity; XML specification specifically states * such markup is not legal. */ protected final char getNextCharFromCurrent(String errorMsg) throws XMLStreamException { if (mInputPtr >= mInputEnd) { loadMoreFromCurrent(errorMsg); } return mInputBuffer[mInputPtr++]; } /** * Method that will skip through zero or more white space characters, * and return either the character following white space, or -1 to * indicate EOF (end of the outermost input source)/ */ protected final int getNextAfterWS() throws XMLStreamException { if (mInputPtr >= mInputEnd) { if (!loadMore()) { return -1; } } char c = mInputBuffer[mInputPtr++]; while (c <= CHAR_SPACE) { // Linefeed? if (c == '\n' || c == '\r') { skipCRLF(c); } else if (c != CHAR_SPACE && c != '\t') { throwInvalidSpace(c); } // Still a white space? if (mInputPtr >= mInputEnd) { if (!loadMore()) { return -1; } } c = mInputBuffer[mInputPtr++]; } return (int) c; } protected final char getNextCharAfterWS(String errorMsg) throws XMLStreamException { if (mInputPtr >= mInputEnd) { loadMore(errorMsg); } char c = mInputBuffer[mInputPtr++]; while (c <= CHAR_SPACE) { // Linefeed? if (c == '\n' || c == '\r') { skipCRLF(c); } else if (c != CHAR_SPACE && c != '\t') { throwInvalidSpace(c); } // Still a white space? if (mInputPtr >= mInputEnd) { loadMore(errorMsg); } c = mInputBuffer[mInputPtr++]; } return c; } protected final char getNextInCurrAfterWS(String errorMsg) throws XMLStreamException { return getNextInCurrAfterWS(errorMsg, getNextCharFromCurrent(errorMsg)); } protected final char getNextInCurrAfterWS(String errorMsg, char c) throws XMLStreamException { while (c <= CHAR_SPACE) { // Linefeed? if (c == '\n' || c == '\r') { skipCRLF(c); } else if (c != CHAR_SPACE && c != '\t') { throwInvalidSpace(c); } // Still a white space? if (mInputPtr >= mInputEnd) { loadMoreFromCurrent(errorMsg); } c = mInputBuffer[mInputPtr++]; } return c; } /** * Method called when a CR has been spotted in input; checks if next * char is LF, and if so, skips it. Note that next character has to * come from the current input source, to qualify; it can never come * from another (nested) input source. * * @return True, if passed in char is '\r' and next one is '\n'. */ protected final boolean skipCRLF(char c) throws XMLStreamException { boolean result; if (c == '\r' && peekNext() == '\n') { ++mInputPtr; result = true; } else { result = false; } ++mCurrInputRow; mCurrInputRowStart = mInputPtr; return result; } protected final void markLF() { ++mCurrInputRow; mCurrInputRowStart = mInputPtr; } protected final void markLF(int inputPtr) { ++mCurrInputRow; mCurrInputRowStart = inputPtr; } /** * Method to push back last character read; can only be called once, * that is, no more than one char can be guaranteed to be succesfully * returned. */ protected final void pushback() { --mInputPtr; } /* /////////////////////////////////////////////////////////// // Sub-class overridable input handling methods /////////////////////////////////////////////////////////// */ /** * Method called when an entity has been expanded (new input source * has been created). Needs to initialize location information and change * active input source. * * @param entityId Name of the entity being expanded */ protected void initInputSource(WstxInputSource newInput, boolean isExt, String entityId) throws XMLStreamException { mInput = newInput; // Let's make sure new input will be read next time input is needed: mInputPtr = 0; mInputEnd = 0; /* Plus, reset the input location so that'll be accurate for * error reporting etc. */ mInputTopDepth = mCurrDepth; mInput.initInputLocation(this, mCurrDepth); /* 21-Feb-2006, TSa: Linefeeds are NOT normalized when expanding * internal entities (XML, 2.11) */ if (isExt) { mNormalizeLFs = true; } else { mNormalizeLFs = false; } } /** * Method that will try to read one or more characters from currently * open input sources; closing input sources if necessary. * * @return true if reading succeeded (or may succeed), false if * we reached EOF. */ protected boolean loadMore() throws XMLStreamException { WstxInputSource input = mInput; do { /* Need to make sure offsets are properly updated for error * reporting purposes, and do this now while previous amounts * are still known. */ mCurrInputProcessed += mInputEnd; mCurrInputRowStart -= mInputEnd; int count; try { count = input.readInto(this); if (count > 0) { return true; } input.close(); } catch (IOException ioe) { throw constructFromIOE(ioe); } if (input == mRootInput) { /* Note: no need to check entity/input nesting in this * particular case, since it will be handled by higher level * parsing code (results in an unexpected EOF) */ return false; } WstxInputSource parent = input.getParent(); if (parent == null) { // sanity check! throwNullParent(input); } /* 13-Feb-2006, TSa: Ok, do we violate a proper nesting constraints * with this input block closure? */ if (mCurrDepth != input.getScopeId()) { handleIncompleteEntityProblem(input); } mInput = input = parent; input.restoreContext(this); mInputTopDepth = input.getScopeId(); /* 21-Feb-2006, TSa: Since linefeed normalization needs to be * suppressed for internal entity expansion, we may need to * change the state... */ if (!mNormalizeLFs) { mNormalizeLFs = !input.fromInternalEntity(); } // Maybe there are leftovers from that input in buffer now? } while (mInputPtr >= mInputEnd); return true; } protected final boolean loadMore(String errorMsg) throws XMLStreamException { if (!loadMore()) { throwUnexpectedEOF(errorMsg); } return true; } protected boolean loadMoreFromCurrent() throws XMLStreamException { // Need to update offsets properly mCurrInputProcessed += mInputEnd; mCurrInputRowStart -= mInputEnd; try { int count = mInput.readInto(this); return (count > 0); } catch (IOException ie) { throw constructFromIOE(ie); } } protected final boolean loadMoreFromCurrent(String errorMsg) throws XMLStreamException { if (!loadMoreFromCurrent()) { throwUnexpectedEOB(errorMsg); } return true; } /** * Method called to make sure current main-level input buffer has at * least specified number of characters available consequtively, * without having to call {@link #loadMore}. It can only be called * when input comes from main-level buffer; further, call can shift * content in input buffer, so caller has to flush any data still * pending. In short, caller has to know exactly what it's doing. :-) *
* Note: method does not check for any other input sources than the * current one -- if current source can not fulfill the request, a * failure is indicated. * * @return true if there's now enough data; false if not (EOF) */ protected boolean ensureInput(int minAmount) throws XMLStreamException { int currAmount = mInputEnd - mInputPtr; if (currAmount >= minAmount) { return true; } try { return mInput.readMore(this, minAmount); } catch (IOException ie) { throw constructFromIOE(ie); } } protected void closeAllInput(boolean force) throws XMLStreamException { WstxInputSource input = mInput; while (true) { try { if (force) { input.closeCompletely(); } else { input.close(); } } catch (IOException ie) { throw constructFromIOE(ie); } if (input == mRootInput) { break; } WstxInputSource parent = input.getParent(); if (parent == null) { // sanity check! throwNullParent(input); } mInput = input = parent; } } protected void throwNullParent(WstxInputSource curr) { throw new IllegalStateException(ErrorConsts.ERR_INTERNAL); //throw new IllegalStateException("Internal error: null parent for input source '"+curr+"'; should never occur (should have stopped at root input '"+mRootInput+"')."); } /* /////////////////////////////////////////////////////////// // Entity resolution /////////////////////////////////////////////////////////// */ /** * Method that tries to resolve a character entity, or (if caller so * specifies), a pre-defined internal entity (lt, gt, amp, apos, quot). * It will succeed iff: *
* Note: On entry we are guaranteed there are at least 3 more characters * in this buffer; otherwise we shouldn't be called. * * @param checkStd If true, will check pre-defined internal entities * (gt, lt, amp, apos, quot); if false, will only check actual * character entities. * * @return (Valid) character value, if entity is a character reference, * and could be resolved from current input buffer (does not span * buffer boundary); null char (code 0) if not (either non-char * entity, or spans input buffer boundary). */ protected int resolveSimpleEntity(boolean checkStd) throws XMLStreamException { char[] buf = mInputBuffer; int ptr = mInputPtr; char c = buf[ptr++]; // Numeric reference? if (c == '#') { c = buf[ptr++]; int value = 0; int inputLen = mInputEnd; if (c == 'x') { // hex while (ptr < inputLen) { c = buf[ptr++]; if (c == ';') { break; } value = value << 4; if (c <= '9' && c >= '0') { value += (c - '0'); } else if (c >= 'a' && c <= 'f') { value += (10 + (c - 'a')); } else if (c >= 'A' && c <= 'F') { value += (10 + (c - 'A')); } else { mInputPtr = ptr; // so error points to correct char throwUnexpectedChar(c, "; expected a hex digit (0-9a-fA-F)."); } /* Need to check for overflow; easiest to do right as * it happens... */ if (value > MAX_UNICODE_CHAR) { reportUnicodeOverflow(); } } } else { // numeric (decimal) while (c != ';') { if (c <= '9' && c >= '0') { value = (value * 10) + (c - '0'); // Overflow? if (value > MAX_UNICODE_CHAR) { reportUnicodeOverflow(); } } else { mInputPtr = ptr; // so error points to correct char throwUnexpectedChar(c, "; expected a decimal number."); } if (ptr >= inputLen) { break; } c = buf[ptr++]; } } /* We get here either if we got it all, OR if we ran out of * input in current buffer. */ if (c == ';') { // got the full thing mInputPtr = ptr; validateChar(value); return value; } /* If we ran out of input, need to just fall back, gets * resolved via 'full' resolution mechanism. */ } else if (checkStd) { /* Caller may not want to resolve these quite yet... * (when it wants separate events for non-char entities) */ if (c == 'a') { // amp or apos? c = buf[ptr++]; if (c == 'm') { // amp? if (buf[ptr++] == 'p') { if (ptr < mInputEnd && buf[ptr++] == ';') { mInputPtr = ptr; return '&'; } } } else if (c == 'p') { // apos? if (buf[ptr++] == 'o') { int len = mInputEnd; if (ptr < len && buf[ptr++] == 's') { if (ptr < len && buf[ptr++] == ';') { mInputPtr = ptr; return '\''; } } } } } else if (c == 'g') { // gt? if (buf[ptr++] == 't' && buf[ptr++] == ';') { mInputPtr = ptr; return '>'; } } else if (c == 'l') { // lt? if (buf[ptr++] == 't' && buf[ptr++] == ';') { mInputPtr = ptr; return '<'; } } else if (c == 'q') { // quot? if (buf[ptr++] == 'u' && buf[ptr++] == 'o') { int len = mInputEnd; if (ptr < len && buf[ptr++] == 't') { if (ptr < len && buf[ptr++] == ';') { mInputPtr = ptr; return '"'; } } } } } return 0; } /** * Method called to resolve character entities, and only character * entities (except that pre-defined char entities -- amp, apos, lt, * gt, quote -- MAY be "char entities" in this sense, depending on * arguments). * Otherwise it is to return the null char; if so, * the input pointer will point to the same point as when method * entered (char after ampersand), plus the ampersand itself is * guaranteed to be in the input buffer (so caller can just push it * back if necessary). *
* Most often this method is called when reader is not to expand * non-char entities automatically, but to return them as separate * events. *
* Main complication here is that we need to do 5-char lookahead. This * is problematic if chars are on input buffer boundary. This is ok * for the root level input buffer, but not for some nested buffers. * However, according to XML specs, such split entities are actually * illegal... so we can throw an exception in those cases. * * @param checkStd If true, will check pre-defined internal entities * (gt, lt, amp, apos, quot) as character entities; if false, will only * check actual 'real' character entities. * * @return (Valid) character value, if entity is a character reference, * and could be resolved from current input buffer (does not span * buffer boundary); null char (code 0) if not (either non-char * entity, or spans input buffer boundary). */ protected int resolveCharOnlyEntity(boolean checkStd) throws XMLStreamException { //int avail = inputInBuffer(); int avail = mInputEnd - mInputPtr; if (avail < 6) { // split entity, or buffer boundary /* Don't want to lose leading '&' (in case we can not expand * the entity), so let's push it back first */ --mInputPtr; /* Shortest valid reference would be 3 chars ('&a;'); which * would only be legal from an expanded entity... */ if (!ensureInput(6)) { avail = inputInBuffer(); if (avail < 3) { throwUnexpectedEOF(SUFFIX_IN_ENTITY_REF); } } else { avail = 6; } // ... and now we can move pointer back as well: ++mInputPtr; } /* Ok, now we have one more character to check, and that's enough * to determine type decisively. */ char c = mInputBuffer[mInputPtr]; // A char reference? if (c == '#') { // yup ++mInputPtr; return resolveCharEnt(null); } // nope... except may be a pre-def? if (checkStd) { if (c == 'a') { char d = mInputBuffer[mInputPtr+1]; if (d == 'm') { if (avail >= 4 && mInputBuffer[mInputPtr+2] == 'p' && mInputBuffer[mInputPtr+3] == ';') { mInputPtr += 4; return '&'; } } else if (d == 'p') { if (avail >= 5 && mInputBuffer[mInputPtr+2] == 'o' && mInputBuffer[mInputPtr+3] == 's' && mInputBuffer[mInputPtr+4] == ';') { mInputPtr += 5; return '\''; } } } else if (c == 'l') { if (avail >= 3 && mInputBuffer[mInputPtr+1] == 't' && mInputBuffer[mInputPtr+2] == ';') { mInputPtr += 3; return '<'; } } else if (c == 'g') { if (avail >= 3 && mInputBuffer[mInputPtr+1] == 't' && mInputBuffer[mInputPtr+2] == ';') { mInputPtr += 3; return '>'; } } else if (c == 'q') { if (avail >= 5 && mInputBuffer[mInputPtr+1] == 'u' && mInputBuffer[mInputPtr+2] == 'o' && mInputBuffer[mInputPtr+3] == 't' && mInputBuffer[mInputPtr+4] == ';') { mInputPtr += 5; return '"'; } } } return 0; } /** * Reverse of {@link #resolveCharOnlyEntity}; will only resolve entity * if it is NOT a character entity (or pre-defined 'generic' entity; * amp, apos, lt, gt or quot). Only used in cases where entities * are to be separately returned unexpanded (in non-entity-replacing * mode); which means it's never called from dtd handler. */ protected EntityDecl resolveNonCharEntity() throws XMLStreamException { //int avail = inputInBuffer(); int avail = mInputEnd - mInputPtr; if (avail < 6) { // split entity, or buffer boundary /* Don't want to lose leading '&' (in case we can not expand * the entity), so let's push it back first */ --mInputPtr; /* Shortest valid reference would be 3 chars ('&a;'); which * would only be legal from an expanded entity... */ if (!ensureInput(6)) { avail = inputInBuffer(); if (avail < 3) { throwUnexpectedEOF(SUFFIX_IN_ENTITY_REF); } } else { avail = 6; } // ... and now we can move pointer back as well: ++mInputPtr; } // We don't care about char entities: char c = mInputBuffer[mInputPtr]; if (c == '#') { return null; } /* 19-Aug-2004, TSa: Need special handling for pre-defined * entities; they are not counted as 'real' general parsed * entities, but more as character entities... */ // have chars at least up to mInputPtr+4 by now if (c == 'a') { char d = mInputBuffer[mInputPtr+1]; if (d == 'm') { if (avail >= 4 && mInputBuffer[mInputPtr+2] == 'p' && mInputBuffer[mInputPtr+3] == ';') { // If not automatically expanding: //return sEntityAmp; // mInputPtr += 4; return null; } } else if (d == 'p') { if (avail >= 5 && mInputBuffer[mInputPtr+2] == 'o' && mInputBuffer[mInputPtr+3] == 's' && mInputBuffer[mInputPtr+4] == ';') { return null; } } } else if (c == 'l') { if (avail >= 3 && mInputBuffer[mInputPtr+1] == 't' && mInputBuffer[mInputPtr+2] == ';') { return null; } } else if (c == 'g') { if (avail >= 3 && mInputBuffer[mInputPtr+1] == 't' && mInputBuffer[mInputPtr+2] == ';') { return null; } } else if (c == 'q') { if (avail >= 5 && mInputBuffer[mInputPtr+1] == 'u' && mInputBuffer[mInputPtr+2] == 'o' && mInputBuffer[mInputPtr+3] == 't' && mInputBuffer[mInputPtr+4] == ';') { return null; } } // Otherwise, let's just parse in generic way: ++mInputPtr; // since we already read the first letter String id = parseEntityName(c); mCurrName = id; return findEntity(id, null); } /** * Method that does full resolution of an entity reference, be it * character entity, internal entity or external entity, including * updating of input buffers, and depending on whether result is * a character entity (or one of 5 pre-defined entities), returns * char in question, or null character (code 0) to indicate it had * to change input source. * * @param allowExt If true, is allowed to expand external entities * (expanding text); if false, is not (expanding attribute value). * * @return Either single-character replacement (which is NOT to be * reparsed), or null char (0) to indicate expansion is done via * input source. */ protected int fullyResolveEntity(boolean allowExt) throws XMLStreamException { char c = getNextCharFromCurrent(SUFFIX_IN_ENTITY_REF); // Do we have a (numeric) character entity reference? if (c == '#') { // numeric final StringBuffer originalSurface = new StringBuffer("#"); int ch = resolveCharEnt(originalSurface); if (mCfgTreatCharRefsAsEntities) { final char[] originalChars = new char[originalSurface.length()]; originalSurface.getChars(0, originalSurface.length(), originalChars, 0); mCurrEntity = getIntEntity(ch, originalChars); return 0; } return ch; } String id = parseEntityName(c); // Perhaps we have a pre-defined char reference? c = id.charAt(0); /* * 16-May-2004, TSa: Should custom entities (or ones defined in int/ext subset) override * pre-defined settings for these? */ char d = CHAR_NULL; if (c == 'a') { // amp or apos? if (id.equals("amp")) { d = '&'; } else if (id.equals("apos")) { d = '\''; } } else if (c == 'g') { // gt? if (id.length() == 2 && id.charAt(1) == 't') { d = '>'; } } else if (c == 'l') { // lt? if (id.length() == 2 && id.charAt(1) == 't') { d = '<'; } } else if (c == 'q') { // quot? if (id.equals("quot")) { d = '"'; } } if (d != CHAR_NULL) { if (mCfgTreatCharRefsAsEntities) { final char[] originalChars = new char[id.length()]; id.getChars(0, id.length(), originalChars, 0); mCurrEntity = getIntEntity(d, originalChars); return 0; } return d; } final EntityDecl e = expandEntity(id, allowExt, null); if (mCfgTreatCharRefsAsEntities) { mCurrEntity = e; } return 0; } /** * Returns an entity (possibly from cache) for the argument character using the encoded * representation in mInputBuffer[entityStartPos ... mInputPtr-1]. */ protected EntityDecl getIntEntity(int ch, final char[] originalChars) { String cacheKey = new String(originalChars); IntEntity entity = (IntEntity) mCachedEntities.get(cacheKey); if (entity == null) { String repl; if (ch <= 0xFFFF) { repl = Character.toString((char) ch); } else { StringBuffer sb = new StringBuffer(2); ch -= 0x10000; sb.append((char) ((ch >> 10) + 0xD800)); sb.append((char) ((ch & 0x3FF) + 0xDC00)); repl = sb.toString(); } entity = IntEntity.create(new String(originalChars), repl); mCachedEntities.put(cacheKey, entity); } return entity; } /** * Helper method that will try to expand a parsed entity (parameter or * generic entity). *
* note: called by sub-classes (dtd parser), needs to be protected. * * @param id Name of the entity being expanded * @param allowExt Whether external entities can be expanded or not; if * not, and the entity to expand would be external one, an exception * will be thrown */ protected EntityDecl expandEntity(String id, boolean allowExt, Object extraArg) throws XMLStreamException { mCurrName = id; EntityDecl ed = findEntity(id, extraArg); if (ed == null) { /* 30-Sep-2005, TSa: As per [WSTX-5], let's only throw exception * if we have to resolve it (otherwise it's just best-effort, * and null is ok) */ /* 02-Oct-2005, TSa: Plus, [WSTX-4] adds "undeclared entity * resolver" */ if (mCfgReplaceEntities) { mCurrEntity = expandUnresolvedEntity(id); } return null; } if (!mCfgTreatCharRefsAsEntities || this instanceof MinimalDTDReader) { expandEntity(ed, allowExt); } return ed; } /** * *
* note: defined as private for documentation, ie. it's just called * from within this class (not sub-classes), from one specific method * (see above) * * @param ed Entity to be expanded * @param allowExt Whether external entities are allowed or not. */ private void expandEntity(EntityDecl ed, boolean allowExt) throws XMLStreamException { String id = ed.getName(); /* Very first thing; we can immediately check if expanding * this entity would result in infinite recursion: */ if (mInput.isOrIsExpandedFrom(id)) { throwRecursionError(id); } /* Should not refer unparsed entities from attribute values * or text content (except via notation mechanism, but that's * not parsed here) */ if (!ed.isParsed()) { throwParseError("Illegal reference to unparsed external entity \"{0}\"", id, null); } // 28-Jun-2004, TSa: Do we support external entity expansion? boolean isExt = ed.isExternal(); if (isExt) { if (!allowExt) { // never ok in attribute value... throwParseError("Encountered a reference to external parsed entity \"{0}\" when expanding attribute value: not legal as per XML 1.0/1.1 #3.1", id, null); } if (!mConfig.willSupportExternalEntities()) { throwParseError("Encountered a reference to external entity \"{0}\", but stream reader has feature \"{1}\" disabled", id, XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES); } } // First, let's give current context chance to save its stuff WstxInputSource oldInput = mInput; oldInput.saveContext(this); WstxInputSource newInput = null; try { newInput = ed.expand(oldInput, mEntityResolver, mConfig, mDocXmlVersion); } catch (FileNotFoundException fex) { /* Let's catch and rethrow this just so we get more meaningful * description (with input source position etc) */ throwParseError("(was {0}) {1}", fex.getClass().getName(), fex.getMessage()); } catch (IOException ioe) { throw constructFromIOE(ioe); } /* And then we'll need to make sure new input comes from the new * input source */ initInputSource(newInput, isExt, id); } /** *
* note: only called from the local expandEntity() method */ private EntityDecl expandUnresolvedEntity(String id) throws XMLStreamException { XMLResolver resolver = mConfig.getUndeclaredEntityResolver(); if (resolver != null) { /* Ok, we can check for recursion here; but let's only do that * if there is any chance that it might get resolved by * the special resolver (it must have been resolved this way * earlier, too...) */ if (mInput.isOrIsExpandedFrom(id)) { throwRecursionError(id); } WstxInputSource oldInput = mInput; oldInput.saveContext(this); // null, null -> no public or system ids int xmlVersion = mDocXmlVersion; // 05-Feb-2006, TSa: If xmlVersion not explicitly known, defaults to 1.0 if (xmlVersion == XmlConsts.XML_V_UNKNOWN) { xmlVersion = XmlConsts.XML_V_10; } WstxInputSource newInput; try { newInput = DefaultInputResolver.resolveEntityUsing (oldInput, id, null, null, resolver, mConfig, xmlVersion); if (mCfgTreatCharRefsAsEntities) { return new IntEntity(WstxInputLocation.getEmptyLocation(), newInput.getEntityId(), newInput.getSource(), new char[]{}, WstxInputLocation.getEmptyLocation()); } } catch (IOException ioe) { throw constructFromIOE(ioe); } if (newInput != null) { // true -> is external initInputSource(newInput, true, id); return null; } } handleUndeclaredEntity(id); return null; } /* /////////////////////////////////////////////////////////// // Abstract methods for sub-classes to implement /////////////////////////////////////////////////////////// */ /** * Abstract method for sub-classes to implement, for finding * a declared general or parsed entity. * * @param id Identifier of the entity to find * @param arg Optional argument passed from caller; needed by DTD * reader. */ protected abstract EntityDecl findEntity(String id, Object arg) throws XMLStreamException; /** * This method gets called if a declaration for an entity was not * found in entity expanding mode (enabled by default for xml reader, * always enabled for dtd reader). */ protected abstract void handleUndeclaredEntity(String id) throws XMLStreamException; protected abstract void handleIncompleteEntityProblem(WstxInputSource closing) throws XMLStreamException; /* /////////////////////////////////////////////////////////// // Basic tokenization /////////////////////////////////////////////////////////// */ /** * Method that will parse name token (roughly equivalent to XML specs; * although bit lenier for more efficient handling); either uri prefix, * or local name. *
* Much of complexity in this method has to do with the intention to * try to avoid any character copies. In this optimal case algorithm * would be fairly simple. However, this only works if all data is * already in input buffer... if not, copy has to be made halfway * through parsing, and that complicates things. *
* One thing to note is that String returned has been canonicalized * and (if necessary) added to symbol table. It can thus be compared * against other such (usually id) Strings, with simple equality operator. * * @param c First character of the name; not yet checked for validity * * @return Canonicalized name String (which may have length 0, if * EOF or non-name-start char encountered) */ protected String parseLocalName(char c) throws XMLStreamException { /* Has to start with letter, or '_' (etc); we won't allow ':' as that * is taken as namespace separator; no use trying to optimize * heavily as it's 98% likely it is a valid char... */ if (!isNameStartChar(c)) { if (c == ':') { throwUnexpectedChar(c, " (missing namespace prefix?)"); } throwUnexpectedChar(c, " (expected a name start character)"); } int ptr = mInputPtr; int hash = (int) c; final int inputLen = mInputEnd; int startPtr = ptr-1; // already read previous char final char[] inputBuf = mInputBuffer; /* After which there may be zero or more name chars * we have to consider */ while (true) { if (ptr >= inputLen) { /* Ok, identifier may continue past buffer end, need * to continue with part 2 (separate method, as this is * not as common as having it all in buffer) */ mInputPtr = ptr; return parseLocalName2(startPtr, hash); } // Ok, we have the char... is it a name char? c = inputBuf[ptr]; if (c < CHAR_LOWEST_LEGAL_LOCALNAME_CHAR) { break; } if (!isNameChar(c)) { break; } hash = (hash * 31) + (int) c; ++ptr; } mInputPtr = ptr; return mSymbols.findSymbol(mInputBuffer, startPtr, ptr - startPtr, hash); } /** * Second part of name token parsing; called when name can continue * past input buffer end (so only part was read before calling this * method to read the rest). *
* Note that this isn't heavily optimized, on assumption it's not * called very often. */ protected String parseLocalName2(int start, int hash) throws XMLStreamException { int ptr = mInputEnd - start; // Let's assume fairly short names char[] outBuf = getNameBuffer(ptr+8); if (ptr > 0) { System.arraycopy(mInputBuffer, start, outBuf, 0, ptr); } int outLen = outBuf.length; while (true) { // note: names can not cross input block (entity) boundaries... if (mInputPtr >= mInputEnd) { if (!loadMoreFromCurrent()) { break; } } char c = mInputBuffer[mInputPtr]; if (c < CHAR_LOWEST_LEGAL_LOCALNAME_CHAR) { break; } if (!isNameChar(c)) { break; } ++mInputPtr; if (ptr >= outLen) { mNameBuffer = outBuf = expandBy50Pct(outBuf); outLen = outBuf.length; } outBuf[ptr++] = c; hash = (hash * 31) + (int) c; } // Still need to canonicalize the name: return mSymbols.findSymbol(outBuf, 0, ptr, hash); } /** * Method that will parse 'full' name token; what full means depends on * whether reader is namespace aware or not. If it is, full name means * local name with no namespace prefix (PI target, entity/notation name); * if not, name can contain arbitrary number of colons. Note that * element and attribute names are NOT parsed here, so actual namespace * prefix separation can be handled properly there. *
* Similar to {@link #parseLocalName}, much of complexity stems from * trying to avoid copying name characters from input buffer. *
* Note that returned String will be canonicalized, similar to * {@link #parseLocalName}, but without separating prefix/local name. * * @return Canonicalized name String (which may have length 0, if * EOF or non-name-start char encountered) */ protected String parseFullName() throws XMLStreamException { if (mInputPtr >= mInputEnd) { loadMoreFromCurrent(); } return parseFullName(mInputBuffer[mInputPtr++]); } protected String parseFullName(char c) throws XMLStreamException { // First char has special handling: if (!isNameStartChar(c)) { if (c == ':') { // no name.... generally an error: if (mCfgNsEnabled) { throwNsColonException(parseFNameForError()); } // Ok, that's fine actually } else { if (c <= CHAR_SPACE) { throwUnexpectedChar(c, " (missing name?)"); } throwUnexpectedChar(c, " (expected a name start character)"); } } int ptr = mInputPtr; int hash = (int) c; int inputLen = mInputEnd; int startPtr = ptr-1; // to account for the first char /* After which there may be zero or more name chars * we have to consider */ while (true) { if (ptr >= inputLen) { /* Ok, identifier may continue past buffer end, need * to continue with part 2 (separate method, as this is * not as common as having it all in buffer) */ mInputPtr = ptr; return parseFullName2(startPtr, hash); } c = mInputBuffer[ptr]; if (c == ':') { // colon only allowed in non-NS mode if (mCfgNsEnabled) { mInputPtr = ptr; throwNsColonException(new String(mInputBuffer, startPtr, ptr - startPtr) + parseFNameForError()); } } else { if (c < CHAR_LOWEST_LEGAL_LOCALNAME_CHAR) { break; } if (!isNameChar(c)) { break; } } hash = (hash * 31) + (int) c; ++ptr; } mInputPtr = ptr; return mSymbols.findSymbol(mInputBuffer, startPtr, ptr - startPtr, hash); } protected String parseFullName2(int start, int hash) throws XMLStreamException { int ptr = mInputEnd - start; // Let's assume fairly short names char[] outBuf = getNameBuffer(ptr+8); if (ptr > 0) { System.arraycopy(mInputBuffer, start, outBuf, 0, ptr); } int outLen = outBuf.length; while (true) { /* 06-Sep-2004, TSa: Name tokens are not allowed to continue * past entity expansion ranges... that is, all characters * have to come from the same input source. Thus, let's only * load things from same input level */ if (mInputPtr >= mInputEnd) { if (!loadMoreFromCurrent()) { break; } } char c = mInputBuffer[mInputPtr]; if (c == ':') { // colon only allowed in non-NS mode if (mCfgNsEnabled) { throwNsColonException(new String(outBuf, 0, ptr) + c + parseFNameForError()); } } else if (c < CHAR_LOWEST_LEGAL_LOCALNAME_CHAR) { break; } else if (!isNameChar(c)) { break; } ++mInputPtr; if (ptr >= outLen) { mNameBuffer = outBuf = expandBy50Pct(outBuf); outLen = outBuf.length; } outBuf[ptr++] = c; hash = (hash * 31) + (int) c; } // Still need to canonicalize the name: return mSymbols.findSymbol(outBuf, 0, ptr, hash); } /** * Method called to read in full name, including unlimited number of * namespace separators (':'), for the purpose of displaying name in * an error message. Won't do any further validations, and parsing * is not optimized: main need is just to get more meaningful error * messages. */ protected String parseFNameForError() throws XMLStreamException { StringBuffer sb = new StringBuffer(100); while (true) { char c; if (mInputPtr < mInputEnd) { c = mInputBuffer[mInputPtr++]; } else { // can't error here, so let's accept EOF for now: int i = getNext(); if (i < 0) { break; } c = (char) i; } if (c != ':' && !isNameChar(c)) { --mInputPtr; break; } sb.append(c); } return sb.toString(); } protected final String parseEntityName(char c) throws XMLStreamException { String id = parseFullName(c); // Needs to be followed by a semi-colon, too.. from same input source: if (mInputPtr >= mInputEnd) { if (!loadMoreFromCurrent()) { throwParseError("Missing semicolon after reference for entity \"{0}\"", id, null); } } c = mInputBuffer[mInputPtr++]; if (c != ';') { throwUnexpectedChar(c, "; expected a semi-colon after the reference for entity '"+id+"'"); } return id; } /** * Note: does not check for number of colons, amongst other things. * Main idea is to skip through what superficially seems like a valid * id, nothing more. This is only done when really skipping through * something we do not care about at all: not even whether names/ids * would be valid (for example, when ignoring internal DTD subset). * * @return Length of skipped name. */ protected int skipFullName(char c) throws XMLStreamException { if (!isNameStartChar(c)) { --mInputPtr; return 0; } /* After which there may be zero or more name chars * we have to consider */ int count = 1; while (true) { c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++] : getNextChar(SUFFIX_EOF_EXP_NAME); if (c != ':' && !isNameChar(c)) { break; } ++count; } return count; } /** * Simple parsing method that parses system ids, which are generally * used in entities (from DOCTYPE declaration to internal/external * subsets). *
* NOTE: returned String is not canonicalized, on assumption that
* external ids may be longish, and are not shared all that often, as
* they are generally just used for resolving paths, if anything.
*
* Also note that this method is not heavily optimized, as it's not
* likely to be a bottleneck for parsing.
*/
protected final String parseSystemId(char quoteChar, boolean convertLFs,
String errorMsg)
throws XMLStreamException
{
char[] buf = getNameBuffer(-1);
int ptr = 0;
while (true) {
char c = (mInputPtr < mInputEnd) ?
mInputBuffer[mInputPtr++] : getNextChar(errorMsg);
if (c == quoteChar) {
break;
}
/* ??? 14-Jun-2004, TSa: Should we normalize linefeeds or not?
* It seems like we should, for all input... so that's the way it
* works.
*/
if (c == '\n') {
markLF();
} else if (c == '\r') {
if (peekNext() == '\n') {
++mInputPtr;
if (!convertLFs) {
/* The only tricky thing; need to preserve 2-char LF; need to
* output one char from here, then can fall back to default:
*/
if (ptr >= buf.length) {
buf = expandBy50Pct(buf);
}
buf[ptr++] = '\r';
}
c = '\n';
} else if (convertLFs) {
c = '\n';
}
}
// Other than that, let's just append it:
if (ptr >= buf.length) {
buf = expandBy50Pct(buf);
}
buf[ptr++] = c;
}
return (ptr == 0) ? "" : new String(buf, 0, ptr);
}
/**
* Simple parsing method that parses system ids, which are generally
* used in entities (from DOCTYPE declaration to internal/external
* subsets).
*
* As per xml specs, the contents are actually normalized. *
* NOTE: returned String is not canonicalized, on assumption that
* external ids may be longish, and are not shared all that often, as
* they are generally just used for resolving paths, if anything.
*
* Also note that this method is not heavily optimized, as it's not
* likely to be a bottleneck for parsing.
*/
protected final String parsePublicId(char quoteChar, String errorMsg)
throws XMLStreamException
{
char[] buf = getNameBuffer(-1);
int ptr = 0;
boolean spaceToAdd = false;
while (true) {
char c = (mInputPtr < mInputEnd) ?
mInputBuffer[mInputPtr++] : getNextChar(errorMsg);
if (c == quoteChar) {
break;
}
if (c == '\n') {
markLF();
spaceToAdd = true;
continue;
} else if (c == '\r') {
if (peekNext() == '\n') {
++mInputPtr;
}
spaceToAdd = true;
continue;
} else if (c == CHAR_SPACE) {
spaceToAdd = true;
continue;
} else {
// Verify it's a legal pubid char (see XML spec, #13, from 2.3)
if ((c >= VALID_PUBID_CHAR_COUNT)
|| sPubidValidity[c] != PUBID_CHAR_VALID_B) {
throwUnexpectedChar(c, " in public identifier");
}
}
// Other than that, let's just append it:
if (ptr >= buf.length) {
buf = expandBy50Pct(buf);
}
/* Space-normalization means scrapping leading and trailing
* white space, and coalescing remaining ws into single spaces.
*/
if (spaceToAdd) { // pending white space to add?
if (c == CHAR_SPACE) { // still a space; let's skip
continue;
}
/* ok: if we have non-space, we'll either forget about
* space(s) (if nothing has been output, ie. leading space),
* or output a single space (in-between non-white space)
*/
spaceToAdd = false;
if (ptr > 0) {
buf[ptr++] = CHAR_SPACE;
if (ptr >= buf.length) {
buf = expandBy50Pct(buf);
}
}
}
buf[ptr++] = c;
}
return (ptr == 0) ? "" : new String(buf, 0, ptr);
}
protected final void parseUntil(TextBuffer tb, char endChar, boolean convertLFs,
String errorMsg)
throws XMLStreamException
{
// Let's first ensure we have some data in there...
if (mInputPtr >= mInputEnd) {
loadMore(errorMsg);
}
while (true) {
// Let's loop consequtive 'easy' spans:
char[] inputBuf = mInputBuffer;
int inputLen = mInputEnd;
int ptr = mInputPtr;
int startPtr = ptr;
while (ptr < inputLen) {
char c = inputBuf[ptr++];
if (c == endChar) {
int thisLen = ptr - startPtr - 1;
if (thisLen > 0) {
tb.append(inputBuf, startPtr, thisLen);
}
mInputPtr = ptr;
return;
}
if (c == '\n') {
mInputPtr = ptr; // markLF() requires this
markLF();
} else if (c == '\r') {
if (!convertLFs && ptr < inputLen) {
if (inputBuf[ptr] == '\n') {
++ptr;
}
mInputPtr = ptr;
markLF();
} else {
int thisLen = ptr - startPtr - 1;
if (thisLen > 0) {
tb.append(inputBuf, startPtr, thisLen);
}
mInputPtr = ptr;
c = getNextChar(errorMsg);
if (c != '\n') {
--mInputPtr; // pusback
tb.append(convertLFs ? '\n' : '\r');
} else {
if (convertLFs) {
tb.append('\n');
} else {
tb.append('\r');
tb.append('\n');
}
}
startPtr = ptr = mInputPtr;
markLF();
}
}
}
int thisLen = ptr - startPtr;
if (thisLen > 0) {
tb.append(inputBuf, startPtr, thisLen);
}
loadMore(errorMsg);
startPtr = ptr = mInputPtr;
inputBuf = mInputBuffer;
inputLen = mInputEnd;
}
}
/*
///////////////////////////////////////////////////////////
// Internal methods
///////////////////////////////////////////////////////////
*/
private int resolveCharEnt(StringBuffer originalCharacters)
throws XMLStreamException
{
int value = 0;
char c = getNextChar(SUFFIX_IN_ENTITY_REF);
if (originalCharacters != null) {
originalCharacters.append(c);
}
if (c == 'x') { // hex
while (true) {
c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++]
: getNextCharFromCurrent(SUFFIX_IN_ENTITY_REF);
if (c == ';') {
break;
}
if (originalCharacters != null) {
originalCharacters.append(c);
}
value = value << 4;
if (c <= '9' && c >= '0') {
value += (c - '0');
} else if (c >= 'a' && c <= 'f') {
value += 10 + (c - 'a');
} else if (c >= 'A' && c <= 'F') {
value += 10 + (c - 'A');
} else {
throwUnexpectedChar(c, "; expected a hex digit (0-9a-fA-F).");
}
// Overflow?
if (value > MAX_UNICODE_CHAR) {
reportUnicodeOverflow();
}
}
} else { // numeric (decimal)
while (c != ';') {
if (c <= '9' && c >= '0') {
value = (value * 10) + (c - '0');
// Overflow?
if (value > MAX_UNICODE_CHAR) {
reportUnicodeOverflow();
}
} else {
throwUnexpectedChar(c, "; expected a decimal number.");
}
c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++]
: getNextCharFromCurrent(SUFFIX_IN_ENTITY_REF);
if (originalCharacters != null && c != ';') {
originalCharacters.append(c);
}
}
}
validateChar(value);
return value;
}
/**
* Method that will verify that expanded Unicode codepoint is a valid
* XML content character.
*/
private final void validateChar(int value)
throws XMLStreamException
{
/* 24-Jan-2006, TSa: Ok, "high" Unicode chars are problematic,
* need to be reported by a surrogate pair..
*/
if (value >= 0xD800) {
if (value < 0xE000) { // no surrogates via entity expansion
reportIllegalChar(value);
}
if (value > 0xFFFF) {
// Within valid range at all?
if (value > MAX_UNICODE_CHAR) {
reportUnicodeOverflow();
}
} else if (value >= 0xFFFE) { // 0xFFFE and 0xFFFF are illegal too
reportIllegalChar(value);
}
// Ok, fine as is
} else if (value < 32) {
if (value == 0) {
throwParseError("Invalid character reference: null character not allowed in XML content.");
}
// XML 1.1 allows most other chars; 1.0 does not:
if (!mXml11 &&
(value != 0x9 && value != 0xA && value != 0xD)) {
reportIllegalChar(value);
}
}
}
protected final char[] getNameBuffer(int minSize)
{
char[] buf = mNameBuffer;
if (buf == null) {
mNameBuffer = buf = new char[(minSize > 48) ? (minSize+16) : 64];
} else if (minSize >= buf.length) { // let's allow one char extra...
int len = buf.length;
len += (len >> 1); // grow by 50%
mNameBuffer = buf = new char[(minSize >= len) ? (minSize+16) : len];
}
return buf;
}
protected final char[] expandBy50Pct(char[] buf)
{
int len = buf.length;
char[] newBuf = new char[len + (len >> 1)];
System.arraycopy(buf, 0, newBuf, 0, len);
return newBuf;
}
/**
* Method called to throw an exception indicating that a name that
* should not be namespace-qualified (PI target, entity/notation name)
* is one, and reader is namespace aware.
*/
private void throwNsColonException(String name)
throws XMLStreamException
{
throwParseError("Illegal name \"{0}\" (PI target, entity/notation name): can not contain a colon (XML Namespaces 1.0#6)", name, null);
}
private void throwRecursionError(String entityName)
throws XMLStreamException
{
throwParseError("Illegal entity expansion: entity \"{0}\" expands itself recursively.", entityName, null);
}
private void reportUnicodeOverflow()
throws XMLStreamException
{
throwParseError("Illegal character entity: value higher than max allowed (0x{0})", Integer.toHexString(MAX_UNICODE_CHAR), null);
}
private void reportIllegalChar(int value)
throws XMLStreamException
{
throwParseError("Illegal character entity: expansion character (code 0x{0}", Integer.toHexString(value), null);
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/sr/Element.java 0000644 0001750 0001750 00000004302 11745427074 022551 0 ustar giovanni giovanni /* Woodstox XML processor
*
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in file LICENSE, included with
* the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.sr;
/**
* Container for information collected regarding a single
* (start) element instance.
*
* This class is not exposed outside of the package and is considered * part of internal implementation. * * @since 4.1 */ final class Element { // // // Element name protected String mLocalName; /** * Prefix this element has, if any; null if none */ protected String mPrefix; /** * Namespace this element is in */ protected String mNamespaceURI; /** * Default namespace for this element. */ protected String mDefaultNsURI; // // // Namespace support /** * Offset within namespace array, maintained by * {@link InputElementStack} that owns this element. */ protected int mNsOffset; // // // Back links to parent element(s) /** * Parent element, if any; null for root */ protected Element mParent; /* ///////////////////////////////////////////////////////// // Life-cycle ///////////////////////////////////////////////////////// */ public Element(Element parent, int nsOffset, String prefix, String ln) { mParent = parent; mNsOffset = nsOffset; mPrefix = prefix; mLocalName = ln; } public void reset(Element parent, int nsOffset, String prefix, String ln) { mParent = parent; mNsOffset = nsOffset; mPrefix = prefix; mLocalName = ln; } /** * Method called to temporarily "store" this Element for later reuse. */ public void relink(Element next) { mParent = next; } } woodstox-4.1.3/src/java/com/ctc/wstx/sr/AttributeCollector.java 0000644 0001750 0001750 00000114700 11745427074 024776 0 ustar giovanni giovanni /* Woodstox XML processor * * Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi * * Licensed under the License specified in file LICENSE, included with * the source code. * You may not use this file except in compliance with the License. * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.ctc.wstx.sr; import java.io.IOException; import java.util.Arrays; import javax.xml.XMLConstants; import javax.xml.stream.Location; import javax.xml.stream.XMLStreamException; import javax.xml.namespace.QName; import org.codehaus.stax2.ri.typed.CharArrayBase64Decoder; import org.codehaus.stax2.ri.typed.ValueDecoderFactory; import org.codehaus.stax2.typed.Base64Variant; import org.codehaus.stax2.typed.TypedArrayDecoder; import org.codehaus.stax2.typed.TypedValueDecoder; import org.codehaus.stax2.typed.TypedXMLStreamException; import org.codehaus.stax2.validation.XMLValidator; import com.ctc.wstx.api.ReaderConfig; import com.ctc.wstx.cfg.ErrorConsts; import com.ctc.wstx.sw.XmlWriter; import com.ctc.wstx.util.*; /** * Shared base class that defines API stream reader uses to communicate * with the attribute collector implementation, independent of whether it's * operating in namespace-aware or non-namespace modes. * Collector class is used to build up attribute lists; for the most part * will just hold references to few specialized {@link TextBuilder}s that * are used to create efficient semi-shared value Strings. */ public final class AttributeCollector { final static int INT_SPACE = 0x0020; /** * Threshold value that indicates minimum length for lists instances * that need a Map structure, for fast attribute access by fully-qualified * name. */ protected final static int LONG_ATTR_LIST_LEN = 4; /** * Expected typical maximum number of attributes for any element; * chosen to minimize need to resize, while trying not to waste space. * Dynamically grown; better not to set too high to avoid excessive * overhead for small attribute-less documents. */ protected final static int EXP_ATTR_COUNT = 12; protected final static int EXP_NS_COUNT = 6; /** * This value is used to indicate that we shouldn't keep track * of index of xml:id attribute -- generally done when Xml:id * support is disabled */ protected final static int XMLID_IX_DISABLED = -2; protected final static int XMLID_IX_NONE = -1; protected final static InternCache sInternCache = InternCache.getInstance(); /* /////////////////////////////////////////////////////////// // Configuration /////////////////////////////////////////////////////////// */ // // Settings for matching Xml:id attribute final String mXmlIdPrefix; final String mXmlIdLocalName; /* /////////////////////////////////////////////////////////// // Collected attribute (incl namespace attrs) information: /////////////////////////////////////////////////////////// */ /** * Array of attributes collected for this element. */ protected Attribute[] mAttributes; /** * Actual number of attributes collected, including attributes * added via default values. */ protected int mAttrCount; /** * Number of attribute values actually parsed, not including * ones created via default value expansion. Equal to or less than * {@link #mAttrCount}. */ protected int mNonDefCount; /** * Array of namespace declaration attributes collected for this element; * not used in non-namespace-aware mode */ protected Attribute[] mNamespaces; /** * Number of valid namespace declarations in {@link #mNamespaces}. */ protected int mNsCount; /** * Flag to indicate whether the default namespace has already been declared * for the current element. */ protected boolean mDefaultNsDeclared = false; /** * Index of "xml:id" attribute, if one exists for the current * element; {@link #XMLID_IX_NONE} if none. */ protected int mXmlIdAttrIndex; /* /////////////////////////////////////////////////////////// // Attribute (and ns) value builders /////////////////////////////////////////////////////////// */ /** * TextBuilder into which values of all attributes are appended * to, including default valued ones (defaults are added after * explicit ones). * Constructed lazily, if and when needed (not needed * for short attribute-less docs) */ protected TextBuilder mValueBuilder = null; /** * TextBuilder into which values of namespace URIs are added (including * URI for the default namespace, if one defined). */ private final TextBuilder mNamespaceBuilder = new TextBuilder(EXP_NS_COUNT); /* ////////////////////////////////////////////////////////////// // Information that defines "Map-like" data structure used for // quick access to attribute values by fully-qualified name ////////////////////////////////////////////////////////////// */ /** * Encoding of a data structure that contains mapping from * attribute names to attribute index in main attribute name arrays. *
* Data structure contains two separate areas; main hash area (with
* size mAttrHashSize
), and remaining spillover area
* that follows hash area up until (but not including)
* mAttrSpillEnd
index.
* Main hash area only contains indexes (index+1; 0 signifying empty slot)
* to actual attributes; spillover area has both hash and index for
* any spilled entry. Spilled entries are simply stored in order
* added, and need to be searched using linear search. In case of both
* primary hash hits and spills, eventual comparison with the local
* name needs to be done with actual name array.
*/
protected int[] mAttrMap = null;
/**
* Size of hash area in mAttrMap
; generally at least 20%
* more than number of attributes (mAttrCount
).
*/
protected int mAttrHashSize;
/**
* Pointer to int slot right after last spill entr, in
* mAttrMap
array.
*/
protected int mAttrSpillEnd;
/*
///////////////////////////////////////////////
// Life-cycle:
///////////////////////////////////////////////
*/
protected AttributeCollector(ReaderConfig cfg, boolean nsAware)
{
mXmlIdAttrIndex = cfg.willDoXmlIdTyping() ? XMLID_IX_NONE : XMLID_IX_DISABLED;
if (nsAware) {
mXmlIdPrefix = "xml";
mXmlIdLocalName = "id";
} else {
mXmlIdPrefix = null;
mXmlIdLocalName = "xml:id";
}
}
/**
* Method called to allow reusing of collector, usually right before
* starting collecting attributes for a new start tag.
*/
/**
* Method called to allow reusing of collector, usually right before
* starting collecting attributes for a new start tag.
*
* Note: public only so that it can be called by unit tests. */ public void reset() { if (mNsCount > 0) { mNamespaceBuilder.reset(); mDefaultNsDeclared = false; mNsCount = 0; } /* No need to clear attr name, or NS prefix Strings; they are * canonicalized and will be referenced by symbol table in any * case... so we can save trouble of cleaning them up. This Object * will get GC'ed soon enough, after parser itself gets disposed of. */ if (mAttrCount > 0) { mValueBuilder.reset(); mAttrCount = 0; if (mXmlIdAttrIndex >= 0) { mXmlIdAttrIndex = XMLID_IX_NONE; } } /* Note: attribute values will be cleared later on, when validating * namespaces. This so that we know how much to clean up; and * occasionally can also just avoid clean up (when resizing) */ } /** * Method that can be called to force space normalization (remove * leading/trailing spaces, replace non-spaces white space with * spaces, collapse spaces to one) on specified attribute. * Currently called by {@link InputElementStack} to force * normalization of Xml:id attribute */ public void normalizeSpacesInValue(int index) { // StringUtil has a method, but it works on char arrays... char[] attrCB = mValueBuilder.getCharBuffer(); String normValue = StringUtil.normalizeSpaces (attrCB, getValueStartOffset(index), getValueStartOffset(index+1)); if (normValue != null) { mAttributes[index].setValue(normValue); } } /* /////////////////////////////////////////////// // Public accesors (for stream reader) /////////////////////////////////////////////// */ /** * @return Number of namespace declarations collected, including * possible default namespace declaration */ protected int getNsCount() { return mNsCount; } public boolean hasDefaultNs() { return mDefaultNsDeclared; } // // // Direct access to attribute/NS prefixes/localnames/URI public final int getCount() { return mAttrCount; } /** * @return Number of attributes that were explicitly specified; may * be less than the total count due to attributes created using * attribute default values */ public int getSpecifiedCount() { return mNonDefCount; } public String getNsPrefix(int index) { if (index < 0 || index >= mNsCount) { throwIndex(index); } // for NS decls, local name is stored in prefix return mNamespaces[index].mLocalName; } public String getNsURI(int index) { if (index < 0 || index >= mNsCount) { throwIndex(index); } return mNamespaces[index].mNamespaceURI; } // // // Direct access to attribute/NS prefixes/localnames/URI public String getPrefix(int index) { if (index < 0 || index >= mAttrCount) { throwIndex(index); } return mAttributes[index].mPrefix; } public String getLocalName(int index) { if (index < 0 || index >= mAttrCount) { throwIndex(index); } return mAttributes[index].mLocalName; } public String getURI(int index) { if (index < 0 || index >= mAttrCount) { throwIndex(index); } return mAttributes[index].mNamespaceURI; } public QName getQName(int index) { if (index < 0 || index >= mAttrCount) { throwIndex(index); } return mAttributes[index].getQName(); } /** *
* Note: the main reason this method is defined at this level, and * made final, is performance. JIT may be able to fully inline this * method, even when reference is via this base class. This is important * since this is likely to be the most often called method of the * collector instances. */ public final String getValue(int index) { if (index < 0 || index >= mAttrCount) { throwIndex(index); } String full = mValueBuilder.getAllValues(); Attribute attr = mAttributes[index]; ++index; if (index < mAttrCount) { // not last int endOffset = mAttributes[index].mValueStartOffset; return attr.getValue(full, endOffset); } // last can be optimized bit more: return attr.getValue(full); } public String getValue(String nsURI, String localName) { // Primary hit? int hashSize = mAttrHashSize; if (hashSize == 0) { // sanity check, for 'no attributes' return null; } int hash = localName.hashCode(); if (nsURI != null) { if (nsURI.length() == 0) { nsURI = null; } else { hash ^= nsURI.hashCode(); } } int ix = mAttrMap[hash & (hashSize-1)]; if (ix == 0) { // nothing in here; no spills either return null; } --ix; // Is primary candidate match? if (mAttributes[ix].hasQName(nsURI, localName)) { return getValue(ix); } /* Nope, need to traverse spill list, which has 2 entries for * each spilled attribute id; first for hash value, second index. */ for (int i = hashSize, len = mAttrSpillEnd; i < len; i += 2) { if (mAttrMap[i] != hash) { continue; } /* Note: spill indexes are not off-by-one, since there's no need * to mask 0 */ ix = mAttrMap[i+1]; if (mAttributes[ix].hasQName(nsURI, localName)) { return getValue(ix); } } return null; } public int findIndex(String localName) { return findIndex(null, localName); } public int findIndex(String nsURI, String localName) { /* Note: most of the code is from getValue().. could refactor * code, performance is bit of concern (one more method call * if index access was separate). * See comments on that method, for logics. */ // Primary hit? int hashSize = mAttrHashSize; if (hashSize == 0) { // sanity check, for 'no attributes' return -1; } int hash = localName.hashCode(); if (nsURI != null) { if (nsURI.length() == 0) { nsURI = null; } else { hash ^= nsURI.hashCode(); } } int ix = mAttrMap[hash & (hashSize-1)]; if (ix == 0) { // nothing in here; no spills either return -1; } --ix; // Is primary candidate match? if (mAttributes[ix].hasQName(nsURI, localName)) { return ix; } /* Nope, need to traverse spill list, which has 2 entries for * each spilled attribute id; first for hash value, second index. */ for (int i = hashSize, len = mAttrSpillEnd; i < len; i += 2) { if (mAttrMap[i] != hash) { continue; } /* Note: spill indexes are not off-by-one, since there's no need * to mask 0 */ ix = mAttrMap[i+1]; if (mAttributes[ix].hasQName(nsURI, localName)) { return ix; } } return -1; } public final boolean isSpecified(int index) { return (index < mNonDefCount); } public final int getXmlIdAttrIndex() { return mXmlIdAttrIndex; } /* ////////////////////////////////////////////////////// // Type-safe accessors to support TypedXMLStreamReader ////////////////////////////////////////////////////// */ /** * Method called to decode the whole attribute value as a single * typed value. * Decoding is done using the decoder provided. */ public final void decodeValue(int index, TypedValueDecoder tvd) throws IllegalArgumentException { if (index < 0 || index >= mAttrCount) { throwIndex(index); } /* Should be faster to pass the char array even if we might * have a String */ // Either way, need to trim before passing: char[] buf = mValueBuilder.getCharBuffer(); int start = mAttributes[index].mValueStartOffset; int end = getValueStartOffset(index+1); while (true) { if (start >= end) { tvd.handleEmptyValue(); return; } if (!StringUtil.isSpace(buf[start])) { break; } ++start; } // Trailing space? while (--end > start && StringUtil.isSpace(buf[end])) { } tvd.decode(buf, start, end+1); } /** * Method called to decode the attribute value that consists of * zero or more space-separated tokens. * Decoding is done using the decoder provided. * @return Number of tokens decoded */ public final int decodeValues(int index, TypedArrayDecoder tad, InputProblemReporter rep) throws XMLStreamException { if (index < 0 || index >= mAttrCount) { throwIndex(index); } // Char[] faster than String... and no need to trim here: return decodeValues(tad, rep, mValueBuilder.getCharBuffer(), mAttributes[index].mValueStartOffset, getValueStartOffset(index+1)); } public final byte[] decodeBinary(int index, Base64Variant v, CharArrayBase64Decoder dec, InputProblemReporter rep) throws XMLStreamException { if (index < 0 || index >= mAttrCount) { throwIndex(index); } /* No point in trying to use String representation, even if one * available, faster to process from char[] */ Attribute attr = mAttributes[index]; char[] cbuf = mValueBuilder.getCharBuffer(); int start = attr.mValueStartOffset; int end = getValueStartOffset(index+1); int len = end-start; dec.init(v, true, cbuf, start, len, null); try { return dec.decodeCompletely(); } catch (IllegalArgumentException iae) { // Need to convert to a checked stream exception String lexical = new String(cbuf, start, len); throw new TypedXMLStreamException(lexical, iae.getMessage(), rep.getLocation(), iae); } } private final static int decodeValues(TypedArrayDecoder tad, InputProblemReporter rep, final char[] buf, int ptr, final int end) throws XMLStreamException { int start = ptr; int count = 0; try { decode_loop: while (ptr < end) { // First, any space to skip? while (buf[ptr] <= INT_SPACE) { if (++ptr >= end) { break decode_loop; } } // Then let's figure out non-space char (token) start = ptr; ++ptr; while (ptr < end && buf[ptr] > INT_SPACE) { ++ptr; } int tokenEnd = ptr; ++ptr; // to skip trailing space (or, beyond end) // Ok, decode... any more room? ++count; if (tad.decodeValue(buf, start, tokenEnd)) { if (!checkExpand(tad)) { break; } } } } catch (IllegalArgumentException iae) { // Need to convert to a checked stream exception Location loc = rep.getLocation(); String lexical = new String(buf, start, (ptr-start)); throw new TypedXMLStreamException(lexical, iae.getMessage(), loc, iae); } return count; } /** * Internal method used to see if we can expand the buffer that * the array decoder has. Bit messy, but simpler than having * separately typed instances; and called rarely so that performance * downside of instanceof is irrelevant. */ private final static boolean checkExpand(TypedArrayDecoder tad) { if (tad instanceof ValueDecoderFactory.BaseArrayDecoder) { ((ValueDecoderFactory.BaseArrayDecoder) tad).expand(); return true; } return false; } /* /////////////////////////////////////////////// // Accessors for accessing helper objects /////////////////////////////////////////////// */ /** * Method for getting start pointer within shared value buffer * for given attribute. It is also the same as end pointer * for preceding attribute, if any. */ protected int getValueStartOffset(int index) { if (index < mAttrCount) { return mAttributes[index].mValueStartOffset; } return mValueBuilder.getCharSize(); } protected char[] getSharedValueBuffer() { return mValueBuilder.getCharBuffer(); } /** * Method called to resolve and initialize specified collected * namespace declaration * * @return Attribute that contains specified namespace declaration */ protected Attribute resolveNamespaceDecl(int index, boolean internURI) { Attribute ns = mNamespaces[index]; String full = mNamespaceBuilder.getAllValues(); String uri; if (mNsCount == 0) { uri = full; } else { ++index; if (index < mNsCount) { // not last int endOffset = mNamespaces[index].mValueStartOffset; uri = ns.getValue(full, endOffset); } else { // is last uri = ns.getValue(full); } } if (internURI && uri.length() > 0) { uri = sInternCache.intern(uri); } ns.mNamespaceURI = uri; return ns; } /** * Method needed by event creating code, to build a non-transient * attribute container, to use with XMLEvent objects (specifically * implementation of StartElement event). */ public ElemAttrs buildAttrOb() { int count = mAttrCount; if (count == 0) { return null; } /* If we have actual attributes, let's first just create the * raw array that has all attribute information: */ String[] raw = new String[count << 2]; for (int i = 0; i < count; ++i) { Attribute attr = mAttributes[i]; int ix = (i << 2); raw[ix] = attr.mLocalName; raw[ix+1] = attr.mNamespaceURI; raw[ix+2] = attr.mPrefix; raw[ix+3] = getValue(i); } // Do we have a "short" list? if (count < LONG_ATTR_LIST_LEN) { return new ElemAttrs(raw, mNonDefCount); } // Ok, nope; we need to also pass the Map information... /* 02-Feb-2009, TSa: Must make a copy of the Map array now, * otherwise could get overwritten. */ int amapLen = mAttrMap.length; int[] amap = new int[amapLen]; // TODO: JDK 1.6 has Arrays.copyOf(), should use with Woodstox 6 System.arraycopy(mAttrMap, 0, amap, 0, amapLen); return new ElemAttrs(raw, mNonDefCount, amap, mAttrHashSize, mAttrSpillEnd); } protected void validateAttribute(int index, XMLValidator vld) throws XMLStreamException { Attribute attr = mAttributes[index]; String normValue = vld.validateAttribute (attr.mLocalName, attr.mNamespaceURI, attr.mPrefix, mValueBuilder.getCharBuffer(), getValueStartOffset(index), getValueStartOffset(index+1)); if (normValue != null) { attr.setValue(normValue); } } /* /////////////////////////////////////////////// // Attribute, namespace decl building /////////////////////////////////////////////// */ /** * Low-level accessor method that attribute validation code may call * for certain types of attributes; generally only for id and idref/idrefs * attributes. It returns the underlying 'raw' attribute value buffer * for direct access. */ public final TextBuilder getAttrBuilder(String attrPrefix, String attrLocalName) { /* Ok: we have parsed prefixed-name of a regular * attribute. So let's initialize the instance... */ if (mAttrCount == 0) { if (mAttributes == null) { allocBuffers(); } mAttributes[0] = new Attribute(attrPrefix, attrLocalName, 0); } else { int valueStart = mValueBuilder.getCharSize(); if (mAttrCount >= mAttributes.length) { mAttributes = (Attribute[]) DataUtil.growArrayBy50Pct(mAttributes); } Attribute curr = mAttributes[mAttrCount]; if (curr == null) { mAttributes[mAttrCount] = new Attribute(attrPrefix, attrLocalName, valueStart); } else { curr.reset(attrPrefix, attrLocalName, valueStart); } } ++mAttrCount; // 25-Sep-2006, TSa: Need to keep track of xml:id attribute? if (attrLocalName == mXmlIdLocalName) { if (attrPrefix == mXmlIdPrefix) { if (mXmlIdAttrIndex != XMLID_IX_DISABLED) { mXmlIdAttrIndex = mAttrCount - 1; } } } /* Can't yet create attribute map by name, since we only know * name prefix, not necessarily matching URI. */ return mValueBuilder; } /** * Method called by validator to insert an attribute that has a default * value and wasn't yet included in collector's attribute set. * * @return Index of the newly added attribute, if added; -1 to indicate * this was a duplicate */ public int addDefaultAttribute(String localName, String uri, String prefix, String value) { int attrIndex = mAttrCount; if (attrIndex < 1) { /* had no explicit attributes... better initialize now, then. * Let's just use hash area of 4, and */ initHashArea(); } /* Ok, first, since we do want to verify that we can not accidentally * add duplicates, let's first try to add entry to Map, since that * will catch dups. */ int hash = localName.hashCode(); if (uri != null && uri.length() > 0) { hash ^= uri.hashCode(); } int index = hash & (mAttrHashSize - 1); int[] map = mAttrMap; if (map[index] == 0) { // whoa, have room... map[index] = attrIndex+1; // add 1 to get 1-based index (0 is empty marker) } else { // nah, collision... int currIndex = map[index]-1; // Index of primary collision entry int spillIndex = mAttrSpillEnd; map = spillAttr(uri, localName, map, currIndex, spillIndex, attrIndex, hash, mAttrHashSize); if (map == null) { // dup! return -1; // could return negation (-(index+1)) of the prev index? } map[++spillIndex] = attrIndex; // no need to specifically avoid 0 mAttrMap = map; mAttrSpillEnd = ++spillIndex; } /* Can reuse code; while we don't really need the builder, * we need to add/reset attribute */ getAttrBuilder(prefix, localName); Attribute attr = mAttributes[mAttrCount-1]; attr.mNamespaceURI = uri; attr.setValue(value); // attribute count has been updated; index is one less than count return (mAttrCount-1); } /** * Low-level mutator method that attribute validation code may call * for certain types of attributes, when it wants to handle the whole * validation and normalization process by itself. It is generally * only called for id and idref/idrefs attributes, as those values * are usually normalized. */ public final void setNormalizedValue(int index, String value) { mAttributes[index].setValue(value); } /** * @return null if the default namespace URI has been already declared * for the current element; TextBuilder to add URI to if not. */ public TextBuilder getDefaultNsBuilder() { if (mDefaultNsDeclared) { return null; } mDefaultNsDeclared = true; return getNsBuilder(null); } /** * @return null if prefix has been already declared; TextBuilder to * add value to if not. */ public TextBuilder getNsBuilder(String prefix) { // first: must verify that it's not a dup if (mNsCount == 0) { if (mNamespaces == null) { mNamespaces = new Attribute[EXP_NS_COUNT]; } mNamespaces[0] = new Attribute(null, prefix, 0); } else { int len = mNsCount; /* Ok: must ensure that there are no duplicate namespace * declarations (ie. decls with same prefix being bound) */ if (prefix != null) { // null == default ns for (int i = 0; i < len; ++i) { // note: for ns decls, bound prefix is in 'local name' if (prefix == mNamespaces[i].mLocalName) { return null; } } } if (len >= mNamespaces.length) { mNamespaces = (Attribute[]) DataUtil.growArrayBy50Pct(mNamespaces); } int uriStart = mNamespaceBuilder.getCharSize(); Attribute curr = mNamespaces[len]; if (curr == null) { mNamespaces[len] = new Attribute(null, prefix, uriStart); } else { curr.reset(null, prefix, uriStart); } } ++mNsCount; return mNamespaceBuilder; } /** * Method called to resolve namespace URIs from attribute prefixes. *
* Note: public only so that it can be called by unit tests. * * @param rep Reporter to use for reporting well-formedness problems * @param ns Namespace prefix/URI mappings active for this element * * @return Index of xml:id attribute, if any, -1 if not */ public int resolveNamespaces(InputProblemReporter rep, StringVector ns) throws XMLStreamException { int attrCount = mAttrCount; /* Let's now set number of 'real' attributes, to allow figuring * out number of attributes created via default value expansion */ mNonDefCount = attrCount; if (attrCount < 1) { // Checked if doing access by FQN: mAttrHashSize = mAttrSpillEnd = 0; // And let's just bail out, too... return mXmlIdAttrIndex; } for (int i = 0; i < attrCount; ++i) { Attribute attr = mAttributes[i]; String prefix = attr.mPrefix; // Attributes' ns URI is null after reset, so can skip setting "no namespace" if (prefix != null) { if (prefix == "xml") { attr.mNamespaceURI = XMLConstants.XML_NS_URI; } else { String uri = ns.findLastFromMap(prefix); if (uri == null) { rep.throwParseError(ErrorConsts.ERR_NS_UNDECLARED_FOR_ATTR, prefix, attr.mLocalName); } attr.mNamespaceURI = uri; } } } /* Ok, finally, let's create attribute map, to allow efficient * access by prefix+localname combination. Could do it on-demand, * but this way we can check for duplicates right away. */ int[] map = mAttrMap; /* What's minimum size to contain at most 80% full hash area, * plus 1/8 spill area (12.5% spilled entries, two ints each)? */ int hashCount = 4; { int min = attrCount + (attrCount >> 2); // == 80% fill rate /* Need to get 2^N size that can contain all elements, with * 80% fill rate */ while (hashCount < min) { hashCount += hashCount; // 2x } // And then add the spill area mAttrHashSize = hashCount; min = hashCount + (hashCount >> 4); // 12.5 x 2 ints if (map == null || map.length < min) { map = new int[min]; } else { /* Need to clear old hash entries (if any). But note that * spilled entries we can leave alone -- they are just ints, * and get overwritten if and as needed */ Arrays.fill(map, 0, hashCount, 0); } } { int mask = hashCount-1; int spillIndex = hashCount; // Ok, array's fine, let's hash 'em in! for (int i = 0; i < attrCount; ++i) { Attribute attr = mAttributes[i]; String name = attr.mLocalName; int hash = name.hashCode(); String uri = attr.mNamespaceURI; if (uri != null) { hash ^= uri.hashCode(); } int index = hash & mask; // Hash slot available? if (map[index] == 0) { map[index] = i+1; // since 0 is marker } else { int currIndex = map[index]-1; /* nope, need to spill; let's extract most of that code to * a separate method for clarity (and maybe it'll be * easier to inline by JVM too) */ map = spillAttr(uri, name, map, currIndex, spillIndex, attrCount, hash, hashCount); if (map == null) { throwDupAttr(rep, currIndex); // never returns here... } else { // let's use else to keep FindBugs happy map[++spillIndex] = i; // no need to specifically avoid 0 ++spillIndex; } } } mAttrSpillEnd = spillIndex; } mAttrMap = map; return mXmlIdAttrIndex; } /* /////////////////////////////////////////////// // Package/core methods: /////////////////////////////////////////////// */ protected void throwIndex(int index) { throw new IllegalArgumentException("Invalid index "+index+"; current element has only "+getCount()+" attributes"); } /** * Method that basically serializes the specified (read-in) attribute * using Writers provided. Serialization is done by * writing out (fully-qualified) name * of the attribute, followed by the equals sign and quoted value. */ public void writeAttribute(int index, XmlWriter xw) throws IOException, XMLStreamException { // Note: here we assume index checks have been done by caller Attribute attr = mAttributes[index]; String ln = attr.mLocalName; String prefix = attr.mPrefix; if (prefix == null || prefix.length() == 0) { xw.writeAttribute(ln, getValue(index)); } else { xw.writeAttribute(prefix, ln, getValue(index)); } } /** * Method called to initialize buffers that need not be immediately * initialized */ protected final void allocBuffers() { if (mAttributes == null) { mAttributes = new Attribute[8]; } if (mValueBuilder == null) { mValueBuilder = new TextBuilder(EXP_ATTR_COUNT); } } /* /////////////////////////////////////////////// // Internal methods: /////////////////////////////////////////////// */ /** * @return Null, if attribute is a duplicate (to indicate error); * map itself, or resized version, otherwise. */ private int[] spillAttr(String uri, String name, int[] map, int currIndex, int spillIndex, int attrCount, int hash, int hashCount) { // Do we have a dup with primary entry? /* Can do equality comp for local name, as they * are always canonicalized: */ Attribute oldAttr = mAttributes[currIndex]; if (oldAttr.mLocalName == name) { // URIs may or may not be interned though: String currURI = oldAttr.mNamespaceURI; if (currURI == uri || (currURI != null && currURI.equals(uri))) { return null; } } /* Is there room to spill into? (need to 2 int spaces; one for hash, * the other for index) */ if ((spillIndex + 1)>= map.length) { // Let's just add room for 4 spills... map = DataUtil.growArrayBy(map, 8); } // Let's first ensure we aren't adding a dup: for (int j = hashCount; j < spillIndex; j += 2) { if (map[j] == hash) { currIndex = map[j+1]; Attribute attr = mAttributes[currIndex]; if (oldAttr.mLocalName == name) { String currURI = attr.mNamespaceURI; if (currURI == uri || (currURI != null && currURI.equals(uri))) { return null; } } } } map[spillIndex] = hash; return map; } /** * Method called to ensure hash area will be properly set up in * cases where initially no room was needed, but default attribute(s) * is being added. */ private void initHashArea() { /* Let's use small hash area of size 4, and one spill; don't * want too big (need to clear up room), nor too small (only * collisions) */ mAttrHashSize = mAttrSpillEnd = 4; if (mAttrMap == null || mAttrMap.length < mAttrHashSize) { mAttrMap = new int[mAttrHashSize+1]; } mAttrMap[0] = mAttrMap[1] = mAttrMap[2] = mAttrMap[3] = 0; allocBuffers(); } /** * Method that can be used to get the specified attribute value, * by getting it written using Writer passed in. Can potentially * save one String allocation, since no (temporary) Strings need * to be created. */ /* protected final void writeValue(int index, Writer w) throws IOException { mValueBuilder.getEntry(index, w); } */ protected void throwDupAttr(InputProblemReporter rep, int index) throws XMLStreamException { rep.throwParseError("Duplicate attribute '"+getQName(index)+"'."); } } woodstox-4.1.3/src/java/com/ctc/wstx/sr/ElemAttrs.java 0000644 0001750 0001750 00000017046 11745427074 023071 0 ustar giovanni giovanni package com.ctc.wstx.sr; import javax.xml.namespace.QName; /** * Container class that is constructed with enough raw attribute information, * to be able to lazily construct full attribute objects, to be accessed * via Iterator, or fully-qualified name. *
* Implementation note: code for using Map-like structure is unfortunately * cut'n pasted from {@link AttributeCollector}. Problem * with refactoring is that it's 90% the same code, but not 100%. *
* Although instances of this class are constructed by stream readers,
* it is actually used by element event objects.
*/
public final class ElemAttrs
{
//private final static int OFFSET_LOCAL_NAME = 0;
private final static int OFFSET_NS_URI = 1;
//private final static int OFFSET_NS_PREFIX = 2;
//private final static int OFFSET_VALUE = 3;
/**
* Array that contains 4 Strings for each attribute;
* localName, URI, prefix, value. Can be used to lazily construct
* structure(s) needed to return Iterator for accessing all
* attributes.
*/
private final String[] mRawAttrs;
/**
* Raw offset (in mRawAttrs
) of the first attribute
* instance that was created through default value expansion.
*/
private final int mDefaultOffset;
/*
//////////////////////////////////////////////////////////////
// Information that defines "Map-like" data structure used for
// quick access to attribute values by fully-qualified name
// (only used for "long" lists)
//////////////////////////////////////////////////////////////
*/
// // // For full explanation, see source for AttributeCollector
private final int[] mAttrMap;
private final int mAttrHashSize;
private final int mAttrSpillEnd;
/**
* Method called to create "short" attribute list; list that has
* only few entries, and can thus be searched for attributes using
* linear search, without using any kind of Map structure.
*
* Currently the limit is 4 attributes; 1, 2 or 3 attribute lists are * considered short, 4 or more 'long'. * * @param rawAttrs Array that contains 4 Strings for each attribute; * localName, URI, prefix, value. Can be used to lazily construct * structure(s) needed to return Iterator for accessing all * attributes. * @param defOffset Index of the first default attribute, if any; * number of all attributes if none */ public ElemAttrs(String[] rawAttrs, int defOffset) { mRawAttrs = rawAttrs; mAttrMap = null; mAttrHashSize = 0; mAttrSpillEnd = 0; mDefaultOffset = (defOffset << 2); } /** * Method called to create "long" attribute list; list that has * a few entries, and efficient access by fully-qualified name should * not be done by linear search. * * @param rawAttrs Array that contains 4 Strings for each attribute; * localName, URI, prefix, value. Can be used to lazily construct * structure(s) needed to return Iterator for accessing all * attributes. */ public ElemAttrs(String[] rawAttrs, int defOffset, int[] attrMap, int hashSize, int spillEnd) { mRawAttrs = rawAttrs; mDefaultOffset = (defOffset << 2); mAttrMap = attrMap; mAttrHashSize = hashSize; mAttrSpillEnd = spillEnd; } /* //////////////////////////////////////////////////// // Public API //////////////////////////////////////////////////// */ public String[] getRawAttrs() { return mRawAttrs; } public int findIndex(QName name) { // Do we have a Map to do lookup against? if (mAttrMap != null) { // yup return findMapIndex(name.getNamespaceURI(), name.getLocalPart()); } // Nope, linear search: String ln = name.getLocalPart(); String uri = name.getNamespaceURI(); boolean defaultNs = (uri == null || uri.length() == 0); String[] raw = mRawAttrs; for (int i = 0, len = raw.length; i < len; i += 4) { if (!ln.equals(raw[i])) { continue; } String thisUri = raw[i+OFFSET_NS_URI]; if (defaultNs) { if (thisUri == null || thisUri.length() == 0) { return i; } } else { // non-default NS if (thisUri != null && (thisUri == uri || thisUri.equals(uri))) { return i; } } } return -1; } public int getFirstDefaultOffset() { return mDefaultOffset; } public boolean isDefault(int ix) { return (ix >= mDefaultOffset); } /* //////////////////////////////////////////////////// // Internal methods //////////////////////////////////////////////////// */ /** *
* Note: this method is very similar to * {@link com.ctc.wstx.sr.AttributeCollector#getAttrValue}; basically * most of it was cut'n pasted. Would be nice to refactor, but it's * bit hard to do that since data structures are not 100% identical * (mostly attribute storage, not Map structure itself). */ private final int findMapIndex(String nsURI, String localName) { // Primary hit? int hash = localName.hashCode(); if (nsURI == null) { nsURI = ""; // just to simplify comparisons -- array contains nulls } else if (nsURI.length() > 0) { hash ^= nsURI.hashCode(); } int ix = mAttrMap[hash & (mAttrHashSize - 1)]; if (ix == 0) { // nothing in here; no spills either return -1; } // Index is "one off" (since 0 indicates 'null), 4 Strings per attr ix = (ix - 1) << 2; // Is primary candidate match? String[] raw = mRawAttrs; String thisName = raw[ix]; /* Equality first, since although equals() checks that too, it's * very likely to match (if interning Strings), and we can save * a method call. */ if (thisName == localName || thisName.equals(localName)) { String thisURI = raw[ix+OFFSET_NS_URI]; if (thisURI == nsURI) { return ix; } if (thisURI == null) { if (nsURI.length() == 0) { return ix; } } else if (thisURI.equals(nsURI)) { return ix; } } /* Nope, need to traverse spill list, which has 2 entries for * each spilled attribute id; first for hash value, second index. */ for (int i = mAttrHashSize, len = mAttrSpillEnd; i < len; i += 2) { if (mAttrMap[i] != hash) { continue; } /* Note: spill indexes are not off-by-one, since there's no need * to mask 0 */ ix = mAttrMap[i+1] << 2; // ... but there are 4 Strings for each attr thisName = raw[ix]; if (thisName == localName || thisName.equals(localName)) { String thisURI = raw[ix+1]; if (thisURI == nsURI) { return ix; } if (thisURI == null) { if (nsURI.length() == 0) { return ix; } } else if (thisURI.equals(nsURI)) { return ix; } } } return -1; } } woodstox-4.1.3/src/java/com/ctc/wstx/sr/package.html 0000644 0001750 0001750 00000000225 11745427074 022576 0 ustar giovanni giovanni
This package contains supporting code for handling namespace information; element stacks that keep track of elements parsed and such. woodstox-4.1.3/src/java/com/ctc/wstx/sr/Attribute.java 0000644 0001750 0001750 00000010542 11745427074 023126 0 ustar giovanni giovanni /* Woodstox XML processor * * Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi * * Licensed under the License specified in file LICENSE, included with * the source code. * You may not use this file except in compliance with the License. * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.ctc.wstx.sr; import javax.xml.namespace.QName; import com.ctc.wstx.compat.QNameCreator; /** * Container for information collected regarding a single element * attribute instance. Used for both regular explicit attributes * and values added via attribute value defaulting. ** This class is not exposed outside of the package and is considered * part of internal implementation. * * @since 4.1 */ final class Attribute { // // // Name information protected String mLocalName; protected String mPrefix; protected String mNamespaceURI; // // // Value information /** * Numeric offset within text builder that denotes pointer * to the first character of the value for this attribute * (or namespace). End offset is derived by looking at * start pointer of the following attribute; or total * length for the last entry */ protected int mValueStartOffset; /** * Value as a String iff it has been requested once; stored * here in case it will be accessed again. */ protected String mReusableValue; /* ////////////////////////////////////////////////// // Life-cycle ////////////////////////////////////////////////// */ public Attribute(String prefix, String localName, int valueStart) { mLocalName = localName; mPrefix = prefix; mValueStartOffset = valueStart; } public void reset(String prefix, String localName, int valueStart) { mLocalName = localName; mPrefix = prefix; mValueStartOffset = valueStart; mNamespaceURI = null; mReusableValue = null; } /** * Method called to inject specific value for this attribute. */ public void setValue(String value) { mReusableValue = value; } /* ////////////////////////////////////////////////// // Accessors ////////////////////////////////////////////////// */ /** * @param uri Namespace URI of the attribute, if any; MUST be * given as null if no namespace * @param localName Local name to match. Note: is NOT guaranteed * to have been interned * * @return True if qualified name of this attribute is the same * as what arguments describe */ protected boolean hasQName(String uri, String localName) { if (localName != mLocalName && !localName.equals(mLocalName)) { return false; } if (mNamespaceURI == uri) { return true; } if (uri == null) { return (mNamespaceURI == null) || mNamespaceURI.length() == 0; } return (mNamespaceURI != null && uri.equals(mNamespaceURI)); } public QName getQName() { if (mPrefix == null) { if (mNamespaceURI == null) { return new QName(mLocalName); } return new QName(mNamespaceURI, mLocalName); } String uri = mNamespaceURI; if (uri == null) { // Some QName impls (older JDKs) don't like nulls uri = ""; } // For [WSTX-174] need to use indirection: return QNameCreator.create(uri, mLocalName, mPrefix); } /** * Method called if this attribute is the last one with value * in the buffer. If so, end value is implied */ public String getValue(String allValues) { if (mReusableValue == null) { mReusableValue = (mValueStartOffset == 0) ? allValues : allValues.substring(mValueStartOffset); } return mReusableValue; } public String getValue(String allValues, int endOffset) { if (mReusableValue == null) { mReusableValue = allValues.substring(mValueStartOffset, endOffset); } return mReusableValue; } } woodstox-4.1.3/src/java/com/ctc/wstx/sr/ValidatingStreamReader.java 0000644 0001750 0001750 00000060242 11745427074 025546 0 ustar giovanni giovanni /* Woodstox XML processor * * Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi * * Licensed under the License specified in the file LICENSE which is * included with the source code. * You may not use this file except in compliance with the License. * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.ctc.wstx.sr; import java.io.FileNotFoundException; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; import java.net.URL; import java.util.*; import javax.xml.stream.XMLStreamException; import org.codehaus.stax2.XMLInputFactory2; import org.codehaus.stax2.validation.*; import com.ctc.wstx.api.ReaderConfig; import com.ctc.wstx.cfg.ErrorConsts; import com.ctc.wstx.cfg.XmlConsts; import com.ctc.wstx.io.*; import com.ctc.wstx.dtd.DTDId; import com.ctc.wstx.dtd.DTDSubset; import com.ctc.wstx.dtd.DTDValidatorBase; import com.ctc.wstx.dtd.FullDTDReader; import com.ctc.wstx.util.URLUtil; /** * Implementation of {@link org.codehaus.stax2.XMLStreamReader2} * that builds on {@link TypedStreamReader} and adds full DTD-handling * including DTD validation * * @author Tatu Saloranta * @author Benson Margulies */ public class ValidatingStreamReader extends TypedStreamReader { /* /////////////////////////////////////////////////////////////////////// // Constants for standard StAX properties: /////////////////////////////////////////////////////////////////////// */ final static String STAX_PROP_ENTITIES = "javax.xml.stream.entities"; final static String STAX_PROP_NOTATIONS = "javax.xml.stream.notations"; /* /////////////////////////////////////////////////////////////////////// // Validation (DTD) information (entities, ...) /////////////////////////////////////////////////////////////////////// */ // // // Note: some members that logically belong here, are actually // // // part of superclass /** * Combined DTD set, constructed from parsed internal and external * entities (which may have been set via override DTD functionality). */ DTDValidationSchema mDTD = null; /** * Validating reader keeps of automatically created DTD-based * validator, since its handling may differ from that of application * managed validators. */ XMLValidator mAutoDtdValidator = null; /** * Flag that indicates whether a DTD validator has been automatically * set (as per DOCTYPE declaration or override) */ boolean mDtdValidatorSet = false; /** * Custom validation problem handler, if any. */ protected ValidationProblemHandler mVldProbHandler = null; /* /////////////////////////////////////////////////////////////////////// // Life-cycle (ctors) /////////////////////////////////////////////////////////////////////// */ private ValidatingStreamReader(InputBootstrapper bs, BranchingReaderSource input, ReaderCreator owner, ReaderConfig cfg, InputElementStack elemStack, boolean forER) throws XMLStreamException { super(bs, input, owner, cfg, elemStack, forER); } /** * Factory method for constructing readers. * * @param owner "Owner" of this reader, factory that created the reader; * needed for returning updated symbol table information after parsing. * @param input Input source used to read the XML document. * @param cfg Object that contains reader configuration info. * @param bs Bootstrapper to use, for reading xml declaration etc. * @param forER True if this reader is to be (configured to be) used by * an event reader. Will cause some changes to default settings, as * required by contracts Woodstox XMLEventReader implementation has * (with respect to lazy parsing, short text segments etc) */ public static ValidatingStreamReader createValidatingStreamReader (BranchingReaderSource input, ReaderCreator owner, ReaderConfig cfg, InputBootstrapper bs, boolean forER) throws XMLStreamException { ValidatingStreamReader sr = new ValidatingStreamReader (bs, input, owner, cfg, createElementStack(cfg), forER); return sr; } /* /////////////////////////////////////////////////////////////////////// // Public API, configuration /////////////////////////////////////////////////////////////////////// */ public Object getProperty(String name) { // DTD-specific properties... if (name.equals(STAX_PROP_ENTITIES)) { safeEnsureFinishToken(); if (mDTD == null || !(mDTD instanceof DTDSubset)) { return null; } List l = ((DTDSubset) mDTD).getGeneralEntityList(); /* Let's make a copy, so that caller can not modify * DTD's internal list instance */ return new ArrayList(l); } if (name.equals(STAX_PROP_NOTATIONS)) { safeEnsureFinishToken(); if (mDTD == null || !(mDTD instanceof DTDSubset)) { return null; } /* Let's make a copy, so that caller can not modify * DTD's internal list instance */ List l = ((DTDSubset) mDTD).getNotationList(); return new ArrayList(l); } return super.getProperty(name); } /* /////////////////////////////////////////////////////////////////////// // XMLStreamReader2 (StAX2) implementation /////////////////////////////////////////////////////////////////////// */ // // // StAX2, per-reader configuration // no additional readable features //public Object getFeature(String name) public void setFeature(String name, Object value) { // Referring to DTD-related features? if (name.equals(FEATURE_DTD_OVERRIDE)) { /* !!! 06-Feb-2007, TSa: Null with 4.0 will actually mean * 'remove any overrides'; which is different from earlier * meaning (which was use a dummy/empty override). * Should we throw an exception, or warn, or something...? */ if (value != null && !(value instanceof DTDValidationSchema)) { throw new IllegalArgumentException("Value to set for feature "+name+" not of type DTDValidationSchema"); } mConfig.setProperty(XMLInputFactory2.P_DTD_OVERRIDE, (DTDValidationSchema) value); } else { super.setFeature(name, value); } } /* /////////////////////////////////////////////////////////////////////// // DTDInfo implementation (StAX 2) /////////////////////////////////////////////////////////////////////// */ public Object getProcessedDTD() { return getProcessedDTDSchema(); } public DTDValidationSchema getProcessedDTDSchema() { DTDValidationSchema dtd = mConfig.getDTDOverride(); if (dtd == null) { dtd = mDTD; } return mDTD; } /* /////////////////////////////////////////////////////////////////////// // Stax2 validation /////////////////////////////////////////////////////////////////////// */ // @Override public XMLValidator validateAgainst(XMLValidationSchema schema) throws XMLStreamException { return mElementStack.validateAgainst(schema); } // @Override public XMLValidator stopValidatingAgainst(XMLValidationSchema schema) throws XMLStreamException { return mElementStack.stopValidatingAgainst(schema); } // @Override public XMLValidator stopValidatingAgainst(XMLValidator validator) throws XMLStreamException { return mElementStack.stopValidatingAgainst(validator); } // @Override public ValidationProblemHandler setValidationProblemHandler(ValidationProblemHandler h) { ValidationProblemHandler oldH = mVldProbHandler; mVldProbHandler = h; return oldH; } /* /////////////////////////////////////////////////////////////////////// // Private methods, DOCTYPE handling /////////////////////////////////////////////////////////////////////// */ /** * This method gets called to handle remainder of DOCTYPE declaration, * essentially the optional internal subset. Internal subset, if such * exists, is always read, but whether its contents are added to the * read buffer depend on passed-in argument. *
* NOTE: Since this method overrides the default implementation, make
* sure you do NOT change the method signature.
*
* @param copyContents If true, will copy contents of the internal
* subset of DOCTYPE declaration
* in the text buffer (in addition to parsing it for actual use); if
* false, will only do parsing.
*/
protected void finishDTD(boolean copyContents)
throws XMLStreamException
{
if (!hasConfigFlags(CFG_SUPPORT_DTD)) {
super.finishDTD(copyContents);
return;
}
/* We know there are no spaces, as this char was read and pushed
* back earlier...
*/
char c = getNextChar(SUFFIX_IN_DTD);
DTDSubset intSubset = null;
/* Do we have an internal subset? Note that we have earlier checked
* that it has to be either '[' or closing '>'.
*/
if (c == '[') {
// Do we need to copy the contents of int. subset in the buffer?
if (copyContents) {
((BranchingReaderSource) mInput).startBranch(mTextBuffer, mInputPtr, mNormalizeLFs);
}
try {
intSubset = FullDTDReader.readInternalSubset(this, mInput, mConfig,
hasConfigFlags(CFG_VALIDATE_AGAINST_DTD),
mDocXmlVersion);
} finally {
/* Let's close branching in any and every case (may allow
* graceful recovery in error cases in future
*/
if (copyContents) {
/* Need to "push back" ']' got in the succesful case
* (that's -1 part below);
* in error case it'll just be whatever last char was.
*/
((BranchingReaderSource) mInput).endBranch(mInputPtr-1);
}
}
// And then we need closing '>'
c = getNextCharAfterWS(SUFFIX_IN_DTD_INTERNAL);
}
if (c != '>') {
throwUnexpectedChar(c, "; expected '>' to finish DOCTYPE declaration.");
}
/* But, then, we also may need to read the external subset, if
* one was defined:
*/
/* 19-Sep-2004, TSa: That does not need to be done, however, if
* there's a DTD override set.
*/
mDTD = mConfig.getDTDOverride();
if (mDTD != null) {
// We have earlier override that's already parsed
} else { // Nope, no override
DTDSubset extSubset = null;
/* 05-Mar-2006, TSa: If standalone was specified as "yes", we
* should not rely on any external declarations, so shouldn't
* we really just skip the external subset?
*/
/* Alas: SAX (Xerces) still tries to read it... should we
* do the Right Thing, or follow the leader? For now, let's
* just follow the wrong example.
*/
//if (mDocStandalone != DOC_STANDALONE_YES) {
if (true) {
if (mDtdPublicId != null || mDtdSystemId != null) {
extSubset = findDtdExtSubset(mDtdPublicId, mDtdSystemId, intSubset);
}
}
if (intSubset == null) {
mDTD = extSubset;
} else if (extSubset == null) {
mDTD = intSubset;
} else {
mDTD = intSubset.combineWithExternalSubset(this, extSubset);
}
}
if (mDTD == null) { // only if specifically overridden not to have any
mGeneralEntities = null;
} else {
if (mDTD instanceof DTDSubset) {
mGeneralEntities = ((DTDSubset) mDTD).getGeneralEntityMap();
} else {
/* Also, let's warn if using non-native DTD implementation,
* since entities and notations can not be accessed
*/
_reportProblem(mConfig.getXMLReporter(), ErrorConsts.WT_DT_DECL,
"Value to set for feature "+FEATURE_DTD_OVERRIDE+" not a native Woodstox DTD implementation (but "+mDTD.getClass()+"): can not access full entity or notation information", null);
}
/* 16-Jan-2006, TSa: Actually, we have both fully-validating mode,
* and non-validating-but-DTD-aware mode. In latter case, we'll
* still need to add a validator, but just to get type info
* and to add attribute default values if necessary.
*/
mAutoDtdValidator = mDTD.createValidator(/*(ValidationContext)*/ mElementStack);
mDtdValidatorSet = true; // so we won't get nags
NsDefaultProvider nsDefs = null;
if (mAutoDtdValidator instanceof DTDValidatorBase) {
DTDValidatorBase dtdv = (DTDValidatorBase) mAutoDtdValidator;
dtdv.setAttrValueNormalization(true);
// Do we have any attribute defaults for 'xmlns' or 'xmlns:*'?
if (dtdv.hasNsDefaults()) {
nsDefs = dtdv;
}
}
mElementStack.setAutomaticDTDValidator(mAutoDtdValidator, nsDefs);
}
}
/**
* If there is an error handler established, call it.
*/
// @Override
public void reportValidationProblem(XMLValidationProblem prob)
throws XMLStreamException
{
if (mVldProbHandler != null) {
// Fix for [WSTX-209]
mVldProbHandler.reportProblem(prob);
} else {
super.reportValidationProblem(prob);
}
}
/**
* Method called right before handling the root element, by the base
* class. This allows for some initialization and checks to be done
* (not including ones that need access to actual element name)
*/
protected void initValidation()
throws XMLStreamException
{
if (hasConfigFlags(CFG_VALIDATE_AGAINST_DTD)
&& !mDtdValidatorSet) {
/* It's ok to miss it, but it may not be what caller wants. Either
* way, let's pass the info and continue
*/
reportProblem(null, ErrorConsts.WT_DT_DECL, ErrorConsts.W_MISSING_DTD, null, null);
}
}
/*
///////////////////////////////////////////////////////////////////////
// Private methods, external subset access
///////////////////////////////////////////////////////////////////////
*/
/**
* Method called by finishDTD
, to locate the specified
* external DTD subset. Subset may be obtained from a cache, if cached
* copy exists and is compatible; if not, it will be read from the
* source identified by the public and/or system identifier passed.
*/
private DTDSubset findDtdExtSubset(String pubId, String sysId,
DTDSubset intSubset)
throws XMLStreamException
{
boolean cache = hasConfigFlags(CFG_CACHE_DTDS);
DTDId dtdId;
try {
dtdId = constructDtdId(pubId, sysId);
} catch (IOException ioe) {
throw constructFromIOE(ioe);
}
if (cache) {
DTDSubset extSubset = findCachedSubset(dtdId, intSubset);
if (extSubset != null) {
return extSubset;
}
}
// No useful cached copy? Need to read it then.
/* For now, we do require system identifier; otherwise we don't
* know how to resolve DTDs by public id. In future should
* probably also have some simple catalog resolving facility?
*/
if (sysId == null) {
throwParseError("Can not resolve DTD with public id \"{0}\"; missing system identifier", mDtdPublicId, null);
}
WstxInputSource src = null;
try {
int xmlVersion = mDocXmlVersion;
// 05-Feb-2006, TSa: If xmlVersion not explicitly known, defaults to 1.0
if (xmlVersion == XmlConsts.XML_V_UNKNOWN) {
xmlVersion = XmlConsts.XML_V_10;
}
/* null -> no explicit path context, use parent's
* null -> not an entity expansion, no name.
* Note, too, that we can NOT just pass mEntityResolver, since
* that's the one used for general entities, whereas ext subset
* should be resolved by the param entity resolver.
*/
src = DefaultInputResolver.resolveEntity
(mInput, null, null, pubId, sysId, mConfig.getDtdResolver(),
mConfig, xmlVersion);
} catch (FileNotFoundException fex) {
/* Let's catch and rethrow this just so we get more meaningful
* description (with input source position etc)
*/
throwParseError("(was {0}) {1}", fex.getClass().getName(), fex.getMessage());
} catch (IOException ioe) {
throwFromIOE(ioe);
}
DTDSubset extSubset = FullDTDReader.readExternalSubset(src, mConfig, intSubset,
hasConfigFlags(CFG_VALIDATE_AGAINST_DTD),
mDocXmlVersion);
if (cache) {
/* Ok; can be cached, but only if it does NOT refer to
* parameter entities defined in the internal subset (if
* it does, there's no easy/efficient to check if it could
* be used later on, plus it's unlikely it could be)
*/
if (extSubset.isCachable()) {
mOwner.addCachedDTD(dtdId, extSubset);
}
}
return extSubset;
}
private DTDSubset findCachedSubset(DTDId id, DTDSubset intSubset)
throws XMLStreamException
{
DTDSubset extSubset = mOwner.findCachedDTD(id);
/* Ok, now; can use the cached copy iff it does not refer to
* any parameter entities internal subset (if one exists)
* defines:
*/
if (extSubset != null) {
if (intSubset == null || extSubset.isReusableWith(intSubset)) {
return extSubset;
}
}
return null;
}
/**
* Method called to resolve path to external DTD subset, given
* system identifier.
*/
private URI resolveExtSubsetPath(String systemId) throws IOException
{
// Do we have a context to use for resolving?
URL ctxt = (mInput == null) ? null : mInput.getSource();
/* Ok, either got a context or not; let's create the URL based on
* the id, and optional context:
*/
if (ctxt == null) {
/* Call will try to figure out if system id has the protocol
* in it; if not, create a relative file, if it does, try to
* resolve it.
*/
return URLUtil.uriFromSystemId(systemId);
}
URL url = URLUtil.urlFromSystemId(systemId, ctxt);
try {
return new URI(url.toExternalForm());
} catch (URISyntaxException e) { // should never occur...
throw new IOException("Failed to construct URI for external subset, URL = "+url.toExternalForm()+": "+e.getMessage());
}
}
protected DTDId constructDtdId(String pubId, String sysId)
throws IOException
{
/* Following settings will change what gets stored as DTD, so
* they need to separate cached instances too:
*/
int significantFlags = mConfigFlags &
(CFG_NAMESPACE_AWARE
/* Let's optimize non-validating case; DTD info we need
* is less if so (no need to store content specs for one)...
* plus, eventual functionality may be different too.
*/
| CFG_VALIDATE_AGAINST_DTD
/* Also, whether we support dtd++ or not may change construction
* of settings... (currently does not, but could)
*/
| CFG_SUPPORT_DTDPP
/* Also, basic xml:id support does matter -- xml:id attribute
* type is verified only if it's enabled
*/
| CFG_XMLID_TYPING
);
URI sysRef = (sysId == null || sysId.length() == 0) ? null :
resolveExtSubsetPath(sysId);
/* 29-Mar-2006, TSa: Apparently public ids are not always very
* unique and/or can be mismatched with system ids, resulting
* in false matches if using public ids. As a result, by default
* Woodstox does NOT rely on public ids, when matching.
*/
boolean usePublicId = (mConfigFlags & CFG_CACHE_DTDS_BY_PUBLIC_ID) != 0;
if (usePublicId && pubId != null && pubId.length() > 0) {
return DTDId.construct(pubId, sysRef, significantFlags, mXml11);
}
if (sysRef == null) {
return null;
}
return DTDId.constructFromSystemId(sysRef, significantFlags, mXml11);
}
protected DTDId constructDtdId(URI sysId)
throws IOException
{
int significantFlags = mConfigFlags &
(CFG_NAMESPACE_AWARE
/* Let's optimize non-validating case; DTD info we need
* is less if so (no need to store content specs for one)
*/
| CFG_VALIDATE_AGAINST_DTD
/* Also, whether we support dtd++ or not may change construction
* of settings... (currently does not, but could)
*/
| CFG_SUPPORT_DTDPP
);
return DTDId.constructFromSystemId(sysId, significantFlags, mXml11);
}
/*
///////////////////////////////////////////////////////////////////////
// Private methods, DTD validation support
///////////////////////////////////////////////////////////////////////
*/
/**
* Method called by lower-level parsing code when invalid content
* (anything inside element with 'empty' content spec; text inside
* non-mixed element etc) is found during basic scanning. Note
* that actual DTD element structure problems are not reported
* through this method.
*/
protected void reportInvalidContent(int evtType)
throws XMLStreamException
{
switch (mVldContent) {
case XMLValidator.CONTENT_ALLOW_NONE:
reportValidationProblem(ErrorConsts.ERR_VLD_EMPTY,
mElementStack.getTopElementDesc(),
ErrorConsts.tokenTypeDesc(evtType));
break;
case XMLValidator.CONTENT_ALLOW_WS:
case XMLValidator.CONTENT_ALLOW_WS_NONSTRICT: // should this ever occur?
reportValidationProblem(ErrorConsts.ERR_VLD_NON_MIXED,
mElementStack.getTopElementDesc(), null);
break;
case XMLValidator.CONTENT_ALLOW_VALIDATABLE_TEXT:
case XMLValidator.CONTENT_ALLOW_ANY_TEXT:
/* Not 100% sure if this should ever happen... depends on
* interpretation of 'any' content model?
*/
reportValidationProblem(ErrorConsts.ERR_VLD_ANY,
mElementStack.getTopElementDesc(),
ErrorConsts.tokenTypeDesc(evtType));
break;
default: // should never occur:
throwParseError("Internal error: trying to report invalid content for "+evtType);
}
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/sr/StreamReaderImpl.java 0000644 0001750 0001750 00000001466 11745427074 024370 0 ustar giovanni giovanni package com.ctc.wstx.sr;
import javax.xml.stream.*;
import org.codehaus.stax2.XMLStreamReader2;
import com.ctc.wstx.ent.EntityDecl;
/**
* Interface that defines "internal Woodstox API". It is used to decouple
* parts of the Woodstox that need to know something more about woodstox
* stream reader implementation, but not about implementation details.
* Specifically, there are some simple dependencies from the stream
* writer; they should only need to refer to this interface.
*/
public interface StreamReaderImpl
extends XMLStreamReader2
{
public EntityDecl getCurrentEntityDecl();
public Object withStartElement(ElemCallback cb, Location loc);
public boolean isNamespaceAware();
public AttributeCollector getAttributeCollector();
public InputElementStack getInputElementStack();
}
woodstox-4.1.3/src/java/com/ctc/wstx/sr/InputElementStack.java 0000644 0001750 0001750 00000106443 11745427074 024570 0 ustar giovanni giovanni /* Woodstox XML processor
*
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in file LICENSE, included with
* the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.sr;
import java.util.*;
import javax.xml.XMLConstants;
import javax.xml.namespace.NamespaceContext;
import javax.xml.namespace.QName;
import javax.xml.stream.Location;
import javax.xml.stream.XMLStreamException;
import org.codehaus.stax2.AttributeInfo;
import org.codehaus.stax2.ri.EmptyIterator;
import org.codehaus.stax2.ri.SingletonIterator;
import org.codehaus.stax2.validation.ValidationContext;
import org.codehaus.stax2.validation.XMLValidator;
import org.codehaus.stax2.validation.XMLValidationProblem;
import org.codehaus.stax2.validation.XMLValidationSchema;
import org.codehaus.stax2.validation.ValidatorPair;
import com.ctc.wstx.api.ReaderConfig;
import com.ctc.wstx.api.WstxInputProperties;
import com.ctc.wstx.cfg.ErrorConsts;
import com.ctc.wstx.cfg.XmlConsts;
import com.ctc.wstx.compat.QNameCreator;
import com.ctc.wstx.dtd.DTDValidatorBase; // unfortunate dependency
import com.ctc.wstx.util.*;
/**
* Shared base class that defines API stream reader uses to communicate
* with the element stack implementation, independent of whether it's
* operating in namespace-aware or non-namespace modes.
* Element stack class is used for storing nesting information about open
* elements, and for namespace-aware mode, also information about
* namespaces active (including default namespace), during parsing of
* XML input.
*
* This class also implements {@link NamespaceContext}, since it has all
* the information necessary, so parser can just return element stack
* instance as necesary.
*/
public final class InputElementStack
implements AttributeInfo, NamespaceContext, ValidationContext
{
final static int ID_ATTR_NONE = -1;
/*
//////////////////////////////////////////////////
// Configuration
//////////////////////////////////////////////////
*/
protected final boolean mNsAware;
protected final AttributeCollector mAttrCollector;
protected final ReaderConfig mConfig;
protected InputProblemReporter mReporter = null;
/**
* Object that will need to be consulted about namespace bindings,
* since it has some knowledge about default namespace declarations
* (has default attribute value expansion).
*/
protected NsDefaultProvider mNsDefaultProvider;
/*
//////////////////////////////////////////////////
// Element, namespace information
//////////////////////////////////////////////////
*/
protected int mDepth = 0;
/**
* Vector that contains all currently active namespaces; one String for
* prefix, another for matching URI. Does also include default name
* spaces (at most one per level).
*/
protected final StringVector mNamespaces = new StringVector(64);
/**
* Currently open element, if any; null outside root element.
*/
protected Element mCurrElement;
protected boolean mMayHaveNsDefaults = false;
/*
//////////////////////////////////////////////////
// Element validation (optional), attribute typing
//////////////////////////////////////////////////
*/
/**
* Optional validator object that will get called if set,
* and that can validate xml content. Note that it is possible
* that this is set to a proxy object that calls multiple
* validators in sequence.
*/
protected XMLValidator mValidator = null;
/**
* Index of the attribute with type of ID, if known (most likely
* due to Xml:id support); -1 if not available, or no ID attribute
* for current element.
*/
protected int mIdAttrIndex = ID_ATTR_NONE;
/*
//////////////////////////////////////////////////
// Simple 1-slot QName cache; used for improving
// efficiency of code that uses QNames extensively
// (like StAX Event API implementation)
//////////////////////////////////////////////////
*/
protected String mLastLocalName = null;
protected String mLastPrefix = null;
protected String mLastNsURI = null;
protected QName mLastName = null;
/*
/////////////////////////////////////////////////////
// Other simple caching
/////////////////////////////////////////////////////
*/
// Non-transient NamespaceContext caching; mostly for event API
/**
* Last potentially shareable NamespaceContext created by
* this stack. This reference is cleared each time bindings
* change (either due to a start element with new bindings, or due
* to the matching end element that closes scope of such binding(s)).
*/
protected BaseNsContext mLastNsContext = null;
// Chain of reusable Element instances
protected Element mFreeElement = null;
/*
//////////////////////////////////////////////////
// Life-cycle (create, update state)
//////////////////////////////////////////////////
*/
protected InputElementStack(ReaderConfig cfg, boolean nsAware)
{
mConfig = cfg;
mNsAware = nsAware;
mAttrCollector = new AttributeCollector(cfg, nsAware);
}
protected void connectReporter(InputProblemReporter rep)
{
mReporter = rep;
}
protected XMLValidator addValidator(XMLValidator vld)
{
if (mValidator == null) {
mValidator = vld;
} else {
mValidator = new ValidatorPair(mValidator, vld);
}
return vld;
}
/**
* Method called to connect the automatically handled DTD validator
* (one detected from DOCTYPE, loaded and completely handled by
* the stream reader without application calling validation methods).
* Handled separately, since its behaviour is potentially different
* from that of explicitly added validators.
*/
protected void setAutomaticDTDValidator(XMLValidator validator, NsDefaultProvider nsDefs)
{
mNsDefaultProvider = nsDefs;
addValidator(validator);
}
/*
//////////////////////////////////////////////////
// Start/stop validation
//////////////////////////////////////////////////
*/
public XMLValidator validateAgainst(XMLValidationSchema schema)
throws XMLStreamException
{
/* Should we first check if we maybe already have a validator
* for the schema?
*/
return addValidator(schema.createValidator(this));
}
public XMLValidator stopValidatingAgainst(XMLValidationSchema schema)
throws XMLStreamException
{
XMLValidator[] results = new XMLValidator[2];
if (ValidatorPair.removeValidator(mValidator, schema, results)) { // found
XMLValidator found = results[0];
mValidator = results[1];
found.validationCompleted(false);
return found;
}
return null;
}
public XMLValidator stopValidatingAgainst(XMLValidator validator)
throws XMLStreamException
{
XMLValidator[] results = new XMLValidator[2];
if (ValidatorPair.removeValidator(mValidator, validator, results)) { // found
XMLValidator found = results[0];
mValidator = results[1];
found.validationCompleted(false);
return found;
}
return null;
}
/*
//////////////////////////////////////////////////
// Accessors:
//////////////////////////////////////////////////
*/
/**
* This is a method called by the reader to ensure that we have at
* least one 'real' validator. This is only needed to ensure that
* validation problems that the reader can detect (illegal textual
* content) can be reported as validity errors. Since the validator
* API does not have a good way to cleanly deal with such a possibility,
* the check is rather fragile, but should work for now: essentially
* we need at least one validator object that either is not a sub-class
* of DTDValidatorBase
or returns true for
* reallyValidating
.
*
* !!! TODO: remove need for this method (and method itself) with * Woodstox 4.0, by adding necessary support in Stax2 XMLValidator * interface. */ protected boolean reallyValidating() { if (mValidator == null) { // no validators, no validation // (although, should never get called if no validators) return false; } if (!(mValidator instanceof DTDValidatorBase)) { // note: happens for validator pair, for one return true; } return ((DTDValidatorBase) mValidator).reallyValidating(); } /** * Method called by {@link BasicStreamReader}, to retrieve the * attribute collector it needs for some direct access. */ public final AttributeCollector getAttrCollector() { return mAttrCollector; } /** * Method called to construct a non-transient NamespaceContext instance; * generally needed when creating events to return from event-based * iterators. */ public BaseNsContext createNonTransientNsContext(Location loc) { // Have an instance we can reuse? Great! if (mLastNsContext != null) { return mLastNsContext; } // No namespaces declared at this point? Easy, as well: int totalNsSize = mNamespaces.size(); if (totalNsSize < 1) { return (mLastNsContext = EmptyNamespaceContext.getInstance()); } // Otherwise, we need to create a new non-empty context: int localCount = getCurrentNsCount() << 1; BaseNsContext nsCtxt = new CompactNsContext (loc, getDefaultNsURI(), mNamespaces.asArray(), totalNsSize, totalNsSize - localCount); /* And it can be shared if there are no new ('local', ie. included * within this start element) bindings -- if there are, underlying * array might be shareable, but offsets wouldn't be) */ if (localCount == 0) { mLastNsContext = nsCtxt; } return nsCtxt; } /** * Method called by the stream reader to add new (start) element * into the stack in namespace-aware mode; called when a start element * is encountered during parsing, but only in ns-aware mode. */ public final void push(String prefix, String localName) { ++mDepth; String defaultNs = (mCurrElement == null) ? XmlConsts.DEFAULT_NAMESPACE_URI : mCurrElement.mDefaultNsURI; if (mFreeElement == null) { mCurrElement = new Element(mCurrElement, mNamespaces.size(), prefix, localName); } else { Element newElem = mFreeElement; mFreeElement = newElem.mParent; newElem.reset(mCurrElement, mNamespaces.size(), prefix, localName); mCurrElement = newElem; } mCurrElement.mDefaultNsURI = defaultNs; mAttrCollector.reset(); /* 20-Feb-2006, TSa: Hmmh. Namespace default provider unfortunately * needs an advance warning... */ if (mNsDefaultProvider != null) { mMayHaveNsDefaults = mNsDefaultProvider.mayHaveNsDefaults(prefix, localName); } } /** * Method called by the stream reader to remove the topmost (start) * element from the stack; * called when an end element is encountered during parsing. * * @return True if stack has more elements; false if not (that is, * root element closed) */ public final boolean pop() throws XMLStreamException { if (mCurrElement == null) { throw new IllegalStateException("Popping from empty stack"); } --mDepth; Element child = mCurrElement; Element parent = child.mParent; mCurrElement = parent; // Let's do simple recycling of Element instances... child.relink(mFreeElement); mFreeElement = child; // Need to purge namespaces? int nsCount = mNamespaces.size() - child.mNsOffset; if (nsCount > 0) { // 2 entries for each NS mapping: mLastNsContext = null; // let's invalidate ns ctxt too, if we had one mNamespaces.removeLast(nsCount); } return (parent != null); } /** * Method called to resolve element and attribute namespaces (in * namespace-aware mode), and do optional validation using pluggable * validator object. * * @return Text content validation state that should be effective * for the fully resolved element context */ public int resolveAndValidateElement() throws XMLStreamException { if (mDepth == 0) { // just a simple sanity check throw new IllegalStateException("Calling validate() on empty stack."); } AttributeCollector ac = mAttrCollector; // Any namespace declarations? { int nsCount = ac.getNsCount(); if (nsCount > 0) { /* let's first invalidate old (possibly) shared ns ctxt too, * if we had one; new one can be created at a later point */ mLastNsContext = null; boolean internNsUris = mConfig.willInternNsURIs(); for (int i = 0; i < nsCount; ++i) { Attribute ns = ac.resolveNamespaceDecl(i, internNsUris); String nsUri = ns.mNamespaceURI; // note: for namespaces, prefix is stored as local name String prefix = ns.mLocalName; /* 18-Jul-2004, TSa: Need to check that 'xml' and 'xmlns' * prefixes are not re-defined (and 'xmlns' not even * defined to its correct ns). */ if (prefix == "xmlns") { // xmlns can never be declared, even to its correct URI mReporter.throwParseError(ErrorConsts.ERR_NS_REDECL_XMLNS); } else if (prefix == "xml") { // whereas xml is ok, as long as it's same URI: if (!nsUri.equals(XMLConstants.XML_NS_URI)) { mReporter.throwParseError(ErrorConsts.ERR_NS_REDECL_XML, nsUri, null); } /* 09-Feb-2006, TSa: Hmmh. Now, should this explicit * xml declaration be visible to the app? SAX API * seem to ignore it. */ //mNamespaces.addStrings(prefix, nsUri); } else { // ok, valid prefix, so far /* 17-Mar-2006, TSa: Unbinding default NS needs to * result in null being added: */ if (nsUri == null || nsUri.length() == 0) { nsUri = XmlConsts.DEFAULT_NAMESPACE_URI; } // The default ns binding needs special handling: if (prefix == null) { mCurrElement.mDefaultNsURI = nsUri; } /* But then let's ensure that URIs matching xml * and xmlns are not being bound to anything else */ if (internNsUris) { // identity comparison is ok: if (nsUri == XMLConstants.XML_NS_URI) { mReporter.throwParseError(ErrorConsts.ERR_NS_REDECL_XML_URI, prefix, null); } else if (nsUri == XMLConstants.XMLNS_ATTRIBUTE_NS_URI) { mReporter.throwParseError(ErrorConsts.ERR_NS_REDECL_XMLNS_URI); } } else { // need to check equals() if (nsUri.equals(XMLConstants.XML_NS_URI)) { mReporter.throwParseError(ErrorConsts.ERR_NS_REDECL_XML_URI, prefix, null); } else if (nsUri.equals(XMLConstants.XMLNS_ATTRIBUTE_NS_URI)) { mReporter.throwParseError(ErrorConsts.ERR_NS_REDECL_XMLNS_URI); } } /* and at any rate, binding needs to be added, to * be visible to the app (including def ns): */ mNamespaces.addStrings(prefix, nsUri); } } } } /* 20-Feb-2006, TSa: Any attribute defaults for namespace declaration * pseudo-attributes? */ if (mMayHaveNsDefaults) { mNsDefaultProvider.checkNsDefaults(this); } // Then, let's set element's namespace, if any: String prefix = mCurrElement.mPrefix; String ns; if (prefix == null) { // use default NS, if any ns = mCurrElement.mDefaultNsURI; } else if (prefix == "xml") { ns = XMLConstants.XML_NS_URI; } else { // Need to find namespace with the prefix: ns = mNamespaces.findLastFromMap(prefix); /* 07-Sep-2007, TSa: "no namespace" should now be indicated * by an empty string, however, due to historical reasons * let's be bit defensive and allow nulls for the same too */ if (ns == null || ns.length() == 0) { mReporter.throwParseError(ErrorConsts.ERR_NS_UNDECLARED, prefix, null); } } mCurrElement.mNamespaceURI = ns; // And finally, resolve attributes' namespaces too: int xmlidIx = ac.resolveNamespaces(mReporter, mNamespaces); mIdAttrIndex = xmlidIx; XMLValidator vld = mValidator; /* If we have no validator(s), nothing more to do, * except perhaps little bit of Xml:id handling: */ if (vld == null) { // no validator in use if (xmlidIx >= 0) { // need to normalize xml:id, still? ac.normalizeSpacesInValue(xmlidIx); } return XMLValidator.CONTENT_ALLOW_ANY_TEXT; } // Otherwise need to call relevant validation methods. /* First, a call to check if the element itself may be acceptable * within structure: */ vld.validateElementStart (mCurrElement.mLocalName, mCurrElement.mNamespaceURI, mCurrElement.mPrefix); // Then attributes, if any: int attrLen = ac.getCount(); if (attrLen > 0) { for (int i = 0; i < attrLen; ++i) { ac.validateAttribute(i, mValidator); } } /* And finally let's wrap things up to see what textual content * is allowed as child content, if any: */ return mValidator.validateElementAndAttributes(); } /** * Method called after parsing (but before returning) end element, * to allow for pluggable validators to verify correctness of * the content model for the closing element. * * @return Validation state that should be effective for the parent * element state */ public int validateEndElement() throws XMLStreamException { if (mValidator == null) { // should never be null if we get here return XMLValidator.CONTENT_ALLOW_ANY_TEXT; } int result = mValidator.validateElementEnd (mCurrElement.mLocalName, mCurrElement.mNamespaceURI, mCurrElement.mPrefix); if (mDepth == 1) { // root closing mValidator.validationCompleted(true); } return result; } /* /////////////////////////////////////////////////// // AttributeInfo methods (StAX2) /////////////////////////////////////////////////// */ public final int getAttributeCount() { return mAttrCollector.getCount(); } public final int findAttributeIndex(String nsURI, String localName) { return mAttrCollector.findIndex(nsURI, localName); } /** * Default implementation just indicates it does not know of such * attributes; this because that requires DTD information that only * some implementations have. */ public final int getIdAttributeIndex() { if (mIdAttrIndex >= 0) { return mIdAttrIndex; } return (mValidator == null) ? -1 : mValidator.getIdAttrIndex(); } /** * Default implementation just indicates it does not know of such * attributes; this because that requires DTD information that only * some implementations have. */ public final int getNotationAttributeIndex() { return (mValidator == null) ? -1 : mValidator.getNotationAttrIndex(); } /* /////////////////////////////////////////////////// // Implementation of NamespaceContext: /////////////////////////////////////////////////// */ public final String getNamespaceURI(String prefix) { if (prefix == null) { throw new IllegalArgumentException(ErrorConsts.ERR_NULL_ARG); } if (prefix.length() == 0) { if (mDepth == 0) { // unexpected... but let's not err at this point /* 07-Sep-2007, TSa: Default/"no namespace" does map to * "URI" of empty String. */ return XmlConsts.DEFAULT_NAMESPACE_URI; } return mCurrElement.mDefaultNsURI; } if (prefix.equals(XMLConstants.XML_NS_PREFIX)) { return XMLConstants.XML_NS_URI; } if (prefix.equals(XMLConstants.XMLNS_ATTRIBUTE)) { return XMLConstants.XMLNS_ATTRIBUTE_NS_URI; } /* Ok, need to find the match, if any; starting from end of the * list of active namespaces. Note that we can not count on prefix * being interned/canonicalized. */ return mNamespaces.findLastNonInterned(prefix); } public final String getPrefix(String nsURI) { if (nsURI == null || nsURI.length() == 0) { throw new IllegalArgumentException("Illegal to pass null/empty prefix as argument."); } if (nsURI.equals(XMLConstants.XML_NS_URI)) { return XMLConstants.XML_NS_PREFIX; } if (nsURI.equals(XMLConstants.XMLNS_ATTRIBUTE_NS_URI)) { return XMLConstants.XMLNS_ATTRIBUTE; } /* Ok, need to find the match, if any; starting from end of the * list of active namespaces. Note that we can not count on prefix * being interned/canonicalized. */ String prefix = null; // 29-Sep-2004, TSa: Need to check for namespace masking, too... String[] strs = mNamespaces.getInternalArray(); int len = mNamespaces.size(); main_loop: for (int index = len-1; index > 0; index -= 2) { if (nsURI.equals(strs[index])) { // Ok, is prefix masked? prefix = strs[index-1]; for (int j = index+1; j < len; j += 2) { if (strs[j] == prefix) { // masked! prefix = null; continue main_loop; } } // nah, it's good // 17-Mar-2006, TSa: ... but default NS has prefix null... if (prefix == null) { prefix = ""; } break main_loop; } } return prefix; } public final Iterator getPrefixes(String nsURI) { if (nsURI == null || nsURI.length() == 0) { throw new IllegalArgumentException("Illegal to pass null/empty prefix as argument."); } if (nsURI.equals(XMLConstants.XML_NS_URI)) { return new SingletonIterator(XMLConstants.XML_NS_PREFIX); } if (nsURI.equals(XMLConstants.XMLNS_ATTRIBUTE_NS_URI)) { return new SingletonIterator(XMLConstants.XMLNS_ATTRIBUTE); } // 29-Sep-2004, TSa: Need to check for namespace masking, too... String[] strs = mNamespaces.getInternalArray(); int len = mNamespaces.size(); ArrayList l = null; main_loop: for (int index = len-1; index > 0; index -= 2) { if (nsURI.equals(strs[index])) { // Ok, is prefix masked? String prefix = strs[index-1]; for (int j = index+1; j < len; j += 2) { if (strs[j] == prefix) { // masked! continue main_loop; } } // nah, it's good! if (l == null) { l = new ArrayList(); } l.add(prefix); } } return (l == null) ? EmptyIterator.getInstance() : l.iterator(); } /* /////////////////////////////////////////////////// // ValidationContext /////////////////////////////////////////////////// */ public final String getXmlVersion() { return mConfig.isXml11() ? XmlConsts.XML_V_11_STR : XmlConsts.XML_V_10_STR; } // Part of Stax2, see above: //public int getAttributeCount(); public String getAttributeLocalName(int index) { return getAttrCollector().getLocalName(index); } public String getAttributeNamespace(int index) { return getAttrCollector().getURI(index); } public String getAttributePrefix(int index) { return getAttrCollector().getPrefix(index); } public String getAttributeValue(int index) { return getAttrCollector().getValue(index); } public String getAttributeValue(String nsURI, String localName) { int ix = findAttributeIndex(nsURI, localName); return (ix < 0) ? null : getAttributeValue(ix); } // Part of Stax2, see above: //public int findAttributeIndex(String nsURI, String localName); public boolean isNotationDeclared(String name) { // !!! TBI return false; } public boolean isUnparsedEntityDeclared(String name) { // !!! TBI return false; } public String getBaseUri() { // !!! TBI return null; } public final QName getCurrentElementName() { if (mDepth == 0) { return null; } String prefix = mCurrElement.mPrefix; /* 17-Mar-2006, TSa: We only map prefix to empty String because * some QName impls barf on nulls. Otherwise we will always * use null to indicate missing prefixes. */ if (prefix == null) { prefix = ""; } /* 03-Dec-2004, TSa: Maybe we can just reuse the last QName * object created, if we have same data? (happens if * state hasn't changed, or we got end element for a leaf * element, or repeating leaf elements) */ String nsURI = mCurrElement.mNamespaceURI; String ln = mCurrElement.mLocalName; /* Since we generally intern most Strings, can do identity * comparisons here: */ if (ln != mLastLocalName) { mLastLocalName = ln; mLastPrefix = prefix; mLastNsURI = nsURI; } else if (prefix != mLastPrefix) { mLastPrefix = prefix; mLastNsURI = nsURI; } else if (nsURI != mLastNsURI) { mLastNsURI = nsURI; } else { return mLastName; } QName n = QNameCreator.create(nsURI, ln, prefix); mLastName = n; return n; } // This was defined above for NamespaceContext //public String getNamespaceURI(String prefix); public Location getValidationLocation() { return mReporter.getLocation(); } public void reportProblem(XMLValidationProblem problem) throws XMLStreamException { mReporter.reportValidationProblem(problem); } /** * Method called by actual validator instances when attributes with * default values have no explicit values for the element; if so, * default value needs to be added as if it was parsed from the * element. */ public int addDefaultAttribute(String localName, String uri, String prefix, String value) { return mAttrCollector.addDefaultAttribute(localName, uri, prefix, value); } /* /////////////////////////////////////////////////// // Support for NsDefaultProvider /////////////////////////////////////////////////// */ public boolean isPrefixLocallyDeclared(String internedPrefix) { if (internedPrefix != null && internedPrefix.length() == 0) { // default ns internedPrefix = null; } int offset = mCurrElement.mNsOffset; for (int len = mNamespaces.size(); offset < len; offset += 2) { // both interned, can use identity comparison String thisPrefix = mNamespaces.getString(offset); if (thisPrefix == internedPrefix) { return true; } } return false; } /** * Callback method called by the namespace default provider. At * this point we can trust it to only call this method with somewhat * valid arguments (no dups etc). */ public void addNsBinding(String prefix, String uri) { // Unbind? (xml 1.1...) if ((uri == null) || (uri.length() == 0)) { uri = null; } // Default ns declaration? if ((prefix == null) || (prefix.length() == 0)) { prefix = null; mCurrElement.mDefaultNsURI = uri; } mNamespaces.addStrings(prefix, uri); } /* /////////////////////////////////////////////////// // Support for validation: /////////////////////////////////////////////////// */ public final void validateText(TextBuffer tb, boolean lastTextSegment) throws XMLStreamException { tb.validateText(mValidator, lastTextSegment); } public final void validateText(String contents, boolean lastTextSegment) throws XMLStreamException { mValidator.validateText(contents, lastTextSegment); } /* /////////////////////////////////////////////////// // Accessors: /////////////////////////////////////////////////// */ // // // Generic stack information: public final boolean isNamespaceAware() { return mNsAware; } // // // Generic stack information: public final boolean isEmpty() { return mDepth == 0; } /** * @return Number of open elements in the stack; 0 when parser is in * prolog/epilog, 1 inside root element and so on. */ public final int getDepth() { return mDepth; } // // // Information about element at top of stack: public final String getDefaultNsURI() { if (mDepth == 0) { throw new IllegalStateException("Illegal access, empty stack."); } return mCurrElement.mDefaultNsURI; } public final String getNsURI() { if (mDepth == 0) { throw new IllegalStateException("Illegal access, empty stack."); } return mCurrElement.mNamespaceURI; } public final String getPrefix() { if (mDepth == 0) { throw new IllegalStateException("Illegal access, empty stack."); } return mCurrElement.mPrefix; } public final String getLocalName() { if (mDepth == 0) { throw new IllegalStateException("Illegal access, empty stack."); } return mCurrElement.mLocalName; } public final boolean matches(String prefix, String localName) { if (mDepth == 0) { throw new IllegalStateException("Illegal access, empty stack."); } String thisPrefix = mCurrElement.mPrefix; if (prefix == null || prefix.length() == 0) { // no name space if (thisPrefix != null && thisPrefix.length() > 0) { return false; } } else { if (thisPrefix != prefix && !thisPrefix.equals(prefix)) { return false; } } String thisName = mCurrElement.mLocalName; return (thisName == localName) || thisName.equals(localName); } public final String getTopElementDesc() { if (mDepth == 0) { throw new IllegalStateException("Illegal access, empty stack."); } String name = mCurrElement.mLocalName; String prefix = mCurrElement.mPrefix; if (prefix == null) { // no name space return name; } return prefix + ":" + name; } // // // Namespace information: /** * @return Number of active prefix/namespace mappings for current scope, * including mappings from enclosing elements. */ public final int getTotalNsCount() { return mNamespaces.size() >> 1; } /** * @return Number of active prefix/namespace mappings for current scope, * NOT including mappings from enclosing elements. */ public final int getCurrentNsCount() { // Need not check for empty stack; should return 0 properly return (mNamespaces.size() - mCurrElement.mNsOffset) >> 1; } public final String getLocalNsPrefix(int index) { int offset = mCurrElement.mNsOffset; int localCount = (mNamespaces.size() - offset); index <<= 1; // 2 entries, prefix/URI for each NS if (index < 0 || index >= localCount) { throwIllegalIndex(index >> 1, localCount >> 1); } return mNamespaces.getString(offset + index); } public final String getLocalNsURI(int index) { int offset = mCurrElement.mNsOffset; int localCount = (mNamespaces.size() - offset); index <<= 1; // 2 entries, prefix/URI for each NS if (index < 0 || index >= localCount) { throwIllegalIndex(index >> 1, localCount >> 1); } return mNamespaces.getString(offset + index + 1); } private void throwIllegalIndex(int index, int localCount) { throw new IllegalArgumentException("Illegal namespace index " +(index >> 1) +"; current scope only has " +(localCount >> 1) +" namespace declarations."); } // // // DTD-derived attribute information: /** * @return Schema (DTD, RNG, W3C Schema) based type of the attribute * in specified index */ public final String getAttributeType(int index) { if (index == mIdAttrIndex && index >= 0) { // second check to ensure -1 is not passed return "ID"; } return (mValidator == null) ? WstxInputProperties.UNKNOWN_ATTR_TYPE : mValidator.getAttributeType(index); } } woodstox-4.1.3/src/java/com/ctc/wstx/sr/InputProblemReporter.java 0000644 0001750 0001750 00000003275 11745427074 025333 0 ustar giovanni giovanni package com.ctc.wstx.sr; import javax.xml.stream.Location; import javax.xml.stream.XMLStreamException; import org.codehaus.stax2.validation.XMLValidationProblem; /** * Interface implemented by input reader, and used by other components to * report problem that are related to current input position. */ public interface InputProblemReporter { /* //////////////////////////////////////////////////// // Methods for reporting "hard" errors: //////////////////////////////////////////////////// */ public void throwParseError(String msg) throws XMLStreamException; public void throwParseError(String msg, Object arg, Object arg2) throws XMLStreamException; /* /////////////////////////////////////////////////////// // Reporting validation problems /////////////////////////////////////////////////////// */ public void reportValidationProblem(XMLValidationProblem prob) throws XMLStreamException; public void reportValidationProblem(String msg) throws XMLStreamException; public void reportValidationProblem(String msg, Object arg, Object arg2) throws XMLStreamException; /* /////////////////////////////////////////////////////// // Methods for reporting other "soft" (recoverable) problems /////////////////////////////////////////////////////// */ public void reportProblem(Location loc, String probType, String format, Object arg, Object arg2) throws XMLStreamException; /* //////////////////////////////////////////////////// // Supporting methods needed by reporting //////////////////////////////////////////////////// */ public Location getLocation(); } woodstox-4.1.3/src/java/com/ctc/wstx/sr/TypedStreamReader.java 0000644 0001750 0001750 00000066566 11745427074 024570 0 ustar giovanni giovanni /* Woodstox XML processor * * Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi * * Licensed under the License specified in file LICENSE, included with * the source code. * You may not use this file except in compliance with the License. * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.ctc.wstx.sr; import java.math.BigDecimal; import java.math.BigInteger; import javax.xml.namespace.QName; import javax.xml.stream.XMLStreamException; import org.codehaus.stax2.typed.Base64Variant; import org.codehaus.stax2.typed.Base64Variants; import org.codehaus.stax2.typed.TypedArrayDecoder; import org.codehaus.stax2.typed.TypedValueDecoder; import org.codehaus.stax2.typed.TypedXMLStreamException; import org.codehaus.stax2.ri.Stax2Util; import org.codehaus.stax2.ri.typed.ValueDecoderFactory; import org.codehaus.stax2.ri.typed.CharArrayBase64Decoder; import com.ctc.wstx.api.ReaderConfig; import com.ctc.wstx.cfg.ErrorConsts; import com.ctc.wstx.io.BranchingReaderSource; import com.ctc.wstx.io.InputBootstrapper; import com.ctc.wstx.io.WstxInputData; /** * Complete implementation of {@link org.codehaus.stax2.XMLStreamReader2}, * including Typed Access API (Stax2 v3.0) implementation. * Only functionality missing is DTD validation, which is provided by a * specialized sub-class. */ public class TypedStreamReader extends BasicStreamReader { /** * Mask of event types that are legal (starting) states * to call Typed Access API from. * */ final protected static int MASK_TYPED_ACCESS_ARRAY = (1 << START_ELEMENT) | (1 << END_ELEMENT) // for convenience | (1 << CHARACTERS) | (1 << CDATA) | (1 << SPACE) // Not ok for PI or COMMENT? Let's assume so ; final protected static int MASK_TYPED_ACCESS_BINARY = (1 << START_ELEMENT) // note: END_ELEMENT handled separately | (1 << CHARACTERS) | (1 << CDATA) | (1 << SPACE) ; /** * Minimum length of text chunks to parse before base64 decoding. * Will try to limit it to fit within regular result buffers. */ final static int MIN_BINARY_CHUNK = 2000; /** * Factory used for constructing decoders we need for typed access */ protected ValueDecoderFactory _decoderFactory; /** * Lazily-constructed decoder object for decoding base64 encoded * element binary content. */ protected CharArrayBase64Decoder _base64Decoder = null; /* //////////////////////////////////////////////////// // Instance construction //////////////////////////////////////////////////// */ protected TypedStreamReader(InputBootstrapper bs, BranchingReaderSource input, ReaderCreator owner, ReaderConfig cfg, InputElementStack elemStack, boolean forER) throws XMLStreamException { super(bs, input, owner, cfg, elemStack, forER); } /** * Factory method for constructing readers. * * @param owner "Owner" of this reader, factory that created the reader; * needed for returning updated symbol table information after parsing. * @param input Input source used to read the XML document. * @param cfg Object that contains reader configuration info. */ public static TypedStreamReader createStreamReader (BranchingReaderSource input, ReaderCreator owner, ReaderConfig cfg, InputBootstrapper bs, boolean forER) throws XMLStreamException { TypedStreamReader sr = new TypedStreamReader (bs, input, owner, cfg, createElementStack(cfg), forER); return sr; } /* //////////////////////////////////////////////////////// // TypedXMLStreamReader2 implementation, scalar elements //////////////////////////////////////////////////////// */ public boolean getElementAsBoolean() throws XMLStreamException { ValueDecoderFactory.BooleanDecoder dec = _decoderFactory().getBooleanDecoder(); getElementAs(dec); return dec.getValue(); } public int getElementAsInt() throws XMLStreamException { ValueDecoderFactory.IntDecoder dec = _decoderFactory().getIntDecoder(); getElementAs(dec); return dec.getValue(); } public long getElementAsLong() throws XMLStreamException { ValueDecoderFactory.LongDecoder dec = _decoderFactory().getLongDecoder(); getElementAs(dec); return dec.getValue(); } public float getElementAsFloat() throws XMLStreamException { ValueDecoderFactory.FloatDecoder dec = _decoderFactory().getFloatDecoder(); getElementAs(dec); return dec.getValue(); } public double getElementAsDouble() throws XMLStreamException { ValueDecoderFactory.DoubleDecoder dec = _decoderFactory().getDoubleDecoder(); getElementAs(dec); return dec.getValue(); } public BigInteger getElementAsInteger() throws XMLStreamException { ValueDecoderFactory.IntegerDecoder dec = _decoderFactory().getIntegerDecoder(); getElementAs(dec); return dec.getValue(); } public BigDecimal getElementAsDecimal() throws XMLStreamException { ValueDecoderFactory.DecimalDecoder dec = _decoderFactory().getDecimalDecoder(); getElementAs(dec); return dec.getValue(); } public QName getElementAsQName() throws XMLStreamException { ValueDecoderFactory.QNameDecoder dec = _decoderFactory().getQNameDecoder(getNamespaceContext()); getElementAs(dec); return _verifyQName(dec.getValue()); } public final byte[] getElementAsBinary() throws XMLStreamException { return getElementAsBinary(Base64Variants.getDefaultVariant()); } public byte[] getElementAsBinary(Base64Variant v) throws XMLStreamException { // note: code here is similar to Base64DecoderBase.aggregateAll(), see comments there Stax2Util.ByteAggregator aggr = _base64Decoder().getByteAggregator(); byte[] buffer = aggr.startAggregation(); while (true) { int offset = 0; int len = buffer.length; do { int readCount = readElementAsBinary(buffer, offset, len, v); if (readCount < 1) { // all done! return aggr.aggregateAll(buffer, offset); } offset += readCount; len -= readCount; } while (len > 0); buffer = aggr.addFullBlock(buffer); } } public void getElementAs(TypedValueDecoder tvd) throws XMLStreamException { if (mCurrToken != START_ELEMENT) { throwParseError(ErrorConsts.ERR_STATE_NOT_STELEM); } /* Ok, now: with START_ELEMENT we know that it's not partially * processed; that we are in-tree (not prolog or epilog). * The only possible complication would be: */ if (mStEmptyElem) { /* And if so, we'll then get 'virtual' close tag; things * are simple as location info was set when dealing with * empty start element; and likewise, validation (if any) * has been taken care of */ mStEmptyElem = false; mCurrToken = END_ELEMENT; _handleEmptyValue(tvd); return; } // First need to find a textual event while (true) { int type = next(); if (type == END_ELEMENT) { _handleEmptyValue(tvd); return; } if (type == COMMENT || type == PROCESSING_INSTRUCTION) { continue; } if (((1 << type) & MASK_GET_ELEMENT_TEXT) == 0) { throwParseError("Expected a text token, got "+tokenTypeDesc(type)+"."); } break; } if (mTokenState < TOKEN_FULL_SINGLE) { readCoalescedText(mCurrToken, false); } /* Ok: then a quick check; if it looks like we are directly * followed by the end tag, we need not construct String * quite yet. */ if ((mInputPtr + 1) < mInputEnd && mInputBuffer[mInputPtr] == '<' && mInputBuffer[mInputPtr+1] == '/') { // Note: next() has validated text, no need for more validation mInputPtr += 2; mCurrToken = END_ELEMENT; /* Can by-pass next(), nextFromTree(), in this case. * However, must do decoding first, and only then call * readEndElem(), since this latter call may invalidate * underlying input buffer (when end tag is at buffer * boundary) */ try { // buffer now has all the data mTextBuffer.decode(tvd); } catch (IllegalArgumentException iae) { throw _constructTypeException(iae, mTextBuffer.contentsAsString()); } readEndElem(); return; } // Otherwise, we'll need to do slower processing int extra = 1 + (mTextBuffer.size() >> 1); // let's add 50% space StringBuffer sb = mTextBuffer.contentsAsStringBuffer(extra); int type; while ((type = next()) != END_ELEMENT) { if (((1 << type) & MASK_GET_ELEMENT_TEXT) != 0) { if (mTokenState < TOKEN_FULL_SINGLE) { readCoalescedText(type, false); } mTextBuffer.contentsToStringBuffer(sb); continue; } if (type != COMMENT && type != PROCESSING_INSTRUCTION) { throwParseError("Expected a text token, got "+tokenTypeDesc(type)+"."); } } // Note: calls next() have validated text, no need for more validation String str = sb.toString(); String tstr = Stax2Util.trimSpaces(str); if (tstr == null) { _handleEmptyValue(tvd); } else { try { tvd.decode(tstr); } catch (IllegalArgumentException iae) { throw _constructTypeException(iae, str); } } } /* //////////////////////////////////////////////////////// // TypedXMLStreamReader2 implementation, array elements //////////////////////////////////////////////////////// */ public int readElementAsIntArray(int[] value, int from, int length) throws XMLStreamException { return readElementAsArray(_decoderFactory().getIntArrayDecoder(value, from, length)); } public int readElementAsLongArray(long[] value, int from, int length) throws XMLStreamException { return readElementAsArray(_decoderFactory().getLongArrayDecoder(value, from, length)); } public int readElementAsFloatArray(float[] value, int from, int length) throws XMLStreamException { return readElementAsArray(_decoderFactory().getFloatArrayDecoder(value, from, length)); } public int readElementAsDoubleArray(double[] value, int from, int length) throws XMLStreamException { return readElementAsArray(_decoderFactory().getDoubleArrayDecoder(value, from, length)); } /** * Method called to parse array of primitives. *
* !!! 05-Sep-2008, tatu: Current implementation is not optimal * either performance-wise, or from getting accurate Location * for decoding problems. But it works otherwise, and we need * to get Woodstox 4.0 out by the end of the year... so it'll * do, for now. * * @return Number of elements decoded (if any were decoded), or * -1 to indicate that no more values can be decoded. */ public final int readElementAsArray(TypedArrayDecoder dec) throws XMLStreamException { int type = mCurrToken; // First things first: must be acceptable start state: if (((1 << type) & MASK_TYPED_ACCESS_ARRAY) == 0) { throwNotTextualOrElem(type); } // Are we just starting (START_ELEMENT)? if (type == START_ELEMENT) { // Empty? Not common, but can short-cut handling if occurs if (mStEmptyElem) { mStEmptyElem = false; mCurrToken = END_ELEMENT; return -1; } // Otherwise let's just find the first text segment while (true) { type = next(); if (type == END_ELEMENT) { // Simple... no textul content return -1; } if (type == COMMENT || type == PROCESSING_INSTRUCTION) { continue; } if (type == CHARACTERS || type == CDATA) { break; } // otherwise just not legal (how about SPACE, unexpanded entities?) throw _constructUnexpectedInTyped(type); } } int count = 0; while (type != END_ELEMENT) { /* Ok then: we will have a valid textual type. Just need to * ensure current segment is completed. Plus, for current impl, * also need to coalesce to prevent artificial CDATA/text * boundary from splitting tokens */ if (type == CHARACTERS || type == CDATA || type == SPACE) { if (mTokenState < TOKEN_FULL_SINGLE) { readCoalescedText(type, false); } } else if (type == COMMENT || type == PROCESSING_INSTRUCTION) { type = next(); continue; } else { throw _constructUnexpectedInTyped(type); } count += mTextBuffer.decodeElements(dec, this); if (!dec.hasRoom()) { break; } type = next(); } // If nothing was found, needs to be indicated via -1, not 0 return (count > 0) ? count : -1; } /* //////////////////////////////////////////////////////// // TypedXMLStreamReader2 implementation, binary data //////////////////////////////////////////////////////// */ public final int readElementAsBinary(byte[] resultBuffer, int offset, int maxLength) throws XMLStreamException { return readElementAsBinary(resultBuffer, offset, maxLength, Base64Variants.getDefaultVariant()); } public int readElementAsBinary(byte[] resultBuffer, int offset, int maxLength, Base64Variant v) throws XMLStreamException { if (resultBuffer == null) { throw new IllegalArgumentException("resultBuffer is null"); } if (offset < 0) { throw new IllegalArgumentException("Illegal offset ("+offset+"), must be [0, "+resultBuffer.length+"["); } if (maxLength < 1 || (offset + maxLength) > resultBuffer.length) { if (maxLength == 0) { // special case, allowed, but won't do anything return 0; } throw new IllegalArgumentException("Illegal maxLength ("+maxLength+"), has to be positive number, and offset+maxLength can not exceed"+resultBuffer.length); } final CharArrayBase64Decoder dec = _base64Decoder(); int type = mCurrToken; // First things first: must be acceptable start state: if (((1 << type) & MASK_TYPED_ACCESS_BINARY) == 0) { if (type == END_ELEMENT) { // Minor complication: may have unflushed stuff (non-padded versions) if (!dec.hasData()) { return -1; } } else { throwNotTextualOrElem(type); } } else if (type == START_ELEMENT) { // just starting (START_ELEMENT)? if (mStEmptyElem) { // empty element? simple... mStEmptyElem = false; mCurrToken = END_ELEMENT; return -1; } // Otherwise let's just find the first text segment while (true) { type = next(); if (type == END_ELEMENT) { // Simple... no textual content return -1; } if (type == COMMENT || type == PROCESSING_INSTRUCTION) { continue; } /* 12-Dec-2009, tatu: Important: in coalescing mode we may * have incomplete segment that needs to be completed */ if (mTokenState < mStTextThreshold) { finishToken(false); } _initBinaryChunks(v, dec, type, true); break; } } int totalCount = 0; main_loop: while (true) { // Ok, decode: int count; try { count = dec.decode(resultBuffer, offset, maxLength); } catch (IllegalArgumentException iae) { // !!! 26-Sep-2008, tatus: should try to figure out which char (etc) triggered problem to pass with typed exception throw _constructTypeException(iae.getMessage(), ""); } offset += count; totalCount += count; maxLength -= count; /* And if we filled the buffer we are done. Or, an edge * case: reached END_ELEMENT (for non-padded variant) */ if (maxLength < 1 || mCurrToken == END_ELEMENT) { break; } // Otherwise need to advance to the next event while (true) { type = next(); if (type == COMMENT || type == PROCESSING_INSTRUCTION || type == SPACE) { // space is ignorable too continue; } if (type == END_ELEMENT) { /* Just need to verify we don't have partial stuff * (missing one to three characters of a full quartet * that encodes 1 - 3 bytes). Also: non-padding * variants can be in incomplete state, from which * data may need to be flushed... */ int left = dec.endOfContent(); if (left < 0) { // incomplete, error throw _constructTypeException("Incomplete base64 triplet at the end of decoded content", ""); } else if (left > 0) { // 1 or 2 more bytes of data, loop some more continue main_loop; } // Otherwise, no more data, we are done break main_loop; } /* 12-Dec-2009, tatu: Important: in coalescing mode we may * have incomplete segment that needs to be completed */ if (mTokenState < mStTextThreshold) { finishToken(false); } _initBinaryChunks(v, dec, type, false); break; } } // If nothing was found, needs to be indicated via -1, not 0 return (totalCount > 0) ? totalCount : -1; } private final void _initBinaryChunks(Base64Variant v, CharArrayBase64Decoder dec, int type, boolean isFirst) throws XMLStreamException { if (type == CHARACTERS) { if (mTokenState < mStTextThreshold) { mTokenState = readTextSecondary(MIN_BINARY_CHUNK, false) ? TOKEN_FULL_SINGLE : TOKEN_PARTIAL_SINGLE; } } else if (type == CDATA) { if (mTokenState < mStTextThreshold) { mTokenState = readCDataSecondary(MIN_BINARY_CHUNK) ? TOKEN_FULL_SINGLE : TOKEN_PARTIAL_SINGLE; } } else { throw _constructUnexpectedInTyped(type); } mTextBuffer.initBinaryChunks(v, dec, isFirst); } /* /////////////////////////////////////////////////////////// // TypedXMLStreamReader2 implementation, scalar attributes /////////////////////////////////////////////////////////// */ public int getAttributeIndex(String namespaceURI, String localName) { // Note: cut'n pasted from "getAttributeInfo()" if (mCurrToken != START_ELEMENT) { throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_STELEM); } return mElementStack.findAttributeIndex(namespaceURI, localName); } public boolean getAttributeAsBoolean(int index) throws XMLStreamException { ValueDecoderFactory.BooleanDecoder dec = _decoderFactory().getBooleanDecoder(); getAttributeAs(index, dec); return dec.getValue(); } public int getAttributeAsInt(int index) throws XMLStreamException { ValueDecoderFactory.IntDecoder dec = _decoderFactory().getIntDecoder(); getAttributeAs(index, dec); return dec.getValue(); } public long getAttributeAsLong(int index) throws XMLStreamException { ValueDecoderFactory.LongDecoder dec = _decoderFactory().getLongDecoder(); getAttributeAs(index, dec); return dec.getValue(); } public float getAttributeAsFloat(int index) throws XMLStreamException { ValueDecoderFactory.FloatDecoder dec = _decoderFactory().getFloatDecoder(); getAttributeAs(index, dec); return dec.getValue(); } public double getAttributeAsDouble(int index) throws XMLStreamException { ValueDecoderFactory.DoubleDecoder dec = _decoderFactory().getDoubleDecoder(); getAttributeAs(index, dec); return dec.getValue(); } public BigInteger getAttributeAsInteger(int index) throws XMLStreamException { ValueDecoderFactory.IntegerDecoder dec = _decoderFactory().getIntegerDecoder(); getAttributeAs(index, dec); return dec.getValue(); } public BigDecimal getAttributeAsDecimal(int index) throws XMLStreamException { ValueDecoderFactory.DecimalDecoder dec = _decoderFactory().getDecimalDecoder(); getAttributeAs(index, dec); return dec.getValue(); } public QName getAttributeAsQName(int index) throws XMLStreamException { ValueDecoderFactory.QNameDecoder dec = _decoderFactory().getQNameDecoder(getNamespaceContext()); getAttributeAs(index, dec); return _verifyQName(dec.getValue()); } public void getAttributeAs(int index, TypedValueDecoder tvd) throws XMLStreamException { if (mCurrToken != START_ELEMENT) { throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_STELEM); } try { mAttrCollector.decodeValue(index, tvd); } catch (IllegalArgumentException iae) { throw _constructTypeException(iae, mAttrCollector.getValue(index)); } } public int[] getAttributeAsIntArray(int index) throws XMLStreamException { ValueDecoderFactory.IntArrayDecoder dec = _decoderFactory().getIntArrayDecoder(); getAttributeAsArray(index, dec); return dec.getValues(); } public long[] getAttributeAsLongArray(int index) throws XMLStreamException { ValueDecoderFactory.LongArrayDecoder dec = _decoderFactory().getLongArrayDecoder(); getAttributeAsArray(index, dec); return dec.getValues(); } public float[] getAttributeAsFloatArray(int index) throws XMLStreamException { ValueDecoderFactory.FloatArrayDecoder dec = _decoderFactory().getFloatArrayDecoder(); getAttributeAsArray(index, dec); return dec.getValues(); } public double[] getAttributeAsDoubleArray(int index) throws XMLStreamException { ValueDecoderFactory.DoubleArrayDecoder dec = _decoderFactory().getDoubleArrayDecoder(); getAttributeAsArray(index, dec); return dec.getValues(); } /** * Method that allows reading contents of an attribute as an array * of whitespace-separate tokens, decoded using specified decoder. * * @return Number of tokens decoded, 0 if none found */ public int getAttributeAsArray(int index, TypedArrayDecoder tad) throws XMLStreamException { if (mCurrToken != START_ELEMENT) { throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_STELEM); } return mAttrCollector.decodeValues(index, tad, this); } public byte[] getAttributeAsBinary(int index) throws XMLStreamException { return getAttributeAsBinary(index, Base64Variants.getDefaultVariant()); } public byte[] getAttributeAsBinary(int index, Base64Variant v) throws XMLStreamException { return mAttrCollector.decodeBinary(index, v, _base64Decoder(), this); } /* ///////////////////////////////////////////////////// // Internal helper methods ///////////////////////////////////////////////////// */ /** * Method called to verify validity of the parsed QName element * or attribute value. At this point binding of a prefixed name * (if qname has a prefix) has been verified, and thereby prefix * also must be valid (since there must have been a preceding * declaration). But local name might still not be a legal * well-formed xml name, so let's verify that. */ protected QName _verifyQName(QName n) throws TypedXMLStreamException { String ln = n.getLocalPart(); int ix = WstxInputData.findIllegalNameChar(ln, mCfgNsEnabled, mXml11); if (ix >= 0) { String prefix = n.getPrefix(); String pname = (prefix != null && prefix.length() > 0) ? (prefix + ":" +ln) : ln; throw _constructTypeException("Invalid local name \""+ln+"\" (character at #"+ix+" is invalid)", pname); } return n; } protected ValueDecoderFactory _decoderFactory() { if (_decoderFactory == null) { _decoderFactory = new ValueDecoderFactory(); } return _decoderFactory; } protected CharArrayBase64Decoder _base64Decoder() { if (_base64Decoder == null) { _base64Decoder = new CharArrayBase64Decoder(); } return _base64Decoder; } /** * Method called to handle value that has empty String * as representation. This will usually either lead to an * exception, or parsing to the default value for the * type in question (null for nullable types and so on). */ private void _handleEmptyValue(TypedValueDecoder dec) throws XMLStreamException { try { // default action is to throw an exception dec.handleEmptyValue(); } catch (IllegalArgumentException iae) { throw _constructTypeException(iae, ""); } } /** * Method called to wrap or convert given conversion-fail exception * into a full {@link TypedXMLStreamException}, * * @param iae Problem as reported by converter * @param lexicalValue Lexical value (element content, attribute value) * that could not be converted succesfully. */ protected TypedXMLStreamException _constructTypeException(IllegalArgumentException iae, String lexicalValue) { return new TypedXMLStreamException(lexicalValue, iae.getMessage(), getStartLocation(), iae); } } woodstox-4.1.3/src/java/com/ctc/wstx/sr/CompactNsContext.java 0000644 0001750 0001750 00000020550 11745427074 024417 0 ustar giovanni giovanni package com.ctc.wstx.sr; import java.io.IOException; import java.io.Writer; import java.util.ArrayList; import java.util.Iterator; import javax.xml.XMLConstants; import javax.xml.stream.Location; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamWriter; import org.codehaus.stax2.ri.EmptyIterator; import org.codehaus.stax2.ri.SingletonIterator; // This is unfortunate dependency, but... import org.codehaus.stax2.ri.evt.NamespaceEventImpl; import com.ctc.wstx.util.BaseNsContext; /** * Simple implementation of separate non-transient namespace context * object. Created for start-element event by transient namespace * instance updated by stream reader. *
* Note about implementation: Location information is only needed (and * only needs to passed) if access is made via extended interface; one * that can return information about actual Namespace event objects. */ public final class CompactNsContext extends BaseNsContext { final Location mLocation; /** * Array that contains 2 Strings for each declared default namespace * (including default namespace declarations); first is the prefix, * second URI. */ final String[] mNamespaces; /** * Number of entries in {@link #mNamespaces} (which is twice the number * of bindings) */ final int mNsLength; /** * Index of first namespace pair in mNamespaces that is declared * in scope of element for which this context was constructed. May be * equal to {@link #mNsLength} (which indicates there are no local * bindings). */ final int mFirstLocalNs; /** * List only needed to support List accessor from start-element event; * created lazily if/as needed. */ transient ArrayList mNsList; public CompactNsContext(Location loc, String defaultNsURI, String[] namespaces, int nsLen, int firstLocal) { mLocation = loc; mNamespaces = namespaces; mNsLength = nsLen; mFirstLocalNs = firstLocal; } /** * @param prefix Non-null, non-empty prefix (base-class verifies these * constraints) to find namespace URI for. */ public String doGetNamespaceURI(String prefix) { /* Let's search from beginning towards end; this way we'll first * find the innermost (or, in case of same-level declaration, last) * declaration for prefix. */ // (note: default namespace will be there too) String[] ns = mNamespaces; if (prefix.length() == 0) { for (int i = mNsLength-2; i >= 0; i -= 2) { if (ns[i] == null) { return ns[i+1]; } } return null; // default ns not bound } for (int i = mNsLength-2; i >= 0; i -= 2) { if (prefix.equals(ns[i])) { return ns[i+1]; } } return null; } public String doGetPrefix(String nsURI) { // Note: base class checks for 'known' problems and prefixes: String[] ns = mNamespaces; int len = mNsLength; main_loop: for (int i = len-1; i > 0; i -= 2) { if (nsURI.equals(ns[i])) { /* 29-Sep-2004, TSa: Actually, need to make sure that this * declaration is not masked by a later declaration. * This happens when same prefix is declared on a later * entry (ie. for child element) */ String prefix = ns[i-1]; for (int j = i+1; j < len; j += 2) { // Prefixes are interned, can do straight equality check if (ns[j] == prefix) { continue main_loop; // was masked! } } String uri = ns[i-1]; /* 19-Mar-2006, TSa: Empty namespaces are represented by * null prefixes; but need to be represented as empty * strings (to distinguish from unbound URIs). */ return (uri == null) ? "" : uri; } } return null; } public Iterator doGetPrefixes(String nsURI) { // Note: base class checks for 'known' problems and prefixes: String[] ns = mNamespaces; int len = mNsLength; String first = null; ArrayList all = null; main_loop: for (int i = len-1; i > 0; i -= 2) { String currNS = ns[i]; if (currNS == nsURI || currNS.equals(nsURI)) { /* 29-Sep-2004, TSa: Need to ensure it's not masked by * a later ns declaration in a child element. */ String prefix = ns[i-1]; for (int j = i+1; j < len; j += 2) { // Prefixes are interned, can do straight equality check if (ns[j] == prefix) { continue main_loop; // was masked, need to ignore } } /* 19-Mar-2006, TSa: Empty namespaces are represented by * null prefixes; but need to be represented as empty * strings (to distinguish from unbound URIs). */ if (prefix == null) { prefix = ""; } if (first == null) { first = prefix; } else { if (all == null) { all = new ArrayList(); all.add(first); } all.add(prefix); } } } if (all != null) { return all.iterator(); } if (first != null) { return new SingletonIterator(first); } return EmptyIterator.getInstance(); } /* /////////////////////////////////////////////////////// // Extended API, needed by Wstx classes /////////////////////////////////////////////////////// */ public Iterator getNamespaces() { if (mNsList == null) { int firstLocal = mFirstLocalNs; int len = mNsLength - firstLocal; if (len == 0) { // can this happen? return EmptyIterator.getInstance(); } if (len == 2) { // only one NS return new SingletonIterator(NamespaceEventImpl.constructNamespace (mLocation, mNamespaces[firstLocal], mNamespaces[firstLocal+1])); } ArrayList l = new ArrayList(len >> 1); String[] ns = mNamespaces; for (len = mNsLength; firstLocal < len; firstLocal += 2) { l.add(NamespaceEventImpl.constructNamespace(mLocation, ns[firstLocal], ns[firstLocal+1])); } mNsList = l; } return mNsList.iterator(); } /** * Method called by {@link com.ctc.wstx.evt.CompactStartElement} * to output all 'local' namespace declarations active in current * namespace scope, if any. Local means that declaration was done in * scope of current element, not in a parent element. */ public void outputNamespaceDeclarations(Writer w) throws IOException { String[] ns = mNamespaces; for (int i = mFirstLocalNs, len = mNsLength; i < len; i += 2) { w.write(' '); w.write(XMLConstants.XMLNS_ATTRIBUTE); String prefix = ns[i]; if (prefix != null && prefix.length() > 0) { w.write(':'); w.write(prefix); } w.write("=\""); w.write(ns[i+1]); w.write('"'); } } public void outputNamespaceDeclarations(XMLStreamWriter w) throws XMLStreamException { String[] ns = mNamespaces; for (int i = mFirstLocalNs, len = mNsLength; i < len; i += 2) { String nsURI = ns[i+1]; String prefix = ns[i]; if (prefix != null && prefix.length() > 0) { w.writeNamespace(prefix, nsURI); } else { w.writeDefaultNamespace(nsURI); } } } } woodstox-4.1.3/src/java/com/ctc/wstx/sr/ElemCallback.java 0000644 0001750 0001750 00000001103 11745427074 023453 0 ustar giovanni giovanni package com.ctc.wstx.sr; import javax.xml.stream.Location; import javax.xml.namespace.QName; import com.ctc.wstx.util.BaseNsContext; /** * Abstract base class that defines set of simple callbacks to be * called by the stream reader, passing information about element * that the stream currently points to, if any. */ public abstract class ElemCallback { public abstract Object withStartElement(Location loc, QName name, BaseNsContext nsCtxt, ElemAttrs attrs, boolean wasEmpty); } woodstox-4.1.3/src/java/com/ctc/wstx/sr/NsDefaultProvider.java 0000644 0001750 0001750 00000003452 11745427074 024565 0 ustar giovanni giovanni /* Woodstox XML processor * * Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi * * Licensed under the License specified in file LICENSE, included with * the source code. * You may not use this file except in compliance with the License. * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.ctc.wstx.sr; import javax.xml.stream.XMLStreamException; /** * Interface only used by Woodstox core. The main reason for the interface * is to reduce coupling with the input element stack and dtd validator * instances: while dtd validator needs to be able to inject namespace * declarations based on attribute default values, it should not have to * know too much about element stack implementation, and vice versa. * As a result, this interface defines API input element stack calls * on the dtd validator instance. Validator instance then refers to the * input element stack base class to do callbacks if and as necessary. */ public interface NsDefaultProvider { public boolean mayHaveNsDefaults(String elemPrefix, String elemLN); /** * Method called by the input element stack to indicate that * it has just added local namespace declarations from the * current element, and is about to start resolving element * and attribute namespace bindings. This provider instance is * to add namespace declarations from attribute defaults, if * any, using callbacks to the input element stack. */ public void checkNsDefaults(InputElementStack nsStack) throws XMLStreamException; } woodstox-4.1.3/src/java/com/ctc/wstx/cfg/ 0000755 0001750 0001750 00000000000 11756143457 020433 5 ustar giovanni giovanni woodstox-4.1.3/src/java/com/ctc/wstx/cfg/ParsingErrorMsgs.java 0000644 0001750 0001750 00000002443 11745427074 024546 0 ustar giovanni giovanni package com.ctc.wstx.cfg; public interface ParsingErrorMsgs { // // // EOF problems: final static String SUFFIX_IN_ATTR_VALUE = " in attribute value"; final static String SUFFIX_IN_DEF_ATTR_VALUE = " in attribute default value"; final static String SUFFIX_IN_CDATA = " in CDATA section"; final static String SUFFIX_IN_CLOSE_ELEMENT = " in end tag"; final static String SUFFIX_IN_COMMENT = " in comment"; final static String SUFFIX_IN_DTD = " in DOCTYPE declaration"; final static String SUFFIX_IN_DTD_EXTERNAL = " in external DTD subset"; final static String SUFFIX_IN_DTD_INTERNAL = " in internal DTD subset"; final static String SUFFIX_IN_DOC = " in main document content"; final static String SUFFIX_IN_ELEMENT = " in start tag"; final static String SUFFIX_IN_ENTITY_REF = " in entity reference"; final static String SUFFIX_IN_EPILOG = " in epilog"; final static String SUFFIX_IN_NAME = " in name token"; final static String SUFFIX_IN_PROC_INSTR = " in processing instruction"; final static String SUFFIX_IN_PROLOG = " in prolog"; final static String SUFFIX_IN_TEXT = " in document text content"; final static String SUFFIX_IN_XML_DECL = " in xml declaration"; final static String SUFFIX_EOF_EXP_NAME = "; expected an identifier"; } woodstox-4.1.3/src/java/com/ctc/wstx/cfg/package.html 0000644 0001750 0001750 00000000123 11745427074 022706 0 ustar giovanni giovanni
Package that contains internal configuration settings for Woodstox. woodstox-4.1.3/src/java/com/ctc/wstx/cfg/OutputConfigFlags.java 0000644 0001750 0001750 00000007603 11745427074 024705 0 ustar giovanni giovanni package com.ctc.wstx.cfg; /** * Constant interface that contains configuration flag used by output * classes internally, for presenting on/off configuration options. */ public interface OutputConfigFlags { /** * Flag that indicates whether writer is namespace-aware or not; if not, * only local part is ever used. */ final static int CFG_ENABLE_NS = 0x0001; /// Flag that indicates that output class should auto-generate namespace prefixes as necessary. final static int CFG_AUTOMATIC_NS = 0x0002; /// Flag that indicates we can output 'automatic' empty elements. final static int CFG_AUTOMATIC_EMPTY_ELEMENTS = 0x0004; /** * Whether writer should just automatically convert all calls that * would normally produce CDATA to produce (quoted) text. */ final static int CFG_OUTPUT_CDATA_AS_TEXT = 0x0008; /** * Flag that indicates whether attributes expanded from default attribute * values should be copied to output, when using copy methods. */ final static int CFG_COPY_DEFAULT_ATTRS = 0x0010; /** * Flag that indicates whether CR (\r, ascii 13) characters occuring * in text (CHARACTERS) and attribute values should be escaped using * character entities or not. Escaping is needed to enable seamless * round-tripping (preserving CR characters). */ final static int CFG_ESCAPE_CR = 0x0020; /** * Flag that indicates * whether writer is to add a single white space before closing "/>" * of the empty element or not. It is sometimes useful to add to * increase compatibility with HTML browsers, or to increase * readability. */ final static int CFG_ADD_SPACE_AFTER_EMPTY_ELEM = 0x0040; /** * Flag that indicates we can output 'automatic' empty elements; * end elements needed to close the logical output tree when * stream writer is closed (by closing it explicitly, or by writing * end-document event) * * @since 3.2.8 */ final static int CFG_AUTOMATIC_END_ELEMENTS = 0x0080; /// Flag that indicates we should check validity of output XML structure. final static int CFG_VALIDATE_STRUCTURE = 0x0100; /** * Flag that indicates we should check validity of textual content of * nodes that have constraints. ** Specifically: comments can not have '--', CDATA sections can not * have ']]>' and processing instruction can not have '?<' character * combinations in content passed in. */ final static int CFG_VALIDATE_CONTENT = 0x0200; /** * Flag that indicates we should check validity of names (element and * attribute names and prefixes; processing instruction names), that they * contain only legal identifier characters. */ final static int CFG_VALIDATE_NAMES = 0x0400; /** * Flag that indicates we should check uniqueness of attribute names, * to prevent accidental output of duplicate attributes. */ final static int CFG_VALIDATE_ATTR = 0x0800; /** * Flag that will enable writer that checks for validity of content * to try to fix the problem, by splitting output segments as * necessary. If disabled, validation will throw an exception; and * without validation no problem is noticed by writer (but instead * invalid output is created). */ final static int CFG_FIX_CONTENT = 0x1000; /** * Property that enables/disables stream write to close the underlying * output target, either when it is asked to (.close() is called), or * when it doesn't need it any more (reaching EOF, hitting an * unrecoverable exception). * As per Stax 1.0 specification, automatic closing is NOT enabled by * default; except if the caller has no access to the target (i.e. * when factory created it) */ final static int CFG_AUTO_CLOSE_OUTPUT = 0x2000; } woodstox-4.1.3/src/java/com/ctc/wstx/cfg/XmlConsts.java 0000644 0001750 0001750 00000004102 11745427074 023223 0 ustar giovanni giovanni package com.ctc.wstx.cfg; /** * Simple constant container interface, shared by input and output * sides. */ public interface XmlConsts { // // // Constants for XML declaration public final static String XML_DECL_KW_ENCODING = "encoding"; public final static String XML_DECL_KW_VERSION = "version"; public final static String XML_DECL_KW_STANDALONE = "standalone"; public final static String XML_V_10_STR = "1.0"; public final static String XML_V_11_STR = "1.1"; /** * This constants refers to cases where the version has not been * declared explicitly; and needs to be considered to be 1.0. */ public final static int XML_V_UNKNOWN = 0x0000; public final static int XML_V_10 = 0x0100; public final static int XML_V_11 = 0x0110; public final static String XML_SA_YES = "yes"; public final static String XML_SA_NO = "no"; // // // Stax specs mandates some settings: but since exact // // // definitions have been re-interpreted a few times, // // // let's isolate them in a single place /* 13-Mar-2008, TSa: As per latest reading of Stax specs, * all of these are expected to be "", not null. */ public final static String ELEM_NO_NS_URI = ""; public final static String ATTR_NO_NS_URI = ""; public final static String ELEM_NO_PREFIX = ""; public final static String ATTR_NO_PREFIX = ""; /** * Top-most namespace URI assigned for root element, if not specifically * defined (default namespace unbound). *
* As per Stax specs, related clarifying discussion on * the mailing list, and especially JDK 1.6 definitions * in {@link javax.xml.XMLConstants} constants, empty String * should be used instead of null. */ public final static String DEFAULT_NAMESPACE_URI = ELEM_NO_NS_URI; // // // Well, these are not strictly xml constants, but for // // // now can live here /** * This constant defines the highest Unicode character allowed * in XML content. */ final static int MAX_UNICODE_CHAR = 0x10FFFF; } woodstox-4.1.3/src/java/com/ctc/wstx/cfg/ErrorConsts.java 0000644 0001750 0001750 00000021202 11745427074 023554 0 ustar giovanni giovanni package com.ctc.wstx.cfg; import javax.xml.XMLConstants; import javax.xml.stream.XMLStreamConstants; /** * "Static" class that contains error message constants. Note that the * error message constants are NOT made final; reason is that doing so * would make compiler inline them in other classes. Doing so would increase * class size (although not mem usage -- Strings do get interned), with * minimal performance impact. */ public class ErrorConsts implements XMLStreamConstants { // // // Types of warnings we issue via XMLReporter public static String WT_ENT_DECL = "entity declaration"; public static String WT_ELEM_DECL = "element declaration"; public static String WT_ATTR_DECL = "attribute declaration"; public static String WT_XML_DECL = "xml declaration"; public static String WT_DT_DECL = "doctype declaration"; public static String WT_NS_DECL = "namespace declaration"; /** * This is the generic type for warnings based on XMLValidationProblem * objects. */ public static String WT_VALIDATION = "schema validation"; // // And then warning strings public static String W_UNDEFINED_ELEM = "Undefined element \"{0}\"; referred to by attribute(s)"; public static String W_MIXED_ENCODINGS = "Inconsistent text encoding; declared as \"{0}\" in xml declaration, application had passed \"{1}\""; public static String W_MISSING_DTD = "Missing DOCTYPE declaration in validating mode; can not validate elements or attributes"; public static String W_DTD_DUP_ATTR = "Attribute \"{0}\" (for element <{1}>) declared multiple times"; public static String W_DTD_ATTR_REDECL = "Attribute \"{0}\" already declared for element <{1}>; ignoring re-declaration"; // // // Generic errors: public static String ERR_INTERNAL = "Internal error"; public static String ERR_NULL_ARG = "Illegal to pass null as argument"; public static String ERR_UNKNOWN_FEATURE = "Unrecognized feature \"{0}\""; // // // Wrong reader state: public static String ERR_STATE_NOT_STELEM = "Current event not START_ELEMENT"; public static String ERR_STATE_NOT_ELEM = "Current event not START_ELEMENT or END_ELEMENT"; public static String ERR_STATE_NOT_PI = "Current event not PROCESSING_INSTRUCTION"; public static String ERR_STATE_NOT_ELEM_OR_TEXT = "Current event ({0}) not START_ELEMENT, END_ELEMENT, CHARACTERS or CDATA"; // // // XML declaration related problems public static String ERR_XML_10_VS_11 = "XML 1.0 document can not refer to XML 1.1 parsed external entities"; // // // Structural problems, prolog/epilog: public static String ERR_DTD_IN_EPILOG = "Can not have DOCTYPE declaration in epilog"; public static String ERR_DTD_DUP = "Duplicate DOCTYPE declaration"; public static String ERR_CDATA_IN_EPILOG = " (CDATA not allowed in prolog/epilog)"; // // // Illegal input: public static String ERR_HYPHENS_IN_COMMENT = "String '--' not allowed in comment (missing '>'?)"; public static String ERR_BRACKET_IN_TEXT = "String ']]>' not allowed in textual content, except as the end marker of CDATA section"; // // // Generic parsing errors: public static String ERR_UNEXP_KEYWORD = "Unexpected keyword \"{0}\"; expected \"{1}\""; public static String ERR_WF_PI_MISSING_TARGET = "Missing processing instruction target"; public static String ERR_WF_PI_XML_TARGET = "Illegal processing instruction target (\"{0}\"); 'xml' (case insensitive) is reserved by the specs."; public static String ERR_WF_PI_XML_MISSING_SPACE = "excepted either space or \"?>\" after PI target"; // // // Entity problems: public static String ERR_WF_ENTITY_EXT_DECLARED = "Entity \"{0}\" declared externally, but referenced from a document declared 'standalone=\"yes\"'"; public static String ERR_WF_GE_UNDECLARED = "Undeclared general entity \"{0}\""; public static String ERR_WF_GE_UNDECLARED_SA = "Undeclared general entity \"{0}\" (document in stand-alone mode; perhaps declared externally?)"; // // // Namespace problems: public static String ERR_NS_UNDECLARED = "Undeclared namespace prefix \"{0}\""; public static String ERR_NS_UNDECLARED_FOR_ATTR = "Undeclared namespace prefix \"{0}\" (for attribute \"{1}\")"; public static String ERR_NS_REDECL_XML = "Trying to redeclare prefix 'xml' from its default URI '" +XMLConstants.XML_NS_URI +"' to \"{0}\""; public static String ERR_NS_REDECL_XMLNS = "Trying to declare prefix 'xmlns' (illegal as per NS 1.1 #4)"; public static String ERR_NS_REDECL_XML_URI = "Trying to bind URI '" +XMLConstants.XML_NS_URI+" to prefix \"{0}\" (can only bind to 'xml')"; public static String ERR_NS_REDECL_XMLNS_URI = "Trying to bind URI '" +XMLConstants.XMLNS_ATTRIBUTE_NS_URI+" to prefix \"{0}\" (can not be explicitly bound)"; public static String ERR_NS_EMPTY = "Non-default namespace can not map to empty URI (as per Namespace 1.0 # 2) in XML 1.0 documents"; // // // DTD-specific: public static String ERR_DTD_MAINLEVEL_KEYWORD = "; expected a keyword (ATTLIST, ELEMENT, ENTITY, NOTATION), comment, or conditional section"; public static String ERR_DTD_ATTR_TYPE = "; expected one of type (CDATA, ID, IDREF, IDREFS, ENTITY, ENTITIES NOTATION, NMTOKEN or NMTOKENS)"; public static String ERR_DTD_DEFAULT_TYPE = "; expected #REQUIRED, #IMPLIED or #FIXED"; public static String ERR_DTD_ELEM_REDEFD = "Trying to redefine element \"{0}\" (originally defined at {1})"; public static String ERR_DTD_NOTATION_REDEFD = "Trying to redefine notation \"{0}\" (originally defined at {1})"; public static String ERR_DTD_UNDECLARED_ENTITY = "Undeclared {0} entity \"{1}\""; public static String ERR_DTD_XML_SPACE = "Attribute xml:space has to be defined of type enumerated, and have 1 or 2 values, 'default' and/or 'preserve'"; public static String ERR_DTD_XML_ID = "Attribute xml:id has to have attribute type of ID, as per Xml:id specification"; // // // DTD-validation: public static String ERR_VLD_UNKNOWN_ELEM = "Undefined element <{0}> encountered"; public static String ERR_VLD_EMPTY = "Element <{0}> has EMPTY content specification; can not contain {1}"; public static String ERR_VLD_NON_MIXED = "Element <{0}> has non-mixed content specification; can not contain non-white space text, or any CDATA sections"; public static String ERR_VLD_ANY = "Element <{0}> has ANY content specification; can not contain {1}"; public static String ERR_VLD_UNKNOWN_ATTR = "Element <{0}> has no attribute \"{1}\""; public static String ERR_VLD_WRONG_ROOT = "Unexpected root element <{0}>; expected <{0}> as per DOCTYPE declaration"; // // // Output problems: public static String WERR_PROLOG_CDATA = "Trying to output a CDATA block outside main element tree (in prolog or epilog)"; public static String WERR_PROLOG_NONWS_TEXT = "Trying to output non-whitespace characters outside main element tree (in prolog or epilog)"; public static String WERR_PROLOG_SECOND_ROOT = "Trying to output second root, <{0}>"; public static String WERR_CDATA_CONTENT = "Illegal input: CDATA block has embedded ']]>' in it (index {0})"; public static String WERR_COMMENT_CONTENT = "Illegal input: comment content has embedded '--' in it (index {0})"; public static String WERR_ATTR_NO_ELEM = "Trying to write an attribute when there is no open start element."; public static String WERR_NAME_EMPTY = "Illegal to pass empty name"; public static String WERR_NAME_ILLEGAL_FIRST_CHAR = "Illegal first name character {0}"; public static String WERR_NAME_ILLEGAL_CHAR = "Illegal name character {0}"; /* //////////////////////////////////////////////////// // Utility methods //////////////////////////////////////////////////// */ public static String tokenTypeDesc(int type) { switch (type) { case START_ELEMENT: return "START_ELEMENT"; case END_ELEMENT: return "END_ELEMENT"; case START_DOCUMENT: return "START_DOCUMENT"; case END_DOCUMENT: return "END_DOCUMENT"; case CHARACTERS: return "CHARACTERS"; case CDATA: return "CDATA"; case SPACE: return "SPACE"; case COMMENT: return "COMMENT"; case PROCESSING_INSTRUCTION: return "PROCESSING_INSTRUCTION"; case DTD: return "DTD"; case ENTITY_REFERENCE: return "ENTITY_REFERENCE"; } return "["+type+"]"; } } woodstox-4.1.3/src/java/com/ctc/wstx/cfg/InputConfigFlags.java 0000644 0001750 0001750 00000015154 11745427074 024504 0 ustar giovanni giovanni package com.ctc.wstx.cfg; /** * Constant interface that contains configuration flag used by parser * and parser factory, as well as some other input constants. */ public interface InputConfigFlags { /* ////////////////////////////////////////////////////// // Flags for standard StAX features: ////////////////////////////////////////////////////// */ // // // Namespace handling: /** * If true, parser will handle namespaces according to XML specs; if * false, will only pass them as part of element/attribute name value * information. */ final static int CFG_NAMESPACE_AWARE = 0x0001; // // // Text normalization /// Flag that indicates iterator should coalesce all text segments. final static int CFG_COALESCE_TEXT = 0x0002; // // // Entity handling /** * Flag that enables automatic replacement of internal entities */ final static int CFG_REPLACE_ENTITY_REFS = 0x0004; /** * Flag that enables support for expanding external entities. Woodstox * pretty much ignores the setting, since effectively it is irrelevant, * as {@link #CFG_REPLACE_ENTITY_REFS} and {@link #CFG_SUPPORT_DTD} * both need to be enabled for external entities to be supported. */ final static int CFG_SUPPORT_EXTERNAL_ENTITIES = 0x0008; // // // DTD handling /** * Whether DTD handling is enabled or disabled; disabling means both * internal and external subsets will just be skipped unprocessed. */ final static int CFG_SUPPORT_DTD = 0x0010; /** * Not yet (fully) supported; added as the placeholder */ final static int CFG_VALIDATE_AGAINST_DTD = 0x0020; // // Note: can add 2 more 'standard' flags here... /* ////////////////////////////////////////////////////// // Flags for StAX2 features ////////////////////////////////////////////////////// */ /** * If true, parser will report (ignorable) white space events in prolog * and epilog; if false, it will silently ignore them. */ final static int CFG_REPORT_PROLOG_WS = 0x0100; // // // Type conversions: /** * If true, parser will accurately report CDATA event as such (unless * coalescing); otherwise will always report them as CHARACTERS * independent of coalescing settings. */ final static int CFG_REPORT_CDATA = 0x0200; // // // String interning: /** * If true, will guarantee that all names (attribute/element local names * have been intern()ed. If false, this is not guaranteed although * implementation may still choose to do it. */ final static int CFG_INTERN_NAMES = 0x0400; /** * It true, will call intern() on all namespace URIs parsed; otherwise * will just use 'regular' Strings created from parsed contents. Interning * makes namespace-based access faster, but has initial overhead of * intern() call. */ final static int CFG_INTERN_NS_URIS = 0x0800; // // // Lazy/incomplete parsing /** * Property that determines whether Event objects created will * contain (accurate) {@link javax.xml.stream.Location} information or not. If not, * Location may be null or a fixed location (beginning of main * XML file). *
* Note, however, that the underlying parser will still keep track * of location information for error reporting purposes; it's only * Event objects that are affected. */ final static int CFG_PRESERVE_LOCATION = 0x1000; // // // Input source handling /** * Property that enables/disables stream reader to close the underlying * input source, either when it is asked to (.close() is called), or * when it doesn't need it any more (reaching EOF, hitting an * unrecoverable exception). * As per Stax 1.0 specification, automatic closing is NOT enabled by * default; except if the caller has no access to the target (i.e. * when factory created it) */ final static int CFG_AUTO_CLOSE_INPUT = 0x2000; /* ////////////////////////////////////////////////////// // Flags for Woodstox-specific features ////////////////////////////////////////////////////// */ // // // Content normalization // 20-Jan-2007, TSa: These properties removed from 4.0, deprecated: final static int CFG_NORMALIZE_LFS = 0x4000; //final static int CFG_NORMALIZE_ATTR_VALUES = 0x8000; // // // Caching /** * If true, input factory is allowed cache parsed external DTD subsets, * potentially speeding up things for which DTDs are needed for: entity * substitution, attribute defaulting, and of course DTD-based validation. */ final static int CFG_CACHE_DTDS = 0x00010000; /** * If true, key used for matching DTD subsets can be the public id, * if false, only system id can be used. */ final static int CFG_CACHE_DTDS_BY_PUBLIC_ID = 0x00020000; // // // Lazy/incomplete parsing /** * If true, input factory can defer parsing of nodes until data is * actually needed; if false, it has to read all the data in right * away when next type is requested. Setting it to true is good for * performance, in the cases where some of the nodes (like comments, * processing instructions, or whole subtrees) are ignored. Otherwise * setting will not make much of a difference. Downside is that error * reporting is also done 'lazily'; not right away when getting the next * even type but when either accessing data, or skipping it. */ final static int CFG_LAZY_PARSING = 0x00040000; // // // Validation support // DTD++ support /** * If true, DTD-parser will recognize DTD++ features, and the validator * will also use any such information found from DTD when DTD validation * is enabled. */ final static int CFG_SUPPORT_DTDPP = 0x00080000; // Automatic W3C Schema support? //final static int CFG_AUTOMATIC_W3C_SCHEMA = 0x00100000; // // // Xml:id support /** * If true, xml:id attribute type assignment and matching checks will * be done as per Xml:id specification. Needs to be enabled for xml:id * uniqueness checks to function properly */ final static int CFG_XMLID_TYPING = 0x00200000; /** * If true, xml:id attribute uniqueness constraints are enforced, even * if not validating against DTD otherwise. */ final static int CFG_XMLID_UNIQ_CHECKS = 0x00400000; /** * If true, the XML parser will treat character references as entities. * */ final static int CFG_TREAT_CHAR_REFS_AS_ENTS = 0x00800000; } woodstox-4.1.3/src/java/com/ctc/wstx/api/ 0000755 0001750 0001750 00000000000 11756143457 020445 5 ustar giovanni giovanni woodstox-4.1.3/src/java/com/ctc/wstx/api/WstxOutputProperties.java 0000644 0001750 0001750 00000017020 11745427074 025551 0 ustar giovanni giovanni package com.ctc.wstx.api; /** * Class that contains constant for property names used to configure * cursor and event writers produced by Wstx implementation of * {@link javax.xml.stream.XMLOutputFactory}. *
*/ public final class WstxOutputProperties { /** * Default xml version number output, if none was specified by * application. Version 1.0 is used * to try to maximize compatibility (some older parsers * may barf on 1.1 and later...) */ public final static String DEFAULT_XML_VERSION = "1.0"; /** * If no encoding is passed, we should just default to what xml * in general expects (and can determine), UTF-8. *
* Note: you can check out bug entry [WSTX-18] for more details */ public final static String DEFAULT_OUTPUT_ENCODING = "UTF-8"; // // // Output options, simple on/off settings: /** * Whether writer should just automatically convert all calls that * would normally produce CDATA to produce (quoted) text. */ public final static String P_OUTPUT_CDATA_AS_TEXT = "com.ctc.wstx.outputCDataAsText"; /** * Whether writer should copy attributes that were initially expanded * using default settings ("implicit" attributes) or not. */ public final static String P_COPY_DEFAULT_ATTRS = "com.ctc.wstx.copyDefaultAttrs"; /** * Whether writer is to add a single white space before closing "/>" * of the empty element or not. It is sometimes useful to add to * increase compatibility with HTML browsers, or to increase * readability. *
* The default value is 'false', up to Woodstox 4.x. *
* NOTE: JavaDocs for versions 4.0.0 - 4.0.7 incorrectly state that * default is 'true': this is NOT the case. *
* Note: added to resolve Jira entry
* WSTX-125.
*/
public final static String P_ADD_SPACE_AFTER_EMPTY_ELEM = "com.ctc.wstx.addSpaceAfterEmptyElem";
/**
* Whether stream writer is to automatically add end elements that are
* needed to properly close the output tree, when the stream is closed
* (either explicitly by a call to close
or
* closeCompletely
, or implicitly by a call
* to writeEndDocument
.
*
* The default value is 'true' as of Woodstox 4.x. * Prior to 4.0, this feature was always enabled and there was no * way to disable it) * * @since 3.2.8 */ public final static String P_AUTOMATIC_END_ELEMENTS = "com.ctc.wstx.automaticEndElements"; // // // Validation options: /** * Whether output classes should do basic verification that the output * structure is well-formed (start and end elements match); that * there is one and only one root, and that there is no textual content * in prolog/epilog. If false, won't do any checking regarding structure. */ public final static String P_OUTPUT_VALIDATE_STRUCTURE = "com.ctc.wstx.outputValidateStructure"; /** * Whether output classes should do basic verification that the textual * content output as part of nodes should be checked for validity, * if there's a possibility of invalid content. Nodes that include * such constraints are: comment/'--', cdata/']]>', * proc. instr/'?>'. */ public final static String P_OUTPUT_VALIDATE_CONTENT = "com.ctc.wstx.outputValidateContent"; /** * Whether output classes should check uniqueness of attribute names, * to prevent accidental output of duplicate attributes. */ public final static String P_OUTPUT_VALIDATE_ATTR = "com.ctc.wstx.outputValidateAttr"; /** * Whether output classes should check validity of names, ie that they * only contain legal XML identifier characters. */ public final static String P_OUTPUT_VALIDATE_NAMES = "com.ctc.wstx.outputValidateNames"; /** * Property that further modifies handling of invalid content so * that if {@link #P_OUTPUT_VALIDATE_CONTENT} is enabled, instead of * reporting an error, writer will try to fix the problem. * Invalid content in this context refers to comment * content with "--", CDATA with "]]>" and proc. instr data with "?>". * This can * be done for some content (CDATA, possibly comment), by splitting * content into separate * segments; but not for others (proc. instr, since that might * change the semantics in unintended ways). */ public final static String P_OUTPUT_FIX_CONTENT = "com.ctc.wstx.outputFixContent"; /** * Property that determines whether Carriage Return (\r) characters are * to be escaped when output or not. If enabled, all instances of * of character \r are escaped using a character entity (where possible, * that is, within CHARACTERS events, and attribute values). Otherwise * they are output as is. The main reason to enable this property is * to ensure that carriage returns are preserved as is through parsing, * since otherwise they will be converted to canonical xml linefeeds * (\n), when occuring along or as part of \r\n pair. */ public final static String P_OUTPUT_ESCAPE_CR = "com.ctc.wstx.outputEscapeCr"; /** * Property that defines a {@link InvalidCharHandler} used to determine * what to do with a Java character that app tries to output but which * is not a valid xml character. Alternatives are converting it to * another character or throw an exception: default implementations * exist for both behaviors. */ public final static String P_OUTPUT_INVALID_CHAR_HANDLER = "com.ctc.wstx.outputInvalidCharHandler"; /** * Property that defines an {@link EmptyElementHandler} used to determine * if the end tag for an empty element should be written or not. * * If specified {@link org.codehaus.stax2.XMLOutputFactory2#P_AUTOMATIC_EMPTY_ELEMENTS} is ignored. */ public final static String P_OUTPUT_EMPTY_ELEMENT_HANDLER = "com.ctc.wstx.outputEmptyElementHandler"; // // // Per-instance access to underlying output objects /** * Property that can be used to find out the underlying * {@link java.io.OutputStream} that an * {@link javax.xml.stream.XMLStreamWriter} instance is using, * if known (not known if constructed with a {@link java.io.Writer}, * or other non-stream destination). Null is returned, if not * known. *
* Note: in general it is dangerous to operate on returned stream * (if any), due to buffering stream writer can do. As such, caller * has to take care to know what he is doing, including properly * flushing output. */ public final static String P_OUTPUT_UNDERLYING_STREAM = "com.ctc.wstx.outputUnderlyingStream"; /** * Property that can be used to find out the underlying * {@link java.io.Writer} that an * {@link javax.xml.stream.XMLStreamWriter} instance is using, * if known (may not be known if constructed with a {@link java.io.OutputStream}, * or other non-Writer destination). Null is returned, if not * known. Note that the Writer may be an internal wrapper over * an output stream. *
* Note: in general it is dangerous to operate on returned Writer * (if any), due to buffering stream writer can do. As such, caller * has to take care to know what he is doing, including properly * flushing output. */ public final static String P_OUTPUT_UNDERLYING_WRITER = "com.ctc.wstx.outputUnderlyingWriter"; } woodstox-4.1.3/src/java/com/ctc/wstx/api/ValidatorConfig.java 0000644 0001750 0001750 00000001567 11745427074 024372 0 ustar giovanni giovanni package com.ctc.wstx.api; public final class ValidatorConfig extends CommonConfig { /** * For now, since there are no mutable properties, we can share * a singleton instance. */ final static ValidatorConfig sInstance = new ValidatorConfig(); private ValidatorConfig() { } public static ValidatorConfig createDefaults() { /* For now, since there are no mutable properties, we can share * a singleton instance. */ return sInstance; } protected int findPropertyId(String propName) { // Nothing above and beyond default settings... return -1; } protected Object getProperty(int id) { // nothing to get: return null; } protected boolean setProperty(String propName, int id, Object value) { // nothing to set: return false; } } woodstox-4.1.3/src/java/com/ctc/wstx/api/EmptyElementHandler.java 0000644 0001750 0001750 00000005601 11745427074 025216 0 ustar giovanni giovanni package com.ctc.wstx.api; import java.util.Set; import java.util.TreeSet; /** * Optional handler used to determine if a specific empty element (by name) should * be allowed to use the self-closing syntax instead of having a separate end tag. * * @since 4.1 */ public interface EmptyElementHandler { /** * @param prefix The element's namespace prefix, null if not set * @param localName The element's local name * @param nsURI The elements's namespace URI, null if not set * @param allowEmpty The allow empty setting specified by the caller. * @return True if the empty element can be self-closing. False if a separate end tag should be written. */ public boolean allowEmptyElement(String prefix, String localName, String nsURI, boolean allowEmpty); /** * Handler that uses a Set of Strings. If the local part of the element's QName is contained * in the Set the element is allowed to be empty. *
* Users of this class are encouraged to use a {@link TreeSet} with the {@link String#CASE_INSENSITIVE_ORDER} * comparator if case-insensitive comparison is needed (like when dealing with HTML tags). */ public static class SetEmptyElementHandler implements EmptyElementHandler { final protected Set mEmptyElements; public SetEmptyElementHandler(Set emptyElements) { mEmptyElements = emptyElements; } public boolean allowEmptyElement(String prefix, String localName, String nsURI, boolean allowEmpty) { return mEmptyElements.contains(localName); } } /** * HTML specific empty element handler. * Extends the {@link SetEmptyElementHandler} and configures * the HTML elements that must be self-closing according to the W3C: * http://www.w3.org/TR/html4/index/elements.html *
* Note that element name comparison is case-insensitive as required * by HTML specification. */ public static class HtmlEmptyElementHandler extends SetEmptyElementHandler { private final static HtmlEmptyElementHandler sInstance = new HtmlEmptyElementHandler(); public static HtmlEmptyElementHandler getInstance() { return sInstance; } protected HtmlEmptyElementHandler() { super(new TreeSet(String.CASE_INSENSITIVE_ORDER)); mEmptyElements.add("area"); mEmptyElements.add("base"); mEmptyElements.add("basefont"); mEmptyElements.add("br"); mEmptyElements.add("col"); mEmptyElements.add("frame"); mEmptyElements.add("hr"); mEmptyElements.add("input"); mEmptyElements.add("img"); mEmptyElements.add("isindex"); mEmptyElements.add("link"); mEmptyElements.add("meta"); mEmptyElements.add("param"); } } } woodstox-4.1.3/src/java/com/ctc/wstx/api/ReaderConfig.java 0000644 0001750 0001750 00000146011 11745427074 023641 0 ustar giovanni giovanni package com.ctc.wstx.api; import java.lang.ref.SoftReference; import java.net.URL; import java.util.*; import javax.xml.stream.*; import org.codehaus.stax2.XMLInputFactory2; // for property consts import org.codehaus.stax2.XMLStreamProperties; // for property consts import org.codehaus.stax2.validation.DTDValidationSchema; import com.ctc.wstx.api.WstxInputProperties; import com.ctc.wstx.cfg.InputConfigFlags; import com.ctc.wstx.dtd.DTDEventListener; import com.ctc.wstx.ent.IntEntity; import com.ctc.wstx.ent.EntityDecl; import com.ctc.wstx.io.BufferRecycler; import com.ctc.wstx.util.ArgUtil; import com.ctc.wstx.util.DataUtil; import com.ctc.wstx.util.SymbolTable; /** * Simple configuration container class; passed by reader factory to reader * instance created. *
* In addition to its main task as a configuration container, this class
* also acts as a wrapper around simple buffer recycling functionality.
* The reason is that while conceptually this is a separate concern,
* there are enough commonalities with the life-cycle of this object to
* make this a very convenience place to add that functionality...
* (that is: conceptually this is not right, but from pragmatic viewpoint
* it just makes sense)
*/
public final class ReaderConfig
extends CommonConfig
implements InputConfigFlags
{
/*
///////////////////////////////////////////////////////////////////////
// Constants for reader properties:
///////////////////////////////////////////////////////////////////////
*/
// // First, standard StAX properties:
// Simple flags:
final static int PROP_COALESCE_TEXT = 1;
final static int PROP_NAMESPACE_AWARE = 2;
final static int PROP_REPLACE_ENTITY_REFS = 3;
final static int PROP_SUPPORT_EXTERNAL_ENTITIES = 4;
final static int PROP_VALIDATE_AGAINST_DTD = 5;
final static int PROP_SUPPORT_DTD = 6;
// Object type properties
public final static int PROP_EVENT_ALLOCATOR = 7;
final static int PROP_WARNING_REPORTER = 8;
final static int PROP_XML_RESOLVER = 9;
// // Then StAX2 standard properties:
// Simple flags:
final static int PROP_INTERN_NS_URIS = 20;
final static int PROP_INTERN_NAMES = 21;
final static int PROP_REPORT_CDATA = 22;
final static int PROP_REPORT_PROLOG_WS = 23;
final static int PROP_PRESERVE_LOCATION = 24;
final static int PROP_AUTO_CLOSE_INPUT = 25;
// Enum / Object type properties:
final static int PROP_SUPPORT_XMLID = 26; // shared with WriterConfig
final static int PROP_DTD_OVERRIDE = 27;
// // // Constants for additional Wstx properties:
// Simple flags:
/**
* Note: this entry was deprecated for 4.0 versions up until
* and including 4.0.7; was brought back for 4.0.8 (and will
* be retained for 4.1)
*/
final static int PROP_NORMALIZE_LFS = 40;
/* This entry was deprecated for 3.2 and removed in 4.0
* version. There are no plans to bring it back.
*/
//final static int PROP_NORMALIZE_ATTR_VALUES = 41;
final static int PROP_CACHE_DTDS = 42;
final static int PROP_CACHE_DTDS_BY_PUBLIC_ID = 43;
final static int PROP_LAZY_PARSING = 44;
final static int PROP_SUPPORT_DTDPP = 45;
final static int PROP_TREAT_CHAR_REFS_AS_ENTS = 46;
// Object type properties:
final static int PROP_INPUT_BUFFER_LENGTH = 50;
//final static int PROP_TEXT_BUFFER_LENGTH = 51;
final static int PROP_MIN_TEXT_SEGMENT = 52;
final static int PROP_CUSTOM_INTERNAL_ENTITIES = 53;
final static int PROP_DTD_RESOLVER = 54;
final static int PROP_ENTITY_RESOLVER = 55;
final static int PROP_UNDECLARED_ENTITY_RESOLVER = 56;
final static int PROP_BASE_URL = 57;
final static int PROP_INPUT_PARSING_MODE = 58;
/*
////////////////////////////////////////////////
// Limits for numeric properties
////////////////////////////////////////////////
*/
/**
* Need to set a minimum size, since there are some limitations to
* smallest consequtive block that can be used.
*/
final static int MIN_INPUT_BUFFER_LENGTH = 8; // 16 bytes
/**
* Let's allow caching of just a dozen DTDs... shouldn't really
* matter, how many DTDs does one really use?
*/
final static int DTD_CACHE_SIZE_J2SE = 12;
final static int DTD_CACHE_SIZE_J2ME = 5;
/*
///////////////////////////////////////////////////////////////////////
// Default values for custom properties:
///////////////////////////////////////////////////////////////////////
*/
/**
* By default, let's require minimum of 64 chars to be delivered
* as shortest partial (piece of) text (CDATA, text) segment;
* same for both J2ME subset and full readers. Prevents tiniest
* runts from getting passed
*/
final static int DEFAULT_SHORTEST_TEXT_SEGMENT = 64;
/**
* Default config flags are converted from individual settings,
* to conform to StAX 1.0 specifications.
*/
final static int DEFAULT_FLAGS_FULL =
0
// First, default settings StAX specs dictate:
| CFG_NAMESPACE_AWARE
// Coalescing to be disabled
//| CFG_COALESCE_TEXT
| CFG_REPLACE_ENTITY_REFS
| CFG_SUPPORT_EXTERNAL_ENTITIES
| CFG_SUPPORT_DTD
// and then custom setting defaults:
// and namespace URI interning
| CFG_INTERN_NAMES
| CFG_INTERN_NS_URIS
// we will also accurately report CDATA, by default
| CFG_REPORT_CDATA
/* 20-Jan-2006, TSa: As per discussions on stax-builders list
* (and input from xml experts), 4.0 will revert to "do not
* report SPACE events outside root element by default"
* settings. Conceptually this is what xml specification
* implies should be done: there is no content outside of
* the element tree, including any ignorable content, just
* processing instructions and comments.
*/
//| CFG_REPORT_PROLOG_WS
/* but enable DTD caching (if they are handled):
* (... maybe J2ME subset shouldn't do it?)
*/
| CFG_CACHE_DTDS
/* 29-Mar-2006, TSa: But note, no caching by public-id, due
* to problems with cases where public-id/system-id were
* inconsistently used, leading to problems.
*/
/* by default, let's also allow lazy parsing, since it tends
* to improve performance
*/
| CFG_LAZY_PARSING
/* and also make Event objects preserve location info...
* can be turned off for maximum performance
*/
| CFG_PRESERVE_LOCATION
// As per Stax 1.0 specs, we can not enable this by default:
//| CFG_AUTO_CLOSE_INPUT);
/* Also, let's enable dtd++ support (shouldn't hurt with non-dtd++
* dtds)
*/
| CFG_SUPPORT_DTDPP
/*
* Set this as a default, as this is required in xml;
*/
| CFG_NORMALIZE_LFS
/* Regarding Xml:id, let's enabled typing by default, but not
* uniqueness validity checks: latter will be taken care of
* by DTD validation if enabled, otherwise needs to be explicitly
* enabled
*/
| CFG_XMLID_TYPING
// | CFG_XMLID_UNIQ_CHECKS
;
/**
* For now defaults for J2ME flags can be identical to 'full' set;
* differences are in buffer sizes.
*/
final static int DEFAULT_FLAGS_J2ME = DEFAULT_FLAGS_FULL;
// // //
/**
* Map to use for converting from String property ids to ints
* described above; useful to allow use of switch later on.
*/
final static HashMap sProperties = new HashMap(64); // we have about 40 entries
static {
// Standard ones; support for features
sProperties.put(XMLInputFactory.IS_COALESCING,
DataUtil.Integer(PROP_COALESCE_TEXT));
sProperties.put(XMLInputFactory.IS_NAMESPACE_AWARE,
DataUtil.Integer(PROP_NAMESPACE_AWARE));
sProperties.put(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES,
DataUtil.Integer(PROP_REPLACE_ENTITY_REFS));
sProperties.put(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES,
DataUtil.Integer(PROP_SUPPORT_EXTERNAL_ENTITIES));
sProperties.put(XMLInputFactory.IS_VALIDATING,
DataUtil.Integer(PROP_VALIDATE_AGAINST_DTD));
sProperties.put(XMLInputFactory.SUPPORT_DTD,
DataUtil.Integer(PROP_SUPPORT_DTD));
// Standard ones; pluggable components
sProperties.put(XMLInputFactory.ALLOCATOR,
DataUtil.Integer(PROP_EVENT_ALLOCATOR));
sProperties.put(XMLInputFactory.REPORTER,
DataUtil.Integer(PROP_WARNING_REPORTER));
sProperties.put(XMLInputFactory.RESOLVER,
DataUtil.Integer(PROP_XML_RESOLVER));
// StAX2-introduced flags:
sProperties.put(XMLInputFactory2.P_INTERN_NAMES,
DataUtil.Integer(PROP_INTERN_NAMES));
sProperties.put(XMLInputFactory2.P_INTERN_NS_URIS,
DataUtil.Integer(PROP_INTERN_NS_URIS));
sProperties.put(XMLInputFactory2.P_REPORT_CDATA,
DataUtil.Integer(PROP_REPORT_CDATA));
sProperties.put(XMLInputFactory2.P_REPORT_PROLOG_WHITESPACE,
DataUtil.Integer(PROP_REPORT_PROLOG_WS));
sProperties.put(XMLInputFactory2.P_PRESERVE_LOCATION,
DataUtil.Integer(PROP_PRESERVE_LOCATION));
sProperties.put(XMLInputFactory2.P_AUTO_CLOSE_INPUT,
DataUtil.Integer(PROP_AUTO_CLOSE_INPUT));
sProperties.put(XMLInputFactory2.XSP_SUPPORT_XMLID,
DataUtil.Integer(PROP_SUPPORT_XMLID));
sProperties.put(XMLInputFactory2.P_DTD_OVERRIDE,
DataUtil.Integer(PROP_DTD_OVERRIDE));
// Non-standard ones, flags:
sProperties.put(WstxInputProperties.P_CACHE_DTDS,
DataUtil.Integer(PROP_CACHE_DTDS));
sProperties.put(WstxInputProperties.P_CACHE_DTDS_BY_PUBLIC_ID,
DataUtil.Integer(PROP_CACHE_DTDS_BY_PUBLIC_ID));
sProperties.put(XMLInputFactory2.P_LAZY_PARSING,
DataUtil.Integer(PROP_LAZY_PARSING));
sProperties.put(WstxInputProperties.P_SUPPORT_DTDPP,
DataUtil.Integer(PROP_SUPPORT_DTDPP));
sProperties.put(WstxInputProperties.P_TREAT_CHAR_REFS_AS_ENTS,
DataUtil.Integer(PROP_TREAT_CHAR_REFS_AS_ENTS));
sProperties.put(WstxInputProperties.P_NORMALIZE_LFS,
DataUtil.Integer(PROP_NORMALIZE_LFS));
// Non-standard ones, non-flags:
sProperties.put(WstxInputProperties.P_INPUT_BUFFER_LENGTH,
DataUtil.Integer(PROP_INPUT_BUFFER_LENGTH));
sProperties.put(WstxInputProperties.P_MIN_TEXT_SEGMENT,
DataUtil.Integer(PROP_MIN_TEXT_SEGMENT));
sProperties.put(WstxInputProperties.P_CUSTOM_INTERNAL_ENTITIES,
DataUtil.Integer(PROP_CUSTOM_INTERNAL_ENTITIES));
sProperties.put(WstxInputProperties.P_DTD_RESOLVER,
DataUtil.Integer(PROP_DTD_RESOLVER));
sProperties.put(WstxInputProperties.P_ENTITY_RESOLVER,
DataUtil.Integer(PROP_ENTITY_RESOLVER));
sProperties.put(WstxInputProperties.P_UNDECLARED_ENTITY_RESOLVER,
DataUtil.Integer(PROP_UNDECLARED_ENTITY_RESOLVER));
sProperties.put(WstxInputProperties.P_BASE_URL,
DataUtil.Integer(PROP_BASE_URL));
sProperties.put(WstxInputProperties.P_INPUT_PARSING_MODE,
DataUtil.Integer(PROP_INPUT_PARSING_MODE));
}
/*
///////////////////////////////////////////////////////////////////////
// Current config state:
///////////////////////////////////////////////////////////////////////
*/
final boolean mIsJ2MESubset;
final SymbolTable mSymbols;
/**
* Bitset that contains state of on/off properties; initialized
* to defaults, but can be set/cleared.
*/
int mConfigFlags;
/**
* Bitset that indicates explicit changes to {@link #mConfigFlags}
* through calls; empty bit means that the corresponding property
* has its default value, set bit that an explicit call has been
* made.
*/
int mConfigFlagMods;
/**
* 13-Nov-2008, tatus: Need to be able to keep track of whether
* name-interning has been explicitly enabled/disable or not
* (not if it's whatever defaults we have)
*/
final static int PROP_INTERN_NAMES_EXPLICIT = 26;
final static int PROP_INTERN_NS_URIS_EXPLICIT = 27;
int mInputBufferLen;
int mMinTextSegmentLen;
/**
* Base URL to use as the resolution context for relative entity
* references
*/
URL mBaseURL = null;
/**
* Parsing mode can be changed from the default xml compliant
* behavior to one of alternate modes (fragment processing,
* multiple document processing).
*/
WstxInputProperties.ParsingMode mParsingMode =
WstxInputProperties.PARSING_MODE_DOCUMENT;
/**
* This boolean flag is set if the input document requires
* xml 1.1 (or above) compliant processing: default is xml 1.0
* compliant. Note that unlike most other properties, this
* does not come from configuration settings, but from processed
* document itself.
*/
boolean mXml11 = false;
/*
///////////////////////////////////////////////////////////////////////
// Common configuration objects
///////////////////////////////////////////////////////////////////////
*/
XMLReporter mReporter;
XMLResolver mDtdResolver = null;
XMLResolver mEntityResolver = null;
/*
///////////////////////////////////////////////////////////////////////
// More special(ized) configuration objects
///////////////////////////////////////////////////////////////////////
*/
//Map mCustomEntities;
//XMLResolver mUndeclaredEntityResolver;
//DTDEventListener mDTDEventListener;
Object[] mSpecialProperties = null;
private final static int SPEC_PROC_COUNT = 4;
private final static int SP_IX_CUSTOM_ENTITIES = 0;
private final static int SP_IX_UNDECL_ENT_RESOLVER = 1;
private final static int SP_IX_DTD_EVENT_LISTENER = 2;
private final static int SP_IX_DTD_OVERRIDE = 3;
/*
///////////////////////////////////////////////////////////////////////
// Buffer recycling:
///////////////////////////////////////////////////////////////////////
*/
/**
* This ThreadLocal
contains a {@link SoftRerefence}
* to a {@link BufferRecycler} used to provide a low-cost
* buffer recycling between Reader instances.
*/
final static ThreadLocal mRecyclerRef = new ThreadLocal();
/**
* This is the actually container of the recyclable buffers. It
* is obtained via ThreadLocal/SoftReference combination, if one
* exists, when Config instance is created. If one does not
* exist, it will created first time a buffer is returned.
*/
BufferRecycler mCurrRecycler = null;
/*
///////////////////////////////////////////////////////////////////////
// Life-cycle
///////////////////////////////////////////////////////////////////////
*/
private ReaderConfig(boolean j2meSubset, SymbolTable symbols,
int configFlags, int configFlagMods,
int inputBufLen,
int minTextSegmentLen)
{
mIsJ2MESubset = j2meSubset;
mSymbols = symbols;
mConfigFlags = configFlags;
mConfigFlagMods = configFlagMods;
mInputBufferLen = inputBufLen;
mMinTextSegmentLen = minTextSegmentLen;
/* Ok, let's then see if we can find a buffer recycler. Since they
* are lazily constructed, and since GC may just flush them out
* on its whims, it's possible we might not find one. That's ok;
* we can reconstruct one if and when we are to return one or more
* buffers.
*/
SoftReference ref = (SoftReference) mRecyclerRef.get();
if (ref != null) {
mCurrRecycler = (BufferRecycler) ref.get();
}
}
public static ReaderConfig createJ2MEDefaults()
{
/* For J2ME we'll use slightly smaller buffer sizes by
* default, on assumption lower memory usage is desireable:
*/
ReaderConfig rc = new ReaderConfig
(true, null, DEFAULT_FLAGS_J2ME, 0,
// 4k input buffer (2000 chars):
2000,
DEFAULT_SHORTEST_TEXT_SEGMENT);
return rc;
}
public static ReaderConfig createFullDefaults()
{
/* For full version, can use bit larger buffers to achieve better
* overall performance.
*/
ReaderConfig rc = new ReaderConfig
(false, null, DEFAULT_FLAGS_FULL, 0,
// 8k input buffer (4000 chars):
4000,
DEFAULT_SHORTEST_TEXT_SEGMENT);
return rc;
}
public ReaderConfig createNonShared(SymbolTable sym)
{
// should we throw an exception?
//if (sym == null) { }
ReaderConfig rc = new ReaderConfig(mIsJ2MESubset, sym,
mConfigFlags, mConfigFlagMods,
mInputBufferLen,
mMinTextSegmentLen);
rc.mReporter = mReporter;
rc.mDtdResolver = mDtdResolver;
rc.mEntityResolver = mEntityResolver;
rc.mBaseURL = mBaseURL;
rc.mParsingMode = mParsingMode;
if (mSpecialProperties != null) {
int len = mSpecialProperties.length;
Object[] specProps = new Object[len];
System.arraycopy(mSpecialProperties, 0, specProps, 0, len);
rc.mSpecialProperties = specProps;
}
return rc;
}
/**
* Unlike name suggests there is also some limited state information
* associated with the config object. If these objects are reused,
* that state needs to be reset between reuses, to avoid carrying
* over incorrect state.
*/
public void resetState()
{
// Current, only xml 1.0 vs 1.1 state is stored here:
mXml11 = false;
}
/*
///////////////////////////////////////////////////////////////////////
// Implementation of abstract methods
///////////////////////////////////////////////////////////////////////
*/
protected int findPropertyId(String propName)
{
Integer I = (Integer) sProperties.get(propName);
return (I == null) ? -1 : I.intValue();
}
/*
///////////////////////////////////////////////////////////////////////
// Public API, accessors
///////////////////////////////////////////////////////////////////////
*/
// // // Accessors for immutable configuration:
public SymbolTable getSymbols() { return mSymbols; }
/**
* In future this property could/should be made configurable?
*/
public int getDtdCacheSize() {
return mIsJ2MESubset ? DTD_CACHE_SIZE_J2ME : DTD_CACHE_SIZE_J2SE;
}
// // // "Raw" accessors for on/off properties:
public int getConfigFlags() { return mConfigFlags; }
// // // Standard StAX on/off property accessors
public boolean willCoalesceText() {
return _hasConfigFlag(CFG_COALESCE_TEXT);
}
public boolean willSupportNamespaces() {
return _hasConfigFlag(CFG_NAMESPACE_AWARE);
}
public boolean willReplaceEntityRefs() {
return _hasConfigFlag(CFG_REPLACE_ENTITY_REFS);
}
public boolean willSupportExternalEntities() {
return _hasConfigFlag(CFG_SUPPORT_EXTERNAL_ENTITIES);
}
public boolean willSupportDTDs() {
return _hasConfigFlag(CFG_SUPPORT_DTD);
}
public boolean willValidateWithDTD() {
return _hasConfigFlag(CFG_VALIDATE_AGAINST_DTD);
}
// // // Stax2 on/off property accessors
public boolean willReportCData() {
return _hasConfigFlag(CFG_REPORT_CDATA);
}
public boolean willParseLazily() {
return _hasConfigFlag(CFG_LAZY_PARSING);
}
public boolean willInternNames() {
return _hasConfigFlag(CFG_INTERN_NAMES);
}
public boolean willInternNsURIs() {
return _hasConfigFlag(CFG_INTERN_NS_URIS);
}
public boolean willPreserveLocation() {
return _hasConfigFlag(CFG_PRESERVE_LOCATION);
}
public boolean willAutoCloseInput() {
return _hasConfigFlag(CFG_AUTO_CLOSE_INPUT);
}
// // // Woodstox on/off property accessors
public boolean willReportPrologWhitespace() {
return _hasConfigFlag(CFG_REPORT_PROLOG_WS);
}
public boolean willCacheDTDs() {
return _hasConfigFlag(CFG_CACHE_DTDS);
}
public boolean willCacheDTDsByPublicId() {
return _hasConfigFlag(CFG_CACHE_DTDS_BY_PUBLIC_ID);
}
public boolean willDoXmlIdTyping() {
return _hasConfigFlag(CFG_XMLID_TYPING);
}
public boolean willDoXmlIdUniqChecks() {
return _hasConfigFlag(CFG_XMLID_UNIQ_CHECKS);
}
public boolean willSupportDTDPP() {
return _hasConfigFlag(CFG_SUPPORT_DTDPP);
}
public boolean willNormalizeLFs() {
return _hasConfigFlag(CFG_NORMALIZE_LFS);
}
public boolean willTreatCharRefsAsEnts() {
return _hasConfigFlag(CFG_TREAT_CHAR_REFS_AS_ENTS);
}
public int getInputBufferLength() { return mInputBufferLen; }
public int getShortestReportedTextSegment() { return mMinTextSegmentLen; }
public Map getCustomInternalEntities()
{
Map custEnt = (Map) _getSpecialProperty(SP_IX_CUSTOM_ENTITIES);
if (custEnt == null) {
return Collections.EMPTY_MAP;
}
// Better be defensive and just return a copy...
int len = custEnt.size();
HashMap m = new HashMap(len + (len >> 2), 0.81f);
Iterator it = custEnt.entrySet().iterator();
while (it.hasNext()) {
Map.Entry me = (Map.Entry) it.next();
/* Cast is there just as a safe-guard (assertion), and to
* document the type...
*/
m.put(me.getKey(), (EntityDecl) me.getValue());
}
return m;
}
public EntityDecl findCustomInternalEntity(String id)
{
Map custEnt = (Map) _getSpecialProperty(SP_IX_CUSTOM_ENTITIES);
if (custEnt == null) {
return null;
}
return (EntityDecl) custEnt.get(id);
}
public XMLReporter getXMLReporter() { return mReporter; }
public XMLResolver getXMLResolver() { return mEntityResolver; }
public XMLResolver getDtdResolver() { return mDtdResolver; }
public XMLResolver getEntityResolver() { return mEntityResolver; }
public XMLResolver getUndeclaredEntityResolver() {
return (XMLResolver) _getSpecialProperty(SP_IX_UNDECL_ENT_RESOLVER);
}
public URL getBaseURL() { return mBaseURL; }
public WstxInputProperties.ParsingMode getInputParsingMode() {
return mParsingMode;
}
public boolean inputParsingModeDocuments() {
return mParsingMode == WstxInputProperties.PARSING_MODE_DOCUMENTS;
}
public boolean inputParsingModeFragment() {
return mParsingMode == WstxInputProperties.PARSING_MODE_FRAGMENT;
}
/**
* @return True if the input well-formedness and validation checks
* should be done according to xml 1.1 specification; false if
* xml 1.0 specification.
*/
public boolean isXml11() {
return mXml11;
}
public DTDEventListener getDTDEventListener() {
return (DTDEventListener) _getSpecialProperty(SP_IX_DTD_EVENT_LISTENER);
}
public DTDValidationSchema getDTDOverride() {
return (DTDValidationSchema) _getSpecialProperty(SP_IX_DTD_OVERRIDE);
}
/**
* Special accessor to use to verify whether name interning has
* explicitly been enabled; true if call was been made to set
* it to true; false otherwise (default, or set to false)
*/
public boolean hasInternNamesBeenEnabled() {
return _hasExplicitConfigFlag(CFG_INTERN_NAMES);
}
public boolean hasInternNsURIsBeenEnabled() {
return _hasExplicitConfigFlag(CFG_INTERN_NS_URIS);
}
/*
///////////////////////////////////////////////////////////////////////
// Simple mutators
///////////////////////////////////////////////////////////////////////
*/
public void setConfigFlag(int flag) {
mConfigFlags |= flag;
mConfigFlagMods |= flag;
}
public void clearConfigFlag(int flag) {
mConfigFlags &= ~flag;
mConfigFlagMods |= flag;
}
// // // Mutators for standard StAX properties
public void doCoalesceText(boolean state) {
setConfigFlag(CFG_COALESCE_TEXT, state);
}
public void doSupportNamespaces(boolean state) {
setConfigFlag(CFG_NAMESPACE_AWARE, state);
}
public void doReplaceEntityRefs(boolean state) {
setConfigFlag(CFG_REPLACE_ENTITY_REFS, state);
}
public void doSupportExternalEntities(boolean state) {
setConfigFlag(CFG_SUPPORT_EXTERNAL_ENTITIES, state);
}
public void doSupportDTDs(boolean state) {
setConfigFlag(CFG_SUPPORT_DTD, state);
}
public void doValidateWithDTD(boolean state) {
setConfigFlag(CFG_VALIDATE_AGAINST_DTD, state);
}
// // // Mutators for Woodstox-specific properties
public void doInternNames(boolean state) {
setConfigFlag(CFG_INTERN_NAMES, state);
}
public void doInternNsURIs(boolean state) {
setConfigFlag(CFG_INTERN_NS_URIS, state);
}
public void doReportPrologWhitespace(boolean state) {
setConfigFlag(CFG_REPORT_PROLOG_WS, state);
}
public void doReportCData(boolean state) {
setConfigFlag(CFG_REPORT_CDATA, state);
}
public void doCacheDTDs(boolean state) {
setConfigFlag(CFG_CACHE_DTDS, state);
}
public void doCacheDTDsByPublicId(boolean state) {
setConfigFlag(CFG_CACHE_DTDS_BY_PUBLIC_ID, state);
}
public void doParseLazily(boolean state) {
setConfigFlag(CFG_LAZY_PARSING, state);
}
public void doXmlIdTyping(boolean state) {
setConfigFlag(CFG_XMLID_TYPING, state);
}
public void doXmlIdUniqChecks(boolean state) {
setConfigFlag(CFG_XMLID_UNIQ_CHECKS, state);
}
public void doPreserveLocation(boolean state) {
setConfigFlag(CFG_PRESERVE_LOCATION, state);
}
public void doAutoCloseInput(boolean state) {
setConfigFlag(CFG_AUTO_CLOSE_INPUT, state);
}
public void doSupportDTDPP(boolean state) {
setConfigFlag(CFG_SUPPORT_DTDPP, state);
}
public void doTreatCharRefsAsEnts(final boolean state) {
setConfigFlag(CFG_TREAT_CHAR_REFS_AS_ENTS, state);
}
public void doNormalizeLFs(final boolean state) {
setConfigFlag(CFG_NORMALIZE_LFS, state);
}
public void setInputBufferLength(int value)
{
/* Let's enforce minimum here; necessary to allow longest
* consequtive text span to be available (xml decl, etc)
*/
if (value < MIN_INPUT_BUFFER_LENGTH) {
value = MIN_INPUT_BUFFER_LENGTH;
}
mInputBufferLen = value;
}
public void setShortestReportedTextSegment(int value) {
mMinTextSegmentLen = value;
}
public void setCustomInternalEntities(Map m)
{
Map entMap;
if (m == null || m.size() < 1) {
entMap = Collections.EMPTY_MAP;
} else {
int len = m.size();
entMap = new HashMap(len + (len >> 1), 0.75f);
Iterator it = m.entrySet().iterator();
while (it.hasNext()) {
Map.Entry me = (Map.Entry) it.next();
Object val = me.getValue();
char[] ch;
if (val == null) {
ch = DataUtil.getEmptyCharArray();
} else if (val instanceof char[]) {
ch = (char[]) val;
} else {
// Probably String, but let's just ensure that
String str = val.toString();
ch = str.toCharArray();
}
String name = (String) me.getKey();
entMap.put(name, IntEntity.create(name, ch));
}
}
_setSpecialProperty(SP_IX_CUSTOM_ENTITIES, entMap);
}
public void setXMLReporter(XMLReporter r) {
mReporter = r;
}
/**
* Note: for better granularity, you should call {@link #setEntityResolver}
* and {@link #setDtdResolver} instead.
*/
public void setXMLResolver(XMLResolver r) {
mEntityResolver = r;
mDtdResolver = r;
}
public void setDtdResolver(XMLResolver r) {
mDtdResolver = r;
}
public void setEntityResolver(XMLResolver r) {
mEntityResolver = r;
}
public void setUndeclaredEntityResolver(XMLResolver r) {
_setSpecialProperty(SP_IX_UNDECL_ENT_RESOLVER, r);
}
public void setBaseURL(URL baseURL) { mBaseURL = baseURL; }
public void setInputParsingMode(WstxInputProperties.ParsingMode mode) {
mParsingMode = mode;
}
/**
* Method called to enable or disable 1.1 compliant processing; if
* disabled, defaults to xml 1.0 compliant processing.
*/
public void enableXml11(boolean state) {
mXml11 = state;
}
public void setDTDEventListener(DTDEventListener l) {
_setSpecialProperty(SP_IX_DTD_EVENT_LISTENER, l);
}
public void setDTDOverride(DTDValidationSchema schema) {
_setSpecialProperty(SP_IX_DTD_OVERRIDE, schema);
}
/*
///////////////////////////////////////////////////////////////////////
// Profile mutators:
///////////////////////////////////////////////////////////////////////
*/
/**
* Method to call to make Reader created conform as closely to XML
* standard as possible, doing all checks and transformations mandated
* (linefeed conversions, attr value normalizations).
* See {@link XMLInputFactory2#configureForXmlConformance} for
* required settings for standard StAX/StAX properties.
*
* In addition to the standard settings, following Woodstox-specific * settings are also done: *
* Notes: Does NOT change 'performance' settings (buffer sizes, * DTD caching, coalescing, interning, accurate location info). */ public void configureForXmlConformance() { // // StAX 1.0 settings doSupportNamespaces(true); doSupportDTDs(true); doSupportExternalEntities(true); doReplaceEntityRefs(true); // // Stax2 additional settings // Better enable full xml:id checks: doXmlIdTyping(true); doXmlIdUniqChecks(true); // Woodstox-specific ones: } /** * Method to call to make Reader created be as "convenient" to use * as possible; ie try to avoid having to deal with some of things * like segmented text chunks. This may incur some slight performance * penalties, but should not affect XML conformance. * See {@link XMLInputFactory2#configureForConvenience} for * required settings for standard StAX/StAX properties. *
* In addition to the standard settings, following Woodstox-specific * settings are also done: *
XMLStreamFactory2.P_LAZY_PARSING
(to allow for synchronous
* error notification by forcing full XML events to be completely
* parsed when reader's next() is called)
*
* In addition to the standard settings, following Woodstox-specific * settings are also done: *
P_CACHE_DTDS
.
* XMLStremaFactory2.P_LAZY_PARSING
(can improve performance
* especially when skipping text segments)
* P_MIN_TEXT_SEGMENT
, to allow
* reader to optimize segment length it uses (and possibly avoids
* one copy operation in the process)
* P_INPUT_BUFFER_LENGTH
a bit from default,
* to allow for longer consequtive read operations; also reduces cases
* where partial text segments are on input buffer boundaries.
* * See {@link XMLInputFactory2#configureForLowMemUsage} for * required settings for standard StAX/StAX properties. *
* In addition to the standard settings, following Woodstox-specific * settings are also done: *
P_CACHE_DTDS
* P_PARSE_LAZILY
* P_MIN_TEXT_SEGMENT
to the (somewhat low)
* default value.
* P_INPUT_BUFFER_LENGTH
a bit from the default
* * See {@link XMLInputFactory2#configureForLowMemUsage} for * required settings for standard StAX/StAX properties. *
* In addition to the standard settings, following Woodstox-specific * settings are also done: *
P_MIN_TEXT_SEGMENT
to the maximum value so
* that all original text segment chunks are reported without
* segmentation (but without coalescing with adjacent CDATA segments)
* P_TREAT_CHAR_REFS_AS_ENTS
to true, so the all the
* original character references are reported with their position,
* original text, and the replacement text.
*
* Note about exceptions: choice of only allowing throwing of
* {@link IOException}s is due to the way Woodstox stream writer
* backend works; XmlWriter
can only throw IOExceptions.
*/
public interface InvalidCharHandler
{
public char convertInvalidChar(int invalidChar) throws IOException;
/**
* This handler implementation just throws an exception for
* all invalid characters encountered. It is the default handler
* used if nothing else has been specified.
*/
public static class FailingHandler
implements InvalidCharHandler
{
public final static int SURR1_FIRST = 0xD800;
public final static int SURR1_LAST = 0xDBFF;
public final static int SURR2_FIRST = 0xDC00;
public final static int SURR2_LAST = 0xDFFF;
private final static FailingHandler sInstance = new FailingHandler();
protected FailingHandler() { }
public static FailingHandler getInstance() { return sInstance; }
public char convertInvalidChar(int c) throws IOException
{
/* 17-May-2006, TSa: Would really be useful if we could throw
* XMLStreamExceptions; esp. to indicate actual output location.
* However, this causes problem with methods that call us and
* can only throw IOExceptions (when invoked via Writer proxy).
* Need to figure out how to resolve this.
*/
if (c == 0) {
throw new IOException("Invalid null character in text to output");
}
if (c < ' ' || (c >= 0x7F && c <= 0x9F)) {
String msg = "Invalid white space character (0x"+Integer.toHexString(c)+") in text to output (in xml 1.1, could output as a character entity)";
throw new IOException(msg);
}
if (c > 0x10FFFF) {
throw new IOException("Illegal unicode character point (0x"+Integer.toHexString(c)+") to output; max is 0x10FFFF as per RFC 3629");
}
/* Surrogate pair in non-quotable (not text or attribute value)
* content, and non-unicode encoding (ISO-8859-x, Ascii)?
*/
if (c >= SURR1_FIRST && c <= SURR2_LAST) {
throw new IOException("Illegal surrogate pair -- can only be output via character entities, which are not allowed in this content");
}
throw new IOException("Invalid XML character (0x"+Integer.toHexString(c)+") in text to output");
}
}
/**
* Alternative to the default handler, this handler converts all invalid
* characters to the specified output character. That character will
* not be further verified or modified by the stream writer.
*/
public static class ReplacingHandler
implements InvalidCharHandler
{
final char mReplacementChar;
public ReplacingHandler(char c)
{
mReplacementChar = c;
}
public char convertInvalidChar(int c) throws IOException
{
return mReplacementChar;
}
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/api/WriterConfig.java 0000644 0001750 0001750 00000071165 11745427074 023722 0 ustar giovanni giovanni package com.ctc.wstx.api;
import java.lang.ref.SoftReference;
import java.util.HashMap;
import javax.xml.stream.*;
import org.codehaus.stax2.XMLOutputFactory2; // for property consts
import org.codehaus.stax2.XMLStreamProperties;
import org.codehaus.stax2.io.EscapingWriterFactory;
import com.ctc.wstx.api.WstxOutputProperties;
import com.ctc.wstx.cfg.OutputConfigFlags;
import com.ctc.wstx.io.BufferRecycler;
import com.ctc.wstx.util.ArgUtil;
import com.ctc.wstx.util.DataUtil;
/**
* Simple configuration container class; passed by reader factory to reader
* instance created.
*/
public final class WriterConfig
extends CommonConfig
implements OutputConfigFlags
{
// // // Constants for standard Stax properties:
protected final static String DEFAULT_AUTOMATIC_NS_PREFIX = "wstxns";
// // // First, standard Stax writer properties
final static int PROP_AUTOMATIC_NS = 1; // standard property ("repairing")
// // // And then additional Stax2 properties:
// General output settings
final static int PROP_AUTOMATIC_EMPTY_ELEMENTS = 2;
final static int PROP_AUTO_CLOSE_OUTPUT = 3;
// Namespace settings:
final static int PROP_ENABLE_NS = 4;
final static int PROP_AUTOMATIC_NS_PREFIX = 5;
// Escaping text content/attr values:
final static int PROP_TEXT_ESCAPER = 6;
final static int PROP_ATTR_VALUE_ESCAPER = 7;
// Problem checking/reporting options
final static int PROP_PROBLEM_REPORTER = 8;
// // // And then custom Wstx properties:
// Output settings:
final static int PROP_OUTPUT_CDATA_AS_TEXT = 11;
final static int PROP_COPY_DEFAULT_ATTRS = 12;
final static int PROP_ESCAPE_CR = 13;
final static int PROP_ADD_SPACE_AFTER_EMPTY_ELEM = 14;
final static int PROP_AUTOMATIC_END_ELEMENTS = 15;
// Validation flags:
final static int PROP_VALIDATE_STRUCTURE = 16;
final static int PROP_VALIDATE_CONTENT = 17;
final static int PROP_VALIDATE_ATTR = 18;
final static int PROP_VALIDATE_NAMES = 19;
final static int PROP_FIX_CONTENT = 20;
// Other:
final static int PROP_OUTPUT_INVALID_CHAR_HANDLER = 21;
final static int PROP_OUTPUT_EMPTY_ELEMENT_HANDLER = 22;
// Per-writer instance information
final static int PROP_UNDERLYING_STREAM = 30;
final static int PROP_UNDERLYING_WRITER = 31;
// // // Default settings for additional properties:
final static boolean DEFAULT_OUTPUT_CDATA_AS_TEXT = false;
final static boolean DEFAULT_COPY_DEFAULT_ATTRS = false;
/* 26-Dec-2006, TSa: Since CRs have been auto-escaped so far, let's
* retain the defaults when adding new properties/features.
*/
final static boolean DEFAULT_ESCAPE_CR = true;
/**
* 09-Aug-2007, TSa: Space has always been added after empty
* element (before closing "/>"), but now it is configurable.
* 31-Dec-2009, TSa: Intention was to leave it enabled for backwards
* compatibility: but due to a bug this was NOT the case... ugh.
*/
final static boolean DEFAULT_ADD_SPACE_AFTER_EMPTY_ELEM = false;
/* How about validation? Let's turn them mostly off by default, since
* there are some performance hits when enabling them.
*/
// Structural checks are easy, cheap and useful...
final static boolean DEFAULT_VALIDATE_STRUCTURE = true;
/* 17-May-2006, TSa: Since content validation is now much cheaper
* (due to integrated transcoders) than it used to be, let's
* just enable content validation too.
*/
final static boolean DEFAULT_VALIDATE_CONTENT = true;
final static boolean DEFAULT_VALIDATE_ATTR = false;
final static boolean DEFAULT_VALIDATE_NAMES = false;
// This only matters if content validation is enabled...
/**
* As per [WSTX-120], default was changed to false,
* from true (default prior to wstx 4.0)
*/
//final static boolean DEFAULT_FIX_CONTENT = true;
final static boolean DEFAULT_FIX_CONTENT = false;
/**
* Default config flags are converted from individual settings,
* to conform to Stax 1.0 specifications.
*/
final static int DEFAULT_FLAGS_J2ME =
0
// Stax 1.0 mandated:
// namespace-awareness assumed; repairing disabled by default:
// | CFG_AUTOMATIC_NS
| CFG_ENABLE_NS
// Usually it's good to allow writer to produce empty elems
// (note: default for woodstox 1.x was false)
| CFG_AUTOMATIC_EMPTY_ELEMENTS
| (DEFAULT_OUTPUT_CDATA_AS_TEXT ? CFG_OUTPUT_CDATA_AS_TEXT : 0)
| (DEFAULT_COPY_DEFAULT_ATTRS ? CFG_COPY_DEFAULT_ATTRS : 0)
| (DEFAULT_ESCAPE_CR ? CFG_ESCAPE_CR : 0)
| (DEFAULT_ADD_SPACE_AFTER_EMPTY_ELEM ? CFG_ADD_SPACE_AFTER_EMPTY_ELEM : 0)
| CFG_AUTOMATIC_END_ELEMENTS
| (DEFAULT_VALIDATE_STRUCTURE ? CFG_VALIDATE_STRUCTURE : 0)
| (DEFAULT_VALIDATE_CONTENT ? CFG_VALIDATE_CONTENT : 0)
| (DEFAULT_VALIDATE_ATTR ? CFG_VALIDATE_ATTR : 0)
| (DEFAULT_VALIDATE_NAMES ? CFG_VALIDATE_NAMES : 0)
| (DEFAULT_FIX_CONTENT ? CFG_FIX_CONTENT : 0)
// As per Stax 1.0 specs, we can not enable this by default:
//| CFG_AUTO_CLOSE_INPUT);
;
/**
* For now, full instances start with same settings as J2ME subset
*/
final static int DEFAULT_FLAGS_FULL = DEFAULT_FLAGS_J2ME;
// // //
/**
* Map to use for converting from String property ids to ints
* described above; useful to allow use of switch later on.
*/
final static HashMap sProperties = new HashMap(8);
static {
// // Stax (1.0) standard ones:
sProperties.put(XMLOutputFactory.IS_REPAIRING_NAMESPACES,
DataUtil.Integer(PROP_AUTOMATIC_NS));
// // Stax2 standard ones:
// Namespace support
sProperties.put(XMLStreamProperties.XSP_NAMESPACE_AWARE,
DataUtil.Integer(PROP_ENABLE_NS));
// Generic output
sProperties.put(XMLOutputFactory2.P_AUTOMATIC_EMPTY_ELEMENTS,
DataUtil.Integer(PROP_AUTOMATIC_EMPTY_ELEMENTS));
sProperties.put(XMLOutputFactory2.P_AUTO_CLOSE_OUTPUT,
DataUtil.Integer(PROP_AUTO_CLOSE_OUTPUT));
// Namespace support
sProperties.put(XMLOutputFactory2.P_AUTOMATIC_NS_PREFIX,
DataUtil.Integer(PROP_AUTOMATIC_NS_PREFIX));
// Text/attr value escaping (customized escapers)
sProperties.put(XMLOutputFactory2.P_TEXT_ESCAPER,
DataUtil.Integer(PROP_TEXT_ESCAPER));
sProperties.put(XMLOutputFactory2.P_ATTR_VALUE_ESCAPER,
DataUtil.Integer(PROP_ATTR_VALUE_ESCAPER));
// Problem checking/reporting options
sProperties.put(XMLStreamProperties.XSP_PROBLEM_REPORTER,
DataUtil.Integer(PROP_PROBLEM_REPORTER));
// // Woodstox-specifics:
// Output conversions
sProperties.put(WstxOutputProperties.P_OUTPUT_CDATA_AS_TEXT,
DataUtil.Integer(PROP_OUTPUT_CDATA_AS_TEXT));
sProperties.put(WstxOutputProperties.P_COPY_DEFAULT_ATTRS,
DataUtil.Integer(PROP_COPY_DEFAULT_ATTRS));
sProperties.put(WstxOutputProperties.P_OUTPUT_ESCAPE_CR,
DataUtil.Integer(PROP_ESCAPE_CR));
sProperties.put(WstxOutputProperties.P_ADD_SPACE_AFTER_EMPTY_ELEM
,
DataUtil.Integer(PROP_ADD_SPACE_AFTER_EMPTY_ELEM));
sProperties.put(WstxOutputProperties.P_AUTOMATIC_END_ELEMENTS,
DataUtil.Integer(PROP_AUTOMATIC_END_ELEMENTS));
sProperties.put(WstxOutputProperties.P_OUTPUT_INVALID_CHAR_HANDLER,
DataUtil.Integer(PROP_OUTPUT_INVALID_CHAR_HANDLER));
sProperties.put(WstxOutputProperties.P_OUTPUT_EMPTY_ELEMENT_HANDLER,
DataUtil.Integer(PROP_OUTPUT_EMPTY_ELEMENT_HANDLER));
// Validation settings:
sProperties.put(WstxOutputProperties.P_OUTPUT_VALIDATE_STRUCTURE,
DataUtil.Integer(PROP_VALIDATE_STRUCTURE));
sProperties.put(WstxOutputProperties.P_OUTPUT_VALIDATE_CONTENT,
DataUtil.Integer(PROP_VALIDATE_CONTENT));
sProperties.put(WstxOutputProperties.P_OUTPUT_VALIDATE_ATTR,
DataUtil.Integer(PROP_VALIDATE_ATTR));
sProperties.put(WstxOutputProperties.P_OUTPUT_VALIDATE_NAMES,
DataUtil.Integer(PROP_VALIDATE_NAMES));
sProperties.put(WstxOutputProperties.P_OUTPUT_FIX_CONTENT,
DataUtil.Integer(PROP_FIX_CONTENT));
// Underlying stream/writer access
sProperties.put(WstxOutputProperties.P_OUTPUT_UNDERLYING_STREAM,
DataUtil.Integer(PROP_UNDERLYING_STREAM));
sProperties.put(WstxOutputProperties.P_OUTPUT_UNDERLYING_STREAM,
DataUtil.Integer(PROP_UNDERLYING_STREAM));
}
/*
//////////////////////////////////////////////////////////
// Current config state:
//////////////////////////////////////////////////////////
*/
final boolean mIsJ2MESubset;
protected int mConfigFlags;
/*
//////////////////////////////////////////////////////////
// More special(ized) configuration objects
//////////////////////////////////////////////////////////
*/
//protected String mAutoNsPrefix;
//protected EscapingWriterFactory mTextEscaperFactory = null;
//protected EscapingWriterFactory mAttrValueEscaperFactory = null;
//protected XMLReporter mProblemReporter = null;
//protected InvalidCharHandler mInvalidCharHandler = null;
Object[] mSpecialProperties = null;
private final static int SPEC_PROC_COUNT = 6;
private final static int SP_IX_AUTO_NS_PREFIX = 0;
private final static int SP_IX_TEXT_ESCAPER_FACTORY = 1;
private final static int SP_IX_ATTR_VALUE_ESCAPER_FACTORY = 2;
private final static int SP_IX_PROBLEM_REPORTER = 3;
private final static int SP_IX_INVALID_CHAR_HANDLER = 4;
private final static int SP_IX_EMPTY_ELEMENT_HANDLER = 5;
/*
//////////////////////////////////////////////////////////
// Buffer recycling:
//////////////////////////////////////////////////////////
*/
/**
* This ThreadLocal
contains a {@link SoftRerefence}
* to a {@link BufferRecycler} used to provide a low-cost
* buffer recycling between Reader instances.
*/
final static ThreadLocal mRecyclerRef = new ThreadLocal();
/**
* This is the actually container of the recyclable buffers. It
* is obtained via ThreadLocal/SoftReference combination, if one
* exists, when Config instance is created. If one does not
* exists, it will created first time a buffer is returned.
*/
BufferRecycler mCurrRecycler = null;
/*
//////////////////////////////////////////////////////////
// Life-cycle:
//////////////////////////////////////////////////////////
*/
private WriterConfig(boolean j2meSubset, int flags, Object[] specProps)
{
mIsJ2MESubset = j2meSubset;
mConfigFlags = flags;
mSpecialProperties = specProps;
/* Ok, let's then see if we can find a buffer recycler. Since they
* are lazily constructed, and since GC may just flush them out
* on its whims, it's possible we might not find one. That's ok;
* we can reconstruct one if and when we are to return one or more
* buffers.
*/
SoftReference ref = (SoftReference) mRecyclerRef.get();
if (ref != null) {
mCurrRecycler = (BufferRecycler) ref.get();
}
}
public static WriterConfig createJ2MEDefaults()
{
return new WriterConfig(true, DEFAULT_FLAGS_J2ME, null);
}
public static WriterConfig createFullDefaults()
{
return new WriterConfig(true, DEFAULT_FLAGS_FULL, null);
}
public WriterConfig createNonShared()
{
Object[] specProps;
if (mSpecialProperties != null) {
int len = mSpecialProperties.length;
specProps = new Object[len];
System.arraycopy(mSpecialProperties, 0, specProps, 0, len);
} else {
specProps = null;
}
return new WriterConfig(mIsJ2MESubset, mConfigFlags, specProps);
}
/*
//////////////////////////////////////////////////////////
// Implementation of abstract methods
//////////////////////////////////////////////////////////
*/
protected int findPropertyId(String propName)
{
Integer I = (Integer) sProperties.get(propName);
return (I == null) ? -1 : I.intValue();
}
/*
//////////////////////////////////////////////////////////
// Public API
//////////////////////////////////////////////////////////
*/
public Object getProperty(int id)
{
switch (id) {
// First, Stax 1.0 properties:
case PROP_AUTOMATIC_NS:
return automaticNamespacesEnabled() ? Boolean.TRUE : Boolean.FALSE;
// Then Stax2 properties:
// First, properties common to input/output factories:
case PROP_ENABLE_NS:
return willSupportNamespaces() ? Boolean.TRUE : Boolean.FALSE;
case PROP_PROBLEM_REPORTER:
return getProblemReporter();
// Then output-specific properties:
case PROP_AUTOMATIC_EMPTY_ELEMENTS:
return automaticEmptyElementsEnabled() ? Boolean.TRUE : Boolean.FALSE;
case PROP_AUTO_CLOSE_OUTPUT:
return willAutoCloseOutput() ? Boolean.TRUE : Boolean.FALSE;
case PROP_AUTOMATIC_NS_PREFIX:
return getAutomaticNsPrefix();
case PROP_TEXT_ESCAPER:
return getTextEscaperFactory();
case PROP_ATTR_VALUE_ESCAPER:
return getAttrValueEscaperFactory();
// // // Then Woodstox-specific properties:
case PROP_OUTPUT_CDATA_AS_TEXT:
return willOutputCDataAsText() ? Boolean.TRUE : Boolean.FALSE;
case PROP_COPY_DEFAULT_ATTRS:
return willCopyDefaultAttrs() ? Boolean.TRUE : Boolean.FALSE;
case PROP_ESCAPE_CR:
return willEscapeCr() ? Boolean.TRUE : Boolean.FALSE;
case PROP_ADD_SPACE_AFTER_EMPTY_ELEM:
return willAddSpaceAfterEmptyElem() ? Boolean.TRUE : Boolean.FALSE;
case PROP_AUTOMATIC_END_ELEMENTS:
return automaticEndElementsEnabled() ? Boolean.TRUE : Boolean.FALSE;
case PROP_VALIDATE_STRUCTURE:
return willValidateStructure() ? Boolean.TRUE : Boolean.FALSE;
case PROP_VALIDATE_CONTENT:
return willValidateContent() ? Boolean.TRUE : Boolean.FALSE;
case PROP_VALIDATE_ATTR:
return willValidateAttributes() ? Boolean.TRUE : Boolean.FALSE;
case PROP_VALIDATE_NAMES:
return willValidateNames() ? Boolean.TRUE : Boolean.FALSE;
case PROP_FIX_CONTENT:
return willFixContent() ? Boolean.TRUE : Boolean.FALSE;
case PROP_OUTPUT_INVALID_CHAR_HANDLER:
return getInvalidCharHandler();
case PROP_OUTPUT_EMPTY_ELEMENT_HANDLER:
return getEmptyElementHandler();
// And then per-instance properties: not valid via config object
case PROP_UNDERLYING_STREAM:
case PROP_UNDERLYING_WRITER:
throw new IllegalStateException("Can not access per-stream-writer properties via factory");
}
throw new IllegalStateException("Internal error: no handler for property with internal id "+id+".");
}
/**
* @return True, if the specified property was succesfully
* set to specified value; false if its value was not changed
*/
public boolean setProperty(String name, int id, Object value)
{
switch (id) {
// First, Stax 1.0 properties:
case PROP_AUTOMATIC_NS:
enableAutomaticNamespaces(ArgUtil.convertToBoolean(name, value));
break;
// // // Then Stax2 ones:
case PROP_ENABLE_NS:
doSupportNamespaces(ArgUtil.convertToBoolean(name, value));
break;
case PROP_PROBLEM_REPORTER:
setProblemReporter((XMLReporter) value);
break;
case PROP_AUTOMATIC_EMPTY_ELEMENTS:
enableAutomaticEmptyElements(ArgUtil.convertToBoolean(name, value));
break;
case PROP_AUTO_CLOSE_OUTPUT:
doAutoCloseOutput(ArgUtil.convertToBoolean(name, value));
break;
case PROP_AUTOMATIC_NS_PREFIX:
// value should be a String, but let's verify that:
setAutomaticNsPrefix(value.toString());
break;
case PROP_TEXT_ESCAPER:
setTextEscaperFactory((EscapingWriterFactory) value);
break;
case PROP_ATTR_VALUE_ESCAPER:
setAttrValueEscaperFactory((EscapingWriterFactory) value);
break;
// // // Then Woodstox-specific ones:
case PROP_OUTPUT_CDATA_AS_TEXT:
doOutputCDataAsText(ArgUtil.convertToBoolean(name, value));
break;
case PROP_COPY_DEFAULT_ATTRS:
doCopyDefaultAttrs(ArgUtil.convertToBoolean(name, value));
break;
case PROP_ESCAPE_CR:
doEscapeCr(ArgUtil.convertToBoolean(name, value));
break;
case PROP_ADD_SPACE_AFTER_EMPTY_ELEM:
doAddSpaceAfterEmptyElem(ArgUtil.convertToBoolean(name, value));
break;
case PROP_AUTOMATIC_END_ELEMENTS:
enableAutomaticEndElements(ArgUtil.convertToBoolean(name, value));
break;
case PROP_VALIDATE_STRUCTURE:
doValidateStructure(ArgUtil.convertToBoolean(name, value));
break;
case PROP_VALIDATE_CONTENT:
doValidateContent(ArgUtil.convertToBoolean(name, value));
break;
case PROP_VALIDATE_ATTR:
doValidateAttributes(ArgUtil.convertToBoolean(name, value));
break;
case PROP_VALIDATE_NAMES:
doValidateNames(ArgUtil.convertToBoolean(name, value));
break;
case PROP_FIX_CONTENT:
doFixContent(ArgUtil.convertToBoolean(name, value));
break;
case PROP_OUTPUT_INVALID_CHAR_HANDLER:
setInvalidCharHandler((InvalidCharHandler) value);
break;
case PROP_OUTPUT_EMPTY_ELEMENT_HANDLER:
setEmptyElementHandler((EmptyElementHandler) value);
break;
case PROP_UNDERLYING_STREAM:
case PROP_UNDERLYING_WRITER:
throw new IllegalStateException("Can not modify per-stream-writer properties via factory");
default:
throw new IllegalStateException("Internal error: no handler for property with internal id "+id+".");
}
return true;
}
/*
//////////////////////////////////////////////////////////
// Extended Woodstox API, accessors/modifiers
//////////////////////////////////////////////////////////
*/
// // // "Raw" accessors for on/off properties:
public int getConfigFlags() { return mConfigFlags; }
// // // Accessors, standard properties:
public boolean automaticNamespacesEnabled() {
return hasConfigFlag(CFG_AUTOMATIC_NS);
}
// // // Accessors, Woodstox properties:
public boolean automaticEmptyElementsEnabled() {
return hasConfigFlag(CFG_AUTOMATIC_EMPTY_ELEMENTS);
}
public boolean willAutoCloseOutput() {
return hasConfigFlag(CFG_AUTO_CLOSE_OUTPUT);
}
public boolean willSupportNamespaces() {
return hasConfigFlag(CFG_ENABLE_NS);
}
public boolean willOutputCDataAsText() {
return hasConfigFlag(CFG_OUTPUT_CDATA_AS_TEXT);
}
public boolean willCopyDefaultAttrs() {
return hasConfigFlag(CFG_COPY_DEFAULT_ATTRS);
}
public boolean willEscapeCr() {
return hasConfigFlag(CFG_ESCAPE_CR);
}
public boolean willAddSpaceAfterEmptyElem() {
return hasConfigFlag(CFG_ADD_SPACE_AFTER_EMPTY_ELEM);
}
public boolean automaticEndElementsEnabled() {
return hasConfigFlag(CFG_AUTOMATIC_END_ELEMENTS);
}
public boolean willValidateStructure() {
return hasConfigFlag(CFG_VALIDATE_STRUCTURE);
}
public boolean willValidateContent() {
return hasConfigFlag(CFG_VALIDATE_CONTENT);
}
public boolean willValidateAttributes() {
return hasConfigFlag(CFG_VALIDATE_ATTR);
}
public boolean willValidateNames() {
return hasConfigFlag(CFG_VALIDATE_NAMES);
}
public boolean willFixContent() {
return hasConfigFlag(CFG_FIX_CONTENT);
}
/**
* @return Prefix to use as the base for automatically generated
* namespace prefixes ("namespace prefix prefix", so to speak).
* Defaults to "wstxns".
*/
public String getAutomaticNsPrefix() {
String prefix = (String) getSpecialProperty(SP_IX_AUTO_NS_PREFIX);
if (prefix == null) {
prefix = DEFAULT_AUTOMATIC_NS_PREFIX;
}
return prefix;
}
public EscapingWriterFactory getTextEscaperFactory() {
return (EscapingWriterFactory) getSpecialProperty(SP_IX_TEXT_ESCAPER_FACTORY);
}
public EscapingWriterFactory getAttrValueEscaperFactory() {
return (EscapingWriterFactory) getSpecialProperty(SP_IX_ATTR_VALUE_ESCAPER_FACTORY);
}
public XMLReporter getProblemReporter() {
return (XMLReporter) getSpecialProperty(SP_IX_PROBLEM_REPORTER);
}
public InvalidCharHandler getInvalidCharHandler() {
return (InvalidCharHandler) getSpecialProperty(SP_IX_INVALID_CHAR_HANDLER);
}
public EmptyElementHandler getEmptyElementHandler() {
return (EmptyElementHandler) getSpecialProperty(SP_IX_EMPTY_ELEMENT_HANDLER);
}
// // // Mutators:
// Standard properies:
public void enableAutomaticNamespaces(boolean state) {
setConfigFlag(CFG_AUTOMATIC_NS, state);
}
// Wstx properies:
public void enableAutomaticEmptyElements(boolean state) {
setConfigFlag(CFG_AUTOMATIC_EMPTY_ELEMENTS, state);
}
public void doAutoCloseOutput(boolean state) {
setConfigFlag(CFG_AUTO_CLOSE_OUTPUT, state);
}
public void doSupportNamespaces(boolean state) {
setConfigFlag(CFG_ENABLE_NS, state);
}
public void doOutputCDataAsText(boolean state) {
setConfigFlag(CFG_OUTPUT_CDATA_AS_TEXT, state);
}
public void doCopyDefaultAttrs(boolean state) {
setConfigFlag(CFG_COPY_DEFAULT_ATTRS, state);
}
public void doEscapeCr(boolean state) {
setConfigFlag(CFG_ESCAPE_CR, state);
}
public void doAddSpaceAfterEmptyElem(boolean state) {
setConfigFlag(CFG_ADD_SPACE_AFTER_EMPTY_ELEM, state);
}
public void enableAutomaticEndElements(boolean state) {
setConfigFlag(CFG_AUTOMATIC_END_ELEMENTS, state);
}
public void doValidateStructure(boolean state) {
setConfigFlag(CFG_VALIDATE_STRUCTURE, state);
}
public void doValidateContent(boolean state) {
setConfigFlag(CFG_VALIDATE_CONTENT, state);
}
public void doValidateAttributes(boolean state) {
setConfigFlag(CFG_VALIDATE_ATTR, state);
}
public void doValidateNames(boolean state) {
setConfigFlag(CFG_VALIDATE_NAMES, state);
}
public void doFixContent(boolean state) {
setConfigFlag(CFG_FIX_CONTENT, state);
}
/**
* @param prefix Prefix to use as the base for automatically generated
* namespace prefixes ("namespace prefix prefix", so to speak).
*/
public void setAutomaticNsPrefix(String prefix) {
setSpecialProperty(SP_IX_AUTO_NS_PREFIX, prefix);
}
public void setTextEscaperFactory(EscapingWriterFactory f) {
setSpecialProperty(SP_IX_TEXT_ESCAPER_FACTORY, f);
}
public void setAttrValueEscaperFactory(EscapingWriterFactory f) {
setSpecialProperty(SP_IX_ATTR_VALUE_ESCAPER_FACTORY, f);
}
public void setProblemReporter(XMLReporter rep) {
setSpecialProperty(SP_IX_PROBLEM_REPORTER, rep);
}
public void setInvalidCharHandler(InvalidCharHandler h) {
setSpecialProperty(SP_IX_INVALID_CHAR_HANDLER, h);
}
public void setEmptyElementHandler(EmptyElementHandler h) {
setSpecialProperty(SP_IX_EMPTY_ELEMENT_HANDLER, h);
}
/*
//////////////////////////////////////////////////////////
// Extended Woodstox API, profiles
//////////////////////////////////////////////////////////
*/
/**
* For Woodstox, this profile enables all basic well-formedness checks,
* including checking for name validity.
*/
public void configureForXmlConformance()
{
doValidateAttributes(true);
doValidateContent(true);
doValidateStructure(true);
doValidateNames(true);
}
/**
* For Woodstox, this profile enables all basic well-formedness checks,
* including checking for name validity, and also enables all matching
* "fix-me" properties (currently only content-fixing property exists).
*/
public void configureForRobustness()
{
doValidateAttributes(true);
doValidateStructure(true);
doValidateNames(true);
/* This the actual "meat": we do want to not only check if the
* content is ok, but also "fix" it if not, and if there's a way
* to fix it:
*/
doValidateContent(true);
doFixContent(true);
}
/**
* For Woodstox, setting this profile disables most checks for validity;
* specifically anything that can have measurable performance impact.
*
*/
public void configureForSpeed()
{
doValidateAttributes(false);
doValidateContent(false);
doValidateNames(false);
// Structural validation is cheap: can be left enabled (if already so)
//doValidateStructure(false);
}
/*
/////////////////////////////////////////////////////
// Buffer recycling:
/////////////////////////////////////////////////////
*/
/**
* Method called to allocate intermediate recyclable copy buffers
*/
public char[] allocMediumCBuffer(int minSize)
{
if (mCurrRecycler != null) {
char[] result = mCurrRecycler.getMediumCBuffer(minSize);
if (result != null) {
return result;
}
}
return new char[minSize];
}
public void freeMediumCBuffer(char[] buffer)
{
// Need to create (and assign) the buffer?
if (mCurrRecycler == null) {
mCurrRecycler = createRecycler();
}
mCurrRecycler.returnMediumCBuffer(buffer);
}
public char[] allocFullCBuffer(int minSize)
{
if (mCurrRecycler != null) {
char[] result = mCurrRecycler.getFullCBuffer(minSize);
if (result != null) {
return result;
}
}
return new char[minSize];
}
public void freeFullCBuffer(char[] buffer)
{
// Need to create (and assign) the buffer?
if (mCurrRecycler == null) {
mCurrRecycler = createRecycler();
}
mCurrRecycler.returnFullCBuffer(buffer);
}
public byte[] allocFullBBuffer(int minSize)
{
if (mCurrRecycler != null) {
byte[] result = mCurrRecycler.getFullBBuffer(minSize);
if (result != null) {
return result;
}
}
return new byte[minSize];
}
public void freeFullBBuffer(byte[] buffer)
{
// Need to create (and assign) the buffer?
if (mCurrRecycler == null) {
mCurrRecycler = createRecycler();
}
mCurrRecycler.returnFullBBuffer(buffer);
}
static int Counter = 0;
private BufferRecycler createRecycler()
{
BufferRecycler recycler = new BufferRecycler();
// No way to reuse/reset SoftReference, have to create new always:
mRecyclerRef.set(new SoftReference(recycler));
return recycler;
}
/*
//////////////////////////////////////////////////////////
// Internal methods
//////////////////////////////////////////////////////////
*/
private void setConfigFlag(int flag, boolean state) {
if (state) {
mConfigFlags |= flag;
} else {
mConfigFlags &= ~flag;
}
}
private final boolean hasConfigFlag(int flag) {
return ((mConfigFlags & flag) == flag);
}
private final Object getSpecialProperty(int ix)
{
if (mSpecialProperties == null) {
return null;
}
return mSpecialProperties[ix];
}
private final void setSpecialProperty(int ix, Object value)
{
if (mSpecialProperties == null) {
mSpecialProperties = new Object[SPEC_PROC_COUNT];
}
mSpecialProperties[ix] = value;
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/api/CommonConfig.java 0000644 0001750 0001750 00000020177 11745427074 023673 0 ustar giovanni giovanni package com.ctc.wstx.api;
import java.util.*;
import org.codehaus.stax2.XMLStreamProperties;
import com.ctc.wstx.util.DataUtil;
/**
* Shared common base class for variour configuration container implementations
* for public factories Woodstox uses: implementations of
* {@link javax.xml.stream.XMLInputFactory},
* {@link javax.xml.stream.XMLOutputFactory} and
* {@link org.codehaus.stax2.validation.XMLValidationSchemaFactory}.
* Implements basic settings for some shared settings, defined by the
* shared property interface {@link XMLStreamProperties}.
*/
abstract class CommonConfig
implements XMLStreamProperties
{
/*
///////////////////////////////////////////////////////////////////////
// Implementation info
///////////////////////////////////////////////////////////////////////
*/
protected final static String IMPL_NAME = "woodstox";
/* !!! TBI: get from props file or so? Or build as part of Ant
* build process?
*/
/**
* This is "major.minor" version used for purposes of determining
* the feature set. Patch level is not included, since those should
* not affect API or feature set. Using applications should be
* prepared to take additional levels, however, just not depend
* on those being available.
*/
protected final static String IMPL_VERSION = "4.1";
/*
///////////////////////////////////////////////////////////////////////
// Internal constants
///////////////////////////////////////////////////////////////////////
*/
final static int PROP_IMPL_NAME = 1;
final static int PROP_IMPL_VERSION = 2;
final static int PROP_SUPPORTS_XML11 = 3;
final static int PROP_SUPPORT_XMLID = 4;
final static int PROP_RETURN_NULL_FOR_DEFAULT_NAMESPACE = 5;
/**
* Map to use for converting from String property ids to enumeration
* (ints). Used for faster dispatching.
*/
final static HashMap sStdProperties = new HashMap(16);
static {
// Basic information about the implementation:
sStdProperties.put(XMLStreamProperties.XSP_IMPLEMENTATION_NAME,
DataUtil.Integer(PROP_IMPL_NAME));
sStdProperties.put(XMLStreamProperties.XSP_IMPLEMENTATION_VERSION,
DataUtil.Integer(PROP_IMPL_VERSION));
// XML version support:
sStdProperties.put(XMLStreamProperties.XSP_SUPPORTS_XML11,
DataUtil.Integer(PROP_SUPPORTS_XML11));
// Xml:id support:
sStdProperties.put(XMLStreamProperties.XSP_SUPPORT_XMLID,
DataUtil.Integer(PROP_SUPPORT_XMLID));
sStdProperties.put(WstxInputProperties.P_RETURN_NULL_FOR_DEFAULT_NAMESPACE,
DataUtil.Integer(PROP_RETURN_NULL_FOR_DEFAULT_NAMESPACE));
/* 23-Apr-2008, tatus: Additional interoperability property,
* one that Sun implementation uses. Can map tor Stax2
* property quite easily.
*/
sStdProperties.put("http://java.sun.com/xml/stream/properties/implementation-name",
DataUtil.Integer(PROP_IMPL_NAME));
}
protected CommonConfig() { }
/*
///////////////////////////////////////////////////////////////////////
// Public API, generic StAX config methods
///////////////////////////////////////////////////////////////////////
*/
public Object getProperty(String propName)
{
/* Related to [WSTX-243]; would be nice to not to have to throw an
* exception; but Stax spec suggests that we do need to indicate
* unrecognized property by exception.
*/
int id = findPropertyId(propName);
if (id >= 0) {
return getProperty(id);
}
id = findStdPropertyId(propName);
if (id < 0) {
reportUnknownProperty(propName);
return null;
}
return getStdProperty(id);
}
public boolean isPropertySupported(String propName)
{
return (findPropertyId(propName) >= 0)
|| (findStdPropertyId(propName) >= 0);
}
/**
* @return True, if the specified property was succesfully
* set to specified value; false if its value was not changed
*/
public boolean setProperty(String propName, Object value)
{
int id = findPropertyId(propName);
if (id >= 0) {
return setProperty(propName, id, value);
}
id = findStdPropertyId(propName);
if (id < 0) {
reportUnknownProperty(propName);
return false;
}
return setStdProperty(propName, id, value);
}
protected void reportUnknownProperty(String propName)
{
// see [WSTX-243] for discussion on whether to throw...
throw new IllegalArgumentException("Unrecognized property '"+propName+"'");
}
/*
///////////////////////////////////////////////////////////////////////
// Additional methods used by Woodstox core
///////////////////////////////////////////////////////////////////////
*/
public final Object safeGetProperty(String propName)
{
int id = findPropertyId(propName);
if (id >= 0) {
return getProperty(id);
}
id = findStdPropertyId(propName);
if (id < 0) {
return null;
}
return getStdProperty(id);
}
/**
* Method used to figure out the official implementation name
* for input/output/validation factories.
*/
public static String getImplName() { return IMPL_NAME; }
/**
* Method used to figure out the official implementation version
* for input/output/validation factories.
*/
public static String getImplVersion() { return IMPL_VERSION; }
/*
///////////////////////////////////////////////////////////////////////
// Interface sub-classes have to implement / can override
///////////////////////////////////////////////////////////////////////
*/
/**
* @return Internal enumerated int matching the String name
* of the property, if one found: -1 to indicate no match
* was found.
*/
protected abstract int findPropertyId(String propName);
protected boolean doesSupportXml11() {
/* Woodstox does support xml 1.1 ... but sub-classes can
* override it if/as necessary (validator factories might not
* support it?)
*/
return true;
}
protected boolean doesSupportXmlId() {
/* Woodstox does support Xml:id ... but sub-classes can
* override it if/as necessary.
*/
return true;
}
protected boolean returnNullForDefaultNamespace() {
return Boolean.getBoolean(WstxInputProperties.P_RETURN_NULL_FOR_DEFAULT_NAMESPACE);
}
protected abstract Object getProperty(int id);
protected abstract boolean setProperty(String propName, int id, Object value);
/*
///////////////////////////////////////////////////////////////////////
// Internal methods
///////////////////////////////////////////////////////////////////////
*/
protected int findStdPropertyId(String propName)
{
Integer I = (Integer) sStdProperties.get(propName);
return (I == null) ? -1 : I.intValue();
}
protected boolean setStdProperty(String propName, int id, Object value)
{
// None of the current shared properties are settable...
return false;
}
protected Object getStdProperty(int id)
{
switch (id) {
case PROP_IMPL_NAME:
return IMPL_NAME;
case PROP_IMPL_VERSION:
return IMPL_VERSION;
case PROP_SUPPORTS_XML11:
return doesSupportXml11() ? Boolean.TRUE : Boolean.FALSE;
case PROP_SUPPORT_XMLID:
return doesSupportXmlId() ? Boolean.TRUE : Boolean.FALSE;
case PROP_RETURN_NULL_FOR_DEFAULT_NAMESPACE:
return returnNullForDefaultNamespace() ? Boolean.TRUE : Boolean.FALSE;
default: // sanity check, should never happen
throw new IllegalStateException("Internal error: no handler for property with internal id "+id+".");
}
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/api/WstxInputProperties.java 0000644 0001750 0001750 00000023665 11745427074 025364 0 ustar giovanni giovanni package com.ctc.wstx.api;
import javax.xml.stream.XMLResolver;
import org.codehaus.stax2.XMLInputFactory2;
/**
* Class that contains constant for property names used to configure
* cursor and event readers produced by Wstx implementation of
* {@link javax.xml.stream.XMLInputFactory}.
*
* TODO: * * - CHECK_CHAR_VALIDITY (separate for white spaces?) * - CATALOG_RESOLVER? (or at least, ENABLE_CATALOGS) */ public final class WstxInputProperties { /** * Constants used when no DTD handling is done, and we do not know the * 'real' type of an attribute. Seems like CDATA is the safe choice. */ public final static String UNKNOWN_ATTR_TYPE = "CDATA"; /* /////////////////////////////////////////////////////// // Simple on/off settings: /////////////////////////////////////////////////////// */ // // // Normalization: /** * Feature that controls whether linefeeds are normalized into * canonical linefeed as mandated by xml specification. *
* Note that disabling this property (from its default enabled * state) will result in non-conforming XML processing. It may * be useful for use cases where changes to input content should * be minimized. *
* Note: this property was initially removed from Woodstox 4.0, * but was reintroduced in 4.0.8 due to user request. */ public final static String P_NORMALIZE_LFS = "com.ctc.wstx.normalizeLFs"; //public final static String P_NORMALIZE_ATTR_VALUES = "com.ctc.wstx.normalizeAttrValues"; // // // XML character validation: /** * Whether readers will verify that characters in text content are fully * valid XML characters (not just Unicode). If true, will check * that they are valid (including white space); if false, will not * check. *
* Turning this option off may improve parsing performance; leaving * it on guarantees compatibility with XML 1.0 specs regarding character * validity rules. */ public final static String P_VALIDATE_TEXT_CHARS = "com.ctc.wstx.validateTextChars"; // // // Caching: /** * Whether readers will try to cache parsed external DTD subsets or not. */ public final static String P_CACHE_DTDS = "com.ctc.wstx.cacheDTDs"; /** * Whether reader is to cache DTDs (when caching enabled) based on public id * or not: if not, system id will be primarily used. Although theoretically * public IDs should be unique, and should be good caching keys, sometimes * broken documents use 'wrong' public IDs, and such by default caching keys * are based on system id only. */ public final static String P_CACHE_DTDS_BY_PUBLIC_ID = "com.ctc.wstx.cacheDTDsByPublicId"; // // // Enabling/disabling lazy/incomplete parsing /** * Whether stream readers are allowed to do lazy parsing, meaning * to parse minimal part of the event when * {@link javax.xml.stream.XMLStreamReader#next} is called, and only parse the rest * as needed (or skip remainder of no extra information is needed). * Alternative to lazy parsing is called "eager parsing", and is * what most xml parsers use by default. *
* Enabling lazy parsing can improve performance for tasks where * number of textual events are skipped. The downside is that * not all well-formedness problems are reported when * {@link javax.xml.stream.XMLStreamReader#next} is called, but only when the * rest of event are read or skipped. *
* Default value for Woodstox is such that lazy parsing is * enabled. * * @deprecated As of Woodstox 4.0 use * {@link XMLInputFactory2#P_LAZY_PARSING} instead (from * Stax2 extension API, v3.0) */ public final static String P_LAZY_PARSING = XMLInputFactory2.P_LAZY_PARSING; // // // API behavior (for backwards compatibility) /** * This read-only property indicates whether null is returned for default name space prefix; * Boolean.TRUE indicates it does, Boolean.FALSE that it does not. *
* Default value for 4.1 is 'false'; this will most likely change for 5.0 since * Stax API actually specifies null to be used. * * @since 4.1.2 */ public final static String P_RETURN_NULL_FOR_DEFAULT_NAMESPACE = "com.ctc.wstx.returnNullForDefaultNamespace"; // // // Enabling/disabling support for dtd++ /** * Whether the Reader will recognized DTD++ extensions when parsing * DTD subsets. *
* Note: not implemented as of 2.0.x */ public final static String P_SUPPORT_DTDPP = "com.ctc.wstx.supportDTDPP"; /** * Whether the Reader will treat character references as entities while parsing * XML documents. */ public static final String P_TREAT_CHAR_REFS_AS_ENTS = "com.ctc.wstx.treatCharRefsAsEnts"; // // // Enabling alternate mode for parsing XML fragments instead // // // of full documents // Automatic W3C Schema support? /* * Whether W3C Schema hint attributes are recognized within document, * and used to locate Schema to use for validation. */ //public final static String P_AUTOMATIC_W3C_SCHEMA = 0x00100000; /* /////////////////////////////////////////////////////// // More complex settings: /////////////////////////////////////////////////////// */ // // // Buffer sizes; /** * Size of input buffer (in chars), to use for reading XML content * from input stream/reader. */ public final static String P_INPUT_BUFFER_LENGTH = "com.ctc.wstx.inputBufferLength"; // // // Constraints on sizes of text segments parsed: /** * Property to specify shortest non-complete text segment (part of * CDATA section or text content) that parser is allowed to return, * if not required to coalesce text. */ public final static String P_MIN_TEXT_SEGMENT = "com.ctc.wstx.minTextSegment"; // // // Entity handling /** * Property of type {@link java.util.Map}, that defines explicit set of * internal (generic) entities that will define of override any entities * defined in internal or external subsets; except for the 5 pre-defined * entities (lt, gt, amp, apos, quot). Can be used to explicitly define * entites that would normally come from a DTD. *
* @deprecated This feature may be removed from future versions of
* Woodstox, since the same functionality can be achieved by using
* custom entity resolvers.
*/
public final static String P_CUSTOM_INTERNAL_ENTITIES = "com.ctc.wstx.customInternalEntities";
/**
* Property of type {@link XMLResolver}, that
* will allow overriding of default DTD and external parameter entity
* resolution.
*/
public final static String P_DTD_RESOLVER = "com.ctc.wstx.dtdResolver";
/**
* Property of type {@link XMLResolver}, that
* will allow overriding of default external general entity
* resolution. Note that using this property overrides settings done
* using {@link javax.xml.stream.XMLInputFactory#RESOLVER} (and vice versa).
*/
public final static String P_ENTITY_RESOLVER = "com.ctc.wstx.entityResolver";
/**
* Property of type {@link XMLResolver}, that
* will allow graceful handling of references to undeclared (general)
* entities.
*/
public final static String P_UNDECLARED_ENTITY_RESOLVER = "com.ctc.wstx.undeclaredEntityResolver";
/**
* Property of type {@link java.net.URL}, that will allow specifying
* context URL to use when resolving relative references, for the
* main-level entities (external DTD subset, references from the internal
* DTD subset).
*/
public final static String P_BASE_URL = "com.ctc.wstx.baseURL";
// // // Alternate parsing modes
/**
* Three-valued property (one of
* {@link #PARSING_MODE_DOCUMENT},
* {@link #PARSING_MODE_FRAGMENT} or
* {@link #PARSING_MODE_DOCUMENTS}; default being the document mode)
* that can be used to handle "non-standard" XML content. The default
* mode (PARSING_MODE_DOCUMENT
) allows parsing of only
* well-formed XML documents, but the other two modes allow more lenient
* parsing. Fragment mode allows parsing of XML content that does not
* have a single root element (can have zero or more), nor can have
* XML or DOCTYPE declarations: this may be useful if parsing a subset
* of a full XML document. Multi-document
* (PARSING_MODE_DOCUMENTS
) mode on the other hand allows
* parsing of a stream that contains multiple consequtive well-formed
* documents, with possibly multiple XML and DOCTYPE declarations.
*
* The main difference from the API perspective is that in first two * modes, START_DOCUMENT and END_DOCUMENT are used as usual (as the first * and last events returned), whereas the multi-document mode can return * multiple pairs of these events: although it is still true that the * first event (one cursor points to when reader is instantiated or * returned by the event reader), there may be intervening pairs that * signal boundary between two adjacent enclosed documents. */ public final static String P_INPUT_PARSING_MODE = "com.ctc.wstx.fragmentMode"; // // // DTD defaulting, overriding /* //////////////////////////////////////////////////////////////////// // Helper classes, values enumerations //////////////////////////////////////////////////////////////////// */ public final static ParsingMode PARSING_MODE_DOCUMENT = new ParsingMode(); public final static ParsingMode PARSING_MODE_FRAGMENT = new ParsingMode(); public final static ParsingMode PARSING_MODE_DOCUMENTS = new ParsingMode(); /** * Inner class used for creating type-safe enumerations (prior to JDK 1.5). */ public final static class ParsingMode { ParsingMode() { } } } woodstox-4.1.3/src/java/com/ctc/wstx/sax/ 0000755 0001750 0001750 00000000000 11756143457 020467 5 ustar giovanni giovanni woodstox-4.1.3/src/java/com/ctc/wstx/sax/SAXProperty.java 0000644 0001750 0001750 00000003675 11745427074 023543 0 ustar giovanni giovanni /* * Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi * * Licensed under the License specified in file LICENSE, included with * the source code. * You may not use this file except in compliance with the License. * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.ctc.wstx.sax; import java.util.HashMap; /** * Type-safe (pre-Java5) enumeration of all currently (SAX 2.0.2) defined * standard properties. */ public final class SAXProperty { public final static String STD_PROPERTY_PREFIX = "http://xml.org/sax/properties/"; final static HashMap sInstances = new HashMap(); // // // "Enum" values: public final static SAXProperty DECLARATION_HANDLER = new SAXProperty("declaration-handler"); public final static SAXProperty DOCUMENT_XML_VERSION = new SAXProperty("document-xml-version"); public final static SAXProperty DOM_NODE = new SAXProperty("dom-node"); public final static SAXProperty LEXICAL_HANDLER = new SAXProperty("lexical-handler"); final static SAXProperty XML_STRING = new SAXProperty("xml-string"); private final String mSuffix; private SAXProperty(String suffix) { mSuffix = suffix; sInstances.put(suffix, this); } public static SAXProperty findByUri(String uri) { if (uri.startsWith(STD_PROPERTY_PREFIX)) { return findBySuffix(uri.substring(STD_PROPERTY_PREFIX.length())); } return null; } public static SAXProperty findBySuffix(String suffix) { return (SAXProperty) sInstances.get(suffix); } public String getSuffix() { return mSuffix; } public String toString() { return STD_PROPERTY_PREFIX + mSuffix; } } woodstox-4.1.3/src/java/com/ctc/wstx/sax/package.html 0000644 0001750 0001750 00000000066 11745427074 022750 0 ustar giovanni giovanni
Contains Woodstox SAX implementation. woodstox-4.1.3/src/java/com/ctc/wstx/sax/WrappedSaxException.java 0000644 0001750 0001750 00000002233 11745427074 025265 0 ustar giovanni giovanni /* * Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi * * Licensed under the License specified in file LICENSE, included with * the source code. * You may not use this file except in compliance with the License. * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.ctc.wstx.sax; import org.xml.sax.SAXException; /** * Simple type-safe wrapper used for "tunneling" SAX exceptions * through interfaces that do not allow them to be thrown. This * is done by extending {@link RuntimeException}. */ public final class WrappedSaxException extends RuntimeException { private static final long serialVersionUID = 1L; protected final SAXException mCause; public WrappedSaxException(SAXException cause) { mCause = cause; } public SAXException getSaxException() { return mCause; } public String toString() { return mCause.toString(); } } woodstox-4.1.3/src/java/com/ctc/wstx/sax/WstxSAXParserFactory.java 0000644 0001750 0001750 00000014137 11745427074 025364 0 ustar giovanni giovanni /* * Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi * * Licensed under the License specified in file LICENSE, included with * the source code. * You may not use this file except in compliance with the License. * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.ctc.wstx.sax; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import org.xml.sax.SAXNotRecognizedException; import org.xml.sax.SAXNotSupportedException; import com.ctc.wstx.stax.WstxInputFactory; /** * This is implementation of the main JAXP SAX factory, and as such * acts as the entry point from JAXP. ** Note: most of the SAX features are not configurable as of yet. * However, effort is made to recognize all existing standard features * and properties, to allow using code to figure out existing * capabilities automatically. */ public class WstxSAXParserFactory extends SAXParserFactory { protected final WstxInputFactory mStaxFactory; /** * Sax feature that determines whether namespace declarations need * to be also reported as attributes or not. */ protected boolean mFeatNsPrefixes = false; public WstxSAXParserFactory() { this(new WstxInputFactory()); } /** * @since 4.0.8 */ public WstxSAXParserFactory(WstxInputFactory f) { mStaxFactory = f; /* defaults should be fine... except that for some weird * reason, by default namespace support is defined to be off */ setNamespaceAware(true); } public boolean getFeature(String name) throws SAXNotRecognizedException, SAXNotSupportedException { SAXFeature stdFeat = SAXFeature.findByUri(name); if (stdFeat == SAXFeature.EXTERNAL_GENERAL_ENTITIES) { return mStaxFactory.getConfig().willSupportExternalEntities(); } else if (stdFeat == SAXFeature.EXTERNAL_PARAMETER_ENTITIES) { return mStaxFactory.getConfig().willSupportExternalEntities(); } else if (stdFeat == SAXFeature.IS_STANDALONE) { // Not known at this point... return false; } else if (stdFeat == SAXFeature.LEXICAL_HANDLER_PARAMETER_ENTITIES) { // !!! TODO: return false; } else if (stdFeat == SAXFeature.NAMESPACES) { return mStaxFactory.getConfig().willSupportNamespaces(); } else if (stdFeat == SAXFeature.NAMESPACE_PREFIXES) { return mFeatNsPrefixes; } else if (stdFeat == SAXFeature.RESOLVE_DTD_URIS) { // !!! TODO: return false; } else if (stdFeat == SAXFeature.STRING_INTERNING) { return mStaxFactory.getConfig().willInternNames(); } else if (stdFeat == SAXFeature.UNICODE_NORMALIZATION_CHECKING) { return false; } else if (stdFeat == SAXFeature.USE_ATTRIBUTES2) { return true; } else if (stdFeat == SAXFeature.USE_LOCATOR2) { return true; } else if (stdFeat == SAXFeature.USE_ENTITY_RESOLVER2) { return true; } else if (stdFeat == SAXFeature.VALIDATION) { return mStaxFactory.getConfig().willValidateWithDTD(); } else if (stdFeat == SAXFeature.XMLNS_URIS) { /* !!! TODO: default value should be false... but not sure * if implementing that mode makes sense */ return true; } else if (stdFeat == SAXFeature.XML_1_1) { return true; } else { throw new SAXNotRecognizedException("Feature '"+name+"' not recognized"); } } public SAXParser newSAXParser() { return new WstxSAXParser(mStaxFactory, mFeatNsPrefixes); } public void setFeature(String name, boolean value) throws SAXNotRecognizedException, SAXNotSupportedException { boolean invalidValue = false; boolean readOnly = false; SAXFeature stdFeat = SAXFeature.findByUri(name); if (stdFeat == SAXFeature.EXTERNAL_GENERAL_ENTITIES) { mStaxFactory.getConfig().doSupportExternalEntities(value); } else if (stdFeat == SAXFeature.EXTERNAL_PARAMETER_ENTITIES) { // !!! TODO } else if (stdFeat == SAXFeature.IS_STANDALONE) { readOnly = true; } else if (stdFeat == SAXFeature.LEXICAL_HANDLER_PARAMETER_ENTITIES) { // !!! TODO } else if (stdFeat == SAXFeature.NAMESPACES) { mStaxFactory.getConfig().doSupportNamespaces(value); } else if (stdFeat == SAXFeature.NAMESPACE_PREFIXES) { mFeatNsPrefixes = value; } else if (stdFeat == SAXFeature.RESOLVE_DTD_URIS) { // !!! TODO } else if (stdFeat == SAXFeature.STRING_INTERNING) { invalidValue = !value; } else if (stdFeat == SAXFeature.UNICODE_NORMALIZATION_CHECKING) { invalidValue = value; } else if (stdFeat == SAXFeature.USE_ATTRIBUTES2) { readOnly = true; } else if (stdFeat == SAXFeature.USE_LOCATOR2) { readOnly = true; } else if (stdFeat == SAXFeature.USE_ENTITY_RESOLVER2) { readOnly = true; } else if (stdFeat == SAXFeature.VALIDATION) { mStaxFactory.getConfig().doValidateWithDTD(value); } else if (stdFeat == SAXFeature.XMLNS_URIS) { invalidValue = !value; } else if (stdFeat == SAXFeature.XML_1_1) { readOnly = true; } else { throw new SAXNotRecognizedException("Feature '"+name+"' not recognized"); } // Trying to modify read-only properties? if (readOnly) { throw new SAXNotSupportedException("Feature '"+name+"' is read-only, can not be modified"); } if (invalidValue) { throw new SAXNotSupportedException("Trying to set invalid value for feature '"+name+"', '"+value+"'"); } } } woodstox-4.1.3/src/java/com/ctc/wstx/sax/WstxSAXParser.java 0000644 0001750 0001750 00000127225 11745427074 024037 0 ustar giovanni giovanni /* * Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi * * Licensed under the License specified in file LICENSE, included with * the source code. * You may not use this file except in compliance with the License. * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.ctc.wstx.sax; import java.io.*; import java.net.URL; import javax.xml.XMLConstants; import javax.xml.parsers.SAXParser; import javax.xml.stream.Location; import javax.xml.stream.XMLResolver; import javax.xml.stream.XMLStreamConstants; import javax.xml.stream.XMLStreamException; import org.xml.sax.*; import org.xml.sax.helpers.DefaultHandler; import org.xml.sax.ext.Attributes2; import org.xml.sax.ext.DeclHandler; import org.xml.sax.ext.LexicalHandler; import org.xml.sax.ext.Locator2; //import org.codehaus.stax2.DTDInfo; import com.ctc.wstx.api.ReaderConfig; import com.ctc.wstx.dtd.DTDEventListener; import com.ctc.wstx.exc.WstxIOException; import com.ctc.wstx.io.DefaultInputResolver; import com.ctc.wstx.io.InputBootstrapper; import com.ctc.wstx.io.ReaderBootstrapper; import com.ctc.wstx.io.StreamBootstrapper; import com.ctc.wstx.sr.*; import com.ctc.wstx.stax.WstxInputFactory; import com.ctc.wstx.util.ExceptionUtil; import com.ctc.wstx.util.URLUtil; /** * This class implements parser part of JAXP and SAX interfaces; and * effectively offers an alternative to using Stax input factory / * stream reader combination. */ public class WstxSAXParser extends SAXParser implements Parser // SAX1 ,XMLReader // SAX2 ,Attributes2 // SAX2 ,Locator2 // SAX2 ,DTDEventListener // Woodstox-internal { final static boolean FEAT_DEFAULT_NS_PREFIXES = false; /** * We will need the factory reference mostly for constructing * underlying stream reader we use. */ protected final WstxInputFactory mStaxFactory; protected final ReaderConfig mConfig; protected boolean mFeatNsPrefixes; /** * Since the stream reader would mostly be just a wrapper around * the underlying scanner (its main job is to implement Stax * interface), we can and should just use the scanner. In effect, * this class is then a replacement of BasicStreamReader, when * using SAX interfaces. */ protected BasicStreamReader mScanner; protected AttributeCollector mAttrCollector; protected InputElementStack mElemStack; // // // Info from xml declaration protected String mEncoding; protected String mXmlVersion; protected boolean mStandalone; // // // Listeners attached: protected ContentHandler mContentHandler; protected DTDHandler mDTDHandler; private EntityResolver mEntityResolver; private ErrorHandler mErrorHandler; private LexicalHandler mLexicalHandler; private DeclHandler mDeclHandler; // // // State: /** * Number of attributes accessible via {@link Attributes} and * {@link Attributes2} interfaces, for the current start element. *
* Note: does not include namespace declarations, even they are to * be reported as attributes. */ protected int mAttrCount; /** * Need to keep track of number of namespaces, if namespace declarations * are to be reported along with attributes (see * {@link #mFeatNsPrefixes}). */ protected int mNsCount = 0; /* /////////////////////////////////////////////////////////////////////// // Life-cycle /////////////////////////////////////////////////////////////////////// */ /** *
* NOTE: this was a protected constructor for versions 4.0
* and 3.2; changed to public in 4.1
*/
public WstxSAXParser(WstxInputFactory sf, boolean nsPrefixes)
{
mStaxFactory = sf;
mFeatNsPrefixes = nsPrefixes;
mConfig = sf.createPrivateConfig();
mConfig.doSupportDTDs(true);
/* Lazy parsing is a tricky thing: although most of the time
* it's useless with SAX, it is actually necessary to be able
* to properly model internal DTD subsets, for example. So,
* we can not really easily determine defaults.
*/
ResolverProxy r = new ResolverProxy();
/* SAX doesn't distinguish between DTD (ext. subset, PEs) and
* entity (external general entities) resolvers, so let's
* assign them both:
*/
mConfig.setDtdResolver(r);
mConfig.setEntityResolver(r);
mConfig.setDTDEventListener(this);
/* These settings do not make sense as generic defaults, but
* are helpful when using some test frameworks. Specifically,
* - DTD caching may remove calls to resolvers, changing
* observed behavior
* - Using min. segment length of 1 will force flushing of
* all content before entity expansion, which will
* completely serialize entity resolution calls wrt.
* CHARACTERS events.
*/
// !!! TEST
//mConfig.setShortestReportedTextSegment(1);
//mConfig.doCacheDTDs(false);
}
/*
* This constructor is provided for two main use cases: testing,
* and introspection via SAX classes (as opposed to JAXP-based
* introspection).
*/
public WstxSAXParser()
{
this(new WstxInputFactory(), FEAT_DEFAULT_NS_PREFIXES);
}
public final Parser getParser()
{
return this;
}
public final XMLReader getXMLReader()
{
return this;
}
/**
* Accessor used to allow configuring all standard Stax configuration
* settings that the underlying reader uses.
*
* @since 4.0.8
*/
public final ReaderConfig getStaxConfig() {
return mConfig;
}
/*
///////////////////////////////////////////////////////////////////////
// Configuration, SAXParser
///////////////////////////////////////////////////////////////////////
*/
public boolean isNamespaceAware() {
return mConfig.willSupportNamespaces();
}
public boolean isValidating() {
return mConfig.willValidateWithDTD();
}
public Object getProperty(String name)
throws SAXNotRecognizedException, SAXNotSupportedException
{
SAXProperty prop = SAXProperty.findByUri(name);
if (prop == SAXProperty.DECLARATION_HANDLER) {
return mDeclHandler;
} else if (prop == SAXProperty.DOCUMENT_XML_VERSION) {
return mXmlVersion;
} else if (prop == SAXProperty.DOM_NODE) {
return null;
} else if (prop == SAXProperty.LEXICAL_HANDLER) {
return mLexicalHandler;
} else if (prop == SAXProperty.XML_STRING) {
return null;
}
throw new SAXNotRecognizedException("Property '"+name+"' not recognized");
}
public void setProperty(String name, Object value)
throws SAXNotRecognizedException, SAXNotSupportedException
{
SAXProperty prop = SAXProperty.findByUri(name);
if (prop == SAXProperty.DECLARATION_HANDLER) {
mDeclHandler = (DeclHandler) value;
return;
} else if (prop == SAXProperty.DOCUMENT_XML_VERSION) {
; // read-only
} else if (prop == SAXProperty.DOM_NODE) {
; // read-only
} else if (prop == SAXProperty.LEXICAL_HANDLER) {
mLexicalHandler = (LexicalHandler) value;
return;
} else if (prop == SAXProperty.XML_STRING) {
; // read-only
} else {
throw new SAXNotRecognizedException("Property '"+name+"' not recognized");
}
// Trying to modify read-only properties?
throw new SAXNotSupportedException("Property '"+name+"' is read-only, can not be modified");
}
/*
///////////////////////////////////////////////////////////////////////
// Overrides, SAXParser
///////////////////////////////////////////////////////////////////////
*/
/* Have to override some methods from SAXParser; JDK
* implementation is sucky, as it tries to override
* many things it really should not...
*/
public void parse(InputSource is, HandlerBase hb)
throws SAXException, IOException
{
if (hb != null) {
/* Ok: let's ONLY set if there are no explicit sets... not
* extremely clear, but JDK tries to set them always so
* let's at least do damage control.
*/
if (mContentHandler == null) {
setDocumentHandler(hb);
}
if (mEntityResolver == null) {
setEntityResolver(hb);
}
if (mErrorHandler == null) {
setErrorHandler(hb);
}
if (mDTDHandler == null) {
setDTDHandler(hb);
}
}
parse(is);
}
public void parse(InputSource is, DefaultHandler dh)
throws SAXException, IOException
{
if (dh != null) {
/* Ok: let's ONLY set if there are no explicit sets... not
* extremely clear, but JDK tries to set them always so
* let's at least do damage control.
*/
if (mContentHandler == null) {
setContentHandler(dh);
}
if (mEntityResolver == null) {
setEntityResolver(dh);
}
if (mErrorHandler == null) {
setErrorHandler(dh);
}
if (mDTDHandler == null) {
setDTDHandler(dh);
}
}
parse(is);
}
/*
///////////////////////////////////////////////////////////////////////
// XLMReader (SAX2) implementation: cfg access
///////////////////////////////////////////////////////////////////////
*/
public ContentHandler getContentHandler()
{
return mContentHandler;
}
public DTDHandler getDTDHandler()
{
return mDTDHandler;
}
public EntityResolver getEntityResolver()
{
return mEntityResolver;
}
public ErrorHandler getErrorHandler()
{
return mErrorHandler;
}
public boolean getFeature(String name)
throws SAXNotRecognizedException
{
SAXFeature stdFeat = SAXFeature.findByUri(name);
if (stdFeat == SAXFeature.EXTERNAL_GENERAL_ENTITIES) {
return mConfig.willSupportExternalEntities();
} else if (stdFeat == SAXFeature.EXTERNAL_PARAMETER_ENTITIES) {
return mConfig.willSupportExternalEntities();
} else if (stdFeat == SAXFeature.IS_STANDALONE) {
return mStandalone;
} else if (stdFeat == SAXFeature.LEXICAL_HANDLER_PARAMETER_ENTITIES) {
// !!! TODO:
return false;
} else if (stdFeat == SAXFeature.NAMESPACES) {
return mConfig.willSupportNamespaces();
} else if (stdFeat == SAXFeature.NAMESPACE_PREFIXES) {
return !mConfig.willSupportNamespaces();
} else if (stdFeat == SAXFeature.RESOLVE_DTD_URIS) {
// !!! TODO:
return false;
} else if (stdFeat == SAXFeature.STRING_INTERNING) {
return true;
} else if (stdFeat == SAXFeature.UNICODE_NORMALIZATION_CHECKING) {
return false;
} else if (stdFeat == SAXFeature.USE_ATTRIBUTES2) {
return true;
} else if (stdFeat == SAXFeature.USE_LOCATOR2) {
return true;
} else if (stdFeat == SAXFeature.USE_ENTITY_RESOLVER2) {
return true;
} else if (stdFeat == SAXFeature.VALIDATION) {
return mConfig.willValidateWithDTD();
} else if (stdFeat == SAXFeature.XMLNS_URIS) {
/* !!! TODO: default value should be false... but not sure
* if implementing that mode makes sense
*/
return true;
} else if (stdFeat == SAXFeature.XML_1_1) {
return true;
}
throw new SAXNotRecognizedException("Feature '"+name+"' not recognized");
}
// Already implemented for SAXParser
//public Object getProperty(String name)
/*
///////////////////////////////////////////////////////////////////////
// XLMReader (SAX2) implementation: cfg changing
///////////////////////////////////////////////////////////////////////
*/
public void setContentHandler(ContentHandler handler)
{
mContentHandler = handler;
}
public void setDTDHandler(DTDHandler handler)
{
mDTDHandler = handler;
}
public void setEntityResolver(EntityResolver resolver)
{
mEntityResolver = resolver;
}
public void setErrorHandler(ErrorHandler handler)
{
mErrorHandler = handler;
}
public void setFeature(String name, boolean value)
throws SAXNotRecognizedException, SAXNotSupportedException
{
boolean invalidValue = false;
boolean readOnly = false;
SAXFeature stdFeat = SAXFeature.findByUri(name);
if (stdFeat == SAXFeature.EXTERNAL_GENERAL_ENTITIES) {
mConfig.doSupportExternalEntities(value);
} else if (stdFeat == SAXFeature.EXTERNAL_PARAMETER_ENTITIES) {
// !!! TODO
} else if (stdFeat == SAXFeature.IS_STANDALONE) {
readOnly = true;
} else if (stdFeat == SAXFeature.LEXICAL_HANDLER_PARAMETER_ENTITIES) {
// !!! TODO
} else if (stdFeat == SAXFeature.NAMESPACES) {
mConfig.doSupportNamespaces(value);
} else if (stdFeat == SAXFeature.NAMESPACE_PREFIXES) {
mFeatNsPrefixes = value;
} else if (stdFeat == SAXFeature.RESOLVE_DTD_URIS) {
// !!! TODO
} else if (stdFeat == SAXFeature.STRING_INTERNING) {
invalidValue = !value;
} else if (stdFeat == SAXFeature.UNICODE_NORMALIZATION_CHECKING) {
invalidValue = value;
} else if (stdFeat == SAXFeature.USE_ATTRIBUTES2) {
readOnly = true;
} else if (stdFeat == SAXFeature.USE_LOCATOR2) {
readOnly = true;
} else if (stdFeat == SAXFeature.USE_ENTITY_RESOLVER2) {
readOnly = true;
} else if (stdFeat == SAXFeature.VALIDATION) {
mConfig.doValidateWithDTD(value);
} else if (stdFeat == SAXFeature.XMLNS_URIS) {
invalidValue = !value;
} else if (stdFeat == SAXFeature.XML_1_1) {
readOnly = true;
} else {
throw new SAXNotRecognizedException("Feature '"+name+"' not recognized");
}
// Trying to modify read-only properties?
if (readOnly) {
throw new SAXNotSupportedException("Feature '"+name+"' is read-only, can not be modified");
}
if (invalidValue) {
throw new SAXNotSupportedException("Trying to set invalid value for feature '"+name+"', '"+value+"'");
}
}
// Already implemented for SAXParser
//public void setProperty(String name, Object value)
/*
///////////////////////////////////////////////////////////////////////
// XLMReader (SAX2) implementation: parsing
///////////////////////////////////////////////////////////////////////
*/
public void parse(InputSource input)
throws SAXException
{
mScanner = null;
String systemId = input.getSystemId();
ReaderConfig cfg = mConfig;
URL srcUrl = null;
// Let's figure out input, first, before sending start-doc event
InputStream is = null;
Reader r = input.getCharacterStream();
if (r == null) {
is = input.getByteStream();
if (is == null) {
if (systemId == null) {
throw new SAXException("Invalid InputSource passed: neither character or byte stream passed, nor system id specified");
}
try {
srcUrl = URLUtil.urlFromSystemId(systemId);
is = URLUtil.inputStreamFromURL(srcUrl);
} catch (IOException ioe) {
SAXException saxe = new SAXException(ioe);
ExceptionUtil.setInitCause(saxe, ioe);
throw saxe;
}
}
}
if (mContentHandler != null) {
mContentHandler.setDocumentLocator(this);
mContentHandler.startDocument();
}
/* Note: since we are reusing the same config instance, need to
* make sure state is not carried forward. Thus:
*/
cfg.resetState();
try {
String inputEnc = input.getEncoding();
String publicId = input.getPublicId();
// Got an InputStream and encoding? Can create a Reader:
if (r == null && (inputEnc != null && inputEnc.length() > 0)) {
r = DefaultInputResolver.constructOptimizedReader(cfg, is, false, inputEnc);
}
InputBootstrapper bs;
if (r != null) {
bs = ReaderBootstrapper.getInstance(publicId, systemId, r, inputEnc);
// false -> not for event reader; false -> no auto-closing
mScanner = (BasicStreamReader) mStaxFactory.createSR(cfg, systemId, bs, false, false);
} else {
bs = StreamBootstrapper.getInstance(publicId, systemId, is);
mScanner = (BasicStreamReader) mStaxFactory.createSR(cfg, systemId, bs, false, false);
}
// Need to get xml declaration stuff out now:
{
String enc2 = mScanner.getEncoding();
if (enc2 == null) {
enc2 = mScanner.getCharacterEncodingScheme();
}
mEncoding = enc2;
}
mXmlVersion = mScanner.getVersion();
mStandalone = mScanner.standaloneSet();
mAttrCollector = mScanner.getAttributeCollector();
mElemStack = mScanner.getInputElementStack();
fireEvents();
} catch (IOException io) {
throwSaxException(io);
} catch (XMLStreamException strex) {
throwSaxException(strex);
} finally {
if (mContentHandler != null) {
mContentHandler.endDocument();
}
// Could try holding onto the buffers, too... but
// maybe it's better to allow them to be reclaimed, if
// needed by GC
if (mScanner != null) {
BasicStreamReader sr = mScanner;
mScanner = null;
try {
sr.close();
} catch (XMLStreamException sex) { }
}
if (r != null) {
try {
r.close();
} catch (IOException ioe) { }
}
if (is != null) {
try {
is.close();
} catch (IOException ioe) { }
}
}
}
public void parse(String systemId)
throws SAXException
{
InputSource src = new InputSource(systemId);
parse(src);
}
/*
///////////////////////////////////////////////////////////////////////
// Parsing loop, helper methods
///////////////////////////////////////////////////////////////////////
*/
/**
* This is the actual "tight event loop" that will send all events
* between start and end document events. Although we could
* use the stream reader here, there's not much as it mostly
* just forwards requests to the scanner: and so we can as well
* just copy the little code stream reader's next() method has.
*/
private final void fireEvents()
throws IOException, SAXException, XMLStreamException
{
// First we are in prolog:
int type;
/* Need to enable lazy parsing, to get DTD start events before
* its content events. Plus, can skip more efficiently too.
*/
mConfig.doParseLazily(false);
while ((type = mScanner.next()) != XMLStreamConstants.START_ELEMENT) {
fireAuxEvent(type, false);
}
// Now just starting the tree, need to process the START_ELEMENT
fireStartTag();
int depth = 1;
while (true) {
type = mScanner.next();
if (type == XMLStreamConstants.START_ELEMENT) {
fireStartTag();
++depth;
} else if (type == XMLStreamConstants.END_ELEMENT) {
mScanner.fireSaxEndElement(mContentHandler);
if (--depth < 1) {
break;
}
} else if (type == XMLStreamConstants.CHARACTERS) {
mScanner.fireSaxCharacterEvents(mContentHandler);
} else {
fireAuxEvent(type, true);
}
}
// And then epilog:
while (true) {
type = mScanner.next();
if (type == XMLStreamConstants.END_DOCUMENT) {
break;
}
if (type == XMLStreamConstants.SPACE) {
// Not to be reported via SAX interface (which may or may not
// be different from Stax)
continue;
}
fireAuxEvent(type, false);
}
}
private final void fireAuxEvent(int type, boolean inTree)
throws IOException, SAXException, XMLStreamException
{
switch (type) {
case XMLStreamConstants.COMMENT:
mScanner.fireSaxCommentEvent(mLexicalHandler);
break;
case XMLStreamConstants.CDATA:
if (mLexicalHandler != null) {
mLexicalHandler.startCDATA();
mScanner.fireSaxCharacterEvents(mContentHandler);
mLexicalHandler.endCDATA();
} else {
mScanner.fireSaxCharacterEvents(mContentHandler);
}
break;
case XMLStreamConstants.DTD:
if (mLexicalHandler != null) {
/* Note: this is bit tricky, since calling getDTDInfo() will
* trigger full reading of the subsets... but we need to
* get some info first, to be able to send dtd-start event,
* and only then get the rest. Thus, need to call separate
* accessors first:
*/
String rootName = mScanner.getDTDRootName();
String sysId = mScanner.getDTDSystemId();
String pubId = mScanner.getDTDPublicId();
mLexicalHandler.startDTD(rootName, pubId, sysId);
// Ok, let's get rest (if any) read:
try {
/*DTDInfo dtdInfo =*/ mScanner.getDTDInfo();
} catch (WrappedSaxException wse) {
throw wse.getSaxException();
}
mLexicalHandler.endDTD();
}
break;
case XMLStreamConstants.PROCESSING_INSTRUCTION:
mScanner.fireSaxPIEvent(mContentHandler);
break;
case XMLStreamConstants.SPACE:
// With SAX, only to be sent as an event if inside the
// tree, not from within prolog/epilog
if (inTree) {
mScanner.fireSaxSpaceEvents(mContentHandler);
}
break;
case XMLStreamConstants.ENTITY_REFERENCE:
/* Only occurs in non-entity-expanding mode; so effectively
* we are skipping the entity?
*/
if (mContentHandler != null) {
mContentHandler.skippedEntity(mScanner.getLocalName());
}
break;
default:
if (type == XMLStreamConstants.END_DOCUMENT) {
throwSaxException("Unexpected end-of-input in "+(inTree ? "tree" : "prolog"));
}
throw new RuntimeException("Internal error: unexpected type, "+type);
}
}
private final void fireStartTag()
throws SAXException
{
mAttrCount = mAttrCollector.getCount();
if (mFeatNsPrefixes) {
/* 15-Dec-2006, TSa: Note: apparently namespace bindings that
* are added via defaulting are only visible via element
* stack. Thus, we MUST access things via element stack,
* not attribute collector; even though latter seems like
* the more direct route. See
* {@link NsInputElementStack#addNsBinding} for the method
* that injects such special namespace bindings (yes, it's
* a hack, afterthought)
*/
//mNsCount = mAttrCollector.getNsCount();
mNsCount = mElemStack.getCurrentNsCount();
}
mScanner.fireSaxStartElement(mContentHandler, this);
}
/*
///////////////////////////////////////////////////////////////////////
// Parser (SAX1) implementation
///////////////////////////////////////////////////////////////////////
*/
// Already implemented for XMLReader:
//public void parse(InputSource source)
//public void parse(String systemId)
//public void setEntityResolver(EntityResolver resolver)
//public void setErrorHandler(ErrorHandler handler)
public void setDocumentHandler(DocumentHandler handler)
{
setContentHandler(new DocHandlerWrapper(handler));
}
public void setLocale(java.util.Locale locale)
{
// Not supported, let's just ignore
}
/*
///////////////////////////////////////////////////////////////////////
// Attributes (SAX2) implementation
///////////////////////////////////////////////////////////////////////
*/
public int getIndex(String qName)
{
if (mElemStack == null) {
return -1;
}
int ix = mElemStack.findAttributeIndex(null, qName);
// !!! In ns-as-attrs mode, should also match ns decls?
return ix;
}
public int getIndex(String uri, String localName)
{
if (mElemStack == null) {
return -1;
}
int ix = mElemStack.findAttributeIndex(uri, localName);
// !!! In ns-as-attrs mode, should also match ns decls?
return ix;
}
public int getLength()
{
return mAttrCount + mNsCount;
}
public String getLocalName(int index)
{
if (index < mAttrCount) {
return (index < 0) ? null : mAttrCollector.getLocalName(index);
}
index -= mAttrCount;
if (index < mNsCount) {
/* As discussed in fireStartTag
, we must use
* element stack, not attribute collector:
*/
//String prefix = mAttrCollector.getNsPrefix(index);
String prefix = mElemStack.getLocalNsPrefix(index);
return (prefix == null || prefix.length() == 0) ?
"xmlns" : prefix;
}
return null;
}
public String getQName(int index)
{
if (index < mAttrCount) {
if (index < 0) {
return null;
}
String prefix = mAttrCollector.getPrefix(index);
String ln = mAttrCollector.getLocalName(index);
return (prefix == null || prefix.length() == 0) ?
ln : (prefix + ":" + ln);
}
index -= mAttrCount;
if (index < mNsCount) {
/* As discussed in fireStartTag
, we must use
* element stack, not attribute collector:
*/
//String prefix = mAttrCollector.getNsPrefix(index);
String prefix = mElemStack.getLocalNsPrefix(index);
if (prefix == null || prefix.length() == 0) {
return "xmlns";
}
return "xmlns:"+prefix;
}
return null;
}
public String getType(int index)
{
if (index < mAttrCount) {
if (index < 0) {
return null;
}
/* Note: Woodstox will have separate type for enumerated values;
* SAX considers these NMTOKENs, so may need to convert (but
* note: some SAX impls also use "ENUMERATED")
*/
String type = mElemStack.getAttributeType(index);
// Let's count on it being interned:
if (type == "ENUMERATED") {
type = "NMTOKEN";
}
return type;
}
// But how about namespace declarations... let's just call them CDATA?
index -= mAttrCount;
if (index < mNsCount) {
return "CDATA";
}
return null;
}
public String getType(String qName)
{
return getType(getIndex(qName));
}
public String getType(String uri, String localName)
{
return getType(getIndex(uri, localName));
}
public String getURI(int index)
{
if (index < mAttrCount) {
if (index < 0) {
return null;
}
String uri = mAttrCollector.getURI(index);
return (uri == null) ? "" : uri;
}
if ((index - mAttrCount) < mNsCount) {
return XMLConstants.XMLNS_ATTRIBUTE_NS_URI;
}
return null;
}
public String getValue(int index)
{
if (index < mAttrCount) {
return (index < 0) ? null : mAttrCollector.getValue(index);
}
index -= mAttrCount;
if (index < mNsCount) {
/* As discussed in fireStartTag
, we must use
* element stack, not attribute collector:
*/
//String uri = mAttrCollector.getNsURI(index);
String uri = mElemStack.getLocalNsURI(index);
return (uri == null) ? "" : uri;
}
return null;
}
public String getValue(String qName)
{
return getValue(getIndex(qName));
}
public String getValue(String uri, String localName)
{
return getValue(getIndex(uri, localName));
}
/*
///////////////////////////////////////////////////////////////////////
// Attributes2 (SAX2) implementation
///////////////////////////////////////////////////////////////////////
*/
public boolean isDeclared(int index)
{
if (index < mAttrCount) {
if (index >= 0) {
// !!! TODO: implement properly
return true;
}
} else {
index -= mAttrCount;
if (index < mNsCount) {
/* DTD and namespaces don't really play nicely together;
* and in general xmlns: pseudo-attributes are not declared...
* so not quite sure what to return here. For now, let's
* return true, to indicate they ought to be valid
*/
return true;
}
}
throwNoSuchAttribute(index);
return false; // never gets here
}
public boolean isDeclared(String qName)
{
return false;
}
public boolean isDeclared(String uri, String localName)
{
return false;
}
public boolean isSpecified(int index)
{
if (index < mAttrCount) {
if (index >= 0) {
return mAttrCollector.isSpecified(index);
}
} else {
index -= mAttrCount;
if (index < mNsCount) {
/* Determining default-attr - based namespace declarations
* would need new accessors on Woodstox... but they are
* extremely rare, too
*/
return true;
}
}
throwNoSuchAttribute(index);
return false; // never gets here
}
public boolean isSpecified(String qName)
{
int ix = getIndex(qName);
if (ix < 0) {
throw new IllegalArgumentException("No attribute with qName '"+qName+"'");
}
return isSpecified(ix);
}
public boolean isSpecified(String uri, String localName)
{
int ix = getIndex(uri, localName);
if (ix < 0) {
throw new IllegalArgumentException("No attribute with uri "+uri+", local name '"+localName+"'");
}
return isSpecified(ix);
}
/*
///////////////////////////////////////////////////////////////////////
// Locator (SAX1) implementation
///////////////////////////////////////////////////////////////////////
*/
public int getColumnNumber()
{
if (mScanner != null) {
Location loc = mScanner.getLocation();
return loc.getColumnNumber();
}
return -1;
}
public int getLineNumber()
{
if (mScanner != null) {
Location loc = mScanner.getLocation();
return loc.getLineNumber();
}
return -1;
}
public String getPublicId()
{
if (mScanner != null) {
Location loc = mScanner.getLocation();
return loc.getPublicId();
}
return null;
}
public String getSystemId()
{
if (mScanner != null) {
Location loc = mScanner.getLocation();
return loc.getSystemId();
}
return null;
}
/*
///////////////////////////////////////////////////////////////////////
// Locator2 (SAX2) implementation
///////////////////////////////////////////////////////////////////////
*/
public String getEncoding()
{
return mEncoding;
}
public String getXMLVersion()
{
return mXmlVersion;
}
/*
///////////////////////////////////////////////////////////////////////
// DTDEventListener (woodstox internal API) impl
///////////////////////////////////////////////////////////////////////
*/
public boolean dtdReportComments()
{
return (mLexicalHandler != null);
}
public void dtdComment(char[] data, int offset, int len)
{
if (mLexicalHandler != null) {
try {
mLexicalHandler.comment(data, offset, len);
} catch (SAXException sex) {
throw new WrappedSaxException(sex);
}
}
}
public void dtdProcessingInstruction(String target, String data)
{
if (mContentHandler != null) {
try {
mContentHandler.processingInstruction(target, data);
} catch (SAXException sex) {
throw new WrappedSaxException(sex);
}
}
}
public void dtdSkippedEntity(String name)
{
if (mContentHandler != null) {
try {
mContentHandler.skippedEntity(name);
} catch (SAXException sex) {
throw new WrappedSaxException(sex);
}
}
}
// DTD declarations that must be exposed
public void dtdNotationDecl(String name, String publicId, String systemId, URL baseURL)
throws XMLStreamException
{
if (mDTDHandler != null) {
/* 24-Nov-2006, TSa: Note: SAX expects system identifiers to
* be fully resolved when reported...
*/
if (systemId != null && systemId.indexOf(':') < 0) {
try {
systemId = URLUtil.urlFromSystemId(systemId, baseURL).toExternalForm();
} catch (IOException ioe) {
throw new WstxIOException(ioe);
}
}
try {
mDTDHandler.notationDecl(name, publicId, systemId);
} catch (SAXException sex) {
throw new WrappedSaxException(sex);
}
}
}
public void dtdUnparsedEntityDecl(String name, String publicId, String systemId, String notationName, URL baseURL)
throws XMLStreamException
{
if (mDTDHandler != null) {
// SAX expects system id to be fully resolved?
if (systemId.indexOf(':') < 0) { // relative path...
try {
systemId = URLUtil.urlFromSystemId(systemId, baseURL).toExternalForm();
} catch (IOException ioe) {
throw new WstxIOException(ioe);
}
}
try {
mDTDHandler.unparsedEntityDecl(name, publicId, systemId, notationName);
} catch (SAXException sex) {
throw new WrappedSaxException(sex);
}
}
}
// DTD declarations that can be exposed
public void attributeDecl(String eName, String aName, String type, String mode, String value)
{
if (mDeclHandler != null) {
try {
mDeclHandler.attributeDecl(eName, aName, type, mode, value);
} catch (SAXException sex) {
throw new WrappedSaxException(sex);
}
}
}
public void dtdElementDecl(String name, String model)
{
if (mDeclHandler != null) {
try {
mDeclHandler.elementDecl(name, model);
} catch (SAXException sex) {
throw new WrappedSaxException(sex);
}
}
}
public void dtdExternalEntityDecl(String name, String publicId, String systemId)
{
if (mDeclHandler != null) {
try {
mDeclHandler.externalEntityDecl(name, publicId, systemId);
} catch (SAXException sex) {
throw new WrappedSaxException(sex);
}
}
}
public void dtdInternalEntityDecl(String name, String value)
{
if (mDeclHandler != null) {
try {
mDeclHandler.internalEntityDecl(name, value);
} catch (SAXException sex) {
throw new WrappedSaxException(sex);
}
}
}
/*
///////////////////////////////////////////////////////////////////////
// Internal methods
///////////////////////////////////////////////////////////////////////
*/
private void throwSaxException(Exception src)
throws SAXException
{
SAXParseException se = new SAXParseException(src.getMessage(), /*(Locator)*/ this, src);
ExceptionUtil.setInitCause(se, src);
if (mErrorHandler != null) {
mErrorHandler.fatalError(se);
}
throw se;
}
private void throwSaxException(String msg)
throws SAXException
{
SAXParseException se = new SAXParseException(msg, /*(Locator)*/ this);
if (mErrorHandler != null) {
mErrorHandler.fatalError(se);
}
throw se;
}
private void throwNoSuchAttribute(int index)
{
throw new IllegalArgumentException("No attribute with index "+index+" (have "+(mAttrCount+mNsCount)+" attributes)");
}
/*
/////////////////////////////////////////////////
// Helper class for dealing with entity resolution
/////////////////////////////////////////////////
*/
/**
* Simple helper class that converts from Stax API into SAX
* EntityResolver call(s)
*/
final class ResolverProxy
implements XMLResolver
{
public ResolverProxy() { }
public Object resolveEntity(String publicID, String systemID, String baseURI, String namespace)
throws XMLStreamException
{
if (mEntityResolver != null) {
try {
/* Hmmh. SAX expects system id to have been mangled prior
* to call... this may work, depending on stax impl:
*/
URL url = new URL(baseURI);
String ref = new URL(url, systemID).toExternalForm();
InputSource isrc = mEntityResolver.resolveEntity(publicID, ref);
if (isrc != null) {
//System.err.println("Debug: succesfully resolved '"+publicID+"', '"+systemID+"'");
InputStream in = isrc.getByteStream();
if (in != null) {
return in;
}
Reader r = isrc.getCharacterStream();
if (r != null) {
return r;
}
}
// Returning null should be fine, actually...
return null;
} catch (IOException ex) {
throw new WstxIOException(ex);
} catch (Exception ex) {
throw new XMLStreamException(ex.getMessage(), ex);
}
}
return null;
}
}
/*
///////////////////////////////////////////////////////////////////////
// Helper classes for SAX1 support
///////////////////////////////////////////////////////////////////////
*/
final static class DocHandlerWrapper
implements ContentHandler
{
final DocumentHandler mDocHandler;
final AttributesWrapper mAttrWrapper = new AttributesWrapper();
DocHandlerWrapper(DocumentHandler h)
{
mDocHandler = h;
}
public void characters(char[] ch, int start, int length)
throws SAXException
{
mDocHandler.characters(ch, start, length);
}
public void endDocument() throws SAXException
{
mDocHandler.endDocument();
}
public void endElement(String uri, String localName, String qName)
throws SAXException
{
if (qName == null) {
qName = localName;
}
mDocHandler.endElement(qName);
}
public void endPrefixMapping(String prefix)
{
// no equivalent in SAX1, ignore
}
public void ignorableWhitespace(char[] ch, int start, int length)
throws SAXException
{
mDocHandler.ignorableWhitespace(ch, start, length);
}
public void processingInstruction(String target, String data)
throws SAXException
{
mDocHandler.processingInstruction(target, data);
}
public void setDocumentLocator(Locator locator)
{
mDocHandler.setDocumentLocator(locator);
}
public void skippedEntity(String name)
{
// no equivalent in SAX1, ignore
}
public void startDocument()
throws SAXException
{
mDocHandler.startDocument();
}
public void startElement(String uri, String localName, String qName,
Attributes attrs)
throws SAXException
{
if (qName == null) {
qName = localName;
}
// Also, need to wrap Attributes to look like AttributeLost
mAttrWrapper.setAttributes(attrs);
mDocHandler.startElement(qName, mAttrWrapper);
}
public void startPrefixMapping(String prefix, String uri)
{
// no equivalent in SAX1, ignore
}
}
final static class AttributesWrapper
implements AttributeList
{
Attributes mAttrs;
public AttributesWrapper() { }
public void setAttributes(Attributes a) {
mAttrs = a;
}
public int getLength()
{
return mAttrs.getLength();
}
public String getName(int i)
{
String n = mAttrs.getQName(i);
return (n == null) ? mAttrs.getLocalName(i) : n;
}
public String getType(int i)
{
return mAttrs.getType(i);
}
public String getType(String name)
{
return mAttrs.getType(name);
}
public String getValue(int i)
{
return mAttrs.getValue(i);
}
public String getValue(String name)
{
return mAttrs.getValue(name);
}
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/sax/SAXFeature.java 0000644 0001750 0001750 00000005645 11745427074 023311 0 ustar giovanni giovanni /*
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in file LICENSE, included with
* the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.sax;
import java.util.HashMap;
/**
* Type safe (pre-Java5) enumeration for listing all currently (SAX 2.0.2)
* defined standard features
*/
public final class SAXFeature
{
/**
* Since all standard features have same URI prefix, let's separate
* that prefix, from unique remainder of the feature URIs.
*/
public final static String STD_FEATURE_PREFIX = "http://xml.org/sax/features/";
final static HashMap sInstances = new HashMap();
// // // "Enum" values:
final static SAXFeature EXTERNAL_GENERAL_ENTITIES = new SAXFeature("external-general-entities");
final static SAXFeature EXTERNAL_PARAMETER_ENTITIES = new SAXFeature("external-parameter-entities");
final static SAXFeature IS_STANDALONE = new SAXFeature("is-standalone");
final static SAXFeature LEXICAL_HANDLER_PARAMETER_ENTITIES = new SAXFeature("lexical-handler/parameter-entities");
final static SAXFeature NAMESPACES = new SAXFeature("namespaces");
final static SAXFeature NAMESPACE_PREFIXES = new SAXFeature("namespace-prefixes");
final static SAXFeature RESOLVE_DTD_URIS = new SAXFeature("resolve-dtd-uris");
final static SAXFeature STRING_INTERNING = new SAXFeature("string-interning");
final static SAXFeature UNICODE_NORMALIZATION_CHECKING = new SAXFeature("unicode-normalization-checking");
final static SAXFeature USE_ATTRIBUTES2 = new SAXFeature("use-attributes2");
final static SAXFeature USE_LOCATOR2 = new SAXFeature("use-locator2");
final static SAXFeature USE_ENTITY_RESOLVER2 = new SAXFeature("use-entity-resolver2");
final static SAXFeature VALIDATION = new SAXFeature("validation");
final static SAXFeature XMLNS_URIS = new SAXFeature("xmlns-uris");
final static SAXFeature XML_1_1 = new SAXFeature("xml-1.1");
private final String mSuffix;
private SAXFeature(String suffix)
{
mSuffix = suffix;
sInstances.put(suffix, this);
}
public static SAXFeature findByUri(String uri)
{
if (uri.startsWith(STD_FEATURE_PREFIX)) {
return findBySuffix(uri.substring(STD_FEATURE_PREFIX.length()));
}
return null;
}
public static SAXFeature findBySuffix(String suffix)
{
return (SAXFeature) sInstances.get(suffix);
}
public String getSuffix() { return mSuffix; }
public String toString() { return STD_FEATURE_PREFIX + mSuffix; }
}
woodstox-4.1.3/src/java/com/ctc/wstx/util/ 0000755 0001750 0001750 00000000000 11756143457 020651 5 ustar giovanni giovanni woodstox-4.1.3/src/java/com/ctc/wstx/util/TextBuilder.java 0000644 0001750 0001750 00000006435 11745427075 023756 0 ustar giovanni giovanni package com.ctc.wstx.util;
/**
* Class similar to {@link StringBuffer}, except that it can be used to
* construct multiple Strings, that will share same underlying character
* buffer. This is generally useful for closely related value Strings,
* such as attribute values of a single XML start element.
*/
public final class TextBuilder
{
private final static int MIN_LEN = 60;
private final static int MAX_LEN = 120;
private char[] mBuffer;
private int mBufferLen;
private String mResultString;
/*
///////////////////////////////////////////////
// Life-cycle:
///////////////////////////////////////////////
*/
public TextBuilder(int initialSize)
{
int charSize = (initialSize << 4); // multiply by 16 (-> def. 192 chars)
if (charSize < MIN_LEN) {
charSize = MIN_LEN;
} else if (charSize > MAX_LEN) {
charSize = MAX_LEN;
}
mBuffer = new char[charSize];
}
/**
* Method called before starting to (re)use the buffer, will discard
* any existing content, and start collecting new set of values.
*/
public void reset() {
mBufferLen = 0;
mResultString = null;
}
/*
///////////////////////////////////////////////
// Accesors:
///////////////////////////////////////////////
*/
public boolean isEmpty() {
return mBufferLen == 0;
}
public String getAllValues()
{
if (mResultString == null) {
mResultString = new String(mBuffer, 0, mBufferLen);
}
return mResultString;
}
/**
* Method that gives access to underlying character buffer
*/
public char[] getCharBuffer() {
return mBuffer;
}
public int getCharSize() {
return mBufferLen;
}
/*
///////////////////////////////////////////////
// Mutators:
///////////////////////////////////////////////
*/
public void append(char c) {
if (mBuffer.length == mBufferLen) {
resize(1);
}
mBuffer[mBufferLen++] = c;
}
public void append(char[] src, int start, int len) {
if (len > (mBuffer.length - mBufferLen)) {
resize(len);
}
System.arraycopy(src, start, mBuffer, mBufferLen, len);
mBufferLen += len;
}
public void setBufferSize(int newSize) {
mBufferLen = newSize;
}
public char[] bufferFull(int needSpaceFor) {
mBufferLen = mBuffer.length;
resize(1);
return mBuffer;
}
/*
///////////////////////////////////////////////
// Debugging:
///////////////////////////////////////////////
*/
public String toString() {
return new String(mBuffer, 0, mBufferLen);
}
/*
///////////////////////////////////////////////
// Internal methods:
///////////////////////////////////////////////
*/
private void resize(int needSpaceFor) {
char[] old = mBuffer;
int oldLen = old.length;
int addition = oldLen >> 1; // Grow by 50%
needSpaceFor -= (oldLen - mBufferLen);
if (addition < needSpaceFor) {
addition = needSpaceFor;
}
mBuffer = new char[oldLen+addition];
System.arraycopy(old, 0, mBuffer, 0, mBufferLen);
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/util/BijectiveNsMap.java 0000644 0001750 0001750 00000024265 11745427074 024366 0 ustar giovanni giovanni /* Woodstox XML processor
*
* Copyright (c) 2005 Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in file LICENSE, included with
* the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.util;
import java.util.*;
import javax.xml.XMLConstants;
import javax.xml.namespace.NamespaceContext;
import com.ctc.wstx.util.DataUtil;
/**
* Helper class that implements "bijective map" (Map that allows use of values
* as keys and vice versa, bidirectional access), and is specifically
* used for storing namespace binding information.
* One thing worth noting is that Strings stored are NOT assumed to have
* been unified (interned) -- if they were, different implementation would
* be more optimal.
*
* Currently only used by stream writers, but could be more generally useful
* too.
*/
public final class BijectiveNsMap
{
/*
///////////////////////////////////////////////
// Constants
///////////////////////////////////////////////
*/
/**
* Let's plan for having up to 14 explicit namespace declarations (2
* defaults, for 'xml' and 'xmlns', are pre-populated)
*/
final static int DEFAULT_ARRAY_SIZE = 2 * 16;
/*
///////////////////////////////////////////////
// Member vars
///////////////////////////////////////////////
*/
final int mScopeStart;
/**
* Array that contains { prefix, ns-uri } pairs, up to (but not including)
* index {@link #mScopeEnd}.
*/
String[] mNsStrings;
int mScopeEnd;
/*
///////////////////////////////////////////////
// Life-cycle
///////////////////////////////////////////////
*/
private BijectiveNsMap(int scopeStart, String[] strs)
{
mScopeStart = mScopeEnd = scopeStart;
mNsStrings = strs;
}
public static BijectiveNsMap createEmpty()
{
String[] strs = new String[DEFAULT_ARRAY_SIZE];
strs[0] = XMLConstants.XML_NS_PREFIX;
strs[1] = XMLConstants.XML_NS_URI;
strs[2] = XMLConstants.XMLNS_ATTRIBUTE;
strs[3] = XMLConstants.XMLNS_ATTRIBUTE_NS_URI;
/* Let's consider pre-defined ones to be 'out of scope', i.e.
* conceptually be part of (missing) parent's mappings.
*/
return new BijectiveNsMap(4, strs);
}
public BijectiveNsMap createChild() {
return new BijectiveNsMap(mScopeEnd, mNsStrings);
}
/*
///////////////////////////////////////////////
// Public API, accessors
///////////////////////////////////////////////
*/
public String findUriByPrefix(String prefix)
{
/* This is quite simple: just need to locate the last mapping
* for the prefix, if any:
*/
String[] strs = mNsStrings;
int phash = prefix.hashCode();
for (int ix = mScopeEnd - 2; ix >= 0; ix -= 2) {
String thisP = strs[ix];
if (thisP == prefix ||
(thisP.hashCode() == phash && thisP.equals(prefix))) {
return strs[ix+1];
}
}
return null;
}
public String findPrefixByUri(String uri)
{
/* Finding a valid binding for the given URI is trickier, since
* mappings can be masked by others... so, we need to first find
* most recent binding, from the freshest one, and then verify
* it's still unmasked; if not, continue with the first loop,
* and so on.
*/
String[] strs = mNsStrings;
int uhash = uri.hashCode();
main_loop:
for (int ix = mScopeEnd - 1; ix > 0; ix -= 2) {
String thisU = strs[ix];
if (thisU == uri ||
(thisU.hashCode() == uhash && thisU.equals(uri))) {
// match, but has it been masked?
String prefix = strs[ix-1];
/* only need to check, if it wasn't within current scope
* (no masking allowed within scopes)
*/
if (ix < mScopeStart) {
int phash = prefix.hashCode();
for (int j = ix+1, end = mScopeEnd; j < end; j += 2) {
String thisP = strs[j];
if (thisP == prefix ||
(thisP.hashCode() == phash && thisP.equals(prefix))) {
// Masking... got to continue the main loop:
continue main_loop;
}
}
}
// Ok, unmasked one, can return
return prefix;
}
}
return null;
}
public List getPrefixesBoundToUri(String uri, List l)
{
/* Same problems (masking) apply here, as well as with
* findPrefixByUri...
*/
String[] strs = mNsStrings;
int uhash = uri.hashCode();
main_loop:
for (int ix = mScopeEnd - 1; ix > 0; ix -= 2) {
String thisU = strs[ix];
if (thisU == uri ||
(thisU.hashCode() == uhash && thisU.equals(uri))) {
// match, but has it been masked?
String prefix = strs[ix-1];
/* only need to check, if it wasn't within current scope
* (no masking allowed within scopes)
*/
if (ix < mScopeStart) {
int phash = prefix.hashCode();
for (int j = ix+1, end = mScopeEnd; j < end; j += 2) {
String thisP = strs[j];
if (thisP == prefix ||
(thisP.hashCode() == phash && thisP.equals(prefix))) {
// Masking... got to continue the main loop:
continue main_loop;
}
}
}
// Ok, unmasked one, can add
if (l == null) {
l = new ArrayList();
}
l.add(prefix);
}
}
return l;
}
public int size() {
return (mScopeEnd >> 1);
}
public int localSize() {
return ((mScopeEnd - mScopeStart) >> 1);
}
/*
///////////////////////////////////////////////
// Public API, mutators
///////////////////////////////////////////////
*/
/**
* Method to add a new prefix-to-URI mapping for the current scope.
* Note that it should NOT be used for the default namespace
* declaration
*
* @param prefix Prefix to bind
* @param uri URI to bind to the prefix
*
* @return If the prefix was already bound, the URI it was bound to:
* null if it's a new binding for the current scope.
*/
public String addMapping(String prefix, String uri)
{
String[] strs = mNsStrings;
int phash = prefix.hashCode();
for (int ix = mScopeStart, end = mScopeEnd; ix < end; ix += 2) {
String thisP = strs[ix];
if (thisP == prefix ||
(thisP.hashCode() == phash && thisP.equals(prefix))) {
// Overriding an existing mapping
String old = strs[ix+1];
strs[ix+1] = uri;
return old;
}
}
// no previous binding, let's just add it at the end
if (mScopeEnd >= strs.length) {
// let's just double the array sizes...
strs = DataUtil.growArrayBy(strs, strs.length);
mNsStrings = strs;
}
strs[mScopeEnd++] = prefix;
strs[mScopeEnd++] = uri;
return null;
}
/**
* Method used to add a dynamic binding, and return the prefix
* used to bind the specified namespace URI.
*/
public String addGeneratedMapping(String prefixBase, NamespaceContext ctxt,
String uri, int[] seqArr)
{
String[] strs = mNsStrings;
int seqNr = seqArr[0];
String prefix;
main_loop:
while (true) {
/* We better intern the resulting prefix? Or not?
* TODO: maybe soft cache these for other docs?
*/
prefix = (prefixBase + seqNr).intern();
++seqNr;
/* Ok, let's see if we have a mapping (masked or not) for
* the prefix. If we do, let's just not use it: we could
* of course mask it (unless it's in current scope), but
* it's easier to just get a "virgin" prefix...
*/
int phash = prefix.hashCode();
for (int ix = mScopeEnd - 2; ix >= 0; ix -= 2) {
String thisP = strs[ix];
if (thisP == prefix ||
(thisP.hashCode() == phash && thisP.equals(prefix))) {
continue main_loop;
}
}
/* So far so good... but do we have a root context that might
* have something too?
*/
if (ctxt != null && ctxt.getNamespaceURI(prefix) != null) {
continue;
}
break;
}
seqArr[0] = seqNr;
// Ok, good; then let's just add it in...
if (mScopeEnd >= strs.length) {
// let's just double the array sizes...
strs = DataUtil.growArrayBy(strs, strs.length);
mNsStrings = strs;
}
strs[mScopeEnd++] = prefix;
strs[mScopeEnd++] = uri;
return prefix;
}
/*
///////////////////////////////////////////////
// Standard overridden methods
///////////////////////////////////////////////
*/
public String toString() {
return "["+getClass().toString()+"; "+size()+" entries; of which "
+localSize()+" local]";
}
/*
///////////////////////////////////////////////
// Internal methods
///////////////////////////////////////////////
*/
}
woodstox-4.1.3/src/java/com/ctc/wstx/util/TextAccumulator.java 0000644 0001750 0001750 00000003770 11745427075 024646 0 ustar giovanni giovanni package com.ctc.wstx.util;
/**
* Simple utility class used to efficiently accumulate and concatenate
* text passed in various forms
*/
public final class TextAccumulator
{
private String mText = null;
/* !!! JDK 1.5: when we can upgrade to Java 5, can convert
* to using StringBuilder
instead.
*/
private StringBuffer mBuilder = null;
public TextAccumulator() { }
public boolean hasText() {
return (mBuilder != null) || (mText != null);
}
public void addText(String text)
{
int len = text.length();
if (len > 0) {
// Any prior text?
if (mText != null) {
mBuilder = new StringBuffer(mText.length() + len);
mBuilder.append(mText);
mText = null;
}
if (mBuilder != null) {
mBuilder.append(text);
} else {
mText = text;
}
}
}
public void addText(char[] buf, int start, int end)
{
int len = end-start;
if (len > 0) {
// Any prior text?
if (mText != null) {
mBuilder = new StringBuffer(mText.length() + len);
mBuilder.append(mText);
mText = null;
} else if (mBuilder == null) {
/* more efficient to use a builder than a string; and although
* could use a char array, StringBuilder has the benefit of
* being able to share the array, eventually.
*/
mBuilder = new StringBuffer(len);
}
mBuilder.append(buf, start, end-start);
}
}
public String getAndClear()
{
String result;
if (mText != null) {
result = mText;
mText = null;
} else if (mBuilder != null) {
result = mBuilder.toString();
mBuilder = null;
} else {
result = "";
}
return result;
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/util/URLUtil.java 0000644 0001750 0001750 00000016043 11745427075 023017 0 ustar giovanni giovanni package com.ctc.wstx.util;
import java.io.*;
import java.net.URI;
import java.net.URL;
import java.net.URLDecoder;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
public final class URLUtil
{
private URLUtil() { }
/**
* Method that tries to figure out how to create valid URL from a system
* id, without additional contextual information.
* If we could use URIs this might be easier to do, but they are part
* of JDK 1.4, and preferably code should only require 1.2 (or maybe 1.3)
*/
public static URL urlFromSystemId(String sysId)
throws IOException
{
try {
/* Ok, does it look like a full URL? For one, you need a colon. Also,
* to reduce likelihood of collision with Windows paths, let's only
* accept it if there are 3 preceding other chars...
* Not sure if Mac might be a problem? (it uses ':' as file path
* separator, alas, at least prior to MacOS X)
*/
int ix = sysId.indexOf(':', 0);
/* Also, protocols are generally fairly short, usually 3 or 4
* chars (http, ftp, urn); so let's put upper limit of 8 chars too
*/
if (ix >= 3 && ix <= 8) {
return new URL(sysId);
}
// Ok, let's just assume it's local file reference...
/* 24-May-2006, TSa: Amazingly, this single call does show in
* profiling, for small docs. The problem is that deep down it
* tries to check physical file system, to check if the File
* pointed to is a directory: and that is (relatively speaking)
* a very expensive call. Since in this particular case it
* should never be a dir (and/or doesn't matter), let's just
* implement conversion locally
*/
String absPath = new java.io.File(sysId).getAbsolutePath();
// Need to convert colons/backslashes to regular slashes?
{
char sep = File.separatorChar;
if (sep != '/') {
absPath = absPath.replace(sep, '/');
}
}
if (absPath.length() > 0 && absPath.charAt(0) != '/') {
absPath = "/" + absPath;
}
return new URL("file", "", absPath);
} catch (MalformedURLException e) {
throwIOException(e, sysId);
return null; // never gets here
}
}
/**
* @since 4.1
*/
public static URI uriFromSystemId(String sysId) throws IOException
{
// note: mostly a copy of matching method above, but with URI instead of URL
try {
int ix = sysId.indexOf(':', 0);
if (ix >= 3 && ix <= 8) {
return new URI(sysId);
}
String absPath = new java.io.File(sysId).getAbsolutePath();
char sep = File.separatorChar;
if (sep != '/') {
absPath = absPath.replace(sep, '/');
}
if (absPath.length() > 0 && absPath.charAt(0) != '/') {
absPath = "/" + absPath;
}
return new URI("file", absPath, null);
} catch (URISyntaxException e) {
throwIOException(e, sysId);
return null; // never gets here
}
}
public static URL urlFromSystemId(String sysId, URL ctxt) throws IOException
{
if (ctxt == null) {
return urlFromSystemId(sysId);
}
try {
return new URL(ctxt, sysId);
} catch (MalformedURLException e) {
throwIOException(e, sysId);
return null; // never gets here
}
}
/**
* Method that tries to create and return URL that denotes current
* working directory. Usually used to create a context, when one is
* not explicitly passed.
*/
public static URL urlFromCurrentDir()
throws java.net.MalformedURLException /* an IOException */
{
/* This seems to work; independent of whether there happens to
* be such/file dir or not.
*/
return new File("a").getAbsoluteFile().getParentFile().toURL();
}
/**
* Method that tries to get a stream (ideally, optimal one) to read from
* the specified URL.
* Currently it just means creating a simple file input stream if the
* URL points to a (local) file, and otherwise relying on URL classes
* input stream creation method.
*/
public static InputStream inputStreamFromURL(URL url)
throws IOException
{
if ("file".equals(url.getProtocol())) {
/* As per [WSTX-82], can not do this if the path refers
* to a network drive on windows. This fixes the problem;
* might not be needed on all platforms (NFS?), but should not
* matter a lot: performance penalty of extra wrapping is more
* relevant when accessing local file system.
*/
String host = url.getHost();
if (host == null || host.length() == 0) {
/* One more test: if there are quoted characters, need
* to decoded [WSTX-207]:
*/
String path = url.getPath();
if (path.indexOf('%') >= 0) {
path = URLDecoder.decode(path, "UTF-8");
}
return new FileInputStream(path);
}
}
return url.openStream();
}
/**
* Method that tries to get a stream (ideally, optimal one) to write to
* the resource specified by given URL.
* Currently it just means creating a simple file output stream if the
* URL points to a (local) file, and otherwise relying on URL classes
* input stream creation method.
*/
public static OutputStream outputStreamFromURL(URL url)
throws IOException
{
if ("file".equals(url.getProtocol())) {
/* As per [WSTX-82], can not do this if the path refers
* to a network drive on windows.
*/
String host = url.getHost();
if (host == null || host.length() == 0) {
return new FileOutputStream(url.getPath());
}
}
return url.openConnection().getOutputStream();
}
/*
///////////////////////////////////////////////////////////////////////
// Private helper methods
///////////////////////////////////////////////////////////////////////
*/
/**
* Helper method that tries to fully convert strange URL-specific exception
* to more general IO exception. Also, to try to use JDK 1.4 feature without
* creating requirement, uses reflection to try to set the root cause, if
* we are running on JDK1.4
*/
private static void throwIOException(Exception mex, String sysId)
throws IOException
{
IOException ie = new IOException("[resolving systemId '"+sysId+"']: "+mex.toString());
ExceptionUtil.setInitCause(ie, mex);
throw ie;
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/util/DataUtil.java 0000644 0001750 0001750 00000007504 11745427074 023227 0 ustar giovanni giovanni package com.ctc.wstx.util;
import java.lang.reflect.Array;
import java.util.*;
public final class DataUtil
{
final static char[] EMPTY_CHAR_ARRAY = new char[0];
/**
* If baseline requirement was JDK 1.5, we wouldn't need to
* cache Integer instances like this (since it has
* Integer.valueOf() which does it); but until then, we
* alas need our known canonicalization.
*/
final static Integer[] INTS = new Integer[100];
static {
for (int i = 0; i < INTS.length; ++i) {
INTS[i] = new Integer(i);
}
}
private DataUtil() { }
/*
////////////////////////////////////////////////////////////
// Pooling for immutable objects
////////////////////////////////////////////////////////////
*/
public static char[] getEmptyCharArray() {
return EMPTY_CHAR_ARRAY;
}
public static Integer Integer(int i)
{
/* !!! 13-Sep-2008, TSa: JDK 1.5 can use Integer.valueOf(int)
* which does the same. When upgrading baseline, can get rid
* of this method.
*/
if (i < 0 || i >= INTS.length) {
return new Integer(i);
}
return INTS[i];
}
/*
////////////////////////////////////////////////////////////
// Methods for common operations on std data structs
////////////////////////////////////////////////////////////
*/
/**
* Method that can be used to efficiently check if 2 collections
* share at least one common element.
*
* @return True if there is at least one element that's common
* to both Collections, ie. that is contained in both of them.
*/
public static boolean anyValuesInCommon(Collection c1, Collection c2)
{
// Let's always iterate over smaller collection:
if (c1.size() > c2.size()) {
Collection tmp = c1;
c1 = c2;
c2 = tmp;
}
Iterator it = c1.iterator();
while (it.hasNext()) {
if (c2.contains(it.next())) {
return true;
}
}
return false;
}
final static String NO_TYPE = "Illegal to pass null; can not determine component type";
public static Object growArrayBy50Pct(Object arr)
{
if (arr == null) {
throw new IllegalArgumentException(NO_TYPE);
}
Object old = arr;
int len = Array.getLength(arr);
arr = Array.newInstance(arr.getClass().getComponentType(), len + (len >> 1));
System.arraycopy(old, 0, arr, 0, len);
return arr;
}
/**
* Method similar to {@link #growArrayBy50Pct}, but it also ensures that
* the new size is at least as big as the specified minimum size.
*/
public static Object growArrayToAtLeast(Object arr, int minLen)
{
if (arr == null) {
throw new IllegalArgumentException(NO_TYPE);
}
Object old = arr;
int oldLen = Array.getLength(arr);
int newLen = oldLen + ((oldLen + 1) >> 1);
if (newLen < minLen) {
newLen = minLen;
}
arr = Array.newInstance(arr.getClass().getComponentType(), newLen);
System.arraycopy(old, 0, arr, 0, oldLen);
return arr;
}
public static String[] growArrayBy(String[] arr, int more)
{
if (arr == null) {
return new String[more];
}
String[] old = arr;
int len = arr.length;
arr = new String[len + more];
System.arraycopy(old, 0, arr, 0, len);
return arr;
}
public static int[] growArrayBy(int[] arr, int more)
{
if (arr == null) {
return new int[more];
}
int[] old = arr;
int len = arr.length;
arr = new int[len + more];
System.arraycopy(old, 0, arr, 0, len);
return arr;
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/util/EmptyNamespaceContext.java 0000644 0001750 0001750 00000003467 11745427074 026004 0 ustar giovanni giovanni package com.ctc.wstx.util;
import java.io.Writer;
import java.util.Iterator;
import javax.xml.namespace.NamespaceContext;
import javax.xml.stream.XMLStreamWriter;
import org.codehaus.stax2.ri.EmptyIterator;
/**
* Dummy {@link NamespaceContext} (and {@link BaseNsContext})
* implementation that is usually used in
* non-namespace-aware mode.
*
* Note: differs from Stax2 reference implementation's version * slightly, since it needs to support Woodstox specific extensions * for efficient namespace declaration serialization. */ public final class EmptyNamespaceContext extends BaseNsContext { final static EmptyNamespaceContext sInstance = new EmptyNamespaceContext(); private EmptyNamespaceContext() { } public static EmptyNamespaceContext getInstance() { return sInstance; } /* ///////////////////////////////////////////// // Extended API ///////////////////////////////////////////// */ public Iterator getNamespaces() { return EmptyIterator.getInstance(); } /** * Method called by the matching start element class to * output all namespace declarations active in current namespace * scope, if any. */ public void outputNamespaceDeclarations(Writer w) { ; // nothing to output } public void outputNamespaceDeclarations(XMLStreamWriter w) { ; // nothing to output } /* ///////////////////////////////////////////////// // Template methods sub-classes need to implement ///////////////////////////////////////////////// */ public String doGetNamespaceURI(String prefix) { return null; } public String doGetPrefix(String nsURI) { return null; } public Iterator doGetPrefixes(String nsURI) { return EmptyIterator.getInstance(); } } woodstox-4.1.3/src/java/com/ctc/wstx/util/StringVector.java 0000644 0001750 0001750 00000015604 11745427075 024152 0 ustar giovanni giovanni package com.ctc.wstx.util; /** * Data container similar {@link java.util.List} (from storage perspective), * but that can be used in multiple ways. For some uses it acts more like * type-safe String list/vector; for others as order associative list of * String-to-String mappings. */ public final class StringVector { private String[] mStrings; private int mSize; /* /////////////////////////////////////////////////////// // Life-cycle: /////////////////////////////////////////////////////// */ public StringVector(int initialCount) { mStrings = new String[initialCount]; } /* /////////////////////////////////////////////////////// // Basic accessors /////////////////////////////////////////////////////// */ public int size() { return mSize; } public boolean isEmpty() { return mSize == 0; } public String getString(int index) { if (index < 0 || index >= mSize) { throw new IllegalArgumentException("Index "+index+" out of valid range; current size: "+mSize+"."); } return mStrings[index]; } public String getLastString() { if (mSize < 1) { throw new IllegalStateException("getLastString() called on empty StringVector."); } return mStrings[mSize-1]; } public String[] getInternalArray() { return mStrings; } public String[] asArray() { String[] strs = new String[mSize]; System.arraycopy(mStrings, 0, strs, 0, mSize); return strs; } public boolean containsInterned(String value) { String[] str = mStrings; for (int i = 0, len = mSize; i < len; ++i) { if (str[i] == value) { return true; } } return false; } /* /////////////////////////////////////////////////////// // Mutators: /////////////////////////////////////////////////////// */ public void addString(String str) { if (mSize == mStrings.length) { String[] old = mStrings; int oldSize = old.length; mStrings = new String[oldSize + (oldSize << 1)]; System.arraycopy(old, 0, mStrings, 0, oldSize); } mStrings[mSize++] = str; } public void addStrings(String str1, String str2) { if ((mSize + 2) > mStrings.length) { String[] old = mStrings; int oldSize = old.length; mStrings = new String[oldSize + (oldSize << 1)]; System.arraycopy(old, 0, mStrings, 0, oldSize); } mStrings[mSize] = str1; mStrings[mSize+1] = str2; mSize += 2; } public void setString(int index, String str) { mStrings[index] = str; } public void clear(boolean removeRefs) { if (removeRefs) { for (int i = 0, len = mSize; i < len; ++i) { mStrings[i] = null; } } mSize = 0; } public String removeLast() { String result = mStrings[--mSize]; mStrings[mSize] = null; return result; } public void removeLast(int count) { while (--count >= 0) { mStrings[--mSize] = null; } } /* /////////////////////////////////////////////////////// // Specialized "map accessors": /////////////////////////////////////////////////////// */ /** * Specialized access method; treats vector as a Map, with 2 Strings * per entry; first one being key, second value. Further, keys are * assumed to be canonicalized with passed in key (ie. either intern()ed, * or resolved from symbol table). * Starting from the * end (assuming even number of entries), tries to find an entry with * matching key, and if so, returns value. */ public String findLastFromMap(String key) { int index = mSize; while ((index -= 2) >= 0) { if (mStrings[index] == key) { return mStrings[index+1]; } } return null; } public String findLastNonInterned(String key) { int index = mSize; while ((index -= 2) >= 0) { String curr = mStrings[index]; if (curr == key || (curr != null && curr.equals(key))) { return mStrings[index+1]; } } return null; } public int findLastIndexNonInterned(String key) { int index = mSize; while ((index -= 2) >= 0) { String curr = mStrings[index]; if (curr == key || (curr != null && curr.equals(key))) { return index; } } return -1; } public String findLastByValueNonInterned(String value) { for (int index = mSize-1; index > 0; index -= 2) { String currVal = mStrings[index]; if (currVal == value || (currVal != null && currVal.equals(value))) { return mStrings[index-1]; } } return null; } public int findLastIndexByValueNonInterned(String value) { for (int index = mSize-1; index > 0; index -= 2) { String currVal = mStrings[index]; if (currVal == value || (currVal != null && currVal.equals(value))) { return index-1; } } return -1; } /* // Not needed any more public Iterator findAllByValueNonInterned(String value) { String first = null; ArrayList all = null; for (int index = mSize-1; index > 0; index -= 2) { String currVal = mStrings[index]; if (currVal == value || (currVal != null && currVal.equals(value))) { if (first == null) { first = mStrings[index-1]; } else { if (all == null) { all = new ArrayList(); all.add(first); } all.add(mStrings[index-1]); } } } if (all != null) { return all.iterator(); } if (first != null) { return new SingletonIterator(first); } return EmptyIterator.getInstance(); } */ /* /////////////////////////////////////////////////////// // Other methods /////////////////////////////////////////////////////// */ public String toString() { StringBuffer sb = new StringBuffer(mSize * 16); sb.append("[(size = "); sb.append(mSize); sb.append(" ) "); for (int i = 0; i < mSize; ++i) { if (i > 0) { sb.append(", "); } sb.append('"'); sb.append(mStrings[i]); sb.append('"'); sb.append(" == "); sb.append(Integer.toHexString(System.identityHashCode(mStrings[i]))); } sb.append(']'); return sb.toString(); } } woodstox-4.1.3/src/java/com/ctc/wstx/util/StringUtil.java 0000644 0001750 0001750 00000024300 11745427075 023616 0 ustar giovanni giovanni package com.ctc.wstx.util; import java.util.Collection; import java.util.Iterator; public final class StringUtil { final static char CHAR_SPACE = ' '; // 0x0020 private final static char INT_SPACE = 0x0020; static String sLF = null; public static String getLF() { String lf = sLF; if (lf == null) { try { lf = (String) System.getProperty("line.separator"); sLF = (lf == null) ? "\n" : lf; } catch (Throwable t) { // Doh.... whatever; most likely SecurityException sLF = lf = "\n"; } } return lf; } public static void appendLF(StringBuffer sb) { sb.append(getLF()); } public static String concatEntries(Collection coll, String sep, String lastSep) { if (lastSep == null) { lastSep = sep; } int len = coll.size(); StringBuffer sb = new StringBuffer(16 + (len << 3)); Iterator it = coll.iterator(); int i = 0; while (it.hasNext()) { if (i == 0) { ; } else if (i == (len - 1)) { sb.append(lastSep); } else { sb.append(sep); } ++i; sb.append(it.next()); } return sb.toString(); } /** * Method that will check character array passed, and remove all * "extra" spaces (leading and trailing space), and normalize * other white space (more than one consequtive space character * replaced with a single space). *
* NOTE: we only remove explicit space characters (char code 0x0020); * the reason being that other white space must have come from * non-normalizable sources, ie. via entity expansion, and is thus * not to be normalized * * @param buf Buffer that contains the String to check * @param origStart Offset of the first character of the text to check * in the buffer * @param origEnd Offset of the character following the last character * of the text (as per usual Java API convention) * * @return Normalized String, if any white space was removed or * normalized; null if no changes were necessary. */ public static String normalizeSpaces(char[] buf, int origStart, int origEnd) { --origEnd; int start = origStart; int end = origEnd; // First let's trim start... while (start <= end && buf[start] == CHAR_SPACE) { ++start; } // Was it all empty? if (start > end) { return ""; } /* Nope, need to trim from the end then (note: it's known that char * at index 'start' is not a space, at this point) */ while (end > start && buf[end] == CHAR_SPACE) { --end; } /* Ok, may have changes or not: now need to normalize * intermediate duplicate spaces. We also now that the * first and last characters can not be spaces. */ int i = start+1; while (i < end) { if (buf[i] == CHAR_SPACE) { if (buf[i+1] == CHAR_SPACE) { break; } // Nah; no hole for these 2 chars! i += 2; } else { ++i; } } // Hit the end? if (i >= end) { // Any changes? if (start == origStart && end == origEnd) { return null; // none } return new String(buf, start, (end-start)+1); } /* Nope, got a hole, need to constuct the damn thing. Shouldn't * happen too often... so let's just use StringBuffer() */ StringBuffer sb = new StringBuffer(end-start); // can't be longer sb.append(buf, start, i-start); // won't add the starting space while (i <= end) { char c = buf[i++]; if (c == CHAR_SPACE) { sb.append(CHAR_SPACE); // Need to skip dups while (true) { c = buf[i++]; if (c != CHAR_SPACE) { sb.append(c); break; } } } else { sb.append(c); } } return sb.toString(); } public static boolean isAllWhitespace(String str) { for (int i = 0, len = str.length(); i < len; ++i) { if (str.charAt(i) > CHAR_SPACE) { return false; } } return true; } public static boolean isAllWhitespace(char[] ch, int start, int len) { len += start; for (; start < len; ++start) { if (ch[start] > CHAR_SPACE) { return false; } } return true; } /** * Internal constant used to denote END-OF-STRING */ private final static int EOS = 0x10000; /** * Method that implements a loose String compairon for encoding * Strings. It will work like {@link String#equalsIgnoreCase}, * except that it will also ignore all hyphen, underscore and * space characters. */ public static boolean equalEncodings(String str1, String str2) { final int len1 = str1.length(); final int len2 = str2.length(); // Need to loop completely over both Strings for (int i1 = 0, i2 = 0; i1 < len1 || i2 < len2; ) { int c1 = (i1 >= len1) ? EOS : str1.charAt(i1++); int c2 = (i2 >= len2) ? EOS : str2.charAt(i2++); // Can first do a quick comparison (usually they are equal) if (c1 == c2) { continue; } // if not equal, maybe there are WS/hyphen/underscores to skip while (c1 <= INT_SPACE || c1 == '_' || c1 == '-') { c1 = (i1 >= len1) ? EOS : str1.charAt(i1++); } while (c2 <= INT_SPACE || c2 == '_' || c2 == '-') { c2 = (i2 >= len2) ? EOS : str2.charAt(i2++); } // Ok, how about case differences, then? if (c1 != c2) { // If one is EOF, can't match (one is substring of the other) if (c1 == EOS || c2 == EOS) { return false; } if (c1 < 127) { // ascii is easy... if (c1 <= 'Z' && c1 >= 'A') { c1 = c1 + ('a' - 'A'); } } else { c1 = Character.toLowerCase((char)c1); } if (c2 < 127) { // ascii is easy... if (c2 <= 'Z' && c2 >= 'A') { c2 = c2 + ('a' - 'A'); } } else { c2 = Character.toLowerCase((char)c2); } if (c1 != c2) { return false; } } } // If we got this far, we are ok as long as we got through it all return true; } public static boolean encodingStartsWith(String enc, String prefix) { int len1 = enc.length(); int len2 = prefix.length(); int i1 = 0, i2 = 0; // Need to loop completely over both Strings while (i1 < len1 || i2 < len2) { int c1 = (i1 >= len1) ? EOS : enc.charAt(i1++); int c2 = (i2 >= len2) ? EOS : prefix.charAt(i2++); // Can first do a quick comparison (usually they are equal) if (c1 == c2) { continue; } // if not equal, maybe there are WS/hyphen/underscores to skip while (c1 <= CHAR_SPACE || c1 == '_' || c1 == '-') { c1 = (i1 >= len1) ? EOS : enc.charAt(i1++); } while (c2 <= CHAR_SPACE || c2 == '_' || c2 == '-') { c2 = (i2 >= len2) ? EOS : prefix.charAt(i2++); } // Ok, how about case differences, then? if (c1 != c2) { if (c2 == EOS) { // Prefix done, good! return true; } if (c1 == EOS) { // Encoding done, not good return false; } if (Character.toLowerCase((char)c1) != Character.toLowerCase((char)c2)) { return false; } } } // Ok, prefix was exactly the same as encoding... that's fine return true; } /** * Method that will remove all non-alphanumeric characters, and optionally * upper-case included letters, from the given String. */ public static String trimEncoding(String str, boolean upperCase) { int i = 0; int len = str.length(); // Let's first check if String is fine as is: for (; i < len; ++i) { char c = str.charAt(i); if (c <= CHAR_SPACE || !Character.isLetterOrDigit(c)) { break; } } if (i == len) { return str; } // Nope: have to trim it StringBuffer sb = new StringBuffer(); if (i > 0) { sb.append(str.substring(0, i)); } for (; i < len; ++i) { char c = str.charAt(i); if (c > CHAR_SPACE && Character.isLetterOrDigit(c)) { if (upperCase) { c = Character.toUpperCase(c); } sb.append(c); } } return sb.toString(); } public static boolean matches(String str, char[] cbuf, int offset, int len) { if (str.length() != len) { return false; } for (int i = 0; i < len; ++i) { if (str.charAt(i) != cbuf[offset+i]) { return false; } } return true; } /** *
* Note that it is assumed that any "weird" white space * (xml 1.1 LSEP and NEL) have been replaced by canonical * alternatives (linefeed for element content, regular space * for attributes) */ public final static boolean isSpace(char c) { return ((int) c) <= 0x0020; } } woodstox-4.1.3/src/java/com/ctc/wstx/util/ElementId.java 0000644 0001750 0001750 00000010716 11745427074 023365 0 ustar giovanni giovanni package com.ctc.wstx.util; import javax.xml.stream.Location; import com.ctc.wstx.cfg.ErrorConsts; /** * Simple container Object used to store information about id attribute * values, and references to such (as of yet undefined) values. *
* Instances can be in one of 2 modes: either in fully defined mode, * in which case information refers to location where value was defined * (ie. we had id as a value of ID type attribute); or in undefined mode, * in which case information refers to the first reference. *
* Note: this class is designed to be used with {@link ElementIdMap}, * and as a result has some information specifically needed by the * map implementation (such as collision links). */ public final class ElementId { /** * Flag that indicates whether this Object presents a defined id * value (value of an ID attribute) or just a reference to one. */ private boolean mDefined; /* ///////////////////////////////////////////////// // Information about id value or value reference, // depending on mDefined flag ///////////////////////////////////////////////// */ /** * Actual id value */ private final String mIdValue; /** * Location of either definition (if {@link #mDefined} is true; or * first reference (otherwise). Used when reporting errors; either * a referenced id has not been defined, or there are multiple * definitions of same id. */ private Location mLocation; /** * Name of element for which this id refers. */ private PrefixedName mElemName; /** * Name of the attribute that contains this id value (often "id", * but need not be) */ private PrefixedName mAttrName; /* //////////////////////////////////////////////////// // Linking information, needed by the map to keep // track of collided ids, as well as undefined ids //////////////////////////////////////////////////// */ private ElementId mNextUndefined; /** * Pointer to the next element within collision chain. */ private ElementId mNextColl; /* ///////////////////////////////////////////////// // Life cycle ///////////////////////////////////////////////// */ ElementId(String id, Location loc, boolean defined, PrefixedName elemName, PrefixedName attrName) { mIdValue = id; mLocation = loc; mDefined = defined; mElemName = elemName; mAttrName = attrName; } protected void linkUndefined(ElementId undefined) { if (mNextUndefined != null) { throw new IllegalStateException("ElementId '"+this+"' already had net undefined set ('"+mNextUndefined+"')"); } mNextUndefined = undefined; } protected void setNextColliding(ElementId nextColl) { // May add/remove link, no point in checking mNextColl = nextColl; } /* ///////////////////////////////////////////////// // Public API ///////////////////////////////////////////////// */ public String getId() { return mIdValue; } public Location getLocation() { return mLocation; } public PrefixedName getElemName() { return mElemName; } public PrefixedName getAttrName() { return mAttrName; } public boolean isDefined() { return mDefined; } public boolean idMatches(char[] buf, int start, int len) { if (mIdValue.length() != len) { return false; } // Assumes it's always at least one char long if (buf[start] != mIdValue.charAt(0)) { return false; } int i = 1; len += start; while (++start < len) { if (buf[start] != mIdValue.charAt(i)) { return false; } ++i; } return true; } public boolean idMatches(String idStr) { return mIdValue.equals(idStr); } public ElementId nextUndefined() { return mNextUndefined; } public ElementId nextColliding() { return mNextColl; } public void markDefined(Location defLoc) { if (mDefined) { // sanity check throw new IllegalStateException(ErrorConsts.ERR_INTERNAL); } mDefined = true; mLocation = defLoc; } /* ///////////////////////////////////////////////// // Other methods ///////////////////////////////////////////////// */ public String toString() { return mIdValue; } } woodstox-4.1.3/src/java/com/ctc/wstx/util/package.html 0000644 0001750 0001750 00000000370 11745427075 023131 0 ustar giovanni giovanni
Contains utility classes that are not directly Woodstox specific, but are for now only used by Woodstox.
Note that some of more generic classes may eventually be moved to more
generic packages under com.ctc
package.
woodstox-4.1.3/src/java/com/ctc/wstx/util/ArgUtil.java 0000644 0001750 0001750 00000004005 11745427074 023060 0 ustar giovanni giovanni package com.ctc.wstx.util;
/**
* Simple static utility class that contains (static) utility methods useful
* when parsing non-typesafe arguments (String-only configuration, command
* line args).
*/
public final class ArgUtil
{
private ArgUtil() { }
public static boolean convertToBoolean(String prop, Object value)
{
if (value == null) {
return false;
}
if (value instanceof Boolean) {
return ((Boolean) value).booleanValue();
}
if (value instanceof String) {
String str = (String) value;
if (str.equalsIgnoreCase("false")) {
return false;
}
if (str.equalsIgnoreCase("true")) {
return true;
}
throw new IllegalArgumentException("Invalid String value for property '"+prop+"': expected Boolean value.");
}
throw new IllegalArgumentException("Invalid value type ("+value.getClass()+") for property '"+prop+"': expected Boolean value.");
}
public static int convertToInt(String prop, Object value, int minValue)
{
int i;
if (value == null) {
i = 0;
} else if (value instanceof Number) {
i = ((Number) value).intValue();
} else if (value instanceof String) {
try {
i = Integer.parseInt((String) value);
} catch (NumberFormatException nex) {
throw new IllegalArgumentException("Invalid String value for property '"+prop+"': expected a number (Integer).");
}
} else {
throw new IllegalArgumentException("Invalid value type ("+value.getClass()+") for property '"+prop+"': expected Integer value.");
}
if (i < minValue) {
throw new IllegalArgumentException("Invalid numeric value ("+i
+") for property '"+prop
+"': minimum is "+minValue+".");
}
return i;
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/util/PrefixedName.java 0000644 0001750 0001750 00000012354 11745427074 024066 0 ustar giovanni giovanni /* Woodstox XML processor
*
* Copyright (c) 2004 Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in file LICENSE, included with
* the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.util;
import javax.xml.namespace.QName;
/**
* Simple key Object to be used for storing/accessing of potentially namespace
* scoped element and attribute names.
*
* One important note about usage is that two of the name components (prefix * and local name) HAVE to have been interned some way, as all comparisons * are done using identity comparison; whereas URI is NOT necessarily * interned. *
* Note that the main reason this class is mutable -- unlike most key classes * -- is that this allows reusing key objects for access, as long as the code * using it knows ramifications of trying to modify a key that's used * in a data structure. *
* Note, too, that the hash code is cached as this class is mostly used as * a Map key, and hash code is used a lot. */ public final class PrefixedName implements Comparable // to allow alphabetic ordering { private String mPrefix, mLocalName; volatile int mHash = 0; /* /////////////////////////////////////////////////// // Life-cycle /////////////////////////////////////////////////// */ public PrefixedName(String prefix, String localName) { mLocalName = localName; mPrefix = (prefix != null && prefix.length() == 0) ? null : prefix; } public PrefixedName reset(String prefix, String localName) { mLocalName = localName; mPrefix = (prefix != null && prefix.length() == 0) ? null : prefix; mHash = 0; return this; } public static PrefixedName valueOf(QName n) { return new PrefixedName(n.getPrefix(), n.getLocalPart()); } /* /////////////////////////////////////////////////// // Accessors: /////////////////////////////////////////////////// */ public String getPrefix() { return mPrefix; } public String getLocalName() { return mLocalName; } /** * @return True, if this attribute name would result in a namespace * binding (ie. it's "xmlns" or starts with "xmlns:"). */ public boolean isaNsDeclaration() { if (mPrefix == null) { return mLocalName == "xmlns"; } return mPrefix == "xmlns"; } /** * Method used to check for xml reserved attribute names, like * "xml:space" and "xml:id". *
* Note: it is assumed that the passed-in localName is also * interned. */ public boolean isXmlReservedAttr(boolean nsAware, String localName) { if (nsAware) { if ("xml" == mPrefix) { return mLocalName == localName; } } else { if (mLocalName.length() == (4 + localName.length())) { return (mLocalName.startsWith("xml:") && mLocalName.endsWith(localName)); } } return false; } /* /////////////////////////////////////////////////// // Overridden standard methods: /////////////////////////////////////////////////// */ public String toString() { if (mPrefix == null || mPrefix.length() == 0) { return mLocalName; } StringBuffer sb = new StringBuffer(mPrefix.length() + 1 + mLocalName.length()); sb.append(mPrefix); sb.append(':'); sb.append(mLocalName); return sb.toString(); } public boolean equals(Object o) { if (o == this) { return true; } if (!(o instanceof PrefixedName)) { // also filters out nulls return false; } PrefixedName other = (PrefixedName) o; if (mLocalName != other.mLocalName) { // assumes equality return false; } return (mPrefix == other.mPrefix); } public int hashCode() { int hash = mHash; if (hash == 0) { hash = mLocalName.hashCode(); if (mPrefix != null) { hash ^= mPrefix.hashCode(); } mHash = hash; } return hash; } public int compareTo(Object o) { PrefixedName other = (PrefixedName) o; // First, by prefix, then by local name: String op = other.mPrefix; // Missing prefix is ordered before existing prefix if (op == null || op.length() == 0) { if (mPrefix != null && mPrefix.length() > 0) { return 1; } } else if (mPrefix == null || mPrefix.length() == 0) { return -1; } else { int result = mPrefix.compareTo(op); if (result != 0) { return result; } } return mLocalName.compareTo(other.mLocalName); } } woodstox-4.1.3/src/java/com/ctc/wstx/util/ElementIdMap.java 0000644 0001750 0001750 00000032130 11745427074 024015 0 ustar giovanni giovanni /* Woodstox XML processor * * Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi * * Licensed under the License specified in the file LICENSE which is * included with the source code. * You may not use this file except in compliance with the License. * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.ctc.wstx.util; import javax.xml.stream.Location; /** * This class is a specialized type-safe linked hash map used for * storing {@link ElementId} instances. {@link ElementId} instances * represent both id definitions (values of element attributes that * have type ID in DTD), and references (values of element attributes * of type IDREF and IDREFS). These definitions and references are * stored for the purpose of verifying * that all referenced id values are defined, and that none are defined * more than once. *
* Note: there are 2 somewhat distinct usage modes, by DTDValidator and
* by MSV-based validators.
* DTDs pass raw character arrays, whereas
* MSV-based validators operate on Strings. This is the main reason
* for 2 distinct sets of methods.
*/
public final class ElementIdMap
{
/**
* Default initial table size; set so that usually it need not
* be expanded.
*/
protected static final int DEFAULT_SIZE = 128;
protected static final int MIN_SIZE = 16;
/**
* Let's use 80% fill factor...
*/
protected static final int FILL_PCT = 80;
/*
////////////////////////////////////////
// Actual hash table structure
////////////////////////////////////////
*/
/**
* Actual hash table area
*/
protected ElementId[] mTable;
/**
* Current size (number of entries); needed to know if and when
* rehash.
*/
protected int mSize;
/**
* Limit that indicates maximum size this instance can hold before
* it needs to be expanded and rehashed. Calculated using fill
* factor passed in to constructor.
*/
protected int mSizeThreshold;
/**
* Mask used to get index from hash values; equal to
* mBuckets.length - 1
, when mBuckets.length is
* a power of two.
*/
protected int mIndexMask;
/*
////////////////////////////////////////
// Linked list info
////////////////////////////////////////
*/
protected ElementId mHead;
protected ElementId mTail;
/*
////////////////////////////////////////
// Life-cycle:
////////////////////////////////////////
*/
public ElementIdMap()
{
this(DEFAULT_SIZE);
}
/**
* This constructor is mainly used for testing, as it can be sized
* appropriately to test rehashing etc.
*/
public ElementIdMap(int initialSize)
{
int actual = MIN_SIZE;
while (actual < initialSize) {
actual += actual;
}
mTable = new ElementId[actual];
// Mask is easy to calc for powers of two.
mIndexMask = actual - 1;
mSize = 0;
mSizeThreshold = (actual * FILL_PCT) / 100;
mHead = mTail = null;
}
/*
////////////////////////////////////////////////////
// Public API
////////////////////////////////////////////////////
*/
public ElementId getFirstUndefined()
{
/* Since the linked list is pruned to always start with
* the first (in doc order) undefined id, we can just
* return head:
*/
return mHead;
}
/**
* Method called when a reference to id is encountered. If so, need
* to check if specified id entry (ref or definiton) exists; and if not,
* to add a reference marker.
*/
public ElementId addReferenced(char[] buffer, int start, int len, int hash,
Location loc, PrefixedName elemName, PrefixedName attrName)
{
int index = (hash & mIndexMask);
ElementId id = mTable[index];
while (id != null) {
if (id.idMatches(buffer, start, len)) { // found existing one
return id;
}
id = id.nextColliding();
}
// Not found, need to create a placeholder...
// But first, do we need more room?
if (mSize >= mSizeThreshold) {
rehash();
// Index changes, for the new entr:
index = (hash & mIndexMask);
}
++mSize;
// Ok, then, let's create the entry
String idStr = new String(buffer, start, len);
id = new ElementId(idStr, loc, false, elemName, attrName);
// First, let's link it to Map; all ids have to be connected
id.setNextColliding(mTable[index]);
mTable[index] = id;
// And then add the undefined entry at the end of list
if (mHead == null) {
mHead = mTail = id;
} else {
mTail.linkUndefined(id);
mTail = id;
}
return id;
}
public ElementId addReferenced(String idStr,
Location loc, PrefixedName elemName, PrefixedName attrName)
{
int hash = calcHash(idStr);
int index = (hash & mIndexMask);
ElementId id = mTable[index];
while (id != null) {
if (id.idMatches(idStr)) { // found existing one
return id;
}
id = id.nextColliding();
}
// Not found, need to create a placeholder...
// But first, do we need more room?
if (mSize >= mSizeThreshold) {
rehash();
// Index changes, for the new entr:
index = (hash & mIndexMask);
}
++mSize;
// Ok, then, let's create the entry
id = new ElementId(idStr, loc, false, elemName, attrName);
// First, let's link it to Map; all ids have to be connected
id.setNextColliding(mTable[index]);
mTable[index] = id;
// And then add the undefined entry at the end of list
if (mHead == null) {
mHead = mTail = id;
} else {
mTail.linkUndefined(id);
mTail = id;
}
return id;
}
/**
* Method called when an id definition is encountered. If so, need
* to check if specified id entry (ref or definiton) exists. If not,
* need to add the definition marker. If it does exist, need to
* 'upgrade it', if it was a reference marker; otherwise need to
* just return the old entry, and expect caller to check for dups
* and report the error.
*/
public ElementId addDefined(char[] buffer, int start, int len, int hash,
Location loc, PrefixedName elemName, PrefixedName attrName)
{
int index = (hash & mIndexMask);
ElementId id = mTable[index];
while (id != null) {
if (id.idMatches(buffer, start, len)) {
break;
}
id = id.nextColliding();
}
/* Not found, can just add it to the Map; no need to add to the
* linked list as it's not undefined
*/
if (id == null) {
// First, do we need more room?
if (mSize >= mSizeThreshold) {
rehash();
index = (hash & mIndexMask);
}
++mSize;
String idStr = new String(buffer, start, len);
id = new ElementId(idStr, loc, true, elemName, attrName);
id.setNextColliding(mTable[index]);
mTable[index] = id;
} else {
/* If already defined, nothing additional to do (we could
* signal an error here, though... for now, we'll let caller
* do that
*/
if (id.isDefined()) {
;
} else {
/* Not defined, just need to upgrade, and possibly remove from
* the linked list.
*/
id.markDefined(loc);
/* Ok; if it was the first undefined, need to unlink it, as
* well as potentially next items.
*/
if (id == mHead) {
do {
mHead = mHead.nextUndefined();
} while (mHead != null && mHead.isDefined());
// Did we clear up all undefined ids?
if (mHead == null) {
mTail = null;
}
}
}
}
return id;
}
public ElementId addDefined(String idStr,
Location loc, PrefixedName elemName, PrefixedName attrName)
{
int hash = calcHash(idStr);
int index = (hash & mIndexMask);
ElementId id = mTable[index];
while (id != null) {
if (id.idMatches(idStr)) {
break;
}
id = id.nextColliding();
}
/* Not found, can just add it to the Map; no need to add to the
* linked list as it's not undefined
*/
if (id == null) {
if (mSize >= mSizeThreshold) { // need more room
rehash();
index = (hash & mIndexMask);
}
++mSize;
id = new ElementId(idStr, loc, true, elemName, attrName);
id.setNextColliding(mTable[index]);
mTable[index] = id;
} else {
/* If already defined, nothing additional to do (we could
* signal an error here, though... for now, we'll let caller
* do that
*/
if (id.isDefined()) {
;
} else {
/* Not defined, just need to upgrade, and possibly remove from
* the linked list.
*/
id.markDefined(loc);
/* Ok; if it was the first undefined, need to unlink it, as
* well as potentially next items.
*/
if (id == mHead) {
do {
mHead = mHead.nextUndefined();
} while (mHead != null && mHead.isDefined());
if (mHead == null) { // cleared up all undefined ids?
mTail = null;
}
}
}
}
return id;
}
/**
* Implementation of a hashing method for variable length
* Strings. Most of the time intention is that this calculation
* is done by caller during parsing, not here; however, sometimes
* it needs to be done for parsed "String" too.
*
* Note: identical to {@link com.ctc.wstx.util.SymbolTable#calcHash}, * although not required to be. * * @param len Length of String; has to be at least 1 (caller guarantees * this pre-condition) */ public static int calcHash(char[] buffer, int start, int len) { int hash = (int) buffer[0]; for (int i = 1; i < len; ++i) { hash = (hash * 31) + (int) buffer[i]; } return hash; } public static int calcHash(String key) { int hash = (int) key.charAt(0); for (int i = 1, len = key.length(); i < len; ++i) { hash = (hash * 31) + (int) key.charAt(i); } return hash; } /* ////////////////////////////////////////////////////////// // Internal methods ////////////////////////////////////////////////////////// */ /** * Method called when size (number of entries) of symbol table grows * so big that load factor is exceeded. Since size has to remain * power of two, arrays will then always be doubled. Main work * is really redistributing old entries into new String/Bucket * entries. */ private void rehash() { int size = mTable.length; /* Let's grow aggressively; this should minimize number of * resizes, while adding to mem usage. But since these Maps * are never long-lived (only during parsing and validation of * a single doc), that shouldn't greatly matter. */ int newSize = (size << 2); ElementId[] oldSyms = mTable; mTable = new ElementId[newSize]; // Let's update index mask, threshold, now (needed for rehashing) mIndexMask = newSize - 1; mSizeThreshold <<= 2; int count = 0; // let's do sanity check for (int i = 0; i < size; ++i) { for (ElementId id = oldSyms[i]; id != null; ) { ++count; int index = calcHash(id.getId()) & mIndexMask; ElementId nextIn = id.nextColliding(); id.setNextColliding(mTable[index]); mTable[index] = id; id = nextIn; } } if (count != mSize) { ExceptionUtil.throwInternal("on rehash(): had "+mSize+" entries; now have "+count+"."); } } } woodstox-4.1.3/src/java/com/ctc/wstx/util/SimpleCache.java 0000644 0001750 0001750 00000002730 11745427075 023672 0 ustar giovanni giovanni package com.ctc.wstx.util; import java.util.*; /** * Simple Map implementation usable for caches where contents do not * expire, but where size needs to remain bounded. *
* Note: we probably should use weak references, or something similar * to limit maximum memory usage. This could be implemented in many * ways, perhaps by using two areas: first, smaller one, with strong * refs, and secondary bigger one that uses soft references. */ public final class SimpleCache { protected final LimitMap mItems; protected final int mMaxSize; public SimpleCache(int maxSize) { mItems = new LimitMap(maxSize); mMaxSize = maxSize; } public Object find(Object key) { return mItems.get(key); } public void add(Object key, Object value) { mItems.put(key, value); } /* /////////////////////////////////////////////////////////////////////// // Helper classes /////////////////////////////////////////////////////////////////////// */ final static class LimitMap extends LinkedHashMap { private static final long serialVersionUID = 1L; protected final int mMaxSize; public LimitMap(int size) { super(size, 0.8f, true); // Let's not allow silly low values... mMaxSize = size; } public boolean removeEldestEntry(Map.Entry eldest) { return (size() >= mMaxSize); } } } woodstox-4.1.3/src/java/com/ctc/wstx/util/DefaultXmlSymbolTable.java 0000644 0001750 0001750 00000005431 11745427074 025720 0 ustar giovanni giovanni package com.ctc.wstx.util; import com.ctc.wstx.util.SymbolTable; /** * Factory class used for instantiating pre-populated XML symbol * tables. Such tables already have basic String constants that * XML standard defines. */ public final class DefaultXmlSymbolTable { /** * Root symbol table from which child instances are derived. */ final static SymbolTable sInstance; final static String mNsPrefixXml; final static String mNsPrefixXmlns; /* Although theoretically there'd be no strict need to pre-populate * the default table, if all access was done using suggested usage * patterns (reuse input factories consistently, esp. for same types * of documents), it is possible some developers just use each factory * just once. As such, it does matter how tables are pre-populated. * Thus, let's use limited sensible set of predefined prefixes and * names. */ static { /* 128 means it's ok without resize up to ~96 symbols; true that * default symbols added will be interned. */ sInstance = new SymbolTable(true, 128); // Let's add default namespace binding prefixes mNsPrefixXml = sInstance.findSymbol("xml"); mNsPrefixXmlns = sInstance.findSymbol("xmlns"); /* No need to add keywords, as they are checked directly by * Reader, without constructing Strings. */ // Ok, any common prefixes? // or local names (element, attribute)? sInstance.findSymbol("id"); sInstance.findSymbol("name"); // XML Schema? // prefixes: sInstance.findSymbol("xsd"); sInstance.findSymbol("xsi"); // local names: sInstance.findSymbol("type"); // How about some common prefixes and names for Soap? // commonly used prefixes: sInstance.findSymbol("soap"); sInstance.findSymbol("SOAP-ENC"); sInstance.findSymbol("SOAP-ENV"); // local names: sInstance.findSymbol("Body"); sInstance.findSymbol("Envelope"); } /* /////////////////////////////////////////////////// // Public API, factory method(s): /////////////////////////////////////////////////// */ /** * Method that will return an instance of SymbolTable that has basic * XML 1.0 constants pre-populated. */ public static SymbolTable getInstance() { return sInstance.makeChild(); } /* /////////////////////////////////////////////////// // Public API, efficient access to (shared) // constants values: /////////////////////////////////////////////////// */ public static String getXmlSymbol() { return mNsPrefixXml; } public static String getXmlnsSymbol() { return mNsPrefixXmlns; } } woodstox-4.1.3/src/java/com/ctc/wstx/util/XmlChars.java 0000644 0001750 0001750 00000053431 11745427075 023242 0 ustar giovanni giovanni package com.ctc.wstx.util; /** * Simple utility class that encapsulates logic of determining validity * of characters outside basic 7-bit range of Unicode, for XML 1.0 */ public final class XmlChars { /* We don't need full 64k bits... (0x80 - 0x312C) / 32. But to * simplify things, let's just include first 0x80 entries in there etc */ final static int SIZE = (0x3140 >> 5); // 32 bits per int final static int[] sXml10StartChars = new int[SIZE]; static { SETBITS(sXml10StartChars, 0xC0, 0xD6); SETBITS(sXml10StartChars, 0xD8, 0xF6); SETBITS(sXml10StartChars, 0xF8, 0xFF); SETBITS(sXml10StartChars, 0x100, 0x131); SETBITS(sXml10StartChars, 0x134, 0x13e); SETBITS(sXml10StartChars, 0x141, 0x148); SETBITS(sXml10StartChars, 0x14a, 0x17e); SETBITS(sXml10StartChars, 0x180, 0x1c3); SETBITS(sXml10StartChars, 0x1cd, 0x1f0); SETBITS(sXml10StartChars, 0x1f4, 0x1f5); SETBITS(sXml10StartChars, 0x1fa, 0x217); SETBITS(sXml10StartChars, 0x250, 0x2a8); SETBITS(sXml10StartChars, 0x2bb, 0x2c1); SETBITS(sXml10StartChars, 0x386); SETBITS(sXml10StartChars, 0x388, 0x38a); SETBITS(sXml10StartChars, 0x38c); SETBITS(sXml10StartChars, 0x38e, 0x3a1); SETBITS(sXml10StartChars, 0x3a3, 0x3ce); SETBITS(sXml10StartChars, 0x3d0, 0x3d6); SETBITS(sXml10StartChars, 0x3da); SETBITS(sXml10StartChars, 0x3dc); SETBITS(sXml10StartChars, 0x3de); SETBITS(sXml10StartChars, 0x3e0); SETBITS(sXml10StartChars, 0x3e2, 0x3f3); SETBITS(sXml10StartChars, 0x401, 0x40c); SETBITS(sXml10StartChars, 0x40e, 0x44f); SETBITS(sXml10StartChars, 0x451, 0x45c); SETBITS(sXml10StartChars, 0x45e, 0x481); SETBITS(sXml10StartChars, 0x490, 0x4c4); SETBITS(sXml10StartChars, 0x4c7, 0x4c8); SETBITS(sXml10StartChars, 0x4cb, 0x4cc); SETBITS(sXml10StartChars, 0x4d0, 0x4eb); SETBITS(sXml10StartChars, 0x4ee, 0x4f5); SETBITS(sXml10StartChars, 0x4f8, 0x4f9); SETBITS(sXml10StartChars, 0x531, 0x556); SETBITS(sXml10StartChars, 0x559); SETBITS(sXml10StartChars, 0x561, 0x586); SETBITS(sXml10StartChars, 0x5d0, 0x5ea); SETBITS(sXml10StartChars, 0x5f0, 0x5f2); SETBITS(sXml10StartChars, 0x621, 0x63a); SETBITS(sXml10StartChars, 0x641, 0x64a); SETBITS(sXml10StartChars, 0x671, 0x6b7); SETBITS(sXml10StartChars, 0x6ba, 0x6be); SETBITS(sXml10StartChars, 0x6c0, 0x6ce); SETBITS(sXml10StartChars, 0x6d0, 0x6d3); SETBITS(sXml10StartChars, 0x6d5); SETBITS(sXml10StartChars, 0x6e5, 0x6e6); SETBITS(sXml10StartChars, 0x905, 0x939); SETBITS(sXml10StartChars, 0x93d); SETBITS(sXml10StartChars, 0x958, 0x961); SETBITS(sXml10StartChars, 0x985, 0x98c); SETBITS(sXml10StartChars, 0x98f, 0x990); SETBITS(sXml10StartChars, 0x993, 0x9a8); SETBITS(sXml10StartChars, 0x9aa, 0x9b0); SETBITS(sXml10StartChars, 0x9b2); SETBITS(sXml10StartChars, 0x9b6, 0x9b9); SETBITS(sXml10StartChars, 0x9dc); SETBITS(sXml10StartChars, 0x9dd); SETBITS(sXml10StartChars, 0x9df, 0x9e1); SETBITS(sXml10StartChars, 0x9f0); SETBITS(sXml10StartChars, 0x9f1); SETBITS(sXml10StartChars, 0xA05, 0xA0A); SETBITS(sXml10StartChars, 0xA0F); SETBITS(sXml10StartChars, 0xA10); SETBITS(sXml10StartChars, 0xA13, 0xA28); SETBITS(sXml10StartChars, 0xA2A, 0xA30); SETBITS(sXml10StartChars, 0xA32); SETBITS(sXml10StartChars, 0xA33); SETBITS(sXml10StartChars, 0xA35); SETBITS(sXml10StartChars, 0xA36); SETBITS(sXml10StartChars, 0xA38); SETBITS(sXml10StartChars, 0xA39); SETBITS(sXml10StartChars, 0xA59, 0xA5C); SETBITS(sXml10StartChars, 0xA5E); SETBITS(sXml10StartChars, 0xA72, 0xA74); SETBITS(sXml10StartChars, 0xA85, 0xA8B); SETBITS(sXml10StartChars, 0xA8D); SETBITS(sXml10StartChars, 0xA8F, 0xA91); SETBITS(sXml10StartChars, 0xA93, 0xAA8); SETBITS(sXml10StartChars, 0xAAA, 0xAB0); SETBITS(sXml10StartChars, 0xAB2, 0xAB3); SETBITS(sXml10StartChars, 0xAB5, 0xAB9); SETBITS(sXml10StartChars, 0xABD); SETBITS(sXml10StartChars, 0xAE0); SETBITS(sXml10StartChars, 0xB05, 0xB0C); SETBITS(sXml10StartChars, 0xB0F); SETBITS(sXml10StartChars, 0xB10); SETBITS(sXml10StartChars, 0xB13, 0xB28); SETBITS(sXml10StartChars, 0xB2A, 0xB30); SETBITS(sXml10StartChars, 0xB32); SETBITS(sXml10StartChars, 0xB33); SETBITS(sXml10StartChars, 0xB36, 0xB39); SETBITS(sXml10StartChars, 0xB3D); SETBITS(sXml10StartChars, 0xB5C); SETBITS(sXml10StartChars, 0xB5D); SETBITS(sXml10StartChars, 0xB5F, 0xB61); SETBITS(sXml10StartChars, 0xB85, 0xB8A); SETBITS(sXml10StartChars, 0xB8E, 0xB90); SETBITS(sXml10StartChars, 0xB92, 0xB95); SETBITS(sXml10StartChars, 0xB99, 0xB9A); SETBITS(sXml10StartChars, 0xB9C); SETBITS(sXml10StartChars, 0xB9E); SETBITS(sXml10StartChars, 0xB9F); SETBITS(sXml10StartChars, 0xBA3); SETBITS(sXml10StartChars, 0xBA4); SETBITS(sXml10StartChars, 0xBA8, 0xBAA); SETBITS(sXml10StartChars, 0xBAE, 0xBB5); SETBITS(sXml10StartChars, 0xBB7, 0xBB9); SETBITS(sXml10StartChars, 0xC05, 0xC0C); SETBITS(sXml10StartChars, 0xC0E, 0xC10); SETBITS(sXml10StartChars, 0xC12, 0xC28); SETBITS(sXml10StartChars, 0xC2A, 0xC33); SETBITS(sXml10StartChars, 0xC35, 0xC39); SETBITS(sXml10StartChars, 0xC60); SETBITS(sXml10StartChars, 0xC61); SETBITS(sXml10StartChars, 0xC85, 0xC8C); SETBITS(sXml10StartChars, 0xC8E, 0xC90); SETBITS(sXml10StartChars, 0xC92, 0xCA8); SETBITS(sXml10StartChars, 0xCAA, 0xCB3); SETBITS(sXml10StartChars, 0xCB5, 0xCB9); SETBITS(sXml10StartChars, 0xCDE); SETBITS(sXml10StartChars, 0xCE0); SETBITS(sXml10StartChars, 0xCE1); SETBITS(sXml10StartChars, 0xD05, 0xD0C); SETBITS(sXml10StartChars, 0xD0E, 0xD10); SETBITS(sXml10StartChars, 0xD12, 0xD28); SETBITS(sXml10StartChars, 0xD2A, 0xD39); SETBITS(sXml10StartChars, 0xD60); SETBITS(sXml10StartChars, 0xD61); SETBITS(sXml10StartChars, 0xE01, 0xE2E); SETBITS(sXml10StartChars, 0xE30); SETBITS(sXml10StartChars, 0xE32); SETBITS(sXml10StartChars, 0xE33); SETBITS(sXml10StartChars, 0xE40, 0xE45); SETBITS(sXml10StartChars, 0xE81); SETBITS(sXml10StartChars, 0xE82); SETBITS(sXml10StartChars, 0xE84); SETBITS(sXml10StartChars, 0xE87); SETBITS(sXml10StartChars, 0xE88); SETBITS(sXml10StartChars, 0xE8A); SETBITS(sXml10StartChars, 0xE8D); SETBITS(sXml10StartChars, 0xE94, 0xE97); SETBITS(sXml10StartChars, 0xE99, 0xE9F); SETBITS(sXml10StartChars, 0xEA1, 0xEA3); SETBITS(sXml10StartChars, 0xEA5); SETBITS(sXml10StartChars, 0xEA7); SETBITS(sXml10StartChars, 0xEAA); SETBITS(sXml10StartChars, 0xEAB); SETBITS(sXml10StartChars, 0xEAD); SETBITS(sXml10StartChars, 0xEAE); SETBITS(sXml10StartChars, 0xEB0); SETBITS(sXml10StartChars, 0xEB2); SETBITS(sXml10StartChars, 0xEB3); SETBITS(sXml10StartChars, 0xEBD); SETBITS(sXml10StartChars, 0xEC0, 0xEC4); SETBITS(sXml10StartChars, 0xF40, 0xF47); SETBITS(sXml10StartChars, 0xF49, 0xF69); SETBITS(sXml10StartChars, 0x10a0, 0x10c5); SETBITS(sXml10StartChars, 0x10d0, 0x10f6); SETBITS(sXml10StartChars, 0x1100); SETBITS(sXml10StartChars, 0x1102, 0x1103); SETBITS(sXml10StartChars, 0x1105, 0x1107); SETBITS(sXml10StartChars, 0x1109); SETBITS(sXml10StartChars, 0x110b, 0x110c); SETBITS(sXml10StartChars, 0x110e, 0x1112); SETBITS(sXml10StartChars, 0x113c); SETBITS(sXml10StartChars, 0x113e); SETBITS(sXml10StartChars, 0x1140); SETBITS(sXml10StartChars, 0x114c); SETBITS(sXml10StartChars, 0x114e); SETBITS(sXml10StartChars, 0x1150); SETBITS(sXml10StartChars, 0x1154, 0x1155); SETBITS(sXml10StartChars, 0x1159); SETBITS(sXml10StartChars, 0x115f, 0x1161); SETBITS(sXml10StartChars, 0x1163); SETBITS(sXml10StartChars, 0x1165); SETBITS(sXml10StartChars, 0x1167); SETBITS(sXml10StartChars, 0x1169); SETBITS(sXml10StartChars, 0x116d, 0x116e); SETBITS(sXml10StartChars, 0x1172, 0x1173); SETBITS(sXml10StartChars, 0x1175); SETBITS(sXml10StartChars, 0x119e); SETBITS(sXml10StartChars, 0x11a8); SETBITS(sXml10StartChars, 0x11ab); SETBITS(sXml10StartChars, 0x11ae, 0x11af); SETBITS(sXml10StartChars, 0x11b7, 0x11b8); SETBITS(sXml10StartChars, 0x11ba); SETBITS(sXml10StartChars, 0x11bc, 0x11c2); SETBITS(sXml10StartChars, 0x11eb); SETBITS(sXml10StartChars, 0x11f0); SETBITS(sXml10StartChars, 0x11f9); SETBITS(sXml10StartChars, 0x1e00, 0x1e9b); SETBITS(sXml10StartChars, 0x1ea0, 0x1ef9); SETBITS(sXml10StartChars, 0x1f00, 0x1f15); SETBITS(sXml10StartChars, 0x1f18, 0x1f1d); SETBITS(sXml10StartChars, 0x1f20, 0x1f45); SETBITS(sXml10StartChars, 0x1f48, 0x1f4d); SETBITS(sXml10StartChars, 0x1f50, 0x1f57); SETBITS(sXml10StartChars, 0x1f59); SETBITS(sXml10StartChars, 0x1f5b); SETBITS(sXml10StartChars, 0x1f5d); SETBITS(sXml10StartChars, 0x1f5f, 0x1f7d); SETBITS(sXml10StartChars, 0x1f80, 0x1fb4); SETBITS(sXml10StartChars, 0x1fb6, 0x1fbc); SETBITS(sXml10StartChars, 0x1fbe); SETBITS(sXml10StartChars, 0x1fc2, 0x1fc4); SETBITS(sXml10StartChars, 0x1fc6, 0x1fcc); SETBITS(sXml10StartChars, 0x1fd0, 0x1fd3); SETBITS(sXml10StartChars, 0x1fd6, 0x1fdb); SETBITS(sXml10StartChars, 0x1fe0, 0x1fec); SETBITS(sXml10StartChars, 0x1ff2, 0x1ff4); SETBITS(sXml10StartChars, 0x1ff6, 0x1ffc); SETBITS(sXml10StartChars, 0x2126); SETBITS(sXml10StartChars, 0x212a, 0x212b); SETBITS(sXml10StartChars, 0x212e); SETBITS(sXml10StartChars, 0x2180, 0x2182); SETBITS(sXml10StartChars, 0x3041, 0x3094); SETBITS(sXml10StartChars, 0x30a1, 0x30fa); SETBITS(sXml10StartChars, 0x3105, 0x312c); // note: AC00 - D7A3 handled separately // [86] Ideographic (but note: > 0x312c handled separately) SETBITS(sXml10StartChars, 0x3007); SETBITS(sXml10StartChars, 0x3021, 0x3029); } final static int[] sXml10Chars = new int[SIZE]; static { // Let's start with all valid start chars: System.arraycopy(sXml10StartChars, 0, sXml10Chars, 0, SIZE); // [87] CombiningChar ::= SETBITS(sXml10Chars, 0x300, 0x345); SETBITS(sXml10Chars, 0x360, 0x361); SETBITS(sXml10Chars, 0x483, 0x486); SETBITS(sXml10Chars, 0x591, 0x5a1); SETBITS(sXml10Chars, 0x5a3, 0x5b9); SETBITS(sXml10Chars, 0x5bb, 0x5bd); SETBITS(sXml10Chars, 0x5bf); SETBITS(sXml10Chars, 0x5c1, 0x5c2); SETBITS(sXml10Chars, 0x5c4); SETBITS(sXml10Chars, 0x64b, 0x652); SETBITS(sXml10Chars, 0x670); SETBITS(sXml10Chars, 0x6d6, 0x6dc); SETBITS(sXml10Chars, 0x6dd, 0x6df); SETBITS(sXml10Chars, 0x6e0, 0x6e4); SETBITS(sXml10Chars, 0x6e7, 0x6e8); SETBITS(sXml10Chars, 0x6ea, 0x6ed); SETBITS(sXml10Chars, 0x901, 0x903); SETBITS(sXml10Chars, 0x93c); SETBITS(sXml10Chars, 0x93e, 0x94c); SETBITS(sXml10Chars, 0x94d); SETBITS(sXml10Chars, 0x951, 0x954); SETBITS(sXml10Chars, 0x962); SETBITS(sXml10Chars, 0x963); SETBITS(sXml10Chars, 0x981, 0x983); SETBITS(sXml10Chars, 0x9bc); SETBITS(sXml10Chars, 0x9be); SETBITS(sXml10Chars, 0x9bf); SETBITS(sXml10Chars, 0x9c0, 0x9c4); SETBITS(sXml10Chars, 0x9c7); SETBITS(sXml10Chars, 0x9c8); SETBITS(sXml10Chars, 0x9cb, 0x9cd); SETBITS(sXml10Chars, 0x9d7); SETBITS(sXml10Chars, 0x9e2); SETBITS(sXml10Chars, 0x9e3); SETBITS(sXml10Chars, 0xA02); SETBITS(sXml10Chars, 0xA3C); SETBITS(sXml10Chars, 0xA3E); SETBITS(sXml10Chars, 0xA3F); SETBITS(sXml10Chars, 0xA40, 0xA42); SETBITS(sXml10Chars, 0xA47); SETBITS(sXml10Chars, 0xA48); SETBITS(sXml10Chars, 0xA4B, 0xA4D); SETBITS(sXml10Chars, 0xA70); SETBITS(sXml10Chars, 0xA71); SETBITS(sXml10Chars, 0xA81, 0xA83); SETBITS(sXml10Chars, 0xABC); SETBITS(sXml10Chars, 0xABE, 0xAC5); SETBITS(sXml10Chars, 0xAC7, 0xAC9); SETBITS(sXml10Chars, 0xACB, 0xACD); SETBITS(sXml10Chars, 0xB01, 0xB03); SETBITS(sXml10Chars, 0xB3C); SETBITS(sXml10Chars, 0xB3E, 0xB43); SETBITS(sXml10Chars, 0xB47); SETBITS(sXml10Chars, 0xB48); SETBITS(sXml10Chars, 0xB4B, 0xB4D); SETBITS(sXml10Chars, 0xB56); SETBITS(sXml10Chars, 0xB57); SETBITS(sXml10Chars, 0xB82); SETBITS(sXml10Chars, 0xB83); SETBITS(sXml10Chars, 0xBBE, 0xBC2); SETBITS(sXml10Chars, 0xBC6, 0xBC8); SETBITS(sXml10Chars, 0xBCA, 0xBCD); SETBITS(sXml10Chars, 0xBD7); SETBITS(sXml10Chars, 0xC01, 0xC03); SETBITS(sXml10Chars, 0xC3E, 0xC44); SETBITS(sXml10Chars, 0xC46, 0xC48); SETBITS(sXml10Chars, 0xC4A, 0xC4D); SETBITS(sXml10Chars, 0xC55, 0xC56); SETBITS(sXml10Chars, 0xC82, 0xC83); SETBITS(sXml10Chars, 0xCBE, 0xCC4); SETBITS(sXml10Chars, 0xCC6, 0xCC8); SETBITS(sXml10Chars, 0xCCA, 0xCCD); SETBITS(sXml10Chars, 0xCD5, 0xCD6); SETBITS(sXml10Chars, 0xD02, 0xD03); SETBITS(sXml10Chars, 0xD3E, 0xD43); SETBITS(sXml10Chars, 0xD46, 0xD48); SETBITS(sXml10Chars, 0xD4A, 0xD4D); SETBITS(sXml10Chars, 0xD57); SETBITS(sXml10Chars, 0xE31); SETBITS(sXml10Chars, 0xE34, 0xE3A); SETBITS(sXml10Chars, 0xE47, 0xE4E); SETBITS(sXml10Chars, 0xEB1); SETBITS(sXml10Chars, 0xEB4, 0xEB9); SETBITS(sXml10Chars, 0xEBB, 0xEBC); SETBITS(sXml10Chars, 0xEC8, 0xECD); SETBITS(sXml10Chars, 0xF18, 0xF19); SETBITS(sXml10Chars, 0xF35); SETBITS(sXml10Chars, 0xF37); SETBITS(sXml10Chars, 0xF39); SETBITS(sXml10Chars, 0xF3E); SETBITS(sXml10Chars, 0xF3F); SETBITS(sXml10Chars, 0xF71, 0xF84); SETBITS(sXml10Chars, 0xF86, 0xF8B); SETBITS(sXml10Chars, 0xF90, 0xF95); SETBITS(sXml10Chars, 0xF97); SETBITS(sXml10Chars, 0xF99, 0xFAD); SETBITS(sXml10Chars, 0xFB1, 0xFB7); SETBITS(sXml10Chars, 0xFB9); SETBITS(sXml10Chars, 0x20D0, 0x20DC); SETBITS(sXml10Chars, 0x20E1); SETBITS(sXml10Chars, 0x302A, 0x302F); SETBITS(sXml10Chars, 0x3099); SETBITS(sXml10Chars, 0x309A); // [88] Digit: SETBITS(sXml10Chars, 0x660, 0x669); SETBITS(sXml10Chars, 0x6f0, 0x6f9); SETBITS(sXml10Chars, 0x966, 0x96f); SETBITS(sXml10Chars, 0x9e6, 0x9ef); SETBITS(sXml10Chars, 0xa66, 0xa6f); SETBITS(sXml10Chars, 0xae6, 0xaef); SETBITS(sXml10Chars, 0xb66, 0xb6f); SETBITS(sXml10Chars, 0xbe7, 0xbef); SETBITS(sXml10Chars, 0xc66, 0xc6f); SETBITS(sXml10Chars, 0xce6, 0xcef); SETBITS(sXml10Chars, 0xd66, 0xd6f); SETBITS(sXml10Chars, 0xe50, 0xe59); SETBITS(sXml10Chars, 0xed0, 0xed9); SETBITS(sXml10Chars, 0xf20, 0xf29); // [89] Extender: SETBITS(sXml10Chars, 0xb7); SETBITS(sXml10Chars, 0x2d0); SETBITS(sXml10Chars, 0x2d1); SETBITS(sXml10Chars, 0x387); SETBITS(sXml10Chars, 0x640); SETBITS(sXml10Chars, 0xE46); SETBITS(sXml10Chars, 0xEC6); SETBITS(sXml10Chars, 0x3005); SETBITS(sXml10Chars, 0x3031, 0x3035); SETBITS(sXml10Chars, 0x309d, 0x309e); SETBITS(sXml10Chars, 0x30fc, 0x30fe); } private XmlChars() { } public final static boolean is10NameStartChar(char c) { // First, let's deal with outliers if (c > 0x312C) { // Most valid chars are below this.. if (c < 0xAC00) { return (c >= 0x4E00 && c <= 0x9FA5); // valid ideograms } if (c <= 0xD7A3) { // 0xAC00 - 0xD7A3, valid base chars return true; } /* As to surrogate pairs... let's do the bare minimum; * 0xD800 - 0xDBFF (high surrogate) are ok; low surrogates * can only follow high one */ return (c <= 0xDBFF && c >= 0xD800); } // but then we'll just need to use the table... int ix = (int) c; return (sXml10StartChars[ix >> 5] & (1 << (ix & 31))) != 0; } public final static boolean is10NameChar(char c) { // First, let's deal with outliers if (c > 0x312C) { // Most valid chars are below this.. if (c < 0xAC00) { return (c >= 0x4E00 && c <= 0x9FA5); // valid ideograms } if (c <= 0xD7A3) { // 0xAC00 - 0xD7A3, valid base chars return true; } /* As to surrogate pairs... let's do the bare minimum; * 0xD800 - 0xDFFF (high, low surrogate) are ok (need to * check pairing in future) */ return (c >= 0xD800 && c <= 0xDFFF); } // but then we'll just need to use the table... int ix = (int) c; return (sXml10Chars[ix >> 5] & (1 << (ix & 31))) != 0; } public final static boolean is11NameStartChar(char c) { // Others are checked block-by-block: if (c <= 0x2FEF) { if (c < 0x300) { if (c < 0x00C0) { // 8-bit ctrl chars return false; } // most of the rest are fine... return (c != 0xD7 && c != 0xF7); } if (c >= 0x2C00) { // 0x2C00 - 0x2FEF are ok return true; } if (c < 0x370 || c > 0x218F) { // 0x300 - 0x36F, 0x2190 - 0x2BFF invalid return false; } if (c < 0x2000) { // 0x370 - 0x37D, 0x37F - 0x1FFF are ok return (c != 0x37E); } if (c >= 0x2070) { // 0x2070 - 0x218F are ok return (c <= 0x218F); } // And finally, 0x200C - 0x200D return (c == 0x200C || c == 0x200D); } // 0x3000 and above: if (c >= 0x3001) { /* Hmmh, let's allow high surrogates here, without checking * that they are properly followed... crude basic support, * I know, but allows valid combinations, just doesn't catch * invalid ones */ if (c <= 0xDBFF) { // 0x3001 - 0xD7FF (chars), // 0xD800 - 0xDBFF (high surrogate) are ok (unlike DC00-DFFF) return true; } if (c >= 0xF900 && c <= 0xFFFD) { /* Check above removes low surrogate (since one can not * START an identifier), and byte-order markers.. */ return (c <= 0xFDCF || c >= 0xFDF0); } } return false; } public final static boolean is11NameChar(char c) { // Others are checked block-by-block: if (c <= 0x2FEF) { if (c < 0x2000) { // only 8-bit ctrl chars and 0x37E to filter out return (c >= 0x00C0 && c != 0x37E) || (c == 0xB7); } if (c >= 0x2C00) { // 0x100 - 0x1FFF, 0x2C00 - 0x2FEF are ok return true; } if (c < 0x200C || c > 0x218F) { // 0x2000 - 0x200B, 0x2190 - 0x2BFF invalid return false; } if (c >= 0x2070) { // 0x2070 - 0x218F are ok return true; } // And finally, 0x200C - 0x200D, 0x203F - 0x2040 are ok return (c == 0x200C || c == 0x200D || c == 0x203F || c == 0x2040); } // 0x3000 and above: if (c >= 0x3001) { /* Hmmh, let's allow surrogate heres, without checking that * they have proper ordering. For non-first name chars, both are * ok, for valid names. Crude basic support, * I know, but allows valid combinations, just doesn't catch * invalid ones */ if (c <= 0xDFFF) { // 0x3001 - 0xD7FF (chars), // 0xD800 - 0xDFFF (high, low surrogate) are ok: return true; } if (c >= 0xF900 && c <= 0xFFFD) { /* Check above removes other invalid chars (below valid * range), and byte-order markers (0xFFFE, 0xFFFF). */ return (c <= 0xFDCF || c >= 0xFDF0); } } return false; } private static void SETBITS(int[] array, int start, int end) { int bit1 = (start & 31); int bit2 = (end & 31); start >>= 5; end >>= 5; /* Ok; this is not perfectly optimal, but should be good enough... * we'll only do one-by-one at the ends. */ if (start == end) { for (; bit1 <= bit2; ++bit1) { array[start] |= (1 << bit1); } } else { for (int bit = bit1; bit <= 31; ++bit) { array[start] |= (1 << bit); } while (++start < end) { array[start] = -1; } for (int bit = 0; bit <= bit2; ++bit) { array[end] |= (1 << bit); } } } private static void SETBITS(int[] array, int point) { int ix = (point >> 5); int bit = (point & 31); array[ix] |= (1 << bit); } } woodstox-4.1.3/src/java/com/ctc/wstx/util/SymbolTable.java 0000644 0001750 0001750 00000057455 11745427075 023750 0 ustar giovanni giovanni /* Woodstox XML processor * * Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi * * Licensed under the License specified in the file LICENSE which is * included with the source code. * You may not use this file except in compliance with the License. * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.ctc.wstx.util; /** * This class is a kind of specialized type-safe Map, from char array to * String value. Specialization means that in addition to type-safety * and specific access patterns (key char array, Value optionally interned * String; values added on access if necessary), and that instances are * meant to be used concurrently, but by using well-defined mechanisms * to obtain such concurrently usable instances. Main use for the class * is to store symbol table information for things like compilers and * parsers; especially when number of symbols (keywords) is limited. *
* For optimal performance, usage pattern should be one where matches
* should be very common (esp. after "warm-up"), and as with most hash-based
* maps/sets, that hash codes are uniformly distributed. Also, collisions
* are slightly more expensive than with HashMap or HashSet, since hash codes
* are not used in resolving collisions; that is, equals() comparison is
* done with all symbols in same bucket index.
* Finally, rehashing is also more expensive, as hash codes are not
* stored; rehashing requires all entries' hash codes to be recalculated.
* Reason for not storing hash codes is reduced memory usage, hoping
* for better memory locality.
*
* Usual usage pattern is to create a single "master" instance, and either * use that instance in sequential fashion, or to create derived "child" * instances, which after use, are asked to return possible symbol additions * to master instance. In either case benefit is that symbol table gets * initialized so that further uses are more efficient, as eventually all * symbols needed will already be in symbol table. At that point no more * Symbol String allocations are needed, nor changes to symbol table itself. *
* Note that while individual SymbolTable instances are NOT thread-safe * (much like generic collection classes), concurrently used "child" * instances can be freely used without synchronization. However, using * master table concurrently with child instances can only be done if * access to master instance is read-only (ie. no modifications done). */ public class SymbolTable { /** * Default initial table size; no need to make it miniscule, due * to couple of things: first, overhead of array reallocation * is significant, * and second, overhead of rehashing is also non-negligible. *
* Let's use 128 as the default; it allows for up to 96 symbols, * and uses about 512 bytes on 32-bit machines. */ protected static final int DEFAULT_TABLE_SIZE = 128; protected static final float DEFAULT_FILL_FACTOR = 0.75f; protected static final String EMPTY_STRING = ""; /* //////////////////////////////////////// // Configuration: //////////////////////////////////////// */ /** * Flag that determines whether Strings to be added need to be * interned before being added or not. Forcing intern()ing will add * some overhead when adding new Strings, but may be beneficial if such * Strings are generally used by other parts of system. Note that even * without interning, all returned String instances are guaranteed * to be comparable with equality (==) operator; it's just that such * guarantees are not made for Strings other classes return. */ protected boolean mInternStrings; /* //////////////////////////////////////// // Actual symbol table data: //////////////////////////////////////// */ /** * Primary matching symbols; it's expected most match occur from * here. */ protected String[] mSymbols; /** * Overflow buckets; if primary doesn't match, lookup is done * from here. *
* Note: Number of buckets is half of number of symbol entries, on
* assumption there's less need for buckets.
*/
protected Bucket[] mBuckets;
/**
* Current size (number of entries); needed to know if and when
* rehash.
*/
protected int mSize;
/**
* Limit that indicates maximum size this instance can hold before
* it needs to be expanded and rehashed. Calculated using fill
* factor passed in to constructor.
*/
protected int mSizeThreshold;
/**
* Mask used to get index from hash values; equal to
* mBuckets.length - 1
, when mBuckets.length is
* a power of two.
*/
protected int mIndexMask;
/*
////////////////////////////////////////
// Information about concurrency
////////////////////////////////////////
*/
/**
* Version of this table instance; used when deriving new concurrently
* used versions from existing 'master' instance.
*/
protected int mThisVersion;
/**
* Flag that indicates if any changes have been made to the data;
* used to both determine if bucket array needs to be copied when
* (first) change is made, and potentially if updated bucket list
* is to be resync'ed back to master instance.
*/
protected boolean mDirty;
/*
////////////////////////////////////////
// Life-cycle:
////////////////////////////////////////
*/
/**
* Method for constructing a master symbol table instance; this one
* will create master instance with default size, and with interning
* enabled.
*/
public SymbolTable() {
this(true);
}
/**
* Method for constructing a master symbol table instance.
*/
public SymbolTable(boolean internStrings) {
this(internStrings, DEFAULT_TABLE_SIZE);
}
/**
* Method for constructing a master symbol table instance.
*/
public SymbolTable(boolean internStrings, int initialSize) {
this(internStrings, initialSize, DEFAULT_FILL_FACTOR);
}
/**
* Main method for constructing a master symbol table instance; will
* be called by other public constructors.
*
* @param internStrings Whether Strings to add are intern()ed or not
* @param initialSize Minimum initial size for bucket array; internally
* will always use a power of two equal to or bigger than this value.
* @param fillFactor Maximum fill factor allowed for bucket table;
* when more entries are added, table will be expanded.
*/
public SymbolTable(boolean internStrings, int initialSize,
float fillFactor)
{
mInternStrings = internStrings;
// Let's start versions from 1
mThisVersion = 1;
// And we'll also set flags so no copying of buckets is needed:
mDirty = true;
// No point in requesting funny initial sizes...
if (initialSize < 1) {
throw new IllegalArgumentException("Can not use negative/zero initial size: "+initialSize);
}
/* Initial size has to be a power of two. Also, let's not honour
* sizes that are ridiculously small...
*/
{
int currSize = 4;
while (currSize < initialSize) {
currSize += currSize;
}
initialSize = currSize;
}
mSymbols = new String[initialSize];
mBuckets = new Bucket[initialSize >> 1];
// Mask is easy to calc for powers of two.
mIndexMask = initialSize - 1;
mSize = 0;
// Sanity check for fill factor:
if (fillFactor < 0.01f) {
throw new IllegalArgumentException("Fill factor can not be lower than 0.01.");
}
if (fillFactor > 10.0f) { // just to catch stupid values, ie. useless from performance perspective
throw new IllegalArgumentException("Fill factor can not be higher than 10.0.");
}
mSizeThreshold = (int) (initialSize * fillFactor + 0.5);
}
/**
* Internal constructor used when creating child instances.
*/
private SymbolTable(boolean internStrings, String[] symbols,
Bucket[] buckets, int size, int sizeThreshold,
int indexMask, int version)
{
mInternStrings = internStrings;
mSymbols = symbols;
mBuckets = buckets;
mSize = size;
mSizeThreshold = sizeThreshold;
mIndexMask = indexMask;
mThisVersion = version;
// Need to make copies of arrays, if/when adding new entries
mDirty = false;
}
/**
* "Factory" method; will create a new child instance of this symbol
* table. It will be a copy-on-write instance, ie. it will only use
* read-only copy of parent's data, but when changes are needed, a
* copy will be created.
*
* Note: while this method is synchronized, it is generally not * safe to both use makeChild/mergeChild, AND to use instance * actively. Instead, a separate 'root' instance should be used * on which only makeChild/mergeChild are called, but instance itself * is not used as a symbol table. */ public synchronized SymbolTable makeChild() { return new SymbolTable(mInternStrings, mSymbols, mBuckets, mSize, mSizeThreshold, mIndexMask, mThisVersion+1); } /** * Method that allows contents of child table to potentially be * "merged in" with contents of this symbol table. *
* Note that caller has to make sure symbol table passed in is * really a child or sibling of this symbol table. */ public synchronized void mergeChild(SymbolTable child) { // Let's do a basic sanity check first: if (child.size() <= size()) { // nothing to add return; } // Okie dokie, let's get the data in! mSymbols = child.mSymbols; mBuckets = child.mBuckets; mSize = child.mSize; mSizeThreshold = child.mSizeThreshold; mIndexMask = child.mIndexMask; mThisVersion++; // to prevent other children from overriding // Dirty flag... well, let's just clear it, to force copying just // in case. Shouldn't really matter, for master tables. mDirty = false; /* However, we have to mark child as dirty, so that it will not * be modifying arrays we "took over" (since child may have * returned an updated table before it stopped fully using * the SymbolTable: for example, it may still use it for * parsing PI targets in epilog) */ child.mDirty = false; } /* //////////////////////////////////////////////////// // Public API, configuration //////////////////////////////////////////////////// */ public void setInternStrings(boolean state) { mInternStrings = state; } /* //////////////////////////////////////////////////// // Public API, generic accessors: //////////////////////////////////////////////////// */ public int size() { return mSize; } public int version() { return mThisVersion; } public boolean isDirty() { return mDirty; } public boolean isDirectChildOf(SymbolTable t) { /* Actually, this doesn't really prove it is a child (would have to * use sequence number, or identityHash to really prove it), but * it's good enough if relationship is known to exist. */ /* (for real check, one would need to child/descendant stuff; or * at least an identity hash... or maybe even just a _static_ global * counter for instances... maybe that would actually be worth * doing?) */ if (mThisVersion == (t.mThisVersion + 1)) { return true; } return false; } /* //////////////////////////////////////////////////// // Public API, accessing symbols: //////////////////////////////////////////////////// */ /** * Main access method; will check if actual symbol String exists; * if so, returns it; if not, will create, add and return it. * * @return The symbol matching String in input array */ /* public String findSymbol(char[] buffer, int start, int len) { return findSymbol(buffer, start, len, calcHash(buffer, start, len)); } */ public String findSymbol(char[] buffer, int start, int len, int hash) { // Sanity check: if (len < 1) { return EMPTY_STRING; } hash &= mIndexMask; String sym = mSymbols[hash]; // Optimal case; checking existing primary symbol for hash index: if (sym != null) { // Let's inline primary String equality checking: if (sym.length() == len) { int i = 0; do { if (sym.charAt(i) != buffer[start+i]) { break; } } while (++i < len); // Optimal case; primary match found if (i == len) { return sym; } } // How about collision bucket? Bucket b = mBuckets[hash >> 1]; if (b != null) { sym = b.find(buffer, start, len); if (sym != null) { return sym; } } } // Need to expand? if (mSize >= mSizeThreshold) { rehash(); /* Need to recalc hash; rare occurence (index mask has been * recalculated as part of rehash) */ hash = calcHash(buffer, start, len) & mIndexMask; } else if (!mDirty) { // Or perhaps we need to do copy-on-write? copyArrays(); mDirty = true; } ++mSize; String newSymbol = new String(buffer, start, len); if (mInternStrings) { newSymbol = newSymbol.intern(); } // Ok; do we need to add primary entry, or a bucket? if (mSymbols[hash] == null) { mSymbols[hash] = newSymbol; } else { int bix = hash >> 1; mBuckets[bix] = new Bucket(newSymbol, mBuckets[bix]); } return newSymbol; } /** * Similar to {link #findSymbol}, but will not add passed in symbol * if it is not in symbol table yet. */ public String findSymbolIfExists(char[] buffer, int start, int len, int hash) { // Sanity check: if (len < 1) { return EMPTY_STRING; } hash &= mIndexMask; String sym = mSymbols[hash]; // Optimal case; checking existing primary symbol for hash index: if (sym != null) { // Let's inline primary String equality checking: if (sym.length() == len) { int i = 0; do { if (sym.charAt(i) != buffer[start+i]) { break; } } while (++i < len); // Optimal case; primary match found if (i == len) { return sym; } } // How about collision bucket? Bucket b = mBuckets[hash >> 1]; if (b != null) { sym = b.find(buffer, start, len); if (sym != null) { return sym; } } } return null; } /** * Similar to to {@link #findSymbol(char[],int,int,int)}; used to either * do potentially cheap intern() (if table already has intern()ed version), * or to pre-populate symbol table with known values. */ public String findSymbol(String str) { int len = str.length(); // Sanity check: if (len < 1) { return EMPTY_STRING; } int index = calcHash(str) & mIndexMask; String sym = mSymbols[index]; // Optimal case; checking existing primary symbol for hash index: if (sym != null) { // Let's inline primary String equality checking: if (sym.length() == len) { int i = 0; for (; i < len; ++i) { if (sym.charAt(i) != str.charAt(i)) { break; } } // Optimal case; primary match found if (i == len) { return sym; } } // How about collision bucket? Bucket b = mBuckets[index >> 1]; if (b != null) { sym = b.find(str); if (sym != null) { return sym; } } } // Need to expand? if (mSize >= mSizeThreshold) { rehash(); /* Need to recalc hash; rare occurence (index mask has been * recalculated as part of rehash) */ index = calcHash(str) & mIndexMask; } else if (!mDirty) { // Or perhaps we need to do copy-on-write? copyArrays(); mDirty = true; } ++mSize; if (mInternStrings) { str = str.intern(); } // Ok; do we need to add primary entry, or a bucket? if (mSymbols[index] == null) { mSymbols[index] = str; } else { int bix = index >> 1; mBuckets[bix] = new Bucket(str, mBuckets[bix]); } return str; } /** * Implementation of a hashing method for variable length * Strings. Most of the time intention is that this calculation * is done by caller during parsing, not here; however, sometimes * it needs to be done for parsed "String" too. * * @param len Length of String; has to be at least 1 (caller guarantees * this pre-condition) */ public static int calcHash(char[] buffer, int start, int len) { int hash = (int) buffer[0]; for (int i = 1; i < len; ++i) { hash = (hash * 31) + (int) buffer[i]; } return hash; } public static int calcHash(String key) { int hash = (int) key.charAt(0); for (int i = 1, len = key.length(); i < len; ++i) { hash = (hash * 31) + (int) key.charAt(i); } return hash; } /* ////////////////////////////////////////////////////////// // Internal methods ////////////////////////////////////////////////////////// */ /** * Method called when copy-on-write is needed; generally when first * change is made to a derived symbol table. */ private void copyArrays() { String[] oldSyms = mSymbols; int size = oldSyms.length; mSymbols = new String[size]; System.arraycopy(oldSyms, 0, mSymbols, 0, size); Bucket[] oldBuckets = mBuckets; size = oldBuckets.length; mBuckets = new Bucket[size]; System.arraycopy(oldBuckets, 0, mBuckets, 0, size); } /** * Method called when size (number of entries) of symbol table grows * so big that load factor is exceeded. Since size has to remain * power of two, arrays will then always be doubled. Main work * is really redistributing old entries into new String/Bucket * entries. */ private void rehash() { int size = mSymbols.length; int newSize = size + size; String[] oldSyms = mSymbols; Bucket[] oldBuckets = mBuckets; mSymbols = new String[newSize]; mBuckets = new Bucket[newSize >> 1]; // Let's update index mask, threshold, now (needed for rehashing) mIndexMask = newSize - 1; mSizeThreshold += mSizeThreshold; int count = 0; // let's do sanity check /* Need to do two loops, unfortunately, since spillover area is * only half the size: */ for (int i = 0; i < size; ++i) { String symbol = oldSyms[i]; if (symbol != null) { ++count; int index = calcHash(symbol) & mIndexMask; if (mSymbols[index] == null) { mSymbols[index] = symbol; } else { int bix = index >> 1; mBuckets[bix] = new Bucket(symbol, mBuckets[bix]); } } } size >>= 1; for (int i = 0; i < size; ++i) { Bucket b = oldBuckets[i]; while (b != null) { ++count; String symbol = b.getSymbol(); int index = calcHash(symbol) & mIndexMask; if (mSymbols[index] == null) { mSymbols[index] = symbol; } else { int bix = index >> 1; mBuckets[bix] = new Bucket(symbol, mBuckets[bix]); } b = b.getNext(); } } if (count != mSize) { throw new IllegalStateException("Internal error on SymbolTable.rehash(): had "+mSize+" entries; now have "+count+"."); } } /* ////////////////////////////////////////////////////////// // Test/debug support: ////////////////////////////////////////////////////////// */ public double calcAvgSeek() { int count = 0; for (int i = 0, len = mSymbols.length; i < len; ++i) { if (mSymbols[i] != null) { ++count; } } for (int i = 0, len = mBuckets.length; i < len; ++i) { Bucket b = mBuckets[i]; int cost = 2; while (b != null) { count += cost; ++cost; b = b.getNext(); } } return ((double) count) / ((double) mSize); } /* ////////////////////////////////////////////////////////// // Bucket class ////////////////////////////////////////////////////////// */ /** * This class is a symbol table entry. Each entry acts as a node * in a linked list. */ static final class Bucket { private final String mSymbol; private final Bucket mNext; public Bucket(String symbol, Bucket next) { mSymbol = symbol; mNext = next; } public String getSymbol() { return mSymbol; } public Bucket getNext() { return mNext; } public String find(char[] buf, int start, int len) { String sym = mSymbol; Bucket b = mNext; while (true) { // Inlined equality comparison: if (sym.length() == len) { int i = 0; do { if (sym.charAt(i) != buf[start+i]) { break; } } while (++i < len); if (i == len) { return sym; } } if (b == null) { break; } sym = b.getSymbol(); b = b.getNext(); } return null; } public String find(String str) { String sym = mSymbol; Bucket b = mNext; while (true) { if (sym.equals(str)) { return sym; } if (b == null) { break; } sym = b.getSymbol(); b = b.getNext(); } return null; } } } woodstox-4.1.3/src/java/com/ctc/wstx/util/WordSet.java 0000644 0001750 0001750 00000037456 11745427075 023121 0 ustar giovanni giovanni package com.ctc.wstx.util; import java.util.*; /** * An efficient (both memory and time) implementation of a Set used to * verify that a given * word is contained within the set. The general usage pattern is expected * to be such that most checks are positive, ie. that the word indeed * is contained in the set. *
* Performance of the set is comparable to that of {@link java.util.TreeSet} * for Strings, ie. 2-3x slower than {@link java.util.HashSet} when * using pre-constructed Strings. This is generally result of algorithmic * complexity of structures; Word and Tree sets are roughly logarithmic * to the whole data, whereas Hash set is linear to the length of key. * However: *
* Although this is an efficient set for specific set of usage patterns,
* one restriction is that the full set of words to include has to be
* known before constructing the set. Also, the size of the set is
* limited to total word content of about 20k characters; factory method
* does verify the limit and indicates if an instance can not be created.
*/
public final class WordSet
{
final static char CHAR_NULL = (char) 0;
/**
* Offset added to numbers to mark 'negative' numbers. Asymmetric,
* since range of negative markers needed is smaller than positive
* numbers...
*/
final static int NEGATIVE_OFFSET = 0xC000;
/**
* This is actually just a guess; but in general linear search should
* be faster for short sequences (definitely for 4 or less; maybe up
* to 8 or less?)
*/
final static int MIN_BINARY_SEARCH = 7;
/**
* Compressed presentation of the word set.
*/
final char[] mData;
/*
////////////////////////////////////////////////
// Life-cycle
////////////////////////////////////////////////
*/
private WordSet(char[] data) {
mData = data;
}
public static WordSet constructSet(TreeSet wordSet)
{
return new WordSet(new Builder(wordSet).construct());
}
public static char[] constructRaw(TreeSet wordSet)
{
return new Builder(wordSet).construct();
}
/*
////////////////////////////////////////////////
// Public API
////////////////////////////////////////////////
*/
public boolean contains(char[] buf, int start, int end) {
return contains(mData, buf, start, end);
}
public static boolean contains(char[] data, char[] str, int start, int end)
{
int ptr = 0; // pointer to compressed set data
main_loop:
do {
int left = end-start;
// End of input String? Need to have the run entry:
if (left == 0) {
return (data[ptr+1] == CHAR_NULL);
}
int count = data[ptr++];
// Nope, but do we have an end marker?
if (count >= NEGATIVE_OFFSET) {
// How many chars do we need to have left to match?
int expCount = count - NEGATIVE_OFFSET;
if (left != expCount) {
return false;
}
while (start < end) {
if (data[ptr] != str[start]) {
return false;
}
++ptr;
++start;
}
return true;
}
// No, need to find the branch to follow, if any
char c = str[start++];
// Linear or binary search?
if (count < MIN_BINARY_SEARCH) {
// always at least two branches; never less
if (data[ptr] == c) {
ptr = (int) data[ptr+1];
continue main_loop;
}
if (data[ptr+2] == c) {
ptr = (int) data[ptr+3];
continue main_loop;
}
int branchEnd = ptr + (count << 1);
// Starts from entry #3, if such exists
for (ptr += 4; ptr < branchEnd; ptr += 2) {
if (data[ptr] == c) {
ptr = (int) data[ptr+1];
continue main_loop;
}
}
return false; // No match!
}
{ // Ok, binary search:
int low = 0;
int high = count-1;
int mid;
while (low <= high) {
mid = (low + high) >> 1;
int ix = ptr + (mid << 1);
int diff = data[ix] - c;
if (diff > 0) { // char was 'higher', need to go down
high = mid-1;
} else if (diff < 0) { // lower, need to go up
low = mid+1;
} else { // match
ptr = (int) data[ix+1];
continue main_loop;
}
}
}
// If we fall here, no match!
return false;
} while (ptr != 0);
// If we reached an end state, must match the length
return (start == end);
}
public boolean contains(String str) {
return contains(mData, str);
}
public static boolean contains(char[] data, String str)
{
// Let's use same vars as array-based code, to allow cut'n pasting
int ptr = 0; // pointer to compressed set data
int start = 0;
int end = str.length();
main_loop:
do {
int left = end-start;
// End of input String? Need to have the run entry:
if (left == 0) {
return (data[ptr+1] == CHAR_NULL);
}
int count = data[ptr++];
// Nope, but do we have an end marker?
if (count >= NEGATIVE_OFFSET) {
// How many chars do we need to have left to match?
int expCount = count - NEGATIVE_OFFSET;
if (left != expCount) {
return false;
}
while (start < end) {
if (data[ptr] != str.charAt(start)) {
return false;
}
++ptr;
++start;
}
return true;
}
// No, need to find the branch to follow, if any
char c = str.charAt(start++);
// Linear or binary search?
if (count < MIN_BINARY_SEARCH) {
// always at least two branches; never less
if (data[ptr] == c) {
ptr = (int) data[ptr+1];
continue main_loop;
}
if (data[ptr+2] == c) {
ptr = (int) data[ptr+3];
continue main_loop;
}
int branchEnd = ptr + (count << 1);
// Starts from entry #3, if such exists
for (ptr += 4; ptr < branchEnd; ptr += 2) {
if (data[ptr] == c) {
ptr = (int) data[ptr+1];
continue main_loop;
}
}
return false; // No match!
}
{ // Ok, binary search:
int low = 0;
int high = count-1;
int mid;
while (low <= high) {
mid = (low + high) >> 1;
int ix = ptr + (mid << 1);
int diff = data[ix] - c;
if (diff > 0) { // char was 'higher', need to go down
high = mid-1;
} else if (diff < 0) { // lower, need to go up
low = mid+1;
} else { // match
ptr = (int) data[ix+1];
continue main_loop;
}
}
}
// If we fall here, no match!
return false;
} while (ptr != 0);
// If we reached an end state, must match the length
return (start == end);
}
/*
////////////////////////////////////////////////
// Private methods
////////////////////////////////////////////////
*/
/*
////////////////////////////////////////////////
// Helper classes
////////////////////////////////////////////////
*/
private final static class Builder
{
final String[] mWords;
char[] mData;
/**
* Number of characters currently used from mData
*/
int mSize;
public Builder(TreeSet wordSet) {
int wordCount = wordSet.size();
mWords = new String[wordCount];
wordSet.toArray(mWords);
/* Let's guess approximate size we should need, assuming
* average word length of 6 characters, and 100% overhead
* in structure:
*/
int size = wordCount * 12;
if (size < 256) {
size = 256;
}
mData = new char[size];
}
/**
* @return Raw character data that contains compressed structure
* of the word set
*/
public char[] construct()
{
// Uncomment if you need to debug array-out-of-bound probs
//try {
// Let's check degenerate case of 1 word:
if (mWords.length == 1) {
constructLeaf(0, 0);
} else {
constructBranch(0, 0, mWords.length);
}
//} catch (Throwable t) { System.err.println("Error: "+t); }
char[] result = new char[mSize];
System.arraycopy(mData, 0, result, 0, mSize);
return result;
}
/**
* Method that is called recursively to build the data
* representation for a branch, ie. part of word set tree
* that still has more than one ending
*
* @param charIndex Index of the character in words to consider
* for this round
* @param start Index of the first word to be processed
* @param end Index of the word after last word to be processed
* (so that number of words is end - start - 1
*/
private void constructBranch(int charIndex, int start, int end)
{
// If more than one entry, need to divide into groups
// First, need to add placeholder for branch count:
if (mSize >= mData.length) {
expand(1);
}
mData[mSize++] = 0; // placeholder!
/* structStart will point to second char of first entry
* (which will temporarily have entry count, eventually 'link'
* to continuation)
*/
int structStart = mSize + 1;
int groupCount = 0;
int groupStart = start;
String[] words = mWords;
/* First thing we need to do is a special check for the
* first entry -- it may be "runt" word, one that has no
* more chars but also has a longer version ("id" vs.
* "identifier"). If there is such a word, it'll always
* be first in alphabetic ordering:
*/
if (words[groupStart].length() == charIndex) { // yup, got one:
if ((mSize + 2) > mData.length) {
expand(2);
}
/* Nulls mark both imaginary branching null char and
* "missing link" to the rest
*/
mData[mSize++] = CHAR_NULL;
mData[mSize++] = CHAR_NULL;
// Ok, let's then ignore that entry
++groupStart;
++groupCount;
}
// Ok, then, let's find the ('real') groupings:
while (groupStart < end) {
// Inner loop, let's find the group:
char c = words[groupStart].charAt(charIndex);
int j = groupStart+1;
while (j < end && words[j].charAt(charIndex) == c) {
++j;
}
/* Ok, let's store the char in there, along with count;
* count will be needed in second, and will then get
* overwritten with actual data later on
*/
if ((mSize + 2) > mData.length) {
expand(2);
}
mData[mSize++] = c;
mData[mSize++] = (char) (j - groupStart); // entries in group
groupStart = j;
++groupCount;
}
/* Ok, groups found; need to loop through them, recursively
* calling branch and/or leaf methods
*/
// first let's output the header, ie. group count:
mData[structStart-2] = (char) groupCount;
groupStart = start;
// Do we have the "runt" to skip?
if (mData[structStart] == CHAR_NULL) {
structStart += 2;
++groupStart;
}
int structEnd = mSize;
++charIndex;
for (; structStart < structEnd; structStart += 2) {
groupCount = (int) mData[structStart]; // no sign expansion, is ok
// Ok, count gotten, can now put the 'link' (pointer) in there
mData[structStart] = (char) mSize;
if (groupCount == 1) {
/* One optimization; if it'd lead to a single runt
* entry, we can just add 'null' link:
*/
String word = words[groupStart];
if (word.length() == charIndex) {
mData[structStart] = CHAR_NULL;
} else { // otherwise, let's just create end state:
constructLeaf(charIndex, groupStart);
}
} else {
constructBranch(charIndex, groupStart,
groupStart + groupCount);
}
groupStart += groupCount;
}
// done!
}
/**
* Method called to add leaf entry to word set; basically
* "here is the rest of the only matching word"
*/
private void constructLeaf(int charIndex, int wordIndex)
{
String word = mWords[wordIndex];
int len = word.length();
char[] data = mData;
// need room for 1 header char, rest of the word
if ((mSize + len + 1) >= data.length) {
data = expand(len+1);
}
data[mSize++] = (char) (NEGATIVE_OFFSET + (len - charIndex));
for (; charIndex < len; ++charIndex) {
data[mSize++] = word.charAt(charIndex);
}
}
private char[] expand(int needSpace)
{
char[] old = mData;
int len = old.length;
int newSize = len + ((len < 4096) ? len : (len >> 1));
/* Let's verify we get enough; should always be true but
* better safe than sorry
*/
if (newSize < (mSize + needSpace)) {
newSize = mSize + needSpace + 64;
}
mData = new char[newSize];
System.arraycopy(old, 0, mData, 0, len);
return mData;
}
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/util/WordResolver.java 0000644 0001750 0001750 00000046505 11745427075 024162 0 ustar giovanni giovanni package com.ctc.wstx.util;
import java.util.*;
/**
* A specialized Map/Symbol table - like data structure that can be used
* for both checking whether a word (passed in as a char array) exists
* in certain set of words AND getting that word as a String.
* It is reasonably efficient both time and speed-wise, at least for
* certain use cases; specifically, if there is no existing key to use,
* it is more efficient way to get to a shared copy of that String
* The general usage pattern is expected
* to be such that most checks are positive, ie. that the word indeed
* is contained in the structure.
*
* Although this is an efficient data struct for specific set of usage * patterns, one restriction is that the full set of words to include has to * be known before constructing the instnace. Also, the size of the set is * limited to total word content of about 20k characters. *
* TODO: Should document the internal data structure... */ public final class WordResolver { /** * Maximum number of words (Strings) an instance can contain */ public final static int MAX_WORDS = 0x2000; final static char CHAR_NULL = (char) 0; /** * Offset added to numbers to mark 'negative' numbers. Asymmetric, * since range of negative markers needed is smaller than positive * numbers... */ final static int NEGATIVE_OFFSET = 0x10000 - MAX_WORDS; /** * This is actually just a guess; but in general linear search should * be faster for short sequences (definitely for 4 or less; maybe up * to 8 or less?) */ final static int MIN_BINARY_SEARCH = 7; /** * Compressed presentation of the word set. */ final char[] mData; /** * Array of actual words returned resolved for matches. */ final String[] mWords; /* //////////////////////////////////////////////// // Life-cycle //////////////////////////////////////////////// */ private WordResolver(String[] words, char[] index) { mWords = words; mData = index; } /** * Tries to construct an instance given ordered set of words. *
* Note: currently maximum number of words that can be contained
* is limited to {@link #MAX_WORDS}; additionally, maximum length
* of all such words can not exceed roughly 28000 characters.
*
* @return WordResolver constructed for given set of words, if
* the word set size is not too big; null to indicate "too big"
* instance.
*/
public static WordResolver constructInstance(TreeSet wordSet)
{
if (wordSet.size() > MAX_WORDS) {
return null;
}
return new Builder(wordSet).construct();
}
/*
////////////////////////////////////////////////
// Public API
////////////////////////////////////////////////
*/
/**
* @return Number of words contained
*/
public int size() {
return mWords.length;
}
/*
public int indexSize() {
return mData.length;
}
*/
/**
* @param str Character array that contains the word to find
* @param start Index of the first character of the word
* @param end Index following the last character of the word,
* so that end - start
equals word length (similar
* to the way String.substring()
has).
*
* @return (Shared) string instance of the word, if it exists in
* the word set; null if not.
*/
public String find(char[] str, final int start, final int end)
{
char[] data = mData;
// 03-Jan-2006, TSa: Special case; one entry
if (data == null) {
return findFromOne(str, start, end);
}
int ptr = 0; // pointer to compressed set data
int offset = start;
while (true) {
// End of input String? Need to match the runt entry!
if (offset == end) {
if (data[ptr+1] == CHAR_NULL) {
return mWords[data[ptr+2] - NEGATIVE_OFFSET];
}
return null;
}
int count = data[ptr++];
// Need to find the branch to follow, if any
char c = str[offset++];
inner_block:
do { // dummy loop, need to have break
// Linear or binary search?
if (count < MIN_BINARY_SEARCH) {
// always at least two branches; never less
if (data[ptr] == c) {
ptr = (int) data[ptr+1];
break inner_block;
}
if (data[ptr+2] == c) {
ptr = (int) data[ptr+3];
break inner_block;
}
int branchEnd = ptr + (count << 1);
// Starts from entry #3, if such exists
for (ptr += 4; ptr < branchEnd; ptr += 2) {
if (data[ptr] == c) {
ptr = (int) data[ptr+1];
break inner_block;
}
}
return null; // No match!
} else { // Ok, binary search:
int low = 0;
int high = count-1;
int mid;
while (low <= high) {
mid = (low + high) >> 1;
int ix = ptr + (mid << 1);
int diff = data[ix] - c;
if (diff > 0) { // char was 'higher', need to go down
high = mid-1;
} else if (diff < 0) { // lower, need to go up
low = mid+1;
} else { // match (so far)
ptr = (int) data[ix+1];
break inner_block;
}
}
return null; // No match!
}
} while (false);
// Ok; now, is it the end?
if (ptr >= NEGATIVE_OFFSET) {
String word = mWords[ptr - NEGATIVE_OFFSET];
int expLen = (end - start);
if (word.length() != expLen) {
return null;
}
for (int i = offset - start; offset < end; ++i, ++offset) {
if (word.charAt(i) != str[offset]) {
return null;
}
}
return word;
}
}
// never gets here
}
private String findFromOne(char[] str, final int start, final int end)
{
String word = mWords[0];
int len = end-start;
if (word.length() != len) {
return null;
}
for (int i = 0; i < len; ++i) {
if (word.charAt(i) != str[start+i]) {
return null;
}
}
return word;
}
/**
* @return (Shared) string instance of the word, if it exists in
* the word set; null if not.
*/
public String find(String str)
{
char[] data = mData;
// 03-Jan-2006, TSa: Special case; one entry
if (data == null) {
String word = mWords[0];
return word.equals(str) ? word : null;
}
int ptr = 0; // pointer to compressed set data
int offset = 0;
int end = str.length();
while (true) {
// End of input String? Need to match the runt entry!
if (offset == end) {
if (data[ptr+1] == CHAR_NULL) {
return mWords[data[ptr+2] - NEGATIVE_OFFSET];
}
return null;
}
int count = data[ptr++];
// Need to find the branch to follow, if any
char c = str.charAt(offset++);
inner_block:
do { // dummy loop, need to have break
// Linear or binary search?
if (count < MIN_BINARY_SEARCH) {
// always at least two branches; never less
if (data[ptr] == c) {
ptr = (int) data[ptr+1];
break inner_block;
}
if (data[ptr+2] == c) {
ptr = (int) data[ptr+3];
break inner_block;
}
int branchEnd = ptr + (count << 1);
// Starts from entry #3, if such exists
for (ptr += 4; ptr < branchEnd; ptr += 2) {
if (data[ptr] == c) {
ptr = (int) data[ptr+1];
break inner_block;
}
}
return null; // No match!
} else { // Ok, binary search:
int low = 0;
int high = count-1;
int mid;
while (low <= high) {
mid = (low + high) >> 1;
int ix = ptr + (mid << 1);
int diff = data[ix] - c;
if (diff > 0) { // char was 'higher', need to go down
high = mid-1;
} else if (diff < 0) { // lower, need to go up
low = mid+1;
} else { // match (so far)
ptr = (int) data[ix+1];
break inner_block;
}
}
return null; // No match!
}
} while (false);
// Ok; now, is it the end?
if (ptr >= NEGATIVE_OFFSET) {
String word = mWords[ptr - NEGATIVE_OFFSET];
if (word.length() != str.length()) {
return null;
}
for (; offset < end; ++offset) {
if (word.charAt(offset) != str.charAt(offset)) {
return null;
}
}
return word;
}
}
// never gets here
}
/*
////////////////////////////////////////////////
// Re-defined public methods
////////////////////////////////////////////////
*/
public String toString()
{
StringBuffer sb = new StringBuffer(16 + (mWords.length << 3));
for (int i = 0, len = mWords.length; i < len; ++i) {
if (i > 0) {
sb.append(", ");
}
sb.append(mWords[i]);
}
return sb.toString();
}
/*
////////////////////////////////////////////////
// Private methods
////////////////////////////////////////////////
*/
/*
////////////////////////////////////////////////
// Helper classes
////////////////////////////////////////////////
*/
private final static class Builder
{
final String[] mWords;
char[] mData;
/**
* Number of characters currently used from mData
*/
int mSize;
public Builder(TreeSet wordSet)
{
int wordCount = wordSet.size();
mWords = new String[wordCount];
wordSet.toArray(mWords);
/* 03-Jan-2006, TSa: Special case: just one entry; if so,
* let's leave char array null, and just have the String
* array with one entry.
*/
if (wordCount < 2) {
if (wordCount == 0) {
throw new IllegalArgumentException(); // not legal
}
mData = null;
} else {
/* Let's guess approximate size we should need, assuming
* average word length of 6 characters, overhead matching
* compression (ie. about 1-to-1 ratio overall)
*/
int size = wordCount * 6;
if (size < 256) {
size = 256;
}
mData = new char[size];
}
}
/**
* @return Raw character data that contains compressed structure
* of the word set
*/
public WordResolver construct()
{
char[] result;
/* 03-Jan-2006, TSa: Special case: just one entry; if so,
* let's leave char array null, and just have the String
* array with one entry.
*/
if (mData == null) {
result = null;
} else {
constructBranch(0, 0, mWords.length);
// Too big?
if (mSize > NEGATIVE_OFFSET) {
return null;
}
result = new char[mSize];
System.arraycopy(mData, 0, result, 0, mSize);
}
return new WordResolver(mWords, result);
}
/**
* Method that is called recursively to build the data
* representation for a branch, ie. part of word set tree
* that still has more than one ending
*
* @param charIndex Index of the character in words to consider
* for this round
* @param start Index of the first word to be processed
* @param end Index of the word after last word to be processed
* (so that number of words is end - start - 1
*/
private void constructBranch(int charIndex, int start, int end)
{
// If more than one entry, need to divide into groups
// First, need to add placeholder for branch count:
if (mSize >= mData.length) {
expand(1);
}
mData[mSize++] = 0; // placeholder!
/* structStart will point to second char of first entry
* (which will temporarily have entry count, eventually 'link'
* to continuation)
*/
int structStart = mSize + 1;
int groupCount = 0;
int groupStart = start;
String[] words = mWords;
boolean gotRunt;
/* First thing we need to do is a special check for the
* first entry -- it may be "runt" word, one that has no
* more chars but also has a longer version ("id" vs.
* "identifier"). If so, it needs to be marked; this is done
* by adding a special entry before other entries (since such
* entry would always be ordered first alphabetically)
*/
if (words[groupStart].length() == charIndex) { // yup, got one:
if ((mSize + 2) > mData.length) {
expand(2);
}
/* First null marks the "missing" char (or, end-of-word);
* and then we need the index
*/
mData[mSize++] = CHAR_NULL;
mData[mSize++] = (char) (NEGATIVE_OFFSET + groupStart);
// Ok, let's then ignore that entry
++groupStart;
++groupCount;
gotRunt = true;
} else {
gotRunt = false;
}
// Ok, then, let's find the ('real') groupings:
while (groupStart < end) {
// Inner loop, let's find the group:
char c = words[groupStart].charAt(charIndex);
int j = groupStart+1;
while (j < end && words[j].charAt(charIndex) == c) {
++j;
}
/* Ok, let's store the char in there, along with count;
* count will be needed in second, and will then get
* overwritten with actual data later on
*/
if ((mSize + 2) > mData.length) {
expand(2);
}
mData[mSize++] = c;
mData[mSize++] = (char) (j - groupStart); // entries in group
groupStart = j;
++groupCount;
}
/* Ok, groups found; need to loop through them, recursively
* calling branch and/or leaf methods
*/
// first let's output the header, ie. group count:
mData[structStart-2] = (char) groupCount;
groupStart = start;
// Do we have the "runt" to skip?
if (gotRunt) {
structStart += 2;
++groupStart;
}
int structEnd = mSize;
++charIndex;
for (; structStart < structEnd; structStart += 2) {
groupCount = (int) mData[structStart]; // no sign expansion, is ok
/* Ok, count gotten, can either create a branch (if more than
* one entry) or leaf (just one entry)
*/
if (groupCount == 1) {
mData[structStart] = (char) (NEGATIVE_OFFSET + groupStart);
} else {
mData[structStart] = (char) mSize;
constructBranch(charIndex, groupStart,
groupStart + groupCount);
}
groupStart += groupCount;
}
// done!
}
private char[] expand(int needSpace)
{
char[] old = mData;
int len = old.length;
int newSize = len + ((len < 4096) ? len : (len >> 1));
/* Let's verify we get enough; should always be true but
* better safe than sorry
*/
if (newSize < (mSize + needSpace)) {
newSize = mSize + needSpace + 64;
}
mData = new char[newSize];
System.arraycopy(old, 0, mData, 0, len);
return mData;
}
}
/*
////////////////////////////////////////////////////
// Simple test driver, useful for debugging
// (uncomment if needed -- commented out so it won't
// affect coverage testing)
////////////////////////////////////////////////////
*/
/*
public static void main(String[] args)
{
if (args.length < 2) {
System.err.println("Usage: "+WordResolver.class+" word1 [word2] ... [wordN] keyword");
System.exit(1);
}
String key = args[args.length-1];
TreeSet words = new TreeSet();
for (int i = 0; i < args.length-1; ++i) {
words.add(args[i]);
}
WordResolver set = WordResolver.constructInstance(words);
//outputData(set.mData);
// Ok, and then the test!
char[] keyA = new char[key.length() + 4];
key.getChars(0, key.length(), keyA, 2);
//System.out.println("Word '"+key+"' found via array search: "+WordResolver.find(data, keyA, 2, key.length() + 2));
System.out.println("Word '"+key+"' found via array search: "+set.find(keyA, 2, key.length() + 2));
}
static void outputData(char[] data)
{
for (int i = 0; i < data.length; ++i) {
char c = data[i];
System.out.print(Integer.toHexString(i)+" ["+Integer.toHexString(c)+"]");
if (c > 32 && c <= 127) { // printable char (letter)
System.out.println(" -> '"+c+"'");
} else {
System.out.println();
}
}
}
*/
}
woodstox-4.1.3/src/java/com/ctc/wstx/util/TextBuffer.java 0000644 0001750 0001750 00000132071 11745427075 023575 0 ustar giovanni giovanni package com.ctc.wstx.util;
import java.io.*;
import java.util.ArrayList;
import javax.xml.stream.Location;
import javax.xml.stream.XMLStreamException;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.ext.LexicalHandler;
import org.codehaus.stax2.typed.Base64Variant;
import org.codehaus.stax2.typed.TypedArrayDecoder;
import org.codehaus.stax2.typed.TypedValueDecoder;
import org.codehaus.stax2.typed.TypedXMLStreamException;
import org.codehaus.stax2.validation.XMLValidator;
import org.codehaus.stax2.ri.typed.CharArrayBase64Decoder;
import com.ctc.wstx.api.ReaderConfig;
import com.ctc.wstx.dtd.DTDEventListener;
import com.ctc.wstx.sr.InputProblemReporter;
import com.ctc.wstx.util.StringUtil;
/**
* TextBuffer is a class similar to {@link StringBuffer}, with
* following differences:
*
* Over time more and more cruft has accumulated here, mostly to * support efficient access to collected text. Since access is * easiest to do efficiently using callbacks, this class now needs * to known interfaces of SAX classes and validators. *
* Notes about usage: for debugging purposes, it's suggested to use * {@link #toString} method, as opposed to * {@link #contentsAsArray} or {@link #contentsAsString}. Internally * resulting code paths may or may not be different, WRT caching. * * @author Tatu Saloranta */ public final class TextBuffer { /* 23-Mar-2006, TSa: Memory buffer clearing is a significant overhead * for small documents, no need to use huge buffer -- it will expand * as necessary for larger docs, but commonly text segments just * aren't that long. */ /** * Size of the first text segment buffer to allocate; need not contain * the biggest segment, since new ones will get allocated as needed. * However, it's sensible to use something that often is big enough * to contain segments. */ final static int DEF_INITIAL_BUFFER_SIZE = 500; // 1k /** * We will also restrict maximum length of individual segments * to allocate (not including cases where we must return a single * segment). Value is somewhat arbitrary, let's use it so that * memory used is no more than 1/2 megabytes. */ final static int MAX_SEGMENT_LENGTH = 256 * 1024; final static int INT_SPACE = 0x0020; // // // Configuration: private final ReaderConfig mConfig; // // // Shared read-only input buffer: /** * Shared input buffer; stored here in case some input can be returned * as is, without being copied to collector's own buffers. Note that * this is read-only for this Objet. */ private char[] mInputBuffer; /** * Character offset of first char in input buffer; -1 to indicate * that input buffer currently does not contain any useful char data */ private int mInputStart; /** * When using shared buffer, offset after the last character in * shared buffer */ private int mInputLen; // // // Internal non-shared collector buffers: private boolean mHasSegments = false; /** * List of segments prior to currently active segment. */ private ArrayList mSegments; // // // Currently used segment; not (yet) contained in mSegments /** * Amount of characters in segments in {@link mSegments} */ private int mSegmentSize; private char[] mCurrentSegment; /** * Number of characters in currently active (last) segment */ private int mCurrentSize; // // // Temporary caching for Objects to return /** * String that will be constructed when the whole contents are * needed; will be temporarily stored in case asked for again. */ private String mResultString; private char[] mResultArray; // // // Canonical indentation objects (up to 32 spaces, 8 tabs) public final static int MAX_INDENT_SPACES = 32; public final static int MAX_INDENT_TABS = 8; // Let's add one more space at the end, for safety... private final static String sIndSpaces = // 123456789012345678901234567890123 "\n "; private final static char[] sIndSpacesArray = sIndSpaces.toCharArray(); private final static String[] sIndSpacesStrings = new String[sIndSpacesArray.length]; private final static String sIndTabs = // 1 2 3 4 5 6 7 8 9 "\n\t\t\t\t\t\t\t\t\t"; private final static char[] sIndTabsArray = sIndTabs.toCharArray(); private final static String[] sIndTabsStrings = new String[sIndTabsArray.length]; /* ////////////////////////////////////////////// // Life-cycle ////////////////////////////////////////////// */ private TextBuffer(ReaderConfig cfg) { mConfig = cfg; } public static TextBuffer createRecyclableBuffer(ReaderConfig cfg) { return new TextBuffer(cfg); } public static TextBuffer createTemporaryBuffer() { return new TextBuffer(null); } /** * Method called to indicate that the underlying buffers should now * be recycled if they haven't yet been recycled. Although caller * can still use this text buffer, it is not advisable to call this * method if that is likely, since next time a buffer is needed, * buffers need to reallocated. * Note: calling this method automatically also clears contents * of the buffer. */ public void recycle(boolean force) { if (mConfig != null && mCurrentSegment != null) { if (force) { /* If we are allowed to wipe out all existing data, it's * quite easy; we'll just wipe out contents, and return * biggest buffer: */ resetWithEmpty(); } else { /* But if there's non-shared data (ie. buffer is still * in use), can't return it yet: */ if (mInputStart < 0 && (mSegmentSize + mCurrentSize) > 0) { return; } // If no data (or only shared data), can continue if (mSegments != null && mSegments.size() > 0) { // No need to use anything from list, curr segment not null mSegments.clear(); mSegmentSize = 0; } } char[] buf = mCurrentSegment; mCurrentSegment = null; mConfig.freeMediumCBuffer(buf); } } /** * Method called to clear out any content text buffer may have, and * initializes buffer to use non-shared data. */ public void resetWithEmpty() { mInputBuffer = null; mInputStart = -1; // indicates shared buffer not used mInputLen = 0; mResultString = null; mResultArray = null; // And then reset internal input buffers, if necessary: if (mHasSegments) { clearSegments(); } mCurrentSize = 0; } /** * Similar to {@link #resetWithEmpty}, but actively marks current * text content to be empty string (whereas former method leaves * content as undefined). */ public void resetWithEmptyString() { mInputBuffer = null; mInputStart = -1; // indicates shared buffer not used mInputLen = 0; mResultString = ""; mResultArray = null; if (mHasSegments) { clearSegments(); } mCurrentSize = 0; } /** * Method called to initialize the buffer with a shared copy of data; * this means that buffer will just have pointers to actual data. It * also means that if anything is to be appended to the buffer, it * will first have to unshare it (make a local copy). */ public void resetWithShared(char[] buf, int start, int len) { // Let's first mark things we need about input buffer mInputBuffer = buf; mInputStart = start; mInputLen = len; // Then clear intermediate values, if any: mResultString = null; mResultArray = null; // And then reset internal input buffers, if necessary: if (mHasSegments) { clearSegments(); } } public void resetWithCopy(char[] buf, int start, int len) { mInputBuffer = null; mInputStart = -1; // indicates shared buffer not used mInputLen = 0; mResultString = null; mResultArray = null; // And then reset internal input buffers, if necessary: if (mHasSegments) { clearSegments(); } else { if (mCurrentSegment == null) { mCurrentSegment = allocBuffer(len); } mCurrentSize = mSegmentSize = 0; } append(buf, start, len); } /** * Method called to make sure there is a non-shared segment to use, without * appending any content yet. */ public void resetInitialized() { resetWithEmpty(); if (mCurrentSegment == null) { mCurrentSegment = allocBuffer(0); } } private final char[] allocBuffer(int needed) { int size = Math.max(needed, DEF_INITIAL_BUFFER_SIZE); char[] buf = null; if (mConfig != null) { buf = mConfig.allocMediumCBuffer(size); if (buf != null) { return buf; } } return new char[size]; } private final void clearSegments() { mHasSegments = false; /* Since the current segment should be the biggest one * (as we allocate 50% bigger each time), let's retain it, * and clear others */ mSegments.clear(); mCurrentSize = mSegmentSize = 0; } public void resetWithIndentation(int indCharCount, char indChar) { mInputStart = 0; mInputLen = indCharCount+1; String text; if (indChar == '\t') { // tabs? mInputBuffer = sIndTabsArray; text = sIndTabsStrings[indCharCount]; if (text == null) { sIndTabsStrings[indCharCount] = text = sIndTabs.substring(0, mInputLen); } } else { // nope, spaces (should assert indChar?) mInputBuffer = sIndSpacesArray; text = sIndSpacesStrings[indCharCount]; if (text == null) { sIndSpacesStrings[indCharCount] = text = sIndSpaces.substring(0, mInputLen); } } mResultString = text; /* Should not need the explicit non-shared array; no point in * pre-populating it (can be changed if this is not true) */ mResultArray = null; // And then reset internal input buffers, if necessary: if (mSegments != null && mSegments.size() > 0) { mSegments.clear(); mCurrentSize = mSegmentSize = 0; } } /* ////////////////////////////////////////////// // Accessors for implementing StAX interface: ////////////////////////////////////////////// */ /** * @return Number of characters currently stored by this collector */ public int size() { if (mInputStart >= 0) { // shared copy from input buf return mInputLen; } // local segmented buffers return mSegmentSize + mCurrentSize; } public int getTextStart() { /* Only shared input buffer can have non-zero offset; buffer * segments start at 0, and if we have to create a combo buffer, * that too will start from beginning of the buffer */ return (mInputStart >= 0) ? mInputStart : 0; } public char[] getTextBuffer() { // Are we just using shared input buffer? if (mInputStart >= 0) { return mInputBuffer; } // Nope; but does it fit in just one segment? if (mSegments == null || mSegments.size() == 0) { return mCurrentSegment; } // Nope, need to have/create a non-segmented array and return it return contentsAsArray(); } /* ///////////////////////////////////////////////// // Accessors for implementing StAX2 Typed access ///////////////////////////////////////////////// */ /** * Generic pass-through method which call given decoder * with accumulated data */ public void decode(TypedValueDecoder tvd) throws IllegalArgumentException { char[] buf; int start, end; if (mInputStart >= 0) { // shared buffer, common case buf = mInputBuffer; start = mInputStart; end = start + mInputLen; } else { buf = getTextBuffer(); start = 0; end = mSegmentSize + mCurrentSize; } // Need to trim first while (true) { if (start >= end) { tvd.handleEmptyValue(); return; } if (!StringUtil.isSpace(buf[start])) { break; } ++start; } // Trailing space? while (--end > start && StringUtil.isSpace(buf[end])) { } tvd.decode(buf, start, end+1); } /** * Pass-through decode method called to find find the next token, * decode it, and repeat the process as long as there are more * tokens and the array decoder accepts more entries. * All tokens processed will be "consumed", such that they will * not be visible via buffer. * * @return Number of tokens decoded; 0 means that no (more) tokens * were found from this buffer. */ public int decodeElements(TypedArrayDecoder tad, InputProblemReporter rep) throws TypedXMLStreamException { int count = 0; /* First: for simplicity, we require a single flat buffer to * decode from. Second: to be able to update start location * (to keep track of what's available), we need to fake that * we are using a shared buffer (since that has offset) */ if (mInputStart < 0) { if (mHasSegments) { mInputBuffer = buildResultArray(); mInputLen = mInputBuffer.length; // let's also clear segments since they are not needed any more clearSegments(); } else { // just current buffer, easier to fake mInputBuffer = mCurrentSegment; mInputLen = mCurrentSize; } mInputStart = 0; } // And then let's decode int ptr = mInputStart; final int end = ptr + mInputLen; final char[] buf = mInputBuffer; int start = ptr; try { decode_loop: while (ptr < end) { // First, any space to skip? while (buf[ptr] <= INT_SPACE) { if (++ptr >= end) { break decode_loop; } } // Then let's figure out non-space char (token) start = ptr; ++ptr; while (ptr < end && buf[ptr] > INT_SPACE) { ++ptr; } ++count; int tokenEnd = ptr; ++ptr; // to skip trailing space (or, beyond end) // And there we have it if (tad.decodeValue(buf, start, tokenEnd)) { break; } } } catch (IllegalArgumentException iae) { // Need to convert to a checked stream exception /* Hmmh. This is probably not an accurate location... but * we can't do much better as content we have has been * normalized already. */ Location loc = rep.getLocation(); // -1 to move it back after being advanced earlier (to skip trailing space) String lexical = new String(buf, start, (ptr-start-1)); throw new TypedXMLStreamException(lexical, iae.getMessage(), loc, iae); } finally { mInputStart = ptr; mInputLen = end-ptr; } return count; } /** * Method that needs to be called to configure given base64 decoder * with textual contents collected by this buffer. * * @param dec Decoder that will need data * @param firstChunk Whether this is the first segment fed or not; * if it is, state needs to be fullt reset; if not, only partially. */ public void initBinaryChunks(Base64Variant v, CharArrayBase64Decoder dec, boolean firstChunk) { if (mInputStart < 0) { // non-shared dec.init(v, firstChunk, mCurrentSegment, 0, mCurrentSize, mSegments); } else { // shared dec.init(v, firstChunk, mInputBuffer, mInputStart, mInputLen, null); } } /* ////////////////////////////////////////////// // Accessors: ////////////////////////////////////////////// */ public String contentsAsString() { if (mResultString == null) { // Has array been requested? Can make a shortcut, if so: if (mResultArray != null) { mResultString = new String(mResultArray); } else { // Do we use shared array? if (mInputStart >= 0) { if (mInputLen < 1) { return (mResultString = ""); } mResultString = new String(mInputBuffer, mInputStart, mInputLen); } else { // nope... need to copy // But first, let's see if we have just one buffer int segLen = mSegmentSize; int currLen = mCurrentSize; if (segLen == 0) { // yup mResultString = (currLen == 0) ? "" : new String(mCurrentSegment, 0, currLen); } else { // no, need to combine StringBuffer sb = new StringBuffer(segLen + currLen); // First stored segments if (mSegments != null) { for (int i = 0, len = mSegments.size(); i < len; ++i) { char[] curr = (char[]) mSegments.get(i); sb.append(curr, 0, curr.length); } } // And finally, current segment: sb.append(mCurrentSegment, 0, mCurrentSize); mResultString = sb.toString(); } } } } return mResultString; } /** * Similar to {@link #contentsAsString}, but constructs a StringBuffer * for further appends. * * @param extraSpace Number of extra characters to preserve in StringBuffer * beyond space immediately needed to hold the contents */ public StringBuffer contentsAsStringBuffer(int extraSpace) { if (mResultString != null) { return new StringBuffer(mResultString); } if (mResultArray != null) { StringBuffer sb = new StringBuffer(mResultArray.length + extraSpace); sb.append(mResultArray, 0, mResultArray.length); return sb; } if (mInputStart >= 0) { // shared array if (mInputLen < 1) { return new StringBuffer(); } StringBuffer sb = new StringBuffer(mInputLen + extraSpace); sb.append(mInputBuffer, mInputStart, mInputLen); return sb; } int segLen = mSegmentSize; int currLen = mCurrentSize; StringBuffer sb = new StringBuffer(segLen + currLen + extraSpace); // First stored segments if (mSegments != null) { for (int i = 0, len = mSegments.size(); i < len; ++i) { char[] curr = (char[]) mSegments.get(i); sb.append(curr, 0, curr.length); } } // And finally, current segment: sb.append(mCurrentSegment, 0, currLen); return sb; } public void contentsToStringBuffer(StringBuffer sb) { if (mResultString != null) { sb.append(mResultString); } else if (mResultArray != null) { sb.append(mResultArray); } else if (mInputStart >= 0) { // shared array if (mInputLen > 0) { sb.append(mInputBuffer, mInputStart, mInputLen); } } else { // First stored segments if (mSegments != null) { for (int i = 0, len = mSegments.size(); i < len; ++i) { char[] curr = (char[]) mSegments.get(i); sb.append(curr, 0, curr.length); } } // And finally, current segment: sb.append(mCurrentSegment, 0, mCurrentSize); } } public char[] contentsAsArray() { char[] result = mResultArray; if (result == null) { mResultArray = result = buildResultArray(); } return result; } public int contentsToArray(int srcStart, char[] dst, int dstStart, int len) { // Easy to copy from shared buffer: if (mInputStart >= 0) { int amount = mInputLen - srcStart; if (amount > len) { amount = len; } else if (amount < 0) { amount = 0; } if (amount > 0) { System.arraycopy(mInputBuffer, mInputStart+srcStart, dst, dstStart, amount); } return amount; } /* Could also check if we have array, but that'd only help with * braindead clients that get full array first, then segments... * which hopefully aren't that common */ // Copying from segmented array is bit more involved: int totalAmount = 0; if (mSegments != null) { for (int i = 0, segc = mSegments.size(); i < segc; ++i) { char[] segment = (char[]) mSegments.get(i); int segLen = segment.length; int amount = segLen - srcStart; if (amount < 1) { // nothing from this segment? srcStart -= segLen; continue; } if (amount >= len) { // can get rest from this segment? System.arraycopy(segment, srcStart, dst, dstStart, len); return (totalAmount + len); } // Can get some from this segment, offset becomes zero: System.arraycopy(segment, srcStart, dst, dstStart, amount); totalAmount += amount; dstStart += amount; len -= amount; srcStart = 0; } } // Need to copy anything from last segment? if (len > 0) { int maxAmount = mCurrentSize - srcStart; if (len > maxAmount) { len = maxAmount; } if (len > 0) { // should always be true System.arraycopy(mCurrentSegment, srcStart, dst, dstStart, len); totalAmount += len; } } return totalAmount; } /** * Method that will stream contents of this buffer into specified * Writer. */ public int rawContentsTo(Writer w) throws IOException { // Let's first see if we have created helper objects: if (mResultArray != null) { w.write(mResultArray); return mResultArray.length; } if (mResultString != null) { w.write(mResultString); return mResultString.length(); } // Do we use shared array? if (mInputStart >= 0) { if (mInputLen > 0) { w.write(mInputBuffer, mInputStart, mInputLen); } return mInputLen; } // Nope, need to do full segmented output int rlen = 0; if (mSegments != null) { for (int i = 0, len = mSegments.size(); i < len; ++i) { char[] ch = (char[]) mSegments.get(i); w.write(ch); rlen += ch.length; } } if (mCurrentSize > 0) { w.write(mCurrentSegment, 0, mCurrentSize); rlen += mCurrentSize; } return rlen; } public Reader rawContentsViaReader() throws IOException { // Let's first see if we have created helper objects: if (mResultArray != null) { return new CharArrayReader(mResultArray); } if (mResultString != null) { return new StringReader(mResultString); } // Do we use shared array? if (mInputStart >= 0) { if (mInputLen > 0) { return new CharArrayReader(mInputBuffer, mInputStart, mInputLen); } return new StringReader(""); } // or maybe it's all in the current segment if (mSegments == null || mSegments.size() == 0) { return new CharArrayReader(mCurrentSegment, 0, mCurrentSize); } // Nope, need to do full segmented output return new BufferReader(mSegments, mCurrentSegment, mCurrentSize); } public boolean isAllWhitespace() { if (mInputStart >= 0) { // using single shared buffer? char[] buf = mInputBuffer; int i = mInputStart; int last = i + mInputLen; for (; i < last; ++i) { if (buf[i] > INT_SPACE) { return false; } } return true; } // Nope, need to do full segmented output if (mSegments != null) { for (int i = 0, len = mSegments.size(); i < len; ++i) { char[] buf = (char[]) mSegments.get(i); for (int j = 0, len2 = buf.length; j < len2; ++j) { if (buf[j] > INT_SPACE) { return false; } } } } char[] buf = mCurrentSegment; for (int i = 0, len = mCurrentSize; i < len; ++i) { if (buf[i] > INT_SPACE) { return false; } } return true; } /** * Method that can be used to check if the contents of the buffer end * in specified String. * * @return True if the textual content buffer contains ends with the * specified String; false otherwise */ public boolean endsWith(String str) { /* Let's just play this safe; should seldom if ever happen... * and because of that, can be sub-optimal, performancewise, to * alternatives. */ if (mInputStart >= 0) { unshare(16); } int segIndex = (mSegments == null) ? 0 : mSegments.size(); int inIndex = str.length() - 1; char[] buf = mCurrentSegment; int bufIndex = mCurrentSize-1; while (inIndex >= 0) { if (str.charAt(inIndex) != buf[bufIndex]) { return false; } if (--inIndex == 0) { break; } if (--bufIndex < 0) { if (--segIndex < 0) { // no more data? return false; } buf = (char[]) mSegments.get(segIndex); bufIndex = buf.length-1; } } return true; } /** * Note: it is assumed that this method is not used often enough to * be a bottleneck, or for long segments. Based on this, it is optimized * for common simple cases where there is only one single character * segment to use; fallback for other cases is to create such segment. */ public boolean equalsString(String str) { int expLen = str.length(); // First the easy check; if we have a shared buf: if (mInputStart >= 0) { if (mInputLen != expLen) { return false; } for (int i = 0; i < expLen; ++i) { if (str.charAt(i) != mInputBuffer[mInputStart+i]) { return false; } } return true; } // Otherwise, segments: if (expLen != size()) { return false; } char[] seg; if (mSegments == null || mSegments.size() == 0) { // just one segment, still easy seg = mCurrentSegment; } else { /* Ok; this is the sub-optimal case. Could obviously juggle through * segments, but probably not worth the hassle, we seldom if ever * get here... */ seg = contentsAsArray(); } for (int i = 0; i < expLen; ++i) { if (seg[i] != str.charAt(i)) { return false; } } return true; } /* ////////////////////////////////////////////// // Access using SAX handlers: ////////////////////////////////////////////// */ public void fireSaxCharacterEvents(ContentHandler h) throws SAXException { if (mResultArray != null) { // already have single array? h.characters(mResultArray, 0, mResultArray.length); } else if (mInputStart >= 0) { // sharing input buffer? h.characters(mInputBuffer, mInputStart, mInputLen); } else { if (mSegments != null) { for (int i = 0, len = mSegments.size(); i < len; ++i) { char[] ch = (char[]) mSegments.get(i); h.characters(ch, 0, ch.length); } } if (mCurrentSize > 0) { h.characters(mCurrentSegment, 0, mCurrentSize); } } } public void fireSaxSpaceEvents(ContentHandler h) throws SAXException { if (mResultArray != null) { // only happens for indentation h.ignorableWhitespace(mResultArray, 0, mResultArray.length); } else if (mInputStart >= 0) { // sharing input buffer? h.ignorableWhitespace(mInputBuffer, mInputStart, mInputLen); } else { if (mSegments != null) { for (int i = 0, len = mSegments.size(); i < len; ++i) { char[] ch = (char[]) mSegments.get(i); h.ignorableWhitespace(ch, 0, ch.length); } } if (mCurrentSize > 0) { h.ignorableWhitespace(mCurrentSegment, 0, mCurrentSize); } } } public void fireSaxCommentEvent(LexicalHandler h) throws SAXException { // Comment can not be split, so may need to combine the array if (mResultArray != null) { // only happens for indentation h.comment(mResultArray, 0, mResultArray.length); } else if (mInputStart >= 0) { // sharing input buffer? h.comment(mInputBuffer, mInputStart, mInputLen); } else if (mSegments != null && mSegments.size() > 0) { char[] ch = contentsAsArray(); h.comment(ch, 0, ch.length); } else { h.comment(mCurrentSegment, 0, mCurrentSize); } } public void fireDtdCommentEvent(DTDEventListener l) { // Comment can not be split, so may need to combine the array if (mResultArray != null) { // only happens for indentation l.dtdComment(mResultArray, 0, mResultArray.length); } else if (mInputStart >= 0) { // sharing input buffer? l.dtdComment(mInputBuffer, mInputStart, mInputLen); } else if (mSegments != null && mSegments.size() > 0) { char[] ch = contentsAsArray(); l.dtdComment(ch, 0, ch.length); } else { l.dtdComment(mCurrentSegment, 0, mCurrentSize); } } /* ////////////////////////////////////////////// // Support for validation ////////////////////////////////////////////// */ public void validateText(XMLValidator vld, boolean lastSegment) throws XMLStreamException { // Shared buffer? Let's just pass that if (mInputStart >= 0) { vld.validateText(mInputBuffer, mInputStart, mInputStart + mInputLen, lastSegment); } else { /* Otherwise, can either create a combine buffer, or construct * a String. While former could be more efficient, let's do latter * for now since current validator implementations work better * with Strings. */ vld.validateText(contentsAsString(), lastSegment); } } /* ////////////////////////////////////////////// // Public mutators: ////////////////////////////////////////////// */ /** * Method called to make sure that buffer is not using shared input * buffer; if it is, it will copy such contents to private buffer. */ public void ensureNotShared() { if (mInputStart >= 0) { unshare(16); } } public void append(char c) { // Using shared buffer so far? if (mInputStart >= 0) { unshare(16); } mResultString = null; mResultArray = null; // Room in current segment? char[] curr = mCurrentSegment; if (mCurrentSize >= curr.length) { expand(1); curr = mCurrentSegment; } curr[mCurrentSize++] = c; } public void append(char[] c, int start, int len) { // Can't append to shared buf (sanity check) if (mInputStart >= 0) { unshare(len); } mResultString = null; mResultArray = null; // Room in current segment? char[] curr = mCurrentSegment; int max = curr.length - mCurrentSize; if (max >= len) { System.arraycopy(c, start, curr, mCurrentSize, len); mCurrentSize += len; } else { // No room for all, need to copy part(s): if (max > 0) { System.arraycopy(c, start, curr, mCurrentSize, max); start += max; len -= max; } /* And then allocate new segment; we are guaranteed to now * have enough room in segment. */ expand(len); // note: curr != mCurrentSegment after this System.arraycopy(c, start, mCurrentSegment, 0, len); mCurrentSize = len; } } public void append(String str) { // Can't append to shared buf (sanity check) int len = str.length(); if (mInputStart >= 0) { unshare(len); } mResultString = null; mResultArray = null; // Room in current segment? char[] curr = mCurrentSegment; int max = curr.length - mCurrentSize; if (max >= len) { str.getChars(0, len, curr, mCurrentSize); mCurrentSize += len; } else { // No room for all, need to copy part(s): if (max > 0) { str.getChars(0, max, curr, mCurrentSize); len -= max; } /* And then allocate new segment; we are guaranteed to now * have enough room in segment. */ expand(len); str.getChars(max, max+len, mCurrentSegment, 0); mCurrentSize = len; } } /* ////////////////////////////////////////////// // Raw access, for high-performance use: ////////////////////////////////////////////// */ public char[] getCurrentSegment() { /* Since the intention of the caller is to directly add stuff into * buffers, we should NOT have anything in shared buffer... ie. may * need to unshare contents. */ if (mInputStart >= 0) { unshare(1); } else { char[] curr = mCurrentSegment; if (curr == null) { mCurrentSegment = allocBuffer(0); } else if (mCurrentSize >= curr.length) { // Plus, we better have room for at least one more char expand(1); } } return mCurrentSegment; } public int getCurrentSegmentSize() { return mCurrentSize; } public void setCurrentLength(int len) { mCurrentSize = len; } public char[] finishCurrentSegment() { if (mSegments == null) { mSegments = new ArrayList(); } mHasSegments = true; mSegments.add(mCurrentSegment); int oldLen = mCurrentSegment.length; mSegmentSize += oldLen; char[] curr = new char[calcNewSize(oldLen)]; mCurrentSize = 0; mCurrentSegment = curr; return curr; } /** * Method used to determine size of the next segment to * allocate to contain textual content. */ private int calcNewSize(int latestSize) { // Let's grow segments by 50%, when over 8k int incr = (latestSize < 8000) ? latestSize : (latestSize >> 1); int size = latestSize + incr; // but let's not create too big chunks return Math.min(size, MAX_SEGMENT_LENGTH); } /* ////////////////////////////////////////////// // Standard methods: ////////////////////////////////////////////// */ /** * Note: calling this method may not be as efficient as calling * {@link #contentsAsString}, since it's not guaranteed that resulting * String is cached. */ public String toString() { return contentsAsString(); } /* ////////////////////////////////////////////// // Internal methods: ////////////////////////////////////////////// */ /** * Method called if/when we need to append content when we have been * initialized to use shared buffer. */ public void unshare(int needExtra) { int len = mInputLen; mInputLen = 0; char[] inputBuf = mInputBuffer; mInputBuffer = null; int start = mInputStart; mInputStart = -1; // Is buffer big enough, or do we need to reallocate? int needed = len+needExtra; if (mCurrentSegment == null || needed > mCurrentSegment.length) { mCurrentSegment = allocBuffer(needed); } if (len > 0) { System.arraycopy(inputBuf, start, mCurrentSegment, 0, len); } mSegmentSize = 0; mCurrentSize = len; } /** * Method called when current segment is full, to allocate new * segment. * * @param roomNeeded Number of characters that the resulting * new buffer must have */ private void expand(int roomNeeded) { // First, let's move current segment to segment list: if (mSegments == null) { mSegments = new ArrayList(); } char[] curr = mCurrentSegment; mHasSegments = true; mSegments.add(curr); int oldLen = curr.length; mSegmentSize += oldLen; int newSize = Math.max(roomNeeded, calcNewSize(oldLen)); curr = new char[newSize]; mCurrentSize = 0; mCurrentSegment = curr; } private char[] buildResultArray() { if (mResultString != null) { // Can take a shortcut... return mResultString.toCharArray(); } char[] result; // Do we use shared array? if (mInputStart >= 0) { if (mInputLen < 1) { return DataUtil.getEmptyCharArray(); } result = new char[mInputLen]; System.arraycopy(mInputBuffer, mInputStart, result, 0, mInputLen); } else { // nope int size = size(); if (size < 1) { return DataUtil.getEmptyCharArray(); } int offset = 0; result = new char[size]; if (mSegments != null) { for (int i = 0, len = mSegments.size(); i < len; ++i) { char[] curr = (char[]) mSegments.get(i); int currLen = curr.length; System.arraycopy(curr, 0, result, offset, currLen); offset += currLen; } } System.arraycopy(mCurrentSegment, 0, result, offset, mCurrentSize); } return result; } private final static class BufferReader extends Reader { ArrayList _Segments; char[] _CurrentSegment; final int _CurrentLength; int _SegmentIndex; int _SegmentOffset; int _CurrentOffset; public BufferReader(ArrayList segs, char[] currSeg, int currSegLen) { _Segments = segs; _CurrentSegment = currSeg; _CurrentLength = currSegLen; _SegmentIndex = 0; _SegmentOffset = _CurrentOffset = 0; } public void close() { _Segments = null; _CurrentSegment = null; } public void mark(int x) throws IOException { throw new IOException("mark() not supported"); } public boolean markSupported() { return false; } public int read(char[] cbuf, int offset, int len) { if (len < 1) { return 0; } int origOffset = offset; // First need to copy stuff from previous segments while (_Segments != null) { char[] curr = (char[]) _Segments.get(_SegmentIndex); int max = curr.length - _SegmentOffset; if (len <= max) { // this is enough System.arraycopy(curr, _SegmentOffset, cbuf, offset, len); _SegmentOffset += len; offset += len; return (offset - origOffset); } // Not enough, but helps... if (max > 0) { System.arraycopy(curr, _SegmentOffset, cbuf, offset, max); offset += max; } if (++_SegmentIndex >= _Segments.size()) { // last one _Segments = null; } else { _SegmentOffset = 0; } } // ok, anything to copy from the active segment? if (len > 0 && _CurrentSegment != null) { int max = _CurrentLength - _CurrentOffset; if (len >= max) { // reading it all len = max; System.arraycopy(_CurrentSegment, _CurrentOffset, cbuf, offset, len); _CurrentSegment = null; } else { System.arraycopy(_CurrentSegment, _CurrentOffset, cbuf, offset, len); _CurrentOffset += len; } offset += len; } return (origOffset == offset) ? -1 : (offset - origOffset); } public boolean ready() { return true; } public void reset() throws IOException { throw new IOException("reset() not supported"); } public long skip(long amount) { /* Note: implementation is almost identical to that of read(); * difference being that no data is copied. */ if (amount < 0) { return 0L; } long origAmount= amount; while (_Segments != null) { char[] curr = (char[]) _Segments.get(_SegmentIndex); int max = curr.length - _SegmentOffset; if (max >= amount) { // this is enough _SegmentOffset += (int) amount; return origAmount; } // Not enough, but helps... amount -= max; if (++_SegmentIndex >= _Segments.size()) { // last one _Segments = null; } else { _SegmentOffset = 0; } } // ok, anything left in the active segment? if (amount > 0 && _CurrentSegment != null) { int max = _CurrentLength - _CurrentOffset; if (amount >= max) { // reading it all amount -= max; _CurrentSegment = null; } else { amount = 0L; _CurrentOffset += (int) amount; } } return (amount == origAmount) ? -1L : (origAmount - amount); } } } woodstox-4.1.3/src/java/com/ctc/wstx/util/BaseNsContext.java 0000644 0001750 0001750 00000010251 11745427074 024231 0 ustar giovanni giovanni /* Woodstox XML processor * * Copyright (c) 2004 Tatu Saloranta, tatu.saloranta@iki.fi * * Licensed under the License specified in file LICENSE, included with * the source code. * You may not use this file except in compliance with the License. * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.ctc.wstx.util; import java.io.IOException; import java.io.Writer; import java.util.Iterator; import javax.xml.XMLConstants; import javax.xml.namespace.NamespaceContext; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamWriter; import org.codehaus.stax2.ri.SingletonIterator; import com.ctc.wstx.cfg.ErrorConsts; /** * Abstract base class that defines extra features defined by most * NamespaceContext implementations Wodstox uses. */ public abstract class BaseNsContext implements NamespaceContext { /** * This is the URI returned for default namespace, when it hasn't * been explicitly declared; could be either "" or null. */ protected final static String UNDECLARED_NS_URI = ""; /* ///////////////////////////////////////////// // NamespaceContext API ///////////////////////////////////////////// */ public final String getNamespaceURI(String prefix) { /* First the known offenders; invalid args, 2 predefined xml namespace * prefixes */ if (prefix == null) { throw new IllegalArgumentException(ErrorConsts.ERR_NULL_ARG); } if (prefix.length() > 0) { if (prefix.equals(XMLConstants.XML_NS_PREFIX)) { return XMLConstants.XML_NS_URI; } if (prefix.equals(XMLConstants.XMLNS_ATTRIBUTE)) { return XMLConstants.XMLNS_ATTRIBUTE_NS_URI; } } return doGetNamespaceURI(prefix); } public final String getPrefix(String nsURI) { /* First the known offenders; invalid args, 2 predefined xml namespace * prefixes */ if (nsURI == null || nsURI.length() == 0) { throw new IllegalArgumentException("Illegal to pass null/empty prefix as argument."); } if (nsURI.equals(XMLConstants.XML_NS_URI)) { return XMLConstants.XML_NS_PREFIX; } if (nsURI.equals(XMLConstants.XMLNS_ATTRIBUTE_NS_URI)) { return XMLConstants.XMLNS_ATTRIBUTE; } return doGetPrefix(nsURI); } public final Iterator getPrefixes(String nsURI) { /* First the known offenders; invalid args, 2 predefined xml namespace * prefixes */ if (nsURI == null || nsURI.length() == 0) { throw new IllegalArgumentException("Illegal to pass null/empty prefix as argument."); } if (nsURI.equals(XMLConstants.XML_NS_URI)) { return new SingletonIterator(XMLConstants.XML_NS_PREFIX); } if (nsURI.equals(XMLConstants.XMLNS_ATTRIBUTE_NS_URI)) { return new SingletonIterator(XMLConstants.XMLNS_ATTRIBUTE); } return doGetPrefixes(nsURI); } /* ///////////////////////////////////////////// // Extended API ///////////////////////////////////////////// */ public abstract Iterator getNamespaces(); /** * Method called by the matching start element class to * output all namespace declarations active in current namespace * scope, if any. */ public abstract void outputNamespaceDeclarations(Writer w) throws IOException; public abstract void outputNamespaceDeclarations(XMLStreamWriter w) throws XMLStreamException; /* ///////////////////////////////////////////////// // Template methods sub-classes need to implement ///////////////////////////////////////////////// */ public abstract String doGetNamespaceURI(String prefix); public abstract String doGetPrefix(String nsURI); public abstract Iterator doGetPrefixes(String nsURI); } woodstox-4.1.3/src/java/com/ctc/wstx/util/ExceptionUtil.java 0000644 0001750 0001750 00000004040 11745427074 024304 0 ustar giovanni giovanni package com.ctc.wstx.util; public final class ExceptionUtil { private ExceptionUtil() { } /** * Method that can be used to convert any Throwable to a RuntimeException; * conversion is only done for checked exceptions. */ public static void throwRuntimeException(Throwable t) { // Unchecked? Can re-throw as is throwIfUnchecked(t); // Otherwise, let's just change its type: RuntimeException rex = new RuntimeException("[was "+t.getClass()+"] "+t.getMessage()); // And indicate the root cause setInitCause(rex, t); throw rex; } public static void throwAsIllegalArgument(Throwable t) { // Unchecked? Can re-throw as is throwIfUnchecked(t); // Otherwise, let's just change its type: IllegalArgumentException rex = new IllegalArgumentException("[was "+t.getClass()+"] "+t.getMessage()); // And indicate the root cause setInitCause(rex, t); throw rex; } public static void throwIfUnchecked(Throwable t) { // If it's not checked, let's throw it as is if (t instanceof RuntimeException) { throw (RuntimeException) t; } if (t instanceof Error) { throw (Error) t; } } /** * This method is just added for convenience, and only to be used for * assertion style of exceptions. For errors that actually occur, method * with the string arg should be called instead. */ public static void throwGenericInternal() { throwInternal(null); } public static void throwInternal(String msg) { if (msg == null) { msg = "[no description]"; } throw new RuntimeException("Internal error: "+msg); } public static void setInitCause(Throwable newT, Throwable rootT) { /* [WSTX-110]: Better make sure we do not already have * a chained exception... */ if (newT.getCause() == null) { newT.initCause(rootT); } } } woodstox-4.1.3/src/java/com/ctc/wstx/util/InternCache.java 0000644 0001750 0001750 00000004040 11745427074 023673 0 ustar giovanni giovanni package com.ctc.wstx.util; import java.util.LinkedHashMap; import java.util.Map; /** * Singleton class that implements "fast intern" functionality, essentially * adding a layer that caches Strings that have been previously intern()ed, * but that probably shouldn't be added to symbol tables. * This is usually used by improving intern()ing of things like namespace * URIs. *
* Note: that this class extends {@link LinkedHashMap} is an implementation
* detail -- no code should ever directly call Map methods.
*/
public final class InternCache extends LinkedHashMap //
* Note: this is not really safe, as caller can modify the array, but
* since this method is thought to provide fast access, let's avoid making
* copy here.
*/
public char[] getReplacementChars() {
return mRepl;
}
// // // Type information
public boolean isExternal() { return false; }
public boolean isParsed() { return true; }
public WstxInputSource expand(WstxInputSource parent,
XMLResolver res, ReaderConfig cfg,
int xmlVersion)
{
/* 26-Dec-2006, TSa: Better leave source as null, since internal
* entity declaration context should never be used: when expanding,
* reference context is to be used.
*/
return InputSourceFactory.constructCharArraySource
//(parent, mName, mRepl, 0, mRepl.length, mContentLocation, getSource());
(parent, mName, mRepl, 0, mRepl.length, mContentLocation, null);
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/stax/ 0000755 0001750 0001750 00000000000 11756143457 020653 5 ustar giovanni giovanni woodstox-4.1.3/src/java/com/ctc/wstx/stax/package.html 0000644 0001750 0001750 00000000124 11745427074 023127 0 ustar giovanni giovanni
* TODO:
*
* Currently supported configuration options fall into two categories. First,
* all properties from {@link XMLInputFactory} (such as, say,
* {@link XMLInputFactory#IS_NAMESPACE_AWARE}) are at least recognized, and
* most are supported. Second, there are additional properties, defined in
* constant class {@link WstxInputProperties}, that are supported.
* See {@link WstxInputProperties} for further explanation of these 'custom'
* properties.
*
* @author Tatu Saloranta
*/
public class WstxInputFactory
extends XMLInputFactory2
implements ReaderCreator,
InputConfigFlags
{
/**
* Let's limit max size to 3/4 of 16k, since this corresponds
* to 64k main hash index. This should not be too low, but could
* perhaps be further lowered?
*/
final static int MAX_SYMBOL_TABLE_SIZE = 12000;
/**
* Number of generations should not matter as much as raw
* size... but let's still cap it at some number. 500 generations
* seems reasonable for flushing (note: does not count uses
* where no new symbols were added).
*/
final static int MAX_SYMBOL_TABLE_GENERATIONS = 500;
/*
///////////////////////////////////////////////////////////
// Actual storage of configuration settings
///////////////////////////////////////////////////////////
*/
/**
* Current configurations for this factory
*/
protected final ReaderConfig mConfig;
// // // Stax - mandated objects:
protected XMLEventAllocator mAllocator = null;
// // // Other configuration objects:
protected SimpleCache mDTDCache = null;
/*
///////////////////////////////////////////////////////////
// Objects shared by actual parsers
///////////////////////////////////////////////////////////
*/
/**
* 'Root' symbol table, used for creating actual symbol table instances,
* but never as is.
*/
final static SymbolTable mRootSymbols = DefaultXmlSymbolTable.getInstance();
static {
/* By default, let's enable intern()ing of names (element, attribute,
* prefixes) added to symbol table. This is likely to make some
* access (attr by QName) and comparison of element/attr names
* more efficient. Although it will add some overhead on adding
* new symbols to symbol table that should be rather negligible.
*
* Also note that always doing intern()ing allows for more efficient
* access during DTD validation.
*/
mRootSymbols.setInternStrings(true);
}
/**
* Actual current 'parent' symbol table; concrete instances will be
* created from this instance using
* Note: parser is only to call this method, if passed-in symbol
* table was modified, ie new entry/ies were added in addition to
* whatever was in root table.
*/
public synchronized void updateSymbolTable(SymbolTable t)
{
SymbolTable curr = mSymbols;
/* Let's only add if table was direct descendant; this prevents
* siblings from keeping overwriting settings (multiple direct
* children have additional symbols added)
*/
if (t.isDirectChildOf(curr)) {
/* 07-Apr-2006, TSa: Actually, since huge symbol tables
* might become hindrance more than benefit (either in
* pathological cases with random names; or with very
* long running processes), let's actually limit both
* number of generations, and, more imporantly, maximum
* size of the symbol table
*/
if (t.size() > MAX_SYMBOL_TABLE_SIZE ||
t.version() > MAX_SYMBOL_TABLE_GENERATIONS) {
// If so, we'll reset from bare defaults
mSymbols = mRootSymbols;
//System.err.println("DEBUG: !!!! XXXXX Symbol Table Flush: size: "+t.size()+"; version: "+t.version());
} else {
mSymbols.mergeChild(t);
//System.err.println("Debug: new symbol table: size: "+t.size()+"; version: "+t.version());
}
}
//else System.err.println("Debug: skipping symbol table update");
}
public synchronized void addCachedDTD(DTDId id, DTDSubset extSubset)
{
if (mDTDCache == null) {
mDTDCache = new SimpleCache(mConfig.getDtdCacheSize());
}
mDTDCache.add(id, extSubset);
}
/*
/////////////////////////////////////////////////////
// Stax, XMLInputFactory; factory methods
/////////////////////////////////////////////////////
*/
// // // Filtered reader factory methods
public XMLEventReader createFilteredReader(XMLEventReader reader, EventFilter filter)
{
return new Stax2FilteredEventReader(Stax2EventReaderAdapter.wrapIfNecessary(reader), filter);
}
public XMLStreamReader createFilteredReader(XMLStreamReader reader, StreamFilter filter)
throws XMLStreamException
{
Stax2FilteredStreamReader fr = new Stax2FilteredStreamReader(reader, filter);
/* [WSTX-111] As per Stax 1.0 TCK, apparently the filtered
* reader is expected to be automatically forwarded to the first
* acceptable event. This is different from the way RI works, but
* since specs don't say anything about filtered readers, let's
* consider TCK to be "more formal" for now, and implement that
* behavior.
*/
if (!filter.accept(fr)) { // START_DOCUMENT ok?
// Ok, nope, this should do the trick:
fr.next();
}
return fr;
}
// // // Event reader factory methods
public XMLEventReader createXMLEventReader(InputStream in)
throws XMLStreamException
{
// false for auto-close, since caller has access to the input stream
return new WstxEventReader(createEventAllocator(),
createSR(null, in, null, true, false));
}
public XMLEventReader createXMLEventReader(InputStream in, String enc)
throws XMLStreamException
{
// false for auto-close, since caller has access to the input stream
return new WstxEventReader(createEventAllocator(),
createSR(null, in, enc, true, false));
}
public XMLEventReader createXMLEventReader(Reader r)
throws XMLStreamException
{
// false for auto-close, since caller has access to the input stream
return new WstxEventReader(createEventAllocator(),
createSR(null, r, true, false));
}
public XMLEventReader createXMLEventReader(javax.xml.transform.Source source)
throws XMLStreamException
{
return new WstxEventReader(createEventAllocator(),
createSR(source, true));
}
public XMLEventReader createXMLEventReader(String systemId, InputStream in)
throws XMLStreamException
{
// false for auto-close, since caller has access to the input stream
return new WstxEventReader(createEventAllocator(),
createSR(systemId, in, null, true, false));
}
public XMLEventReader createXMLEventReader(String systemId, Reader r)
throws XMLStreamException
{
// false for auto-close, since caller has access to the reader
return new WstxEventReader(createEventAllocator(),
createSR(systemId, r, true, false));
}
public XMLEventReader createXMLEventReader(XMLStreamReader sr)
throws XMLStreamException
{
return new WstxEventReader(createEventAllocator(), Stax2ReaderAdapter.wrapIfNecessary(sr));
}
// // // Stream reader factory methods
public XMLStreamReader createXMLStreamReader(InputStream in)
throws XMLStreamException
{
// false for auto-close, since caller has access to the input stream
return createSR(null, in, null, false, false);
}
public XMLStreamReader createXMLStreamReader(InputStream in, String enc)
throws XMLStreamException
{
// false for auto-close, since caller has access to the input stream
return createSR(null, in, enc, false, false);
}
public XMLStreamReader createXMLStreamReader(Reader r)
throws XMLStreamException
{
// false for auto-close, since caller has access to the reader
return createSR(null, r, false, false);
}
public XMLStreamReader createXMLStreamReader(javax.xml.transform.Source src)
throws XMLStreamException
{
// false -> not for event. No definition for auto-close; called method will decide
return createSR(src, false);
}
public XMLStreamReader createXMLStreamReader(String systemId, InputStream in)
throws XMLStreamException
{
// false for auto-close, since caller has access to the input stream
return createSR(systemId, in, null, false, false);
}
public XMLStreamReader createXMLStreamReader(String systemId, Reader r)
throws XMLStreamException
{
// false for auto-close, since caller has access to the Reader
return createSR(systemId, r, false, false);
}
/*
///////////////////////////////////////////////////////////
// Stax, XMLInputFactory; generic accessors/mutators
///////////////////////////////////////////////////////////
*/
public Object getProperty(String name)
{
Object ob = mConfig.getProperty(name);
if (ob == null) {
if (name.equals(XMLInputFactory.ALLOCATOR)) {
// Event allocator not available via J2ME subset...
return getEventAllocator();
}
}
return ob;
}
public void setProperty(String propName, Object value)
{
if (!mConfig.setProperty(propName, value)) {
if (XMLInputFactory.ALLOCATOR.equals(propName)) {
setEventAllocator((XMLEventAllocator) value);
}
}
}
public XMLEventAllocator getEventAllocator() {
return mAllocator;
}
public XMLReporter getXMLReporter() {
return mConfig.getXMLReporter();
}
public XMLResolver getXMLResolver() {
return mConfig.getXMLResolver();
}
public boolean isPropertySupported(String name) {
return mConfig.isPropertySupported(name);
}
public void setEventAllocator(XMLEventAllocator allocator) {
mAllocator = allocator;
}
public void setXMLReporter(XMLReporter r) {
mConfig.setXMLReporter(r);
}
/**
* Note: it's preferable to use Wstx-specific
* {@link ReaderConfig#setEntityResolver}
* instead, if possible, since this just wraps passed in resolver.
*/
public void setXMLResolver(XMLResolver r)
{
mConfig.setXMLResolver(r);
}
/*
///////////////////////////////////////////////////////////
// Stax2 implementation
///////////////////////////////////////////////////////////
*/
// // // Stax2, additional factory methods:
public XMLEventReader2 createXMLEventReader(URL src)
throws XMLStreamException
{
/* true for auto-close, since caller has no access to the underlying
* input stream created from the URL
*/
return new WstxEventReader(createEventAllocator(),
createSR(createPrivateConfig(), src, true, true));
}
public XMLEventReader2 createXMLEventReader(File f)
throws XMLStreamException
{
/* true for auto-close, since caller has no access to the underlying
* input stream created from the File
*/
return new WstxEventReader(createEventAllocator(),
createSR(f, true, true));
}
public XMLStreamReader2 createXMLStreamReader(URL src)
throws XMLStreamException
{
/* true for auto-close, since caller has no access to the underlying
* input stream created from the URL
*/
return createSR(createPrivateConfig(), src, false, true);
}
/**
* Convenience factory method that allows for parsing a document
* stored in the specified file.
*/
public XMLStreamReader2 createXMLStreamReader(File f)
throws XMLStreamException
{
/* true for auto-close, since caller has no access to the underlying
* input stream created from the File
*/
return createSR(f, false, true);
}
// // // Stax2 "Profile" mutators
public void configureForXmlConformance()
{
mConfig.configureForXmlConformance();
}
public void configureForConvenience()
{
mConfig.configureForConvenience();
}
public void configureForSpeed()
{
mConfig.configureForSpeed();
}
public void configureForLowMemUsage()
{
mConfig.configureForLowMemUsage();
}
public void configureForRoundTripping()
{
mConfig.configureForRoundTripping();
}
/*
///////////////////////////////////////////////////////////
// Woodstox-specific configuration access
///////////////////////////////////////////////////////////
*/
public ReaderConfig getConfig() {
return mConfig;
}
/*
///////////////////////////////////////////////////////////
// Internal methods:
///////////////////////////////////////////////////////////
*/
/**
* Bottleneck method used for creating ALL full stream reader instances
* (via other createSR() methods and directly)
*
* @param forER True, if the reader is being constructed to be used
* by an event reader; false if it is not (or the purpose is not known)
* @param autoCloseInput Whether the underlying input source should be
* actually closed when encountering EOF, or when
* Note: defined as public method because it needs to be called by
* SAX implementation.
*
* @param systemId System id used for this reader (if any)
* @param bs Bootstrapper to use for creating actual underlying
* physical reader
* @param forER Flag to indicate whether it will be used via
* Event API (will affect some configuration settings), true if it
* will be, false if not (or not known)
* @param autoCloseInput Whether the underlying input source should be
* actually closed when encountering EOF, or when
* Note: only public so that other woodstox components outside of
* this package can access it.
*/
public ReaderConfig createPrivateConfig()
{
return mConfig.createNonShared(mSymbols.makeChild());
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/compat/ 0000755 0001750 0001750 00000000000 11756143457 021157 5 ustar giovanni giovanni woodstox-4.1.3/src/java/com/ctc/wstx/compat/package.html 0000644 0001750 0001750 00000000612 11745427074 023435 0 ustar giovanni giovanni
* Note: choice of java.util.logging logging is only based on the
* fact that it is guaranteed to be present (we have JDK 1.4 baseline
* requirement) so that we do not add external dependencies.
* It is not a recommendation for using JUL per se; most users would
* do well to just use slf4j or log4j directly instead.
*
* @author Tatu Saloranta
*
* @since 3.2.8
*/
public final class QNameCreator
{
/**
* Creator object that creates QNames using proper 3-arg constructor.
* If dynamic class loading fails
*/
private final static Helper _helper;
static {
Helper h = null;
try {
// Not sure where it'll fail, constructor or create...
Helper h0 = new Helper();
/*QName n =*/ h0.create("elem", "http://dummy", "ns");
h = h0;
} catch (Throwable t) {
String msg = "Could not construct QNameCreator.Helper; assume 3-arg QName constructor not available and use 2-arg method instead. Problem: "+t.getMessage();
try {
Logger.getLogger("com.ctc.wstx.compat.QNameCreator").warning(msg);
} catch (Throwable t2) { // just in case JUL craps out...
System.err.println("ERROR: failed to log error using Logger (problem "+t.getMessage()+"), original problem: "+msg);
}
}
_helper = h;
}
public static QName create(String uri, String localName, String prefix)
{
if (_helper == null) { // can't use 3-arg constructor; but 2-arg will be there
return new QName(uri, localName);
}
return _helper.create(uri, localName, prefix);
}
/**
* Helper class used to encapsulate calls to the missing method.
*/
private final static class Helper
{
public Helper() { }
public QName create(String localName, String nsURI, String prefix)
{
return new QName(localName, nsURI, prefix);
}
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/exc/ 0000755 0001750 0001750 00000000000 11756143457 020453 5 ustar giovanni giovanni woodstox-4.1.3/src/java/com/ctc/wstx/exc/WstxException.java 0000644 0001750 0001750 00000005704 11745427074 024146 0 ustar giovanni giovanni /* Woodstox XML processor
*
* Copyright (c) 2004 Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in file LICENSE, included with
* the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.exc;
import javax.xml.stream.Location;
import javax.xml.stream.XMLStreamException;
import com.ctc.wstx.util.ExceptionUtil;
import com.ctc.wstx.util.StringUtil;
/**
* Base class for all implementatations of {@link XMLStreamException}
* Wstx uses.
*/
public class WstxException
extends XMLStreamException
{
private static final long serialVersionUID = 1L;
/**
* D'oh. Super-class munges and hides the message, have to duplicate here
*/
final protected String mMsg;
public WstxException(String msg) {
super(msg);
mMsg = msg;
}
public WstxException(Throwable th) {
super(th.getMessage(), th);
mMsg = th.getMessage();
// 13-Aug-2004, TSa: Better make sure root cause is set...
ExceptionUtil.setInitCause(this, th);
}
public WstxException(String msg, Location loc) {
super(msg, loc);
mMsg = msg;
}
public WstxException(String msg, Location loc, Throwable th) {
super(msg, loc, th);
mMsg = msg;
// 13-Aug-2004, TSa: Better make sure root cause is set...
ExceptionUtil.setInitCause(this, th);
}
/**
* Method is overridden for two main reasons: first, default method
* does not display public/system id information, even if it exists, and
* second, default implementation can not handle nested Location
* information.
*/
public String getMessage()
{
String locMsg = getLocationDesc();
/* Better not use super's message if we do have location information,
* since parent's message contains (part of) Location
* info; something we can regenerate better...
*/
if (locMsg == null) {
return super.getMessage();
}
StringBuffer sb = new StringBuffer(mMsg.length() + locMsg.length() + 20);
sb.append(mMsg);
StringUtil.appendLF(sb);
sb.append(" at ");
sb.append(locMsg);
return sb.toString();
}
public String toString()
{
return getClass().getName()+": "+getMessage();
}
/*
////////////////////////////////////////////////////////
// Internal methods:
////////////////////////////////////////////////////////
*/
protected String getLocationDesc()
{
Location loc = getLocation();
return (loc == null) ? null : loc.toString();
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/exc/WstxLazyException.java 0000644 0001750 0001750 00000003331 11745427074 025000 0 ustar giovanni giovanni /* Woodstox XML processor
*
* Copyright (c) 2004 Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in file LICENSE, included with
* the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.exc;
import javax.xml.stream.XMLStreamException;
import com.ctc.wstx.util.ExceptionUtil;
/**
* Alternative exception class Woodstox code uses when it is not allowed
* to throw an instance of {@link XMLStreamException}; this generally
* happens when doing lazy parsing.
*/
public class WstxLazyException
extends RuntimeException
{
private static final long serialVersionUID = 1L;
final XMLStreamException mOrig;
public WstxLazyException(XMLStreamException origEx)
{
super(origEx.getMessage());
mOrig = origEx;
// Let's additionally to set source message
ExceptionUtil.setInitCause(this, origEx);
}
public static void throwLazily(XMLStreamException ex)
throws WstxLazyException
{
throw new WstxLazyException(ex);
}
/**
* Need to override this, to be able to dynamically construct and
* display the location information...
*/
public String getMessage()
{
return "["+getClass().getName()+"] "+mOrig.getMessage();
}
public String toString()
{
return "["+getClass().getName()+"] "+mOrig.toString();
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/exc/package.html 0000644 0001750 0001750 00000000241 11745427074 022727 0 ustar giovanni giovanni
* The current implementation does not add much beyond basic
* {@link XMLValidationException}, except for fixing some problems that
* underlying {@link XMLStreamException} has.
*
* Note that some of the code is shared with {@link WstxException}. Unfortunately
* it is not possible to extend it, however, since it extends basic
* {@link XMLStreamException}, not {@link XMLValidationException}.
*
* One more thing to note: unlike some other exception classes, these
* exceptions do not have chained root causes. That's why no special
* handling is necessary for setting the root cause in backwards compatible
* way.
*/
public class WstxValidationException
extends XMLValidationException
{
private static final long serialVersionUID = 1L;
protected WstxValidationException(XMLValidationProblem cause, String msg)
{
super(cause, msg);
}
protected WstxValidationException(XMLValidationProblem cause, String msg,
Location loc)
{
super(cause, msg, loc);
}
public static WstxValidationException create(XMLValidationProblem cause)
{
// Should always get a message
Location loc = cause.getLocation();
if (loc == null) {
return new WstxValidationException(cause, cause.getMessage());
}
return new WstxValidationException(cause, cause.getMessage(), loc);
}
/*
/////////////////////////////////////////////////////////
// Overridden methods from XMLStreamException
/////////////////////////////////////////////////////////
*/
/**
* Method is overridden for two main reasons: first, default method
* does not display public/system id information, even if it exists, and
* second, default implementation can not handle nested Location
* information.
*/
public String getMessage()
{
String locMsg = getLocationDesc();
/* Better not use super's message if we do have location information,
* since parent's message contains (part of) Location
* info; something we can regenerate better...
*/
if (locMsg == null) {
return super.getMessage();
}
String msg = getValidationProblem().getMessage();
StringBuffer sb = new StringBuffer(msg.length() + locMsg.length() + 20);
sb.append(msg);
StringUtil.appendLF(sb);
sb.append(" at ");
sb.append(locMsg);
return sb.toString();
}
public String toString()
{
return getClass().getName()+": "+getMessage();
}
/*
////////////////////////////////////////////////////////
// Internal methods:
////////////////////////////////////////////////////////
*/
protected String getLocationDesc()
{
Location loc = getLocation();
return (loc == null) ? null : loc.toString();
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/exc/WstxUnexpectedCharException.java 0000644 0001750 0001750 00000001300 11745427074 026755 0 ustar giovanni giovanni package com.ctc.wstx.exc;
import javax.xml.stream.Location;
/**
* Generic exception type that indicates that tokenizer/parser encountered
* unexpected (but not necessarily invalid per se) character; character that
* is not legal in current context. Could happen, for example, if white space
* was missing between attribute value and name of next attribute.
*/
public class WstxUnexpectedCharException
extends WstxParsingException
{
private static final long serialVersionUID = 1L;
final char mChar;
public WstxUnexpectedCharException(String msg, Location loc, char c) {
super(msg, loc);
mChar = c;
}
public char getChar() {
return mChar;
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/osgi/ 0000755 0001750 0001750 00000000000 11756143457 020635 5 ustar giovanni giovanni woodstox-4.1.3/src/java/com/ctc/wstx/osgi/package.html 0000644 0001750 0001750 00000000262 11745427074 023114 0 ustar giovanni giovanni
* Note that the implementation is only to be used for use with
*
* Some notes regarding missing/incomplete functionality:
*
* Note: explicit empty element (written using
*
* Note: while this is often the same as {@link #mCurrElem},
* it's not always. Specifically, an empty element (written
* explicitly using
* Note: since the actual Reader to use after bootstrapping is pre-constructed,
* the local input buffer can (and should) be quite small.
*/
public final class ReaderBootstrapper
extends InputBootstrapper
{
final static char CHAR_BOM_MARKER = (char) 0xFEFF;
/*
////////////////////////////////////////
// Configuration
////////////////////////////////////////
*/
/**
* Underlying Reader to use for reading content.
*/
final Reader mIn;
/**
* Encoding identifier processing application passed in; if not null,
* will be compared to actual xml declaration based encoding (if
* declaration found)
*/
final String mInputEncoding;
/*
///////////////////////////////////////////////////////////////
// Input buffering
///////////////////////////////////////////////////////////////
*/
private char[] mCharBuffer;
private int mInputPtr;
private int mInputEnd;
/*
////////////////////////////////////////
// Life-cycle
////////////////////////////////////////
*/
private ReaderBootstrapper(String pubId, String sysId, Reader r, String appEncoding)
{
super(pubId, sysId);
mIn = r;
if (appEncoding == null) { // may still be able to figure it out
if (r instanceof InputStreamReader) {
appEncoding = ((InputStreamReader) r).getEncoding();
}
}
mInputEncoding = appEncoding;
}
/*
////////////////////////////////////////
// Public API
////////////////////////////////////////
*/
/**
* @param r Eventual reader that will be reading actual content, after
* bootstrapping finishes
* @param appEncoding Encoding that application declared; may be null.
* If not null, will be compared to actual declaration found; and
* incompatibility reported as a potential (but not necessarily fatal)
* problem.
*/
public static ReaderBootstrapper getInstance(String pubId, String sysId,
Reader r, String appEncoding)
{
return new ReaderBootstrapper(pubId, sysId, r, appEncoding);
}
/**
* Method called to do actual bootstrapping.
*
* @return Actual reader to use for reading xml content
*/
public Reader bootstrapInput(ReaderConfig cfg, boolean mainDoc, int xmlVersion)
throws IOException, XMLStreamException
{
/* First order of business: allocate input buffer. Not done during
* construction for simplicity; that way config object need not be
* passed before actual bootstrap method is called
*/
/* Let's make sure buffer is at least 6 chars (to know '")
*/
if (mInputEnd >= 7) {
char c = mCharBuffer[mInputPtr];
// BOM to skip?
if (c == CHAR_BOM_MARKER) {
c = mCharBuffer[++mInputPtr];
}
if (c == '<') {
if (mCharBuffer[mInputPtr+1] == '?'
&& mCharBuffer[mInputPtr+2] == 'x'
&& mCharBuffer[mInputPtr+3] == 'm'
&& mCharBuffer[mInputPtr+4] == 'l'
&& mCharBuffer[mInputPtr+5] <= CHAR_SPACE) {
// Yup, got the declaration ok!
mInputPtr += 6; // skip declaration
readXmlDecl(mainDoc, xmlVersion);
if (mFoundEncoding != null && mInputEncoding != null) {
verifyXmlEncoding(cfg);
}
}
} else {
/* We may also get something that would be invalid xml
* ("garbage" char; neither '<' nor space). If so, and
* it's one of "well-known" cases, we can not only throw
* an exception but also indicate a clue as to what is likely
* to be wrong.
*/
/* Specifically, UTF-8 read via, say, ISO-8859-1 reader, can
* "leak" marker (0xEF, 0xBB, 0xBF). While we could just eat
* it, there's bound to be other problems cropping up, so let's
* inform about the problem right away.
*/
if (c == 0xEF) {
throw new WstxIOException("Unexpected first character (char code 0xEF), not valid in xml document: could be mangled UTF-8 BOM marker. Make sure that the Reader uses correct encoding or pass an InputStream instead");
}
}
}
/* Ok, now; do we have unused chars we have read that need to
* be merged in?
*/
if (mInputPtr < mInputEnd) {
return new MergedReader(cfg, mIn, mCharBuffer, mInputPtr, mInputEnd);
}
return mIn;
}
public String getInputEncoding() {
return mInputEncoding;
}
public int getInputTotal() {
return mInputProcessed + mInputPtr;
}
public int getInputColumn() {
return (mInputPtr - mInputRowStart);
}
/*
////////////////////////////////////////
// Internal methods, parsing
////////////////////////////////////////
*/
protected void verifyXmlEncoding(ReaderConfig cfg)
throws XMLStreamException
{
String inputEnc = mInputEncoding;
// Close enough?
if (StringUtil.equalEncodings(inputEnc, mFoundEncoding)) {
return;
}
/* Ok, maybe the difference is just with endianness indicator?
* (UTF-16BE vs. UTF-16)?
*/
// !!! TBI
XMLReporter rep = cfg.getXMLReporter();
if (rep != null) {
Location loc = getLocation();
String msg = MessageFormat.format(ErrorConsts.W_MIXED_ENCODINGS,
new Object[] { mFoundEncoding,
inputEnc });
String type = ErrorConsts.WT_XML_DECL;
/* 30-May-2008, tatus: Should wrap all the info as XMValidationProblem
* since that's Woodstox' contract wrt. relatedInformation field.
*/
XMLValidationProblem prob = new XMLValidationProblem(loc, msg, XMLValidationProblem.SEVERITY_WARNING, type);
rep.report(msg, type, prob, loc);
}
}
/*
/////////////////////////////////////////////////////
// Internal methods, loading input data
/////////////////////////////////////////////////////
*/
protected boolean initialLoad(int minimum)
throws IOException
{
mInputPtr = 0;
mInputEnd = 0;
while (mInputEnd < minimum) {
int count = mIn.read(mCharBuffer, mInputEnd,
mCharBuffer.length - mInputEnd);
if (count < 1) {
return false;
}
mInputEnd += count;
}
return true;
}
protected void loadMore()
throws IOException, WstxException
{
/* Need to make sure offsets are properly updated for error
* reporting purposes, and do this now while previous amounts
* are still known.
*/
mInputProcessed += mInputEnd;
mInputRowStart -= mInputEnd;
mInputPtr = 0;
mInputEnd = mIn.read(mCharBuffer, 0, mCharBuffer.length);
if (mInputEnd < 1) {
throw new WstxEOFException(ParsingErrorMsgs.SUFFIX_IN_XML_DECL,
getLocation());
}
}
/*
/////////////////////////////////////////////////////
// Implementations of abstract parsing methods
/////////////////////////////////////////////////////
*/
protected void pushback() {
--mInputPtr;
}
protected int getNext()
throws IOException, WstxException
{
return (mInputPtr < mInputEnd) ?
mCharBuffer[mInputPtr++] : nextChar();
}
protected int getNextAfterWs(boolean reqWs)
throws IOException, WstxException
{
int count = 0;
while (true) {
char c = (mInputPtr < mInputEnd) ?
mCharBuffer[mInputPtr++] : nextChar();
if (c > CHAR_SPACE) {
if (reqWs && count == 0) {
reportUnexpectedChar(c, ERR_XMLDECL_EXP_SPACE);
}
return c;
}
if (c == CHAR_CR || c == CHAR_LF) {
skipCRLF(c);
} else if (c == CHAR_NULL) {
reportNull();
}
++count;
}
}
/**
* @return First character that does not match expected, if any;
* CHAR_NULL if match succeeded
*/
protected int checkKeyword(String exp)
throws IOException, WstxException
{
int len = exp.length();
for (int ptr = 1; ptr < len; ++ptr) {
char c = (mInputPtr < mInputEnd) ?
mCharBuffer[mInputPtr++] : nextChar();
if (c != exp.charAt(ptr)) {
return c;
}
if (c == CHAR_NULL) {
reportNull();
}
}
return CHAR_NULL;
}
protected int readQuotedValue(char[] kw, int quoteChar)
throws IOException, WstxException
{
int i = 0;
int len = kw.length;
while (true) {
char c = (mInputPtr < mInputEnd) ?
mCharBuffer[mInputPtr++] : nextChar();
if (c == CHAR_CR || c == CHAR_LF) {
skipCRLF(c);
} else if (c == CHAR_NULL) {
reportNull();
}
if (c == quoteChar) {
return (i < len) ? i : -1;
}
// Let's just truncate longer values, but match quote
if (i < len) {
kw[i++] = c;
}
}
}
protected Location getLocation()
{
return new WstxInputLocation(null, mPublicId, mSystemId,
mInputProcessed + mInputPtr - 1,
mInputRow, mInputPtr - mInputRowStart);
}
/*
/////////////////////////////////////////////////////
// Internal methods, single-byte access methods
/////////////////////////////////////////////////////
*/
protected char nextChar()
throws IOException, WstxException
{
if (mInputPtr >= mInputEnd) {
loadMore();
}
return mCharBuffer[mInputPtr++];
}
protected void skipCRLF(char lf)
throws IOException, WstxException
{
if (lf == CHAR_CR) {
char c = (mInputPtr < mInputEnd) ?
mCharBuffer[mInputPtr++] : nextChar();
if (c != BYTE_LF) {
--mInputPtr; // pushback if not 2-char/byte lf
}
}
++mInputRow;
mInputRowStart = mInputPtr;
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/io/WstxInputSource.java 0000644 0001750 0001750 00000017143 11745427074 024320 0 ustar giovanni giovanni /* Woodstox XML processor
*
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in file LICENSE, included with
* the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.io;
import java.io.IOException;
import java.net.URL;
import javax.xml.stream.XMLStreamException;
/**
* Interface that defines API actual parsers (stream readers)
* can use to read input from various input sources.
* Needed to abstract out details of getting input from primary input
* files, secondary (potentially cached) referenced documents, and from
* parsed entities, as well as for allowing hierarchic location
* information for error reporting.
*/
public abstract class WstxInputSource
{
/**
* Parent in input source stack
*/
protected final WstxInputSource mParent;
/**
* Name/id of the entity that was expanded to produce this input source;
* null if not entity-originated. Used for catching recursive expansions
* of entities.
*/
protected final String mFromEntity;
/**
* Scope of the reader when this entity was initially expanded. Snapshot
* that will generally be used by the reader to match scoping
* limitations, such as proper nesting entity expansion with respect
* to element and declaration nesting.
*/
protected int mScopeId = 0;
/*
//////////////////////////////////////////////////////////
// Life-cycle:
//////////////////////////////////////////////////////////
*/
protected WstxInputSource(WstxInputSource parent, String fromEntity)
{
mParent = parent;
mFromEntity = fromEntity;
}
/**
* Method that can be called to override originally defined source.
*
* @param url New base URL to set; may be null.
*
* @since 4.0
*/
public abstract void overrideSource(URL url);
/*
//////////////////////////////////////////////////////////
// Basic accessors:
//////////////////////////////////////////////////////////
*/
public final WstxInputSource getParent() {
return mParent;
}
/**
* Method that checks if this input source expanded from the specified
* entity id, directly or by ancestor.
*
* Note that entity ids are expected to have been interned (using
* whatever uniqueness mechanism used), and thus can be simply
* identity checked.
*/
public boolean isOrIsExpandedFrom(String entityId)
{
if (entityId != null) { // should always be true
WstxInputSource curr = this;
while (curr != null) {
if (entityId == curr.mFromEntity) {
return true;
}
curr = curr.mParent;
}
}
return false;
}
/**
* @return True, if this input source was directly expanded from an
* internal entity (general, parsed); false if not (from external
* entity, DTD ext. subset, main document)
*/
public abstract boolean fromInternalEntity();
/*
//////////////////////////////////////////////////////////
// Location info:
//////////////////////////////////////////////////////////
*/
public abstract URL getSource();
public abstract String getPublicId();
public abstract String getSystemId();
/**
* Method usually called to get a parent location for another input
* source. Works since at this point context (line, row, chars) information
* has already been saved to this object.
*/
protected abstract WstxInputLocation getLocation();
public abstract WstxInputLocation getLocation(long total, int row, int col);
public String getEntityId() { return mFromEntity; }
public int getScopeId() { return mScopeId; }
/*
//////////////////////////////////////////////////////////
// Actual input handling
//////////////////////////////////////////////////////////
*/
/**
* Method called by Reader when current input has changed to come
* from this input source. Should reset/initialize input location
* information Reader keeps, for error messages to work ok.
*
* @param reader Reader whose data structures are to be used for
* returning data read
* @param currScopeId
*/
public final void initInputLocation(WstxInputData reader, int currScopeId) {
mScopeId = currScopeId;
doInitInputLocation(reader);
}
protected abstract void doInitInputLocation(WstxInputData reader);
/**
* Method called to read at least one more char from input source, and
* update input data appropriately.
*
* @return Number of characters read from the input source (at least 1),
* if it had any input; -1 if input source has no more input.
*/
public abstract int readInto(WstxInputData reader)
throws IOException, XMLStreamException;
/**
* Method called by reader when it has to have at least specified number
* of consequtive input characters in its buffer, and it currently does
* not have. If so, it asks input source to do whatever it has to do
* to try to get more data, if possible (including moving stuff in
* input buffer if necessary and possible).
*
* @return True if input source was able to provide specific number of
* characters or more; false if not. In latter case, source is free
* to return zero or more characters any way.
*/
public abstract boolean readMore(WstxInputData reader, int minAmount)
throws IOException, XMLStreamException;
/**
* Method Reader calls when this input source is being stored, when
* a nested input source gets used instead (due to entity expansion).
* Needs to get location info from Reader and store it in this Object.
*/
public abstract void saveContext(WstxInputData reader);
/**
* Method Reader calls when this input source is resumed as the
* current source. Needs to update Reader's input location data
* used for error messages etc.
*/
public abstract void restoreContext(WstxInputData reader);
/**
* Method reader calls for this input source when it has encountered
* EOF. This may or may not close the underlying stream/reader; what
* happens depends on configuration
*/
public abstract void close() throws IOException;
/**
* Method reader MAY call to force full closing of the underlying
* input stream(s)/reader(s). No checks are done regarding configuration,
* but input source object is to deal gracefully with multiple calls
* (ie. it's not an error for reader to call this more than once).
*/
public abstract void closeCompletely() throws IOException;
/*
//////////////////////////////////////////////////////////
// Overridden standard methods:
//////////////////////////////////////////////////////////
*/
public String toString() {
StringBuffer sb = new StringBuffer(80);
sb.append("
* Note: public to give access for unit tests that need it...
*/
public static WstxInputSource sourceFromString(WstxInputSource parent, ReaderConfig cfg,
String refName, int xmlVersion,
String refContent)
throws IOException, XMLStreamException
{
/* Last null -> no app-provided encoding (doesn't matter for non-
* main-level handling)
*/
return sourceFromR(parent, cfg, refName, xmlVersion,
new StringReader(refContent),
null, refName);
}
private static WstxInputSource sourceFromIS(WstxInputSource parent,
ReaderConfig cfg,
String refName, int xmlVersion,
InputStream is,
String pubId, String sysId)
throws IOException, XMLStreamException
{
StreamBootstrapper bs = StreamBootstrapper.getInstance(pubId, sysId, is);
Reader r = bs.bootstrapInput(cfg, false, xmlVersion);
URL ctxt = parent.getSource();
// If we got a real sys id, we do know the source...
if (sysId != null && sysId.length() > 0) {
ctxt = URLUtil.urlFromSystemId(sysId, ctxt);
}
return InputSourceFactory.constructEntitySource
(cfg, parent, refName, bs, pubId, sysId, xmlVersion, ctxt, r);
}
private static WstxInputSource sourceFromR(WstxInputSource parent, ReaderConfig cfg,
String refName, int xmlVersion,
Reader r,
String pubId, String sysId)
throws IOException, XMLStreamException
{
/* Last null -> no app-provided encoding (doesn't matter for non-
* main-level handling)
*/
ReaderBootstrapper rbs = ReaderBootstrapper.getInstance(pubId, sysId, r, null);
// null -> no xml reporter... should have one?
Reader r2 = rbs.bootstrapInput(cfg, false, xmlVersion);
URL ctxt = (parent == null) ? null : parent.getSource();
if (sysId != null && sysId.length() > 0) {
ctxt = URLUtil.urlFromSystemId(sysId, ctxt);
}
return InputSourceFactory.constructEntitySource
(cfg, parent, refName, rbs, pubId, sysId, xmlVersion, ctxt, r2);
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/io/CompletelyCloseable.java 0000644 0001750 0001750 00000000226 11745427074 025073 0 ustar giovanni giovanni package com.ctc.wstx.io;
import java.io.IOException;
public interface CompletelyCloseable
{
public void closeCompletely() throws IOException;
}
woodstox-4.1.3/src/java/com/ctc/wstx/io/package.html 0000644 0001750 0001750 00000000346 11745427074 022565 0 ustar giovanni giovanni
* Main reason for the input data portion to be factored out of main
* class is that this way it can also be passed to nested input handling
* Objects, which can then manipulate input buffers of the caller,
* efficiently.
*/
public class WstxInputData
{
// // // Some well-known chars:
/**
* Null-character is used as return value from some method(s), since
* it is not a legal character in an XML document.
*/
public final static char CHAR_NULL = '\u0000';
public final static char INT_NULL = 0;
public final static char CHAR_SPACE = (char) 0x0020;
public final static char INT_SPACE = 0x0020;
/**
* This constant defines the highest Unicode character allowed
* in XML content.
*/
public final static int MAX_UNICODE_CHAR = 0x10FFFF;
/*
////////////////////////////////////////////////////
// Character validity constants, structs
////////////////////////////////////////////////////
*/
/**
* We will only use validity array for first 256 characters, mostly
* because after those characters it's easier to do fairly simple
* block checks.
*/
private final static int VALID_CHAR_COUNT = 0x100;
// These are the same for both 1.0 and 1.1...
// private final static int FIRST_VALID_FOR_FIRST = 0x0041; // 'A'
// private final static int FIRST_VALID_FOR_REST = 0x002D; // '.'
private final static byte NAME_CHAR_INVALID_B = (byte) 0;
private final static byte NAME_CHAR_ALL_VALID_B = (byte) 1;
private final static byte NAME_CHAR_VALID_NONFIRST_B = (byte) -1;
private final static byte[] sCharValidity = new byte[VALID_CHAR_COUNT];
static {
/* First, since all valid-as-first chars are also valid-as-other chars,
* we'll initialize common chars:
*/
sCharValidity['_'] = NAME_CHAR_ALL_VALID_B;
for (int i = 0, last = ('z' - 'a'); i <= last; ++i) {
sCharValidity['A' + i] = NAME_CHAR_ALL_VALID_B;
sCharValidity['a' + i] = NAME_CHAR_ALL_VALID_B;
}
// not all are fully valid, but
for (int i = 0xC0; i < VALID_CHAR_COUNT; ++i) {
sCharValidity[i] = NAME_CHAR_ALL_VALID_B;
}
// ... now we can 'revert' ones not fully valid:
sCharValidity[0xD7] = NAME_CHAR_INVALID_B;
sCharValidity[0xF7] = NAME_CHAR_INVALID_B;
/* And then we can proceed with ones only valid-as-other.
*/
sCharValidity['-'] = NAME_CHAR_VALID_NONFIRST_B;
sCharValidity['.'] = NAME_CHAR_VALID_NONFIRST_B;
sCharValidity[0xB7] = NAME_CHAR_VALID_NONFIRST_B;
for (int i = '0'; i <= '9'; ++i) {
sCharValidity[i] = NAME_CHAR_VALID_NONFIRST_B;
}
}
/**
* Public identifiers only use 7-bit ascii range.
*/
private final static int VALID_PUBID_CHAR_COUNT = 0x80;
private final static byte[] sPubidValidity = new byte[VALID_PUBID_CHAR_COUNT];
// private final static byte PUBID_CHAR_INVALID_B = (byte) 0;
private final static byte PUBID_CHAR_VALID_B = (byte) 1;
static {
for (int i = 0, last = ('z' - 'a'); i <= last; ++i) {
sPubidValidity['A' + i] = PUBID_CHAR_VALID_B;
sPubidValidity['a' + i] = PUBID_CHAR_VALID_B;
}
for (int i = '0'; i <= '9'; ++i) {
sPubidValidity[i] = PUBID_CHAR_VALID_B;
}
// 3 main white space types are valid
sPubidValidity[0x0A] = PUBID_CHAR_VALID_B;
sPubidValidity[0x0D] = PUBID_CHAR_VALID_B;
sPubidValidity[0x20] = PUBID_CHAR_VALID_B;
// And many of punctuation/separator ascii chars too:
sPubidValidity['-'] = PUBID_CHAR_VALID_B;
sPubidValidity['\''] = PUBID_CHAR_VALID_B;
sPubidValidity['('] = PUBID_CHAR_VALID_B;
sPubidValidity[')'] = PUBID_CHAR_VALID_B;
sPubidValidity['+'] = PUBID_CHAR_VALID_B;
sPubidValidity[','] = PUBID_CHAR_VALID_B;
sPubidValidity['.'] = PUBID_CHAR_VALID_B;
sPubidValidity['/'] = PUBID_CHAR_VALID_B;
sPubidValidity[':'] = PUBID_CHAR_VALID_B;
sPubidValidity['='] = PUBID_CHAR_VALID_B;
sPubidValidity['?'] = PUBID_CHAR_VALID_B;
sPubidValidity[';'] = PUBID_CHAR_VALID_B;
sPubidValidity['!'] = PUBID_CHAR_VALID_B;
sPubidValidity['*'] = PUBID_CHAR_VALID_B;
sPubidValidity['#'] = PUBID_CHAR_VALID_B;
sPubidValidity['@'] = PUBID_CHAR_VALID_B;
sPubidValidity['$'] = PUBID_CHAR_VALID_B;
sPubidValidity['_'] = PUBID_CHAR_VALID_B;
sPubidValidity['%'] = PUBID_CHAR_VALID_B;
}
/*
////////////////////////////////////////////////////
// Configuration
////////////////////////////////////////////////////
*/
/**
* Flag that indicates whether XML content is to be treated as per
* XML 1.1 specification or not (if not, it'll use xml 1.0).
*/
protected boolean mXml11 = false;
/*
////////////////////////////////////////////////////
// Current input data
////////////////////////////////////////////////////
*/
/**
* Current buffer from which data is read; generally data is read into
* buffer from input source, but not always (especially when using nested
* input contexts when expanding parsed entity references etc).
*/
protected char[] mInputBuffer;
/**
* Pointer to next available character in buffer
*/
protected int mInputPtr = 0;
/**
* Index of character after last available one in the buffer.
*/
protected int mInputEnd = 0;
/*
////////////////////////////////////////////////////
// Current input location information
////////////////////////////////////////////////////
*/
/**
* Number of characters that were contained in previous blocks
* (blocks that were already processed prior to the current buffer).
*/
protected long mCurrInputProcessed = 0L;
/**
* Current row location of current point in input buffer, starting
* from 1
*/
protected int mCurrInputRow = 1;
/**
* Current index of the first character of the current row in input
* buffer. Needed to calculate column position, if necessary; benefit
* of not having column itself is that this only has to be updated
* once per line.
*/
protected int mCurrInputRowStart = 0;
/*
////////////////////////////////////////////////////
// Life-cycle
////////////////////////////////////////////////////
*/
protected WstxInputData() {
}
/**
* Note: Only public due to sub-classes needing to call this on
* base class instance from different package (confusing?)
*/
public void copyBufferStateFrom(WstxInputData src)
{
mInputBuffer = src.mInputBuffer;
mInputPtr = src.mInputPtr;
mInputEnd = src.mInputEnd;
mCurrInputProcessed = src.mCurrInputProcessed;
mCurrInputRow = src.mCurrInputRow;
mCurrInputRowStart = src.mCurrInputRowStart;
}
/*
////////////////////////////////////////////////////
// Public/package API, character classes
////////////////////////////////////////////////////
*/
/**
* Method that can be used to check whether specified character
* is a valid first character of an XML 1.0/1.1 name; except that
* colon (:) is not recognized as a start char here: caller has
* to verify it separately (since it generally affects namespace
* mapping of a qualified name).
*/
protected final boolean isNameStartChar(char c)
{
/* First, let's handle 7-bit ascii range (identical between xml
* 1.0 and 1.1)
*/
if (c <= 0x7A) { // 'z' or earlier
if (c >= 0x61) { // 'a' - 'z' are ok
return true;
}
if (c < 0x41) { // before 'A' just white space
return false;
}
return (c <= 0x5A) || (c == '_'); // 'A' - 'Z' and '_' are ok
}
/* Ok, otherwise need to use a big honking bit sets... which
* differ between 1.0 and 1.1
*/
return mXml11 ? XmlChars.is11NameStartChar(c) : XmlChars.is10NameStartChar(c);
}
/**
* Method that can be used to check whether specified character
* is a valid character of an XML 1.0/1.1 name as any other char than
* the first one; except that colon (:) is not recognized as valid here:
* caller has to verify it separately (since it generally affects namespace
* mapping of a qualified name).
*/
protected final boolean isNameChar(char c)
{
// First, let's handle 7-bit ascii range
if (c <= 0x7A) { // 'z' or earlier
if (c >= 0x61) { // 'a' - 'z' are ok
return true;
}
if (c <= 0x5A) {
if (c >= 0x41) { // 'A' - 'Z' ok too
return true;
}
// As are 0-9, '.' and '-'
return (c >= 0x30 && c <= 0x39) || (c == '.') || (c == '-');
}
return (c == 0x5F); // '_' is ok too
}
return mXml11 ? XmlChars.is11NameChar(c) : XmlChars.is10NameChar(c);
}
public final static boolean isNameStartChar(char c, boolean nsAware, boolean xml11)
{
/* First, let's handle 7-bit ascii range (identical between xml
* 1.0 and 1.1)
*/
if (c <= 0x7A) { // 'z' or earlier
if (c >= 0x61) { // 'a' - 'z' are ok
return true;
}
if (c < 0x41) { // before 'A' just white space (and colon)
if (c == ':' && !nsAware) {
return true;
}
return false;
}
return (c <= 0x5A) || (c == '_'); // 'A' - 'Z' and '_' are ok
}
/* Ok, otherwise need to use a big honking bit sets... which
* differ between 1.0 and 1.1
*/
return xml11 ? XmlChars.is11NameStartChar(c) : XmlChars.is10NameStartChar(c);
}
public final static boolean isNameChar(char c, boolean nsAware, boolean xml11)
{
// First, let's handle 7-bit ascii range
if (c <= 0x7A) { // 'z' or earlier
if (c >= 0x61) { // 'a' - 'z' are ok
return true;
}
if (c <= 0x5A) {
if (c >= 0x41) { // 'A' - 'Z' ok too
return true;
}
// As are 0-9, '.' and '-'
return (c >= 0x30 && c <= 0x39) || (c == '.') || (c == '-')
|| (c == ':' && !nsAware);
}
return (c == 0x5F); // '_' is ok too
}
return xml11 ? XmlChars.is11NameChar(c) : XmlChars.is10NameChar(c);
}
/**
* Method that can be called to check whether given String contains
* any characters that are not legal XML names.
*
* @return Index of the first illegal xml name characters, if any;
* -1 if the name is completely legal
*/
public final static int findIllegalNameChar(String name, boolean nsAware, boolean xml11)
{
int len = name.length();
if (len < 1) {
return -1;
}
char c = name.charAt(0);
// First char legal?
if (c <= 0x7A) { // 'z' or earlier
if (c < 0x61) { // 'a' - 'z' (0x61 - 0x7A) are ok
if (c < 0x41) { // before 'A' just white space (except colon)
if (c != ':' || nsAware) { // ':' == 0x3A
return 0;
}
} else if ((c > 0x5A) && (c != '_')) {
// 'A' - 'Z' and '_' are ok
return 0;
}
}
} else {
if (xml11) {
if (!XmlChars.is11NameStartChar(c)) {
return 0;
}
} else {
if (!XmlChars.is10NameStartChar(c)) {
return 0;
}
}
}
for (int i = 1; i < len; ++i) {
c = name.charAt(i);
if (c <= 0x7A) { // 'z' or earlier
if (c >= 0x61) { // 'a' - 'z' are ok
continue;
}
if (c <= 0x5A) {
if (c >= 0x41) { // 'A' - 'Z' ok too
continue;
}
// As are 0-9, '.' and '-'
if ((c >= 0x30 && c <= 0x39) || (c == '.') || (c == '-')) {
continue;
}
// And finally, colon, in non-ns-aware mode
if (c == ':' && !nsAware) { // ':' == 0x3A
continue;
}
} else if (c == 0x5F) { // '_' is ok too
continue;
}
} else {
if (xml11) {
if (XmlChars.is11NameChar(c)) {
continue;
}
} else {
if (XmlChars.is10NameChar(c)) {
continue;
}
}
}
return i;
}
return -1;
}
public final static int findIllegalNmtokenChar(String nmtoken, boolean nsAware, boolean xml11)
{
int len = nmtoken.length();
// No special handling for the first char, just the loop
for (int i = 1; i < len; ++i) {
char c = nmtoken.charAt(i);
if (c <= 0x7A) { // 'z' or earlier
if (c >= 0x61) { // 'a' - 'z' are ok
continue;
}
if (c <= 0x5A) {
if (c >= 0x41) { // 'A' - 'Z' ok too
continue;
}
// As are 0-9, '.' and '-'
if ((c >= 0x30 && c <= 0x39) || (c == '.') || (c == '-')) {
continue;
}
// And finally, colon, in non-ns-aware mode
if (c == ':' && !nsAware) { // ':' == 0x3A
continue;
}
} else if (c == 0x5F) { // '_' is ok too
continue;
}
} else {
if (xml11) {
if (XmlChars.is11NameChar(c)) {
continue;
}
} else {
if (XmlChars.is10NameChar(c)) {
continue;
}
}
}
return i;
}
return -1;
}
public final static boolean isSpaceChar(char c)
{
return (c <= CHAR_SPACE);
}
public static String getCharDesc(char c)
{
int i = (int) c;
if (Character.isISOControl(c)) {
return "(CTRL-CHAR, code "+i+")";
}
if (i > 255) {
return "'"+c+"' (code "+i+" / 0x"+Integer.toHexString(i)+")";
}
return "'"+c+"' (code "+i+")";
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/io/StreamBootstrapper.java 0000644 0001750 0001750 00000100076 11745427074 025010 0 ustar giovanni giovanni package com.ctc.wstx.io;
import java.io.*;
import javax.xml.stream.Location;
import javax.xml.stream.XMLStreamException;
import com.ctc.wstx.api.ReaderConfig;
import com.ctc.wstx.cfg.ParsingErrorMsgs;
import com.ctc.wstx.cfg.XmlConsts;
import com.ctc.wstx.exc.*;
/**
* Input bootstrap class used with streams, when encoding is not known
* (when encoding is specified by application, a reader is constructed,
* and then reader-based bootstrapper is used).
* after last valid byte in the buffer
*/
private StreamBootstrapper(String pubId, String sysId, byte[] data, int start, int end)
{
super(pubId, sysId);
mIn = null;
mRecycleBuffer = false;
mByteBuffer = data;
mInputPtr = start;
mInputEnd = end;
}
/*
////////////////////////////////////////
// Public API
////////////////////////////////////////
*/
/**
* Factory method used when the underlying data provider is an
* actual stream.
*/
public static StreamBootstrapper getInstance(String pubId, String sysId, InputStream in)
{
return new StreamBootstrapper(pubId, sysId, in);
}
/**
* Factory method used when the underlying data provider is a pre-allocated
* block source, and no stream is used.
* Additionally the buffer passed is not owned by the bootstrapper
* or Reader that is created, so it is not to be recycled.
*/
public static StreamBootstrapper getInstance(String pubId, String sysId, byte[] data, int start, int end)
{
return new StreamBootstrapper(pubId, sysId, data, start, end);
}
public Reader bootstrapInput(ReaderConfig cfg, boolean mainDoc, int xmlVersion)
throws IOException, XMLStreamException
{
String normEnc = null;
// First, let's get the buffers...
int bufSize = cfg.getInputBufferLength();
if (bufSize < MIN_BUF_SIZE) {
bufSize = MIN_BUF_SIZE;
}
if (mByteBuffer == null) { // non-null if we were passed a buffer
mByteBuffer = cfg.allocFullBBuffer(bufSize);
}
resolveStreamEncoding();
if (hasXmlDecl()) {
// note: readXmlDecl will set mXml11Handling too
readXmlDecl(mainDoc, xmlVersion);
if (mFoundEncoding != null) {
normEnc = verifyXmlEncoding(mFoundEncoding);
}
} else {
/* We'll actually then just inherit whatever main doc had...
* (or in case there was no parent, just copy the 'unknown')
*/
mXml11Handling = (XmlConsts.XML_V_11 == xmlVersion);
}
// Now, have we figured out the encoding?
if (normEnc == null) { // not via xml declaration
/* 21-Sep-2007, TSa: As with any non-UTF-8 encoding, declaration
* isn't optional any more. Besides, we need that information
* anyway to know which variant it is.
*/
if (mEBCDIC) {
if (mFoundEncoding == null || mFoundEncoding.length() == 0) {
reportXmlProblem("Missing encoding declaration: underlying encoding looks like an EBCDIC variant, but no xml encoding declaration found");
}
// Hmmh. What should be the canonical name? Let's just use found encoding?
normEnc = mFoundEncoding;
} else if (mBytesPerChar == 2) { // UTF-16, BE/LE
normEnc = mBigEndian ? CharsetNames.CS_UTF16BE : CharsetNames.CS_UTF16LE;
} else if (mBytesPerChar == 4) { // UCS-4... ?
/* 22-Mar-2005, TSa: JDK apparently has no way of dealing
* with these encodings... not sure if and how it should
* be dealt with, really. Name could be UCS-4xx... or
* perhaps UTF-32xx
*/
normEnc = mBigEndian ? CharsetNames.CS_UTF32BE : CharsetNames.CS_UTF32LE;
} else {
// Ok, default has to be UTF-8, as per XML specs
normEnc = CharsetNames.CS_UTF8;
}
}
mInputEncoding = normEnc;
/* And then the reader. Let's figure out if we can use our own fast
* implementations first:
*/
BaseReader r;
// Normalized, can thus use straight equality checks now
if (normEnc == CharsetNames.CS_UTF8) {
r = new UTF8Reader(cfg, mIn, mByteBuffer, mInputPtr, mInputEnd, mRecycleBuffer);
} else if (normEnc == CharsetNames.CS_ISO_LATIN1) {
r = new ISOLatinReader(cfg, mIn, mByteBuffer, mInputPtr, mInputEnd, mRecycleBuffer);
} else if (normEnc == CharsetNames.CS_US_ASCII) {
r = new AsciiReader(cfg, mIn, mByteBuffer, mInputPtr, mInputEnd, mRecycleBuffer);
} else if (normEnc.startsWith(CharsetNames.CS_UTF32)) {
// let's augment with actual endianness info
if (normEnc == CharsetNames.CS_UTF32) {
mInputEncoding = mBigEndian ? CharsetNames.CS_UTF32BE : CharsetNames.CS_UTF32LE;
}
r = new UTF32Reader(cfg, mIn, mByteBuffer, mInputPtr, mInputEnd,
mRecycleBuffer, mBigEndian);
} else {
// Nah, JDK needs to try it
// Ok; first, do we need to merge stuff back?
InputStream in = mIn;
if (mInputPtr < mInputEnd) {
in = new MergedStream(cfg, in, mByteBuffer, mInputPtr, mInputEnd);
}
/* 20-Jan-2006, TSa: Ok; although it is possible to declare
* stream as 'UTF-16', JDK may need help in figuring out
* the right order, so let's be explicit:
*/
if (normEnc == CharsetNames.CS_UTF16) {
mInputEncoding = normEnc = mBigEndian ? CharsetNames.CS_UTF16BE : CharsetNames.CS_UTF16LE;
}
try {
return new InputStreamReader(in, normEnc);
} catch (UnsupportedEncodingException usex) {
throw new WstxIOException("Unsupported encoding: "+usex.getMessage());
}
}
if (mXml11Handling) {
r.setXmlCompliancy(XmlConsts.XML_V_11);
}
return r;
}
/**
* Since this class only gets used when encoding is not explicitly
* passed, need use the encoding that was auto-detected...
*/
public String getInputEncoding() {
return mInputEncoding;
}
public int getInputTotal() {
int total = mInputProcessed + mInputPtr;
if (mBytesPerChar > 1) {
total /= mBytesPerChar;
}
return total;
}
public int getInputColumn() {
int col = mInputPtr - mInputRowStart;
if (mBytesPerChar > 1) {
col /= mBytesPerChar;
}
return col;
}
/*
////////////////////////////////////////
// Internal methods, parsing
////////////////////////////////////////
*/
/**
* Method called to try to figure out physical encoding the underlying
* input stream uses.
*/
protected void resolveStreamEncoding()
throws IOException, WstxException
{
// Let's first set defaults:
mBytesPerChar = 0;
mBigEndian = true;
/* Ok; first just need 4 bytes for determining bytes-per-char from
* BOM or first char(s) of likely xml declaration:
*/
if (ensureLoaded(4)) {
bomblock:
do { // BOM/auto-detection block
int quartet = (mByteBuffer[0] << 24)
| ((mByteBuffer[1] & 0xFF) << 16)
| ((mByteBuffer[2] & 0xFF) << 8)
| (mByteBuffer[3] & 0xFF);
/* Handling of (usually) optional BOM (required for
* multi-byte formats); first 32-bit charsets:
*/
switch (quartet) {
case 0x0000FEFF:
mBigEndian = true;
mInputPtr = mBytesPerChar = 4;
break bomblock;
case 0xFFFE0000: // UCS-4, LE?
mInputPtr = mBytesPerChar = 4;
mBigEndian = false;
break bomblock;
case 0x0000FFFE: // UCS-4, in-order...
reportWeirdUCS4("2143");
break bomblock;
case 0x0FEFF0000: // UCS-4, in-order...
reportWeirdUCS4("3412");
break bomblock;
}
// Ok, if not, how about 16-bit encoding BOMs?
int msw = quartet >>> 16;
if (msw == 0xFEFF) { // UTF-16, BE
mInputPtr = mBytesPerChar = 2;
mBigEndian = true;
break;
}
if (msw == 0xFFFE) { // UTF-16, LE
mInputPtr = mBytesPerChar = 2;
mBigEndian = false;
break;
}
// And if not, then UTF-8 BOM?
if ((quartet >>> 8) == 0xEFBBBF) { // UTF-8
mInputPtr = 3;
mBytesPerChar = 1;
mBigEndian = true; // doesn't really matter
break;
}
/* And if that wasn't succesful, how about auto-detection
* for ' 0);
// Let's update location markers to ignore BOM.
mInputProcessed = -mInputPtr;
mInputRowStart = mInputPtr;
}
/* Hmmh. If we haven't figured it out, let's just assume
* UTF-8 as per XML specs:
*/
mByteSizeFound = (mBytesPerChar != 0);
if (!mByteSizeFound) {
mBytesPerChar = 1;
mBigEndian = true; // doesn't matter
}
}
/**
* @return Normalized encoding name
*/
protected String verifyXmlEncoding(String enc)
throws WstxException
{
enc = CharsetNames.normalize(enc);
// Let's actually verify we got matching information:
if (enc == CharsetNames.CS_UTF8) {
verifyEncoding(enc, 1);
} else if (enc == CharsetNames.CS_ISO_LATIN1) {
verifyEncoding(enc, 1);
} else if (enc == CharsetNames.CS_US_ASCII) {
verifyEncoding(enc, 1);
} else if (enc == CharsetNames.CS_UTF16) {
// BOM is obligatory, to know the ordering
/* 22-Mar-2005, TSa: Actually, since we don't have a
* custom decoder, so the underlying JDK Reader may
* have dealt with it transparently... so we can not
* really throw an exception here.
*/
//if (!mHadBOM) {
//reportMissingBOM(enc);
//}
verifyEncoding(enc, 2);
} else if (enc == CharsetNames.CS_UTF16LE) {
verifyEncoding(enc, 2, false);
} else if (enc == CharsetNames.CS_UTF16BE) {
verifyEncoding(enc, 2, true);
} else if (enc == CharsetNames.CS_UTF32) {
// Do we require a BOM here? we can live without it...
//if (!mHadBOM) {
// reportMissingBOM(enc);
//}
verifyEncoding(enc, 4);
} else if (enc == CharsetNames.CS_UTF32LE) {
verifyEncoding(enc, 4, false);
} else if (enc == CharsetNames.CS_UTF32BE) {
verifyEncoding(enc, 4, true);
}
return enc;
}
/*
/////////////////////////////////////////////////////
// Internal methods, loading input data
/////////////////////////////////////////////////////
*/
protected boolean ensureLoaded(int minimum)
throws IOException
{
/* Let's assume here buffer has enough room -- this will always
* be true for the limited used this method gets
*/
int gotten = (mInputEnd - mInputPtr);
while (gotten < minimum) {
int count = (mIn == null) ? -1 : mIn.read(mByteBuffer, mInputEnd, mByteBuffer.length - mInputEnd);
if (count < 1) {
return false;
}
mInputEnd += count;
gotten += count;
}
return true;
}
protected void loadMore()
throws IOException, WstxException
{
/* Need to make sure offsets are properly updated for error
* reporting purposes, and do this now while previous amounts
* are still known.
*/
/* Note: at this point these are all in bytes, not chars (for multibyte
* encodings)
*/
mInputProcessed += mInputEnd;
mInputRowStart -= mInputEnd;
mInputPtr = 0;
mInputEnd = (mIn == null) ? -1 : mIn.read(mByteBuffer, 0, mByteBuffer.length);
if (mInputEnd < 1) {
throw new WstxEOFException(ParsingErrorMsgs.SUFFIX_IN_XML_DECL,
getLocation());
}
}
/*
/////////////////////////////////////////////////////
// Implementations of abstract parsing methods
/////////////////////////////////////////////////////
*/
protected void pushback() {
if (mBytesPerChar < 0) {
mInputPtr += mBytesPerChar;
} else {
mInputPtr -= mBytesPerChar;
}
}
protected int getNext()
throws IOException, WstxException
{
if (mBytesPerChar != 1) {
if (mBytesPerChar == -1) { // need to translate
return nextTranslated();
}
return nextMultiByte();
}
byte b = (mInputPtr < mInputEnd) ?
mByteBuffer[mInputPtr++] : nextByte();
return (b & 0xFF);
}
protected int getNextAfterWs(boolean reqWs)
throws IOException, WstxException
{
int count;
if (mBytesPerChar == 1) { // single byte
count = skipSbWs();
} else {
if (mBytesPerChar == -1) { // translated
count = skipTranslatedWs();
} else { // multi byte
count = skipMbWs();
}
}
if (reqWs && count == 0) {
reportUnexpectedChar(getNext(), ERR_XMLDECL_EXP_SPACE);
}
// inlined getNext()
if (mBytesPerChar != 1) {
if (mBytesPerChar == -1) { // translated
return nextTranslated();
}
return nextMultiByte();
}
byte b = (mInputPtr < mInputEnd) ?
mByteBuffer[mInputPtr++] : nextByte();
return (b & 0xFF);
}
/**
* @return First character that does not match expected, if any;
* CHAR_NULL if match succeeded
*/
protected int checkKeyword(String exp)
throws IOException, WstxException
{
if (mBytesPerChar != 1) {
if (mBytesPerChar == -1) {
return checkTranslatedKeyword(exp);
}
return checkMbKeyword(exp);
}
return checkSbKeyword(exp);
}
protected int readQuotedValue(char[] kw, int quoteChar)
throws IOException, WstxException
{
int i = 0;
int len = kw.length;
boolean simple = (mBytesPerChar == 1);
boolean mb = !simple && (mBytesPerChar > 1);
while (i < len) {
int c;
if (simple) {
byte b = (mInputPtr < mInputEnd) ?
mByteBuffer[mInputPtr++] : nextByte();
if (b == BYTE_NULL) {
reportNull();
}
if (b == BYTE_CR || b == BYTE_LF) {
skipSbLF(b);
b = BYTE_LF;
}
c = (b & 0xFF);
} else {
if (mb) {
c = nextMultiByte();
if (c == CHAR_CR || c == CHAR_LF) {
skipMbLF(c);
c = CHAR_LF;
}
} else {
c = nextTranslated();
if (c == CHAR_CR || c == CHAR_LF) {
skipTranslatedLF(c);
c = CHAR_LF;
}
}
}
if (c == quoteChar) {
return (i < len) ? i : -1;
}
if (i < len) {
kw[i++] = (char) c;
}
}
/* If we end up this far, we ran out of buffer space... let's let
* caller figure that out, though
*/
return -1;
}
protected boolean hasXmlDecl()
throws IOException, WstxException
{
/* Separate handling for common and fast case; 1/variable byte
* encodings that have ASCII subset:
*/
if (mBytesPerChar == 1) {
/* However... there has to be at least 6 bytes available; and if
* so, can check the 'signature' easily:
*/
if (ensureLoaded(6)) {
if (mByteBuffer[mInputPtr] == '<'
&& mByteBuffer[mInputPtr+1] == '?'
&& mByteBuffer[mInputPtr+2] == 'x'
&& mByteBuffer[mInputPtr+3] == 'm'
&& mByteBuffer[mInputPtr+4] == 'l'
&& ((mByteBuffer[mInputPtr+5] & 0xFF) <= CHAR_SPACE)) {
// Let's skip stuff so far:
mInputPtr += 6;
return true;
}
}
} else if (mBytesPerChar == -1) { // translated (EBCDIC)
if (ensureLoaded(6)) {
int start = mInputPtr; // if we have to 'unread' chars
if (nextTranslated() == '<'
&& nextTranslated() == '?'
&& nextTranslated() == 'x'
&& nextTranslated() == 'm'
&& nextTranslated() == 'l'
&& nextTranslated() <= CHAR_SPACE) {
return true;
}
mInputPtr = start; // push data back
}
} else {
// ... and then for slower fixed-multibyte encodings:
// Is there enough data for checks?
if (ensureLoaded (6 * mBytesPerChar)) {
int start = mInputPtr; // if we have to 'unread' chars
if (nextMultiByte() == '<'
&& nextMultiByte() == '?'
&& nextMultiByte() == 'x'
&& nextMultiByte() == 'm'
&& nextMultiByte() == 'l'
&& nextMultiByte() <= CHAR_SPACE) {
return true;
}
mInputPtr = start; // push data back
}
}
return false;
}
protected Location getLocation()
{
/* Ok; for fixed-size multi-byte encodings, need to divide numbers
* to get character locations. For variable-length encodings the
* good thing is that xml declaration only uses shortest codepoints,
* ie. char count == byte count.
*/
int total = mInputProcessed + mInputPtr;
int col = mInputPtr - mInputRowStart;
if (mBytesPerChar > 1) {
total /= mBytesPerChar;
col /= mBytesPerChar;
}
return new WstxInputLocation(null, mPublicId, mSystemId,
total - 1, // 0-based
mInputRow, col);
}
/*
/////////////////////////////////////////////////////
// Internal methods, single-byte access methods
/////////////////////////////////////////////////////
*/
protected byte nextByte()
throws IOException, WstxException
{
if (mInputPtr >= mInputEnd) {
loadMore();
}
return mByteBuffer[mInputPtr++];
}
protected int skipSbWs()
throws IOException, WstxException
{
int count = 0;
while (true) {
byte b = (mInputPtr < mInputEnd) ?
mByteBuffer[mInputPtr++] : nextByte();
if ((b & 0xFF) > CHAR_SPACE) {
--mInputPtr;
break;
}
if (b == BYTE_CR || b == BYTE_LF) {
skipSbLF(b);
} else if (b == BYTE_NULL) {
reportNull();
}
++count;
}
return count;
}
protected void skipSbLF(byte lfByte)
throws IOException, WstxException
{
if (lfByte == BYTE_CR) {
byte b = (mInputPtr < mInputEnd) ?
mByteBuffer[mInputPtr++] : nextByte();
if (b != BYTE_LF) {
--mInputPtr; // pushback if not 2-char/byte lf
}
}
++mInputRow;
mInputRowStart = mInputPtr;
}
/**
* @return First character that does not match expected, if any;
* CHAR_NULL if match succeeded
*/
protected int checkSbKeyword(String expected)
throws IOException, WstxException
{
int len = expected.length();
for (int ptr = 1; ptr < len; ++ptr) {
byte b = (mInputPtr < mInputEnd) ?
mByteBuffer[mInputPtr++] : nextByte();
if (b == BYTE_NULL) {
reportNull();
}
if ((b & 0xFF) != expected.charAt(ptr)) {
return (b & 0xFF);
}
}
return CHAR_NULL;
}
/*
/////////////////////////////////////////////////////
// Internal methods, multi-byte/translated access/checks
/////////////////////////////////////////////////////
*/
protected int nextMultiByte()
throws IOException, WstxException
{
byte b = (mInputPtr < mInputEnd) ?
mByteBuffer[mInputPtr++] : nextByte();
byte b2 = (mInputPtr < mInputEnd) ?
mByteBuffer[mInputPtr++] : nextByte();
int c;
if (mBytesPerChar == 2) {
if (mBigEndian) {
c = ((b & 0xFF) << 8) | (b2 & 0xFF);
} else {
c = (b & 0xFF) | ((b2 & 0xFF) << 8);
}
} else {
// Has to be 4 bytes
byte b3 = (mInputPtr < mInputEnd) ?
mByteBuffer[mInputPtr++] : nextByte();
byte b4 = (mInputPtr < mInputEnd) ?
mByteBuffer[mInputPtr++] : nextByte();
if (mBigEndian) {
c = (b << 24) | ((b2 & 0xFF) << 16)
| ((b3 & 0xFF) << 8) | (b4 & 0xFF);
} else {
c = (b4 << 24) | ((b3 & 0xFF) << 16)
| ((b2 & 0xFF) << 8) | (b & 0xFF);
}
}
// Let's catch null chars early
if (c == 0) {
reportNull();
}
return c;
}
protected int nextTranslated()
throws IOException, WstxException
{
byte b = (mInputPtr < mInputEnd) ?
mByteBuffer[mInputPtr++] : nextByte();
int ch = mSingleByteTranslation[b & 0xFF];
if (ch < 0) { // special char... won't care for now
ch = -ch;
}
return ch;
}
protected int skipMbWs()
throws IOException, WstxException
{
int count = 0;
while (true) {
int c = nextMultiByte();
if (c > CHAR_SPACE) {
mInputPtr -= mBytesPerChar;
break;
}
if (c == CHAR_CR || c == CHAR_LF) {
skipMbLF(c);
} else if (c == CHAR_NULL) {
reportNull();
}
++count;
}
return count;
}
protected int skipTranslatedWs()
throws IOException, WstxException
{
int count = 0;
while (true) {
int c = nextTranslated();
// Hmmh. Are we to accept NEL (0x85)?
if (c > CHAR_SPACE && c != CHAR_NEL) {
--mInputPtr;
break;
}
if (c == CHAR_CR || c == CHAR_LF) {
skipTranslatedLF(c);
} else if (c == CHAR_NULL) {
reportNull();
}
++count;
}
return count;
}
protected void skipMbLF(int lf)
throws IOException, WstxException
{
if (lf == CHAR_CR) {
int c = nextMultiByte();
if (c != CHAR_LF) {
mInputPtr -= mBytesPerChar;
}
}
++mInputRow;
mInputRowStart = mInputPtr;
}
protected void skipTranslatedLF(int lf)
throws IOException, WstxException
{
if (lf == CHAR_CR) {
int c = nextTranslated();
if (c != CHAR_LF) {
mInputPtr -= 1;
}
}
++mInputRow;
mInputRowStart = mInputPtr;
}
/**
* @return First character that does not match expected, if any;
* CHAR_NULL if match succeeded
*/
protected int checkMbKeyword(String expected)
throws IOException, WstxException
{
int len = expected.length();
for (int ptr = 1; ptr < len; ++ptr) {
int c = nextMultiByte();
if (c == BYTE_NULL) {
reportNull();
}
if (c != expected.charAt(ptr)) {
return c;
}
}
return CHAR_NULL;
}
protected int checkTranslatedKeyword(String expected)
throws IOException, WstxException
{
int len = expected.length();
for (int ptr = 1; ptr < len; ++ptr) {
int c = nextTranslated();
if (c == BYTE_NULL) {
reportNull();
}
if (c != expected.charAt(ptr)) {
return c;
}
}
return CHAR_NULL;
}
/*
////////////////////////////////////////
// Other private methods:
////////////////////////////////////////
*/
private void verifyEncoding(String id, int bpc)
throws WstxException
{
if (mByteSizeFound) {
/* Let's verify that if we matched an encoding, it's the same
* as what was declared...
*/
if (bpc != mBytesPerChar) {
// [WSTX-138]: Needs to detect EBCDIC discrepancy
if (mEBCDIC) {
reportXmlProblem("Declared encoding '"+id+"' incompatible with auto-detected physical encoding (EBCDIC variant), can not decode input since actual code page not known");
}
reportXmlProblem("Declared encoding '"+id+"' uses "+bpc
+" bytes per character; but physical encoding appeared to use "+mBytesPerChar+"; cannot decode");
}
}
}
private void verifyEncoding(String id, int bpc, boolean bigEndian)
throws WstxException
{
if (mByteSizeFound) {
verifyEncoding(id, bpc);
if (bigEndian != mBigEndian) {
String bigStr = bigEndian ? "big" : "little";
reportXmlProblem
("Declared encoding '"+id+"' has different endianness ("
+bigStr+" endian) than what physical ordering appeared to be; cannot decode");
}
}
}
private void reportWeirdUCS4(String type)
throws IOException
{
throw new CharConversionException("Unsupported UCS-4 endianness ("+type+") detected");
}
/*
private void reportMissingBOM(String enc)
throws WstxException
{
throw new WstxException("Missing BOM for encoding '"+enc+"'; can not be omitted",
getLocation());
}
*/
}
woodstox-4.1.3/src/java/com/ctc/wstx/io/UTF8Writer.java 0000644 0001750 0001750 00000033553 11745427074 023100 0 ustar giovanni giovanni package com.ctc.wstx.io;
import java.io.*;
import com.ctc.wstx.api.WriterConfig;
/**
* Specialized buffering UTF-8 writer used by
* {@link com.ctc.wstx.sw.XmlWriter}.
* The main reason for custom version is to allow for efficient
* buffer recycling; the second benefit is that encoder has less
* overhead for short content encoding (compared to JDK default
* codecs).
*/
public final class UTF8Writer
extends Writer
implements CompletelyCloseable
{
private final static int DEFAULT_BUF_LEN = 4000;
final static int SURR1_FIRST = 0xD800;
final static int SURR1_LAST = 0xDBFF;
final static int SURR2_FIRST = 0xDC00;
final static int SURR2_LAST = 0xDFFF;
final WriterConfig mConfig;
final boolean mAutoCloseOutput;
final OutputStream mOut;
byte[] mOutBuffer;
final int mOutBufferLast;
int mOutPtr;
/**
* When outputting chars from BMP, surrogate pairs need to be coalesced.
* To do this, both pairs must be known first; and since it is possible
* pairs may be split, we need temporary storage for the first half
*/
int mSurrogate = 0;
public UTF8Writer(WriterConfig cfg, OutputStream out, boolean autoclose)
{
mConfig = cfg;
mAutoCloseOutput = autoclose;
mOut = out;
mOutBuffer = (mConfig == null) ? new byte[DEFAULT_BUF_LEN] : cfg.allocFullBBuffer(DEFAULT_BUF_LEN);
/* Max. expansion for a single char (in unmodified UTF-8) is
* 4 bytes (or 3 depending on how you view it -- 4 when recombining
* surrogate pairs)
*/
mOutBufferLast = mOutBuffer.length - 4;
mOutPtr = 0;
}
/*
////////////////////////////////////////////////////////
// CompletelyCloseable impl
////////////////////////////////////////////////////////
*/
public void closeCompletely() throws IOException
{
_close(true);
}
/*
////////////////////////////////////////////////////////
// java.io.Writer implementation
////////////////////////////////////////////////////////
*/
/* !!! 30-Nov-2006, TSa: Due to co-variance between Appendable and
* Writer, this would not compile with javac 1.5, in 1.4 mode
* (source and target set to "1.4". Not a huge deal, but since
* the base impl is just fine, no point in overriding it.
*/
/*
public Writer append(char c)
throws IOException
// note: this is a JDK 1.5 method
{
write(c);
return this;
}
*/
public void close() throws IOException
{
_close(mAutoCloseOutput);
}
public void flush()
throws IOException
{
if (mOutPtr > 0 && mOutBuffer != null) {
mOut.write(mOutBuffer, 0, mOutPtr);
mOutPtr = 0;
}
mOut.flush();
}
public void write(char[] cbuf)
throws IOException
{
write(cbuf, 0, cbuf.length);
}
public void write(char[] cbuf, int off, int len)
throws IOException
{
if (len < 2) {
if (len == 1) {
write(cbuf[off]);
}
return;
}
// First: do we have a leftover surrogate to deal with?
if (mSurrogate > 0) {
char second = cbuf[off++];
--len;
write(_convertSurrogate(second));
// will have at least one more char
}
int outPtr = mOutPtr;
byte[] outBuf = mOutBuffer;
int outBufLast = mOutBufferLast; // has 4 'spare' bytes
// All right; can just loop it nice and easy now:
len += off; // len will now be the end of input buffer
output_loop:
for (; off < len; ) {
/* First, let's ensure we can output at least 4 bytes
* (longest UTF-8 encoded codepoint):
*/
if (outPtr >= outBufLast) {
mOut.write(outBuf, 0, outPtr);
outPtr = 0;
}
int c = cbuf[off++];
// And then see if we have an Ascii char:
if (c < 0x80) { // If so, can do a tight inner loop:
outBuf[outPtr++] = (byte)c;
// Let's calc how many ascii chars we can copy at most:
int maxInCount = (len - off);
int maxOutCount = (outBufLast - outPtr);
if (maxInCount > maxOutCount) {
maxInCount = maxOutCount;
}
maxInCount += off;
ascii_loop:
while (true) {
if (off >= maxInCount) { // done with max. ascii seq
continue output_loop;
}
c = cbuf[off++];
if (c >= 0x80) {
break ascii_loop;
}
outBuf[outPtr++] = (byte) c;
}
}
// Nope, multi-byte:
if (c < 0x800) { // 2-byte
outBuf[outPtr++] = (byte) (0xc0 | (c >> 6));
outBuf[outPtr++] = (byte) (0x80 | (c & 0x3f));
} else { // 3 or 4 bytes
// Surrogates?
if (c < SURR1_FIRST || c > SURR2_LAST) {
outBuf[outPtr++] = (byte) (0xe0 | (c >> 12));
outBuf[outPtr++] = (byte) (0x80 | ((c >> 6) & 0x3f));
outBuf[outPtr++] = (byte) (0x80 | (c & 0x3f));
continue;
}
// Yup, a surrogate:
if (c > SURR1_LAST) { // must be from first range
mOutPtr = outPtr;
throwIllegal(c);
}
mSurrogate = c;
// and if so, followed by another from next range
if (off >= len) { // unless we hit the end?
break;
}
c = _convertSurrogate(cbuf[off++]);
if (c > 0x10FFFF) { // illegal, as per RFC 3629
mOutPtr = outPtr;
throwIllegal(c);
}
outBuf[outPtr++] = (byte) (0xf0 | (c >> 18));
outBuf[outPtr++] = (byte) (0x80 | ((c >> 12) & 0x3f));
outBuf[outPtr++] = (byte) (0x80 | ((c >> 6) & 0x3f));
outBuf[outPtr++] = (byte) (0x80 | (c & 0x3f));
}
}
mOutPtr = outPtr;
}
public void write(int c)
throws IOException
{
// First; do we have a left over surrogate?
if (mSurrogate > 0) {
c = _convertSurrogate(c);
// If not, do we start with a surrogate?
} else if (c >= SURR1_FIRST && c <= SURR2_LAST) {
// Illegal to get second part without first:
if (c > SURR1_LAST) {
throwIllegal(c);
}
// First part just needs to be held for now
mSurrogate = c;
return;
}
if (mOutPtr >= mOutBufferLast) { // let's require enough room, first
mOut.write(mOutBuffer, 0, mOutPtr);
mOutPtr = 0;
}
if (c < 0x80) { // ascii
mOutBuffer[mOutPtr++] = (byte) c;
} else {
int ptr = mOutPtr;
if (c < 0x800) { // 2-byte
mOutBuffer[ptr++] = (byte) (0xc0 | (c >> 6));
mOutBuffer[ptr++] = (byte) (0x80 | (c & 0x3f));
} else if (c <= 0xFFFF) { // 3 bytes
mOutBuffer[ptr++] = (byte) (0xe0 | (c >> 12));
mOutBuffer[ptr++] = (byte) (0x80 | ((c >> 6) & 0x3f));
mOutBuffer[ptr++] = (byte) (0x80 | (c & 0x3f));
} else { // 4 bytes
if (c > 0x10FFFF) { // illegal, as per RFC 3629
throwIllegal(c);
}
mOutBuffer[ptr++] = (byte) (0xf0 | (c >> 18));
mOutBuffer[ptr++] = (byte) (0x80 | ((c >> 12) & 0x3f));
mOutBuffer[ptr++] = (byte) (0x80 | ((c >> 6) & 0x3f));
mOutBuffer[ptr++] = (byte) (0x80 | (c & 0x3f));
}
mOutPtr = ptr;
}
}
public void write(String str)
throws IOException
{
write(str, 0, str.length());
}
public void write(String str, int off, int len)
throws IOException
{
if (len < 2) {
if (len == 1) {
write(str.charAt(off));
}
return;
}
// First: do we have a leftover surrogate to deal with?
if (mSurrogate > 0) {
char second = str.charAt(off++);
--len;
write(_convertSurrogate(second));
// will have at least one more char (case of 1 char was checked earlier on)
}
int outPtr = mOutPtr;
byte[] outBuf = mOutBuffer;
int outBufLast = mOutBufferLast; // has 4 'spare' bytes
// All right; can just loop it nice and easy now:
len += off; // len will now be the end of input buffer
output_loop:
for (; off < len; ) {
/* First, let's ensure we can output at least 4 bytes
* (longest UTF-8 encoded codepoint):
*/
if (outPtr >= outBufLast) {
mOut.write(outBuf, 0, outPtr);
outPtr = 0;
}
int c = str.charAt(off++);
// And then see if we have an Ascii char:
if (c < 0x80) { // If so, can do a tight inner loop:
outBuf[outPtr++] = (byte)c;
// Let's calc how many ascii chars we can copy at most:
int maxInCount = (len - off);
int maxOutCount = (outBufLast - outPtr);
if (maxInCount > maxOutCount) {
maxInCount = maxOutCount;
}
maxInCount += off;
ascii_loop:
while (true) {
if (off >= maxInCount) { // done with max. ascii seq
continue output_loop;
}
c = str.charAt(off++);
if (c >= 0x80) {
break ascii_loop;
}
outBuf[outPtr++] = (byte) c;
}
}
// Nope, multi-byte:
if (c < 0x800) { // 2-byte
outBuf[outPtr++] = (byte) (0xc0 | (c >> 6));
outBuf[outPtr++] = (byte) (0x80 | (c & 0x3f));
} else { // 3 or 4 bytes
// Surrogates?
if (c < SURR1_FIRST || c > SURR2_LAST) {
outBuf[outPtr++] = (byte) (0xe0 | (c >> 12));
outBuf[outPtr++] = (byte) (0x80 | ((c >> 6) & 0x3f));
outBuf[outPtr++] = (byte) (0x80 | (c & 0x3f));
continue;
}
// Yup, a surrogate:
if (c > SURR1_LAST) { // must be from first range
mOutPtr = outPtr;
throwIllegal(c);
}
mSurrogate = c;
// and if so, followed by another from next range
if (off >= len) { // unless we hit the end?
break;
}
c = _convertSurrogate(str.charAt(off++));
if (c > 0x10FFFF) { // illegal, as per RFC 3629
mOutPtr = outPtr;
throwIllegal(c);
}
outBuf[outPtr++] = (byte) (0xf0 | (c >> 18));
outBuf[outPtr++] = (byte) (0x80 | ((c >> 12) & 0x3f));
outBuf[outPtr++] = (byte) (0x80 | ((c >> 6) & 0x3f));
outBuf[outPtr++] = (byte) (0x80 | (c & 0x3f));
}
}
mOutPtr = outPtr;
}
/*
////////////////////////////////////////////////////////////
// Internal methods
////////////////////////////////////////////////////////////
*/
private final void _close(boolean forceClosing)
throws IOException
{
byte[] buf = mOutBuffer;
if (buf != null) {
mOutBuffer = null;
if (mOutPtr > 0) {
mOut.write(buf, 0, mOutPtr);
mOutPtr = 0;
}
if (mConfig != null) {
mConfig.freeFullBBuffer(buf);
}
}
if (forceClosing) {
mOut.close();
}
/* Let's 'flush' orphan surrogate, no matter what; but only
* after cleanly closing everything else.
*/
int code = mSurrogate;
if (code > 0) {
mSurrogate = 0;
throwIllegal(code);
}
}
/**
* Method called to calculate UTF codepoint, from a surrogate pair.
*/
private final int _convertSurrogate(int secondPart)
throws IOException
{
int firstPart = mSurrogate;
mSurrogate = 0;
// Ok, then, is the second part valid?
if (secondPart < SURR2_FIRST || secondPart > SURR2_LAST) {
throw new IOException("Broken surrogate pair: first char 0x"+Integer.toHexString(firstPart)+", second 0x"+Integer.toHexString(secondPart)+"; illegal combination");
}
return 0x10000 + ((firstPart - SURR1_FIRST) << 10) + (secondPart - SURR2_FIRST);
}
private void throwIllegal(int code)
throws IOException
{
if (code > 0x10FFFF) { // over max?
throw new IOException("Illegal character point (0x"+Integer.toHexString(code)+") to output; max is 0x10FFFF as per RFC 3629");
}
if (code >= SURR1_FIRST) {
if (code <= SURR1_LAST) { // Unmatched first part (closing without second part?)
throw new IOException("Unmatched first part of surrogate pair (0x"+Integer.toHexString(code)+")");
}
throw new IOException("Unmatched second part of surrogate pair (0x"+Integer.toHexString(code)+")");
}
// should we ever get this?
throw new IOException("Illegal character point (0x"+Integer.toHexString(code)+") to output");
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/io/UTF8Reader.java 0000644 0001750 0001750 00000036510 11745427074 023022 0 ustar giovanni giovanni /* Woodstox XML processor
*
* Copyright (c) 2004 Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in file LICENSE, included with
* the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.io;
import java.io.*;
import com.ctc.wstx.api.ReaderConfig;
import com.ctc.wstx.cfg.XmlConsts;
/**
* Optimized Reader that reads UTF-8 encoded content from an input stream.
* In addition to doing (hopefully) optimal conversion, it can also take
* array of "pre-read" (leftover) bytes; this is necessary when preliminary
* stream/reader is trying to figure out XML encoding.
*/
public final class UTF8Reader
extends BaseReader
{
boolean mXml11 = false;
char mSurrogate = NULL_CHAR;
/**
* Total read character count; used for error reporting purposes
*/
int mCharCount = 0;
/**
* Total read byte count; used for error reporting purposes
*/
int mByteCount = 0;
/*
////////////////////////////////////////
// Life-cycle
////////////////////////////////////////
*/
public UTF8Reader(ReaderConfig cfg, InputStream in, byte[] buf, int ptr, int len,
boolean recycleBuffer)
{
super(cfg, in, buf, ptr, len, recycleBuffer);
}
public void setXmlCompliancy(int xmlVersion)
{
mXml11 = (xmlVersion == XmlConsts.XML_V_11);
}
/*
////////////////////////////////////////
// Public API
////////////////////////////////////////
*/
public int read(char[] cbuf, int start, int len)
throws IOException
{
// Let's first ensure there's enough room...
if (start < 0 || (start+len) > cbuf.length) {
reportBounds(cbuf, start, len);
}
// Already EOF?
if (mByteBuffer == null) {
return -1;
}
if (len < 1) { // dummy call?
return 0;
}
len += start;
int outPtr = start;
// Ok, first; do we have a surrogate from last round?
if (mSurrogate != NULL_CHAR) {
cbuf[outPtr++] = mSurrogate;
mSurrogate = NULL_CHAR;
// No need to load more, already got one char
} else {
/* To prevent unnecessary blocking (esp. with network streams),
* we'll only require decoding of a single char
*/
int left = (mByteBufferEnd - mBytePtr);
/* So; only need to load more if we can't provide at least
* one more character. We need not do thorough check here,
* but let's check the common cases here: either completely
* empty buffer (left == 0), or one with less than max. byte
* count for a single char, and starting of a multi-byte
* encoding (this leaves possibility of a 2/3-byte char
* that is still fully accessible... but that can be checked
* by the load method)
*/
if (left < 4) {
// Need to load more?
if (left < 1 || mByteBuffer[mBytePtr] < 0) {
if (!loadMore(left)) { // (legal) EOF?
return -1;
}
}
}
}
/* This may look silly, but using a local var is indeed faster
* (if and when HotSpot properly gets things running) than
* member variable...
*/
byte[] buf = mByteBuffer;
int inPtr = mBytePtr;
int inBufLen = mByteBufferEnd;
main_loop:
while (outPtr < len) {
// At this point we have at least one byte available
int c = (int) buf[inPtr++];
/* Let's first do the quickie loop for common case; 7-bit
* ascii:
*/
if (c >= 0) { // ascii? can probably loop, then
if (c == 0x7F && mXml11) { // DEL illegal in xml1.1
int bytePos = mByteCount + inPtr - 1;
int charPos = mCharCount + (outPtr-start);
reportInvalidXml11(c, bytePos, charPos);
}
cbuf[outPtr++] = (char) c; // ok since MSB is never on
/* Ok, how many such chars could we safely process
* without overruns? (will combine 2 in-loop comparisons
* into just one)
*/
int outMax = (len - outPtr); // max output
int inMax = (inBufLen - inPtr); // max input
int inEnd = inPtr + ((inMax < outMax) ? inMax : outMax);
ascii_loop:
while (true) {
if (inPtr >= inEnd) {
break main_loop;
}
c = ((int) buf[inPtr++]) & 0xFF;
if (c >= 0x7F) { // DEL, or multi-byte
break ascii_loop;
}
cbuf[outPtr++] = (char) c;
}
if (c == 0x7F) {
if (mXml11) { // DEL illegal in xml1.1
int bytePos = mByteCount + inPtr - 1;
int charPos = mCharCount + (outPtr-start);
reportInvalidXml11(c, bytePos, charPos);
} // but not in xml 1.0
cbuf[outPtr++] = (char) c;
if(inPtr >= inEnd){
break main_loop;
}
continue main_loop;
}
}
int needed;
// Ok; if we end here, we got multi-byte combination
if ((c & 0xE0) == 0xC0) { // 2 bytes (0x0080 - 0x07FF)
c = (c & 0x1F);
needed = 1;
} else if ((c & 0xF0) == 0xE0) { // 3 bytes (0x0800 - 0xFFFF)
c = (c & 0x0F);
needed = 2;
} else if ((c & 0xF8) == 0xF0) {
// 4 bytes; double-char BS, with surrogates and all...
c = (c & 0x0F);
needed = 3;
} else {
reportInvalidInitial(c & 0xFF, outPtr-start);
// never gets here...
needed = 1;
}
/* Do we have enough bytes? If not, let's just push back the
* byte and leave, since we have already gotten at least one
* char decoded. This way we will only block (with read from
* input stream) when absolutely necessary.
*/
if ((inBufLen - inPtr) < needed) {
--inPtr;
break main_loop;
}
int d = (int) buf[inPtr++];
if ((d & 0xC0) != 0x080) {
reportInvalidOther(d & 0xFF, outPtr-start);
}
c = (c << 6) | (d & 0x3F);
if (needed > 1) { // needed == 1 means 2 bytes total
d = buf[inPtr++]; // 3rd byte
if ((d & 0xC0) != 0x080) {
reportInvalidOther(d & 0xFF, outPtr-start);
}
c = (c << 6) | (d & 0x3F);
if (needed > 2) { // 4 bytes? (need surrogates)
d = buf[inPtr++];
if ((d & 0xC0) != 0x080) {
reportInvalidOther(d & 0xFF, outPtr-start);
}
c = (c << 6) | (d & 0x3F);
if (c > XmlConsts.MAX_UNICODE_CHAR) {
reportInvalid(c, outPtr-start,
"(above "+Integer.toHexString(XmlConsts.MAX_UNICODE_CHAR)+") ");
}
/* Ugh. Need to mess with surrogates. Ok; let's inline them
* there, then, if there's room: if only room for one,
* need to save the surrogate for the rainy day...
*/
c -= 0x10000; // to normalize it starting with 0x0
cbuf[outPtr++] = (char) (0xD800 + (c >> 10));
// hmmh. can this ever be 0? (not legal, at least?)
c = (0xDC00 | (c & 0x03FF));
// Room for second part?
if (outPtr >= len) { // nope
mSurrogate = (char) c;
break main_loop;
}
// sure, let's fall back to normal processing:
} else {
/* Otherwise, need to check that 3-byte chars are
* legal ones (should not expand to surrogates;
* 0xFFFE and 0xFFFF are illegal)
*/
if (c >= 0xD800) {
// But first, let's check max chars:
if (c < 0xE000) {
reportInvalid(c, outPtr-start, "(a surrogate character) ");
} else if (c >= 0xFFFE) {
reportInvalid(c, outPtr-start, "");
}
} else if (mXml11 && c == 0x2028) { // LSEP?
/* 10-May-2006, TSa: Since LSEP is "non-associative",
* it needs additional handling. One way to do
* this is to convert preceding \r to \n. This
* should be implemented better when integrating
* decoder and tokenizer.
*/
if (outPtr > start && cbuf[outPtr-1] == '\r') {
cbuf[outPtr-1] = '\n';
}
c = CONVERT_LSEP_TO;
}
}
} else { // (needed == 1)
if (mXml11) { // high-order ctrl char detection...
if (c <= 0x9F) {
if (c == 0x85) { // NEL, let's convert?
c = CONVERT_NEL_TO;
} else if (c >= 0x7F) { // DEL, ctrl chars
int bytePos = mByteCount + inPtr - 1;
int charPos = mCharCount + (outPtr-start);
reportInvalidXml11(c, bytePos, charPos);
}
}
}
}
cbuf[outPtr++] = (char) c;
if (inPtr >= inBufLen) {
break main_loop;
}
}
mBytePtr = inPtr;
len = outPtr - start;
mCharCount += len;
return len;
}
/*
////////////////////////////////////////
// Internal methods
////////////////////////////////////////
*/
private void reportInvalidInitial(int mask, int offset)
throws IOException
{
// input (byte) ptr has been advanced by one, by now:
int bytePos = mByteCount + mBytePtr - 1;
int charPos = mCharCount + offset + 1;
throw new CharConversionException("Invalid UTF-8 start byte 0x"
+Integer.toHexString(mask)
+" (at char #"+charPos+", byte #"+bytePos+")");
}
private void reportInvalidOther(int mask, int offset)
throws IOException
{
int bytePos = mByteCount + mBytePtr - 1;
int charPos = mCharCount + offset;
throw new CharConversionException("Invalid UTF-8 middle byte 0x"
+Integer.toHexString(mask)
+" (at char #"+charPos+", byte #"+bytePos+")");
}
private void reportUnexpectedEOF(int gotBytes, int needed)
throws IOException
{
int bytePos = mByteCount + gotBytes;
int charPos = mCharCount;
throw new CharConversionException("Unexpected EOF in the middle of a multi-byte char: got "
+gotBytes+", needed "+needed
+", at char #"+charPos+", byte #"+bytePos+")");
}
private void reportInvalid(int value, int offset, String msg)
throws IOException
{
int bytePos = mByteCount + mBytePtr - 1;
int charPos = mCharCount + offset;
throw new CharConversionException("Invalid UTF-8 character 0x"
+Integer.toHexString(value)+msg
+" at char #"+charPos+", byte #"+bytePos+")");
}
/**
* @param available Number of "unused" bytes in the input buffer
*
* @return True, if enough bytes were read to allow decoding of at least
* one full character; false if EOF was encountered instead.
*/
private boolean loadMore(int available)
throws IOException
{
mByteCount += (mByteBufferEnd - available);
// Bytes that need to be moved to the beginning of buffer?
if (available > 0) {
/* 11-Nov-2008, TSa: can only move if we own the buffer; otherwise
* we are stuck with the data.
*/
if (mBytePtr > 0 && canModifyBuffer()) {
for (int i = 0; i < available; ++i) {
mByteBuffer[i] = mByteBuffer[mBytePtr+i];
}
mBytePtr = 0;
mByteBufferEnd = available;
}
} else {
/* Ok; here we can actually reasonably expect an EOF,
* so let's do a separate read right away:
*/
int count = readBytes();
if (count < 1) {
if (count < 0) { // -1
freeBuffers(); // to help GC?
return false;
}
// 0 count is no good; let's err out
reportStrangeStream();
}
}
/* We now have at least one byte... and that allows us to
* calculate exactly how many bytes we need!
*/
int c = (int) mByteBuffer[mBytePtr];
if (c >= 0) { // single byte (ascii) char... cool, can return
return true;
}
// Ok, a multi-byte char, let's check how many bytes we'll need:
int needed;
if ((c & 0xE0) == 0xC0) { // 2 bytes (0x0080 - 0x07FF)
needed = 2;
} else if ((c & 0xF0) == 0xE0) { // 3 bytes (0x0800 - 0xFFFF)
needed = 3;
} else if ((c & 0xF8) == 0xF0) {
// 4 bytes; double-char BS, with surrogates and all...
needed = 4;
} else {
reportInvalidInitial(c & 0xFF, 0);
// never gets here... but compiler whines without this:
needed = 1;
}
/* And then we'll just need to load up to that many bytes;
* if an EOF is hit, that'll be an error. But we need not do
* actual decoding here, just load enough bytes.
*/
while ((mBytePtr + needed) > mByteBufferEnd) {
int count = readBytesAt(mByteBufferEnd);
if (count < 1) {
if (count < 0) { // -1, EOF... no good!
freeBuffers();
reportUnexpectedEOF(mByteBufferEnd, needed);
}
// 0 count is no good; let's err out
reportStrangeStream();
}
}
return true;
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/io/BranchingReaderSource.java 0000644 0001750 0001750 00000011307 11745427074 025345 0 ustar giovanni giovanni package com.ctc.wstx.io;
import java.io.IOException;
import java.io.Reader;
import java.net.URL;
import javax.xml.stream.XMLStreamException;
import com.ctc.wstx.api.ReaderConfig;
import com.ctc.wstx.util.TextBuffer;
/**
* Specialized input source that can "branch" input it reads; essentially
* both giving out read data AND also writing it out to a Writer.
*
* Currently this Reader is only used as the main-level Reader, to allow for
* branching of internal DTD subset to a text buffer if necessary.
*/
public final class BranchingReaderSource
extends ReaderSource
{
// // // Branching information
TextBuffer mBranchBuffer = null;
int mBranchStartOffset = 0;
boolean mConvertLFs = false;
/**
* Flag that indicates that last char from previous buffer was
* '\r', and that following '\n' (if there is one) needs to be
* ignored.
*/
boolean mGotCR = false;
public BranchingReaderSource(ReaderConfig cfg, String pubId, String sysId, URL src,
Reader r, boolean realClose)
{
/* null -> no parent,
* null -> not from explicit entity (no id/name)
*/
super(cfg, null, null, pubId, sysId, src, r, realClose);
}
public int readInto(WstxInputData reader)
throws IOException, XMLStreamException
{
// Need to flush out branched content?
if (mBranchBuffer != null) {
if (mInputLast > mBranchStartOffset) {
appendBranched(mBranchStartOffset, mInputLast);
}
mBranchStartOffset = 0;
}
return super.readInto(reader);
}
public boolean readMore(WstxInputData reader, int minAmount)
throws IOException, XMLStreamException
{
// Existing data to output to branch?
if (mBranchBuffer != null) {
int ptr = reader.mInputPtr;
int currAmount = mInputLast - ptr;
if (currAmount > 0) {
if (ptr > mBranchStartOffset) {
appendBranched(mBranchStartOffset, ptr);
}
mBranchStartOffset = 0;
}
}
return super.readMore(reader, minAmount);
}
/*
//////////////////////////////////////////////////
// Branching methods; used mostly to make a copy
// of parsed internal subsets.
//////////////////////////////////////////////////
*/
public void startBranch(TextBuffer tb, int startOffset,
boolean convertLFs)
{
mBranchBuffer = tb;
mBranchStartOffset = startOffset;
mConvertLFs = convertLFs;
mGotCR = false;
}
/**
* Currently this input source does not implement branching
*/
public void endBranch(int endOffset)
{
if (mBranchBuffer != null) {
if (endOffset > mBranchStartOffset) {
appendBranched(mBranchStartOffset, endOffset);
}
// Let's also make sure no branching is done from this point on:
mBranchBuffer = null;
}
}
/*
//////////////////////////////////////////////////
// Internal methods
//////////////////////////////////////////////////
*/
private void appendBranched(int startOffset, int pastEnd) {
// Main tricky thing here is just replacing of linefeeds...
if (mConvertLFs) {
char[] inBuf = mBuffer;
/* this will also unshare() and ensure there's room for at
* least one more char
*/
char[] outBuf = mBranchBuffer.getCurrentSegment();
int outPtr = mBranchBuffer.getCurrentSegmentSize();
// Pending \n to skip?
if (mGotCR) {
if (inBuf[startOffset] == '\n') {
++startOffset;
}
}
while (startOffset < pastEnd) {
char c = inBuf[startOffset++];
if (c == '\r') {
if (startOffset < pastEnd) {
if (inBuf[startOffset] == '\n') {
++startOffset;
}
} else {
mGotCR = true;
}
c = '\n';
}
// Ok, let's add char to output:
outBuf[outPtr++] = c;
// Need more room?
if (outPtr >= outBuf.length) {
outBuf = mBranchBuffer.finishCurrentSegment();
outPtr = 0;
}
}
mBranchBuffer.setCurrentLength(outPtr);
} else {
mBranchBuffer.append(mBuffer, startOffset, pastEnd-startOffset);
}
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/io/TextEscaper.java 0000644 0001750 0001750 00000004773 11745427074 023406 0 ustar giovanni giovanni package com.ctc.wstx.io;
import java.io.*;
public final class TextEscaper
{
private TextEscaper() { }
/*
/////////////////////////////////////////////////////////////
// Static utility methods, for non-state-aware escaping
/////////////////////////////////////////////////////////////
*/
public static void writeEscapedAttrValue(Writer w, String value)
throws IOException
{
int i = 0;
int len = value.length();
do {
int start = i;
char c = '\u0000';
for (; i < len; ++i) {
c = value.charAt(i);
if (c == '<' || c == '&' || c == '"') {
break;
}
}
int outLen = i - start;
if (outLen > 0) {
w.write(value, start, outLen);
}
if (i < len) {
if (c == '<') {
w.write("<");
} else if (c == '&') {
w.write("&");
} else if (c == '"') {
w.write(""");
}
}
} while (++i < len);
}
/**
* Quoting method used when outputting content that will be part of
* DTD (internal/external subset). Additional quoting is needed for
* percentage char, which signals parameter entities.
*/
public static void outputDTDText(Writer w, char[] ch, int offset, int len)
throws IOException
{
int i = offset;
len += offset;
do {
int start = i;
char c = '\u0000';
for (; i < len; ++i) {
c = ch[i];
if (c == '&' || c == '%' || c == '"') {
break;
}
}
int outLen = i - start;
if (outLen > 0) {
w.write(ch, start, outLen);
}
if (i < len) {
if (c == '&') {
/* Only need to quote to prevent it from being accidentally
* taken as part of char entity...
*/
w.write("&");
} else if (c == '%') {
// Need to quote, to prevent use as Param Entity marker
w.write("%");
} else if (c == '"') {
// Need to quote assuming it encloses entity value
w.write(""");
}
}
} while (++i < len);
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/io/BufferRecycler.java 0000644 0001750 0001750 00000006503 11745427074 024052 0 ustar giovanni giovanni package com.ctc.wstx.io;
/**
* This is a small utility class, whose main functionality is to allow
* simple reuse of raw byte/char buffers. It is usually used through
*
* Regarding implementation: the key design goal is simplicity; and to
* that end, different types of buffers are handled separately. While
* code may look inelegant as a result (wouldn't it be neat to just
* have generic char[]/byte[] buffer accessors?), benefit is that
* no data structures are needed, just simple references. As long
* as usage pattern is well known (which it is, for stream readers)
* this should be highly optimal and robust implementation.
*/
public final class BufferRecycler
{
private char[] mSmallCBuffer = null; // temp buffers
private char[] mMediumCBuffer = null; // text collector
private char[] mFullCBuffer = null; // for actual parsing buffer
private byte[] mFullBBuffer = null;
public BufferRecycler() { }
// // // Char buffers:
// // Small buffers, for temporary parsing
public char[] getSmallCBuffer(int minSize)
{
char[] result = null;
if (mSmallCBuffer != null && mSmallCBuffer.length >= minSize) {
result = mSmallCBuffer;
mSmallCBuffer = null;
}
//System.err.println("DEBUG: Alloc CSmall: "+result);
return result;
}
public void returnSmallCBuffer(char[] buffer)
{
//System.err.println("DEBUG: Return CSmall ("+buffer.length+"): "+buffer);
mSmallCBuffer = buffer;
}
// // Medium buffers, for text output collection
public char[] getMediumCBuffer(int minSize)
{
char[] result = null;
if (mMediumCBuffer != null && mMediumCBuffer.length >= minSize) {
result = mMediumCBuffer;
mMediumCBuffer = null;
}
//System.err.println("DEBUG: Alloc CMed: "+result);
return result;
}
public void returnMediumCBuffer(char[] buffer)
{
mMediumCBuffer = buffer;
//System.err.println("DEBUG: Return CMed ("+buffer.length+"): "+buffer);
}
// // Full buffers, for parser buffering
public char[] getFullCBuffer(int minSize)
{
char[] result = null;
if (mFullCBuffer != null && mFullCBuffer.length >= minSize) {
result = mFullCBuffer;
mFullCBuffer = null;
}
//System.err.println("DEBUG: Alloc CFull: "+result);
return result;
}
public void returnFullCBuffer(char[] buffer)
{
mFullCBuffer = buffer;
//System.err.println("DEBUG: Return CFull ("+buffer.length+"): "+buffer);
}
// // // Byte buffers:
// // Full byte buffers, for byte->char conversion (Readers)
public byte[] getFullBBuffer(int minSize)
{
byte[] result = null;
if (mFullBBuffer != null && mFullBBuffer.length >= minSize) {
result = mFullBBuffer;
mFullBBuffer = null;
}
//System.err.println("DEBUG: Alloc BFull: "+result);
return result;
}
public void returnFullBBuffer(byte[] buffer)
{
mFullBBuffer = buffer;
//System.err.println("DEBUG: Return BFull ("+buffer.length+"): "+buffer);
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/evt/ 0000755 0001750 0001750 00000000000 11756143457 020472 5 ustar giovanni giovanni woodstox-4.1.3/src/java/com/ctc/wstx/evt/SimpleStartElement.java 0000644 0001750 0001750 00000013000 11745427074 025106 0 ustar giovanni giovanni package com.ctc.wstx.evt;
import java.io.IOException;
import java.io.Writer;
import java.util.*;
import javax.xml.namespace.NamespaceContext;
import javax.xml.namespace.QName;
import javax.xml.stream.Location;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamWriter;
import javax.xml.stream.events.Attribute;
import javax.xml.stream.events.Namespace;
import javax.xml.stream.events.StartElement;
import org.codehaus.stax2.ri.EmptyIterator;
import com.ctc.wstx.io.TextEscaper;
import com.ctc.wstx.util.BaseNsContext;
/**
* Wstx {@link StartElement} implementation used when event is constructed
* from already objectified data, for example when constructed by the event
* factory.
*/
public class SimpleStartElement
extends BaseStartElement
{
final Map mAttrs;
/*
/////////////////////////////////////////////
// Life cycle
/////////////////////////////////////////////
*/
protected SimpleStartElement(Location loc, QName name, BaseNsContext nsCtxt,
Map attr)
{
super(loc, name, nsCtxt);
mAttrs = attr;
}
/**
* Factory method called when a start element needs to be constructed
* from an external source (most likely, non-woodstox stream reader).
*/
public static SimpleStartElement construct(Location loc, QName name,
Map attrs, List ns,
NamespaceContext nsCtxt)
{
BaseNsContext myCtxt = MergedNsContext.construct(nsCtxt, ns);
return new SimpleStartElement(loc, name, myCtxt, attrs);
}
public static SimpleStartElement construct(Location loc, QName name,
Iterator attrs, Iterator ns,
NamespaceContext nsCtxt)
{
Map attrMap;
if (attrs == null || !attrs.hasNext()) {
attrMap = null;
} else {
attrMap = new LinkedHashMap();
do {
Attribute attr = (Attribute) attrs.next();
attrMap.put(attr.getName(), attr);
} while (attrs.hasNext());
}
BaseNsContext myCtxt;
if (ns != null && ns.hasNext()) {
ArrayList l = new ArrayList();
do {
l.add((Namespace) ns.next()); // cast to catch type problems early
} while (ns.hasNext());
myCtxt = MergedNsContext.construct(nsCtxt, l);
} else {
/* Doh. Need specificially 'our' namespace context, to get them
* output properly...
*/
if (nsCtxt == null) {
myCtxt = null;
} else if (nsCtxt instanceof BaseNsContext) {
myCtxt = (BaseNsContext) nsCtxt;
} else {
myCtxt = MergedNsContext.construct(nsCtxt, null);
}
}
return new SimpleStartElement(loc, name, myCtxt, attrMap);
}
/*
/////////////////////////////////////////////
// Public API
/////////////////////////////////////////////
*/
public Attribute getAttributeByName(QName name)
{
if (mAttrs == null) {
return null;
}
return (Attribute) mAttrs.get(name);
}
public Iterator getAttributes()
{
if (mAttrs == null) {
return EmptyIterator.getInstance();
}
return mAttrs.values().iterator();
}
protected void outputNsAndAttr(Writer w) throws IOException
{
// First namespace declarations, if any:
if (mNsCtxt != null) {
mNsCtxt.outputNamespaceDeclarations(w);
}
// Then attributes, if any:
if (mAttrs != null && mAttrs.size() > 0) {
Iterator it = mAttrs.values().iterator();
while (it.hasNext()) {
Attribute attr = (Attribute) it.next();
// Let's only output explicit attribute values:
if (!attr.isSpecified()) {
continue;
}
w.write(' ');
QName name = attr.getName();
String prefix = name.getPrefix();
if (prefix != null && prefix.length() > 0) {
w.write(prefix);
w.write(':');
}
w.write(name.getLocalPart());
w.write("=\"");
String val = attr.getValue();
if (val != null && val.length() > 0) {
TextEscaper.writeEscapedAttrValue(w, val);
}
w.write('"');
}
}
}
protected void outputNsAndAttr(XMLStreamWriter w) throws XMLStreamException
{
// First namespace declarations, if any:
if (mNsCtxt != null) {
mNsCtxt.outputNamespaceDeclarations(w);
}
// Then attributes, if any:
if (mAttrs != null && mAttrs.size() > 0) {
Iterator it = mAttrs.values().iterator();
while (it.hasNext()) {
Attribute attr = (Attribute) it.next();
// Let's only output explicit attribute values:
if (!attr.isSpecified()) {
continue;
}
QName name = attr.getName();
String prefix = name.getPrefix();
String ln = name.getLocalPart();
String nsURI = name.getNamespaceURI();
w.writeAttribute(prefix, nsURI, ln, attr.getValue());
}
}
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/evt/BaseStartElement.java 0000644 0001750 0001750 00000012166 11745427074 024543 0 ustar giovanni giovanni /* Woodstox XML processor
*
* Copyright (c) 2004 Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in file LICENSE, included with
* the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.evt;
import java.io.IOException;
import java.io.Writer;
import java.util.Iterator;
import javax.xml.namespace.NamespaceContext;
import javax.xml.namespace.QName;
import javax.xml.stream.*;
import javax.xml.stream.events.Attribute;
import javax.xml.stream.events.StartElement;
import org.codehaus.stax2.XMLStreamWriter2;
import org.codehaus.stax2.ri.EmptyIterator;
import org.codehaus.stax2.ri.evt.BaseEventImpl;
import com.ctc.wstx.exc.WstxIOException;
import com.ctc.wstx.util.BaseNsContext;
/**
* Shared base class of {@link StartElement} implementations Wstx uses.
*/
abstract class BaseStartElement
extends BaseEventImpl
implements StartElement
{
protected final QName mName;
protected final BaseNsContext mNsCtxt;
/*
/////////////////////////////////////////////
// Life cycle
/////////////////////////////////////////////
*/
protected BaseStartElement(Location loc, QName name, BaseNsContext nsCtxt)
{
super(loc);
mName = name;
mNsCtxt = nsCtxt;
}
/*
/////////////////////////////////////////////
// StartElement API
/////////////////////////////////////////////
*/
public abstract Attribute getAttributeByName(QName name);
public abstract Iterator getAttributes();
public final QName getName() {
return mName;
}
public Iterator getNamespaces()
{
if (mNsCtxt == null) {
return EmptyIterator.getInstance();
}
/* !!! 28-Sep-2004: Should refactor, since now it's up to ns context
* to construct namespace events... which adds unnecessary
* up-dependency from stream level to event objects.
*/
return mNsCtxt.getNamespaces();
}
public NamespaceContext getNamespaceContext()
{
return mNsCtxt;
}
public String getNamespaceURI(String prefix) {
return (mNsCtxt == null) ? null : mNsCtxt.getNamespaceURI(prefix);
}
/*
/////////////////////////////////////////////////////
// Implementation of abstract base methods, overrides
/////////////////////////////////////////////////////
*/
public StartElement asStartElement() { // overriden to save a cast
return this;
}
public int getEventType() {
return START_ELEMENT;
}
public boolean isStartElement() {
return true;
}
public void writeAsEncodedUnicode(Writer w)
throws XMLStreamException
{
try {
w.write('<');
String prefix = mName.getPrefix();
if (prefix != null && prefix.length() > 0) {
w.write(prefix);
w.write(':');
}
w.write(mName.getLocalPart());
// Base class can output namespaces and attributes:
outputNsAndAttr(w);
w.write('>');
} catch (IOException ie) {
throw new WstxIOException(ie);
}
}
public void writeUsing(XMLStreamWriter2 w) throws XMLStreamException
{
QName n = mName;
w.writeStartElement(n.getPrefix(), n.getLocalPart(),
n.getNamespaceURI());
outputNsAndAttr(w);
}
protected abstract void outputNsAndAttr(Writer w) throws IOException;
protected abstract void outputNsAndAttr(XMLStreamWriter w) throws XMLStreamException;
/*
///////////////////////////////////////////
// Standard method implementation
//
// note: copied from Stax2 RI's StartElementEventImpl
///////////////////////////////////////////
*/
public boolean equals(Object o)
{
if (o == this) return true;
if (o == null) return false;
if (!(o instanceof StartElement)) return false;
StartElement other = (StartElement) o;
// First things first: names must match
if (mName.equals(other.getName())) {
/* Rest is much trickier. I guess the easiest way is to
* just blindly iterate through ns decls and attributes.
* The main issue is whether ordering should matter; it will,
* if just iterating. Would need to sort to get canonical
* comparison.
*/
if (iteratedEquals(getNamespaces(), other.getNamespaces())) {
return iteratedEquals(getAttributes(), other.getAttributes());
}
}
return false;
}
public int hashCode()
{
int hash = mName.hashCode();
hash = addHash(getNamespaces(), hash);
hash = addHash(getAttributes(), hash);
return hash;
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/evt/WEntityDeclaration.java 0000644 0001750 0001750 00000006240 11745427074 025106 0 ustar giovanni giovanni package com.ctc.wstx.evt;
import java.io.IOException;
import java.io.Writer;
import javax.xml.stream.Location;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.EntityDeclaration;
import org.codehaus.stax2.XMLStreamWriter2;
import org.codehaus.stax2.ri.evt.BaseEventImpl;
import com.ctc.wstx.ent.EntityDecl;
import com.ctc.wstx.exc.WstxIOException;
/**
* Simple implementation of StAX entity declaration events; for the
* most just wraps a {@link EntityDecl} instance.
*/
public abstract class WEntityDeclaration
extends BaseEventImpl
implements EntityDeclaration
{
public WEntityDeclaration(Location loc)
{
super(loc);
}
public abstract String getBaseURI();
public abstract String getName();
public abstract String getNotationName();
public abstract String getPublicId();
public abstract String getReplacementText();
public abstract String getSystemId();
/*
///////////////////////////////////////////
// Implementation of abstract base methods
///////////////////////////////////////////
*/
public int getEventType() {
return ENTITY_DECLARATION;
}
public abstract void writeEnc(Writer w) throws IOException;
public void writeAsEncodedUnicode(Writer w)
throws XMLStreamException
{
try {
writeEnc(w);
} catch (IOException ie) {
throw new WstxIOException(ie);
}
}
/**
* This method does not make much sense for this event type -- the reason
* being that the entity declarations can only be written as part of
* a DTD (internal or external subset), not separately. Can basically
* choose to either skip silently (output nothing), or throw an
* exception.
*/
public void writeUsing(XMLStreamWriter2 w) throws XMLStreamException
{
/* Fail silently, or throw an exception? Let's do latter; at least
* then we'll get useful (?) bug reports!
*/
throw new XMLStreamException("Can not write entity declarations using an XMLStreamWriter");
}
/*
///////////////////////////////////////////
// Standard method impl: note, copied
// from Stax2 RI "EntityDeclarationEventImpl"
///////////////////////////////////////////
*/
public boolean equals(Object o)
{
if (o == this) return true;
if (o == null) return false;
if (!(o instanceof EntityDeclaration)) return false;
EntityDeclaration other = (EntityDeclaration) o;
return stringsWithNullsEqual(getName(), other.getName())
&& stringsWithNullsEqual(getBaseURI(), other.getBaseURI())
&& stringsWithNullsEqual(getNotationName(), other.getNotationName())
&& stringsWithNullsEqual(getPublicId(), other.getPublicId())
&& stringsWithNullsEqual(getReplacementText(), other.getReplacementText())
&& stringsWithNullsEqual(getSystemId(), other.getSystemId())
;
}
public int hashCode()
{
/* Hmmh. Could try using most of the data, but really, name
* should be enough for most use cases
*/
return getName().hashCode();
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/evt/package.html 0000644 0001750 0001750 00000000223 11745427074 022746 0 ustar giovanni giovanni
* TODO:
*
* One of few complications here is the way start elements are constructed.
* The pattern used is double-indirection, needed to get a callback from
* the stream reader, with data we need for constructing even Object...
* but without stream reader having any understanding of event Objects
* per se.
*
* 03-Dec-2004, TSa: One additional twist is that it's now possible to
* create slightly faster event handling, by indicating that the
* fully accurate Location information is not necessary. If so,
* allocator will just use one shared Location object passed to
* all event objects constructed.
*/
public class DefaultEventAllocator
extends ElemCallback
implements XMLEventAllocator, XMLStreamConstants
{
final static DefaultEventAllocator sStdInstance = new DefaultEventAllocator(true);
/*
////////////////////////////////////////
// Configuration
////////////////////////////////////////
*/
protected final boolean mAccurateLocation;
/*
////////////////////////////////////////
// Recycled objects
////////////////////////////////////////
*/
/**
* Last used location info; only relevant to non-accurate-location
* allocators.
*/
protected Location mLastLocation = null;
/**
* @param accurateLocation If true, allocator will construct instances
* that have accurate location information; if false, instances
* will only have some generic shared Location info. Latter option
* will reduce memory usage/thrashing a bit, and may improve speed.
*/
protected DefaultEventAllocator(boolean accurateLocation) {
mAccurateLocation = accurateLocation;
}
public static DefaultEventAllocator getDefaultInstance() {
/* Default (accurate location) instance can be shared as it
* has no state
*/
return sStdInstance;
}
public static DefaultEventAllocator getFastInstance() {
/* Can not share instances, due to QName caching, as well as because
* of Location object related state
*/
return new DefaultEventAllocator(false);
}
/*
//////////////////////////////////////////////////////////
// XMLEventAllocator implementation
//////////////////////////////////////////////////////////
*/
public XMLEvent allocate(XMLStreamReader r)
throws XMLStreamException
{
Location loc;
// Need to keep track of accurate location info?
if (mAccurateLocation) {
loc = r.getLocation();
} else {
loc = mLastLocation;
/* And even if we can just share one instance, we need that
* first instance...
*/
if (loc == null) {
loc = mLastLocation = r.getLocation();
}
}
switch (r.getEventType()) {
case CDATA:
return new CharactersEventImpl(loc, r.getText(), true);
case CHARACTERS:
return new CharactersEventImpl(loc, r.getText(), false);
case COMMENT:
return new CommentEventImpl(loc, r.getText());
case DTD:
// Not sure if we really need this defensive coding but...
if (r instanceof XMLStreamReader2) {
XMLStreamReader2 sr2 = (XMLStreamReader2) r;
DTDInfo dtd = sr2.getDTDInfo();
return new WDTD(loc,
dtd.getDTDRootName(),
dtd.getDTDSystemId(), dtd.getDTDPublicId(),
dtd.getDTDInternalSubset(),
(DTDSubset) dtd.getProcessedDTD());
}
/* No way to get all information... the real big problem is
* that of how to access root name: it's obligatory for
* DOCTYPE construct. :-/
*/
return new WDTD(loc, null, r.getText());
case END_DOCUMENT:
return new EndDocumentEventImpl(loc);
case END_ELEMENT:
return new EndElementEventImpl(loc, r);
case PROCESSING_INSTRUCTION:
return new ProcInstrEventImpl(loc, r.getPITarget(), r.getPIData());
case SPACE:
{
CharactersEventImpl ch = new CharactersEventImpl(loc, r.getText(), false);
ch.setWhitespaceStatus(true);
return ch;
}
case START_DOCUMENT:
return new StartDocumentEventImpl(loc, r);
case START_ELEMENT:
{
/* Creating the event is bit complicated, as the stream
* reader is not to know anything about event objects.
* To do this, we do double-indirection, which means that
* this object actually gets a callback:
*/
/* 19-Jul-2006, TSa: WSTX-61 points out that the code was
* assuming it's always Woodstox reader we had... not
* necessarily so.
*/
if (r instanceof StreamReaderImpl) {
StreamReaderImpl sr = (StreamReaderImpl) r;
BaseStartElement be = (BaseStartElement) sr.withStartElement(this, loc);
if (be == null) { // incorrect state
throw new WstxException("Trying to create START_ELEMENT when current event is "
+ErrorConsts.tokenTypeDesc(sr.getEventType()),
loc);
}
return be;
}
/* Ok, not woodstox impl, will be bit more work (plus less
* efficient, and may miss some info)... but can be done.
*/
NamespaceContext nsCtxt = null;
if (r instanceof XMLStreamReader2) {
nsCtxt = ((XMLStreamReader2) r).getNonTransientNamespaceContext();
}
Map attrs;
{
int attrCount = r.getAttributeCount();
if (attrCount < 1) {
attrs = null;
} else {
attrs = new LinkedHashMap();
for (int i = 0; i < attrCount; ++i) {
QName aname = r.getAttributeName(i);
attrs.put(aname, new AttributeEventImpl(loc, aname, r.getAttributeValue(i), r.isAttributeSpecified(i)));
}
}
}
List ns;
{
int nsCount = r.getNamespaceCount();
if (nsCount < 1) {
ns = null;
} else {
ns = new ArrayList(nsCount);
for (int i = 0; i < nsCount; ++i) {
ns.add(NamespaceEventImpl.constructNamespace(loc, r.getNamespacePrefix(i), r.getNamespaceURI(i)));
}
}
}
return SimpleStartElement.construct(loc, r.getName(), attrs, ns, nsCtxt);
}
case ENTITY_REFERENCE:
{
/* 19-Jul-2006, TSa: Let's also allow other impls, although
* we can't get actual declaration if so...
*/
if (r instanceof StreamReaderImpl) {
EntityDecl ed = ((StreamReaderImpl) r).getCurrentEntityDecl();
if (ed == null) { // undefined?
// We'll still know the name though...
return new WEntityReference(loc, r.getLocalName());
}
return new WEntityReference(loc, ed);
}
return new WEntityReference(loc, r.getLocalName());
}
/* Following 2 types should never get in here; they are directly
* handled by DTDReader, and can only be accessed via DTD event
* element.
*/
case ENTITY_DECLARATION:
case NOTATION_DECLARATION:
/* Following 2 types should never get in here; they are directly
* handled by the reader, and can only be accessed via start
* element.
*/
case NAMESPACE:
case ATTRIBUTE:
throw new WstxException("Internal error: should not get "
+ErrorConsts.tokenTypeDesc(r.getEventType()));
default:
throw new IllegalStateException("Unrecognized event type "+r.getEventType()+".");
}
}
public void allocate(XMLStreamReader r, XMLEventConsumer consumer)
throws XMLStreamException
{
consumer.add(allocate(r));
}
public XMLEventAllocator newInstance() {
return new DefaultEventAllocator(mAccurateLocation);
}
/*
//////////////////////////////////////////////////////////
// ElemCallback implementation
//////////////////////////////////////////////////////////
*/
public Object withStartElement(Location loc, QName name,
BaseNsContext nsCtxt, ElemAttrs attrs,
boolean wasEmpty)
{
return new CompactStartElement(loc, name, nsCtxt, attrs);
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/evt/WEntityReference.java 0000644 0001750 0001750 00000002202 11745427074 024551 0 ustar giovanni giovanni package com.ctc.wstx.evt;
import javax.xml.stream.Location;
import javax.xml.stream.events.EntityReference;
import javax.xml.stream.events.EntityDeclaration;
import org.codehaus.stax2.ri.evt.EntityReferenceEventImpl;
/**
* We need a slightly specialized version to support concept of
* undeclared entities, which can be used in (non-default, non-standard)
* mode where undeclared entities are allowed to be handled.
*/
public class WEntityReference
extends EntityReferenceEventImpl
implements EntityReference
{
final String mName;
public WEntityReference(Location loc, EntityDeclaration decl)
{
super(loc, decl);
mName = null;
}
/**
* This constructor gets called for undeclared/defined entities: we will
* still know the name (from the reference), but not how it's defined
* (since it is not defined).
*/
public WEntityReference(Location loc, String name)
{
super(loc, (EntityDeclaration) null);
mName = name;
}
public String getName()
{
if (mName != null) {
return mName;
}
return super.getName();
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/evt/WDTD.java 0000644 0001750 0001750 00000004540 11745427074 022100 0 ustar giovanni giovanni package com.ctc.wstx.evt;
import java.util.ArrayList;
import java.util.List;
import javax.xml.stream.Location;
import org.codehaus.stax2.ri.evt.DTDEventImpl;
import com.ctc.wstx.dtd.DTDSubset;
/**
* Event that contains all StAX accessible information read from internal
* and external DTD subsets.
*/
public class WDTD
extends DTDEventImpl
{
/**
* Internal DTD Object that contains combined information from internal
* and external subsets.
*/
final DTDSubset mSubset;
/*
/////////////////////////////////////////////////////
// Lazily constructed objects
/////////////////////////////////////////////////////
*/
List mEntities = null;
List mNotations = null;
/*
/////////////////////////////////////////////////////
// Constuctors
/////////////////////////////////////////////////////
*/
public WDTD(Location loc, String rootName,
String sysId, String pubId, String intSubset,
DTDSubset dtdSubset)
{
super(loc, rootName, sysId, pubId, intSubset, dtdSubset);
mSubset = dtdSubset;
}
public WDTD(Location loc, String rootName,
String sysId, String pubId, String intSubset)
{
this(loc, rootName, sysId, pubId, intSubset, null);
}
/**
* Constructor used when only partial information is available...
*/
public WDTD(Location loc, String rootName, String intSubset)
{
this(loc, rootName, null, null, intSubset, null);
}
public WDTD(Location loc, String fullText)
{
super(loc, fullText);
mSubset = null;
}
/*
/////////////////////////////////////////////////////
// Accessors
/////////////////////////////////////////////////////
*/
public List getEntities()
{
if (mEntities == null && (mSubset != null)) {
/* Better make a copy, so that caller can not modify list
* DTD has, which may be shared (since DTD subset instances
* are cached and reused)
*/
mEntities = new ArrayList(mSubset.getGeneralEntityList());
}
return mEntities;
}
public List getNotations() {
if (mNotations == null && (mSubset != null)) {
mNotations = new ArrayList(mSubset.getNotationList());
}
return mNotations;
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/evt/CompactStartElement.java 0000644 0001750 0001750 00000011541 11745427074 025253 0 ustar giovanni giovanni package com.ctc.wstx.evt;
import java.io.IOException;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Iterator;
import javax.xml.namespace.QName;
import javax.xml.stream.*;
import javax.xml.stream.events.Attribute;
import javax.xml.stream.events.StartElement;
import org.codehaus.stax2.ri.EmptyIterator;
import org.codehaus.stax2.ri.SingletonIterator;
import org.codehaus.stax2.ri.evt.AttributeEventImpl;
import com.ctc.wstx.io.TextEscaper;
import com.ctc.wstx.sr.ElemAttrs;
import com.ctc.wstx.util.BaseNsContext;
/**
* Wstx {@link StartElement} implementation used when directly creating
* events from a stream reader.
*/
public class CompactStartElement
extends BaseStartElement
{
// Need to be in sync with ones from ElemAttrs
//private final static int OFFSET_LOCAL_NAME = 0;
private final static int OFFSET_NS_URI = 1;
private final static int OFFSET_NS_PREFIX = 2;
private final static int OFFSET_VALUE = 3;
/*
/////////////////////////////////////////////
// Attribute information
/////////////////////////////////////////////
*/
/**
* Container object that has enough information about attributes to
* be able to implement attribute accessor methods of this class.
*/
final ElemAttrs mAttrs;
/**
* Array needed for accessing actual String components of the attributes
*/
final String[] mRawAttrs;
/**
* Lazily created List that contains Attribute instances contained
* in this list. Created only if there are at least 2 attributes.
*/
private ArrayList mAttrList = null;
/*
/////////////////////////////////////////////
// Life cycle
/////////////////////////////////////////////
*/
protected CompactStartElement(Location loc, QName name, BaseNsContext nsCtxt,
ElemAttrs attrs)
{
super(loc, name, nsCtxt);
mAttrs = attrs;
mRawAttrs = (attrs == null) ? null : attrs.getRawAttrs();
}
/*
/////////////////////////////////////////////
// StartElement implementation
/////////////////////////////////////////////
*/
public Attribute getAttributeByName(QName name)
{
if (mAttrs == null) {
return null;
}
int ix = mAttrs.findIndex(name);
if (ix < 0) {
return null;
}
return constructAttr(mRawAttrs, ix, !mAttrs.isDefault(ix));
}
public Iterator getAttributes()
{
if (mAttrList == null) { // List is lazily constructed as needed
if (mAttrs == null) {
return EmptyIterator.getInstance();
}
String[] rawAttrs = mRawAttrs;
int rawLen = rawAttrs.length;
int defOffset = mAttrs.getFirstDefaultOffset();
if (rawLen == 4) {
return new SingletonIterator
(constructAttr(rawAttrs, 0, (defOffset == 0)));
}
ArrayList l = new ArrayList(rawLen >> 2);
for (int i = 0; i < rawLen; i += 4) {
l.add(constructAttr(rawAttrs, i, (i >= defOffset)));
}
mAttrList = l;
}
return mAttrList.iterator();
}
protected void outputNsAndAttr(Writer w) throws IOException
{
if (mNsCtxt != null) {
mNsCtxt.outputNamespaceDeclarations(w);
}
String[] raw = mRawAttrs;
if (raw != null) {
for (int i = 0, len = raw.length; i < len; i += 4) {
w.write(' ');
String prefix = raw[i + OFFSET_NS_PREFIX];
if (prefix != null && prefix.length() > 0) {
w.write(prefix);
w.write(':');
}
w.write(raw[i]); // local name
w.write("=\"");
TextEscaper.writeEscapedAttrValue(w, raw[i + OFFSET_VALUE]);
w.write('"');
}
}
}
protected void outputNsAndAttr(XMLStreamWriter w) throws XMLStreamException
{
if (mNsCtxt != null) {
mNsCtxt.outputNamespaceDeclarations(w);
}
String[] raw = mRawAttrs;
if (raw != null) {
for (int i = 0, len = raw.length; i < len; i += 4) {
String ln = raw[i];
String prefix = raw[i + OFFSET_NS_PREFIX];
String nsURI = raw[i + OFFSET_NS_URI];
w.writeAttribute(prefix, nsURI, ln, raw[i + OFFSET_VALUE]);
}
}
}
/*
/////////////////////////////////////////////
// Internal methods
/////////////////////////////////////////////
*/
public Attribute constructAttr(String[] raw, int rawIndex, boolean isDef)
{
return new AttributeEventImpl(mLocation, raw[rawIndex], raw[rawIndex+1],
raw[rawIndex+2], raw[rawIndex+3], isDef);
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/evt/WstxEventReader.java 0000644 0001750 0001750 00000005316 11745427074 024432 0 ustar giovanni giovanni /* Woodstox XML processor
*
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in the file LICENSE which is
* included with the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.evt;
import javax.xml.stream.*;
import javax.xml.stream.util.XMLEventAllocator;
import org.codehaus.stax2.XMLStreamReader2;
import org.codehaus.stax2.ri.Stax2EventReaderImpl;
import com.ctc.wstx.cfg.ErrorConsts;
import com.ctc.wstx.exc.WstxParsingException;
/**
* Woodstox version, based on generic Stax reference implementation
* baseline of {@link Stax2EventReaderImpl}.
*/
public class WstxEventReader
extends Stax2EventReaderImpl
{
public WstxEventReader(XMLEventAllocator a, XMLStreamReader2 r)
{
super(a, r);
}
/*
//////////////////////////////////////////////////////
// Impl of abstract methods
//////////////////////////////////////////////////////
*/
protected String getErrorDesc(int errorType, int currEvent)
{
// Defaults are mostly fine, except we can easily add event type desc
switch (errorType) {
case ERR_GETELEMTEXT_NOT_START_ELEM:
return ErrorConsts.ERR_STATE_NOT_STELEM+", got "+ErrorConsts.tokenTypeDesc(currEvent);
case ERR_GETELEMTEXT_NON_TEXT_EVENT:
return "Expected a text token, got "+ErrorConsts.tokenTypeDesc(currEvent);
case ERR_NEXTTAG_NON_WS_TEXT:
return "Only all-whitespace CHARACTERS/CDATA (or SPACE) allowed for nextTag(), got "+ErrorConsts.tokenTypeDesc(currEvent);
case ERR_NEXTTAG_WRONG_TYPE:
return "Got "+ErrorConsts.tokenTypeDesc(currEvent)+", instead of START_ELEMENT, END_ELEMENT or SPACE";
}
return null;
}
public boolean isPropertySupported(String name)
{
return ((XMLStreamReader2)getStreamReader()).isPropertySupported(name);
}
public boolean setProperty(String name, Object value)
{
return ((XMLStreamReader2)getStreamReader()).setProperty(name, value);
}
/*
//////////////////////////////////////////////////////
// Overrides
//////////////////////////////////////////////////////
*/
// @Override
protected void reportProblem(String msg, Location loc)
throws XMLStreamException
{
throw new WstxParsingException(msg, loc);
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/evt/WNotationDeclaration.java 0000644 0001750 0001750 00000001736 11745427074 025432 0 ustar giovanni giovanni package com.ctc.wstx.evt;
import java.net.URL;
import javax.xml.stream.Location;
import org.codehaus.stax2.ri.evt.NotationDeclarationEventImpl;
/**
* Woodstox implementation of {@link org.codehaus.stax2.evt.NotationDeclaration2}.
* The only required addition is that of passing in the Base URI.
*
* @author Tatu Saloranta
*
* @since 4.0.0
*/
public class WNotationDeclaration
extends NotationDeclarationEventImpl
{
/**
* Base URL that can be used to resolve the notation reference if
* necessary.
*/
final URL _baseURL;
public WNotationDeclaration(Location loc,
String name, String pubId, String sysId,
URL baseURL)
{
super(loc, name, pubId, sysId);
_baseURL = baseURL;
}
//@Override
public String getBaseURI()
{
if (_baseURL == null) {
return super.getBaseURI();
}
return _baseURL.toExternalForm();
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/msv/ 0000755 0001750 0001750 00000000000 11756143457 020501 5 ustar giovanni giovanni woodstox-4.1.3/src/java/com/ctc/wstx/msv/W3CSchema.java 0000644 0001750 0001750 00000002677 11745427074 023073 0 ustar giovanni giovanni /* Woodstox XML processor
*
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in the file LICENSE which is
* included with the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.msv;
import javax.xml.stream.*;
import org.codehaus.stax2.validation.*;
import com.sun.msv.grammar.xmlschema.XMLSchemaGrammar;
import com.sun.msv.verifier.regexp.xmlschema.XSREDocDecl;
/**
* This is a validation schema instance based on a W3C schema. It
* serves as a shareable "blueprint" for creating actual validator instances.
*/
public class W3CSchema
implements XMLValidationSchema
{
protected final XMLSchemaGrammar mGrammar;
public W3CSchema(XMLSchemaGrammar grammar)
{
mGrammar = grammar;
}
public String getSchemaType() {
return XMLValidationSchema.SCHEMA_ID_W3C_SCHEMA;
}
public XMLValidator createValidator(ValidationContext ctxt)
throws XMLStreamException
{
XSREDocDecl dd = new XSREDocDecl(mGrammar);
return new GenericMsvValidator(this, ctxt, dd);
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/msv/package.html 0000644 0001750 0001750 00000000210 11745427074 022751 0 ustar giovanni giovanni
* Note about id context provider interface: while it'd be nice to
* separate that part out, it is unfortunately closely tied to the
* validation process. Hence it's directly implemented by this class.
*/
public final class GenericMsvValidator
extends XMLValidator
implements com.sun.msv.grammar.IDContextProvider2
{
/*
///////////////////////////////////////////////////////////////////////
// Configuration
///////////////////////////////////////////////////////////////////////
*/
final protected XMLValidationSchema mParentSchema;
final protected ValidationContext mContext;
final protected DocumentDeclaration mVGM;
/*
///////////////////////////////////////////////////////////////////////
// State, helper objects
///////////////////////////////////////////////////////////////////////
*/
final protected ArrayList mAcceptors = new ArrayList();
protected Acceptor mCurrAcceptor = null;
final protected TextAccumulator mTextAccumulator = new TextAccumulator();
/**
* Map that contains information about element id (values of attributes
* or textual content with type ID) declarations and references
*/
protected ElementIdMap mIdDefs;
/*
///////////////////////////////////////////////////////////////////////
// State, positions
///////////////////////////////////////////////////////////////////////
*/
protected String mCurrAttrPrefix;
protected String mCurrAttrLocalName;
/**
* Sometimes a problem object has to be temporarily
* stored, and only reported later on. This happens
* when exceptions can not be thrown via code outside
* of Woodstox (like validation methods in MSV that do
* callbacks).
*/
protected XMLValidationProblem mProblem;
/*
///////////////////////////////////////////////////////////////////////
// Helper objects
///////////////////////////////////////////////////////////////////////
*/
final StringRef mErrorRef = new StringRef();
/**
* StartTagInfo instance need not be thread-safe, and it is not immutable
* so let's reuse one instance during a single validation.
*/
final StartTagInfo mStartTag = new StartTagInfo("", "", "", null, (IDContextProvider2) null);
/**
* This object provides limited access to attribute values of the
* currently validated element.
*/
final AttributeProxy mAttributeProxy;
/*
///////////////////////////////////////////////////////////////////////
// Construction, configuration
///////////////////////////////////////////////////////////////////////
*/
public GenericMsvValidator(XMLValidationSchema parent, ValidationContext ctxt,
DocumentDeclaration vgm)
{
mParentSchema = parent;
mContext = ctxt;
mVGM = vgm;
mCurrAcceptor = mVGM.createAcceptor();
mAttributeProxy = new AttributeProxy(ctxt);
}
/*
///////////////////////////////////////////////////////////////////////
// IDContextProvider2 implementation:
//
// Core RelaxNG ValidationContext implementation
// (org.relaxng.datatype.ValidationContext, base interface
// of the id provider context)
///////////////////////////////////////////////////////////////////////
*/
public String getBaseUri()
{
return mContext.getBaseUri();
}
public boolean isNotation(String notationName)
{
return mContext.isNotationDeclared(notationName);
}
public boolean isUnparsedEntity(String entityName)
{
return mContext.isUnparsedEntityDeclared(entityName);
}
public String resolveNamespacePrefix(String prefix)
{
return mContext.getNamespaceURI(prefix);
}
/*
///////////////////////////////////////////////////////////////////////
// IDContextProvider2 implementation, extensions over
// core ValidationContext
///////////////////////////////////////////////////////////////////////
*/
/**
*
* Note: we have to throw a dummy marker exception, which merely
* signals that a validation problem is to be reported.
* This is obviously messy, but has to do for now.
*/
public void onID(Datatype datatype, StringToken idToken)
throws IllegalArgumentException
{
if (mIdDefs == null) {
mIdDefs = new ElementIdMap();
}
int idType = datatype.getIdType();
Location loc = mContext.getValidationLocation();
PrefixedName elemPName = getElementPName();
PrefixedName attrPName = getAttrPName();
if (idType == Datatype.ID_TYPE_ID) {
String idStr = idToken.literal.trim();
ElementId eid = mIdDefs.addDefined(idStr, loc, elemPName, attrPName);
// We can detect dups by checking if Location is the one we passed:
if (eid.getLocation() != loc) {
mProblem = new XMLValidationProblem(loc, "Duplicate id '"+idStr+"', first declared at "+eid.getLocation());
mProblem.setReporter(this);
}
} else if (idType == Datatype.ID_TYPE_IDREF) {
String idStr = idToken.literal.trim();
mIdDefs.addReferenced(idStr, loc, elemPName, attrPName);
} else if (idType == Datatype.ID_TYPE_IDREFS) {
StringTokenizer tokens = new StringTokenizer(idToken.literal);
while (tokens.hasMoreTokens()) {
mIdDefs.addReferenced(tokens.nextToken(), loc, elemPName, attrPName);
}
} else { // sanity check
throw new IllegalStateException("Internal error: unexpected ID datatype: "+datatype);
}
}
/*
///////////////////////////////////////////////////////////////////////
// XMLValidator implementation
///////////////////////////////////////////////////////////////////////
*/
public XMLValidationSchema getSchema() {
return mParentSchema;
}
/**
* Method called to update information about the newly encountered (start)
* element. At this point namespace information has been resolved, but
* no DTD validation has been done. Validator is to do these validations,
* including checking for attribute value (and existence) compatibility.
*/
public void validateElementStart(String localName, String uri, String prefix)
throws XMLStreamException
{
/* [WSTX-200]: If sub-tree we were to validate has ended, we
* have no current acceptor, and must quite. Ideally we would
* really handle this more cleanly but...
*/
if (mCurrAcceptor == null) {
return;
}
// Very first thing: do we have text collected?
if (mTextAccumulator.hasText()) {
doValidateText(mTextAccumulator);
}
/* 31-Mar-2006, TSa: MSV seems to require empty String for empty/no
* namespace, not null.
*/
if (uri == null) {
uri = "";
}
/* Do we need to properly fill it? Or could we just put local name?
* Looking at code, I do believe it's only used for error reporting
* purposes...
*/
//String qname = (prefix == null || prefix.length() == 0) ? localName : (prefix + ":" +localName);
String qname = localName;
mStartTag.reinit(uri, localName, qname, mAttributeProxy, this);
mCurrAcceptor = mCurrAcceptor.createChildAcceptor(mStartTag, mErrorRef);
/* As per documentation, the side-effect of getting the error message
* is that we also get a recoverable non-null acceptor... thus, should
* never (?) see null acceptor being returned
*/
if (mErrorRef.str != null) {
reportError(mErrorRef);
}
if (mProblem != null) { // pending problems (to throw exception on)?
XMLValidationProblem p = mProblem;
mProblem = null;
mContext.reportProblem(p);
}
mAcceptors.add(mCurrAcceptor);
}
public String validateAttribute(String localName, String uri,
String prefix, String value)
throws XMLStreamException
{
mCurrAttrLocalName = localName;
mCurrAttrPrefix = prefix;
if (mCurrAcceptor != null) {
String qname = localName; // for now, let's assume we don't need prefixed version
DatatypeRef typeRef = null; // for now, let's not care
/* 31-Mar-2006, TSa: MSV seems to require empty String for empty/no
* namespace, not null.
*/
if (uri == null) {
uri = "";
}
if (!mCurrAcceptor.onAttribute2(uri, localName, qname, value, this, mErrorRef, typeRef)
|| mErrorRef.str != null) {
reportError(mErrorRef);
}
if (mProblem != null) { // pending problems (to throw exception on)?
XMLValidationProblem p = mProblem;
mProblem = null;
mContext.reportProblem(p);
}
}
/* No normalization done by RelaxNG, is there? (at least nothing
* visible to callers that is)
*/
return null;
}
public String validateAttribute(String localName, String uri,
String prefix,
char[] valueChars, int valueStart,
int valueEnd)
throws XMLStreamException
{
int len = valueEnd - valueStart;
/* This is very sub-optimal... but MSV doesn't deal with char
* arrays.
*/
return validateAttribute(localName, uri, prefix,
new String(valueChars, valueStart, len));
}
public int validateElementAndAttributes()
throws XMLStreamException
{
// Not handling any attributes
mCurrAttrLocalName = mCurrAttrPrefix = "";
if (mCurrAcceptor != null) {
/* start tag info is still intact here (only attributes sent
* since child acceptor was created)
*/
if (!mCurrAcceptor.onEndAttributes(mStartTag, mErrorRef)
|| mErrorRef.str != null) {
reportError(mErrorRef);
}
int stringChecks = mCurrAcceptor.getStringCareLevel();
switch (stringChecks) {
case Acceptor.STRING_PROHIBITED: // only WS
return XMLValidator.CONTENT_ALLOW_WS;
case Acceptor.STRING_IGNORE: // anything (mixed content models)
return XMLValidator.CONTENT_ALLOW_ANY_TEXT;
case Acceptor.STRING_STRICT: // validatable (data-oriented)
return XMLValidator.CONTENT_ALLOW_VALIDATABLE_TEXT;
default:
throw new IllegalArgumentException("Internal error: unexpected string care level value return by MSV: "+stringChecks);
}
}
// If no acceptor, we are recovering, no need or use to validate text
return CONTENT_ALLOW_ANY_TEXT;
}
/**
* @return Validation state that should be effective for the parent
* element state
*/
public int validateElementEnd(String localName, String uri, String prefix)
throws XMLStreamException
{
// Very first thing: do we have text collected?
/* 27-Feb-2009, TSa: [WSTX-191]: Actually MSV expects us to call
* validation anyway, in case there might be restriction(s) on
* textual content. Otherwise we'll get an error.
*/
doValidateText(mTextAccumulator);
/* [WSTX-200]: need to avoid problems when doing sub-tree
* validation... not a proper solution, but has to do for
* now
*/
int lastIx = mAcceptors.size()-1;
if (lastIx < 0) {
return XMLValidator.CONTENT_ALLOW_WS;
}
Acceptor acc = (Acceptor)mAcceptors.remove(lastIx);
if (acc != null) { // may be null during error recovery? or not?
if (!acc.isAcceptState(mErrorRef) || mErrorRef.str != null) {
reportError(mErrorRef);
}
}
if (lastIx == 0) { // root closed
mCurrAcceptor = null;
} else {
mCurrAcceptor = (Acceptor) mAcceptors.get(lastIx-1);
}
if (mCurrAcceptor != null && acc != null) {
if (!mCurrAcceptor.stepForward(acc, mErrorRef)
|| mErrorRef.str != null) {
reportError(mErrorRef);
}
int stringChecks = mCurrAcceptor.getStringCareLevel();
switch (stringChecks) {
case Acceptor.STRING_PROHIBITED: // only WS
return XMLValidator.CONTENT_ALLOW_WS;
case Acceptor.STRING_IGNORE: // anything (mixed content models)
return XMLValidator.CONTENT_ALLOW_ANY_TEXT;
case Acceptor.STRING_STRICT: // validatable (data-oriented)
return XMLValidator.CONTENT_ALLOW_VALIDATABLE_TEXT;
default:
throw new IllegalArgumentException("Internal error: unexpected string care level value return by MSV: "+stringChecks);
}
}
return XMLValidator.CONTENT_ALLOW_ANY_TEXT;
}
public void validateText(String text, boolean lastTextSegment)
throws XMLStreamException
{
/* If we got here, then it's likely we do need to call onText2().
* (not guaranteed, though; in case of multiple parallel validators,
* only one of them may actually be interested)
*/
mTextAccumulator.addText(text);
if (lastTextSegment) {
doValidateText(mTextAccumulator);
}
}
public void validateText(char[] cbuf, int textStart, int textEnd,
boolean lastTextSegment)
throws XMLStreamException
{
/* If we got here, then it's likely we do need to call onText().
* (not guaranteed, though; in case of multiple parallel validators,
* only one of them may actually be interested)
*/
mTextAccumulator.addText(cbuf, textStart, textEnd);
if (lastTextSegment) {
doValidateText(mTextAccumulator);
}
}
public void validationCompleted(boolean eod)
throws XMLStreamException
{
/* Ok, so, we should verify that there are no undefined
* IDREF/IDREFS references. But only if we hit EOF, not
* if validation was cancelled.
*/
if (eod) {
if (mIdDefs != null) {
ElementId ref = mIdDefs.getFirstUndefined();
if (ref != null) { // problem!
String msg = "Undefined ID '"+ref.getId()
+"': referenced from element <"
+ref.getElemName()+">, attribute '"
+ref.getAttrName()+"'";
reportError(msg, ref.getLocation());
}
}
}
}
/*
///////////////////////////////////////////////////////////////////////
// Attribute info access
///////////////////////////////////////////////////////////////////////
*/
// // // Access to type info
public String getAttributeType(int index)
{
// !!! TBI
return null;
}
public int getIdAttrIndex()
{
// !!! TBI
return -1;
}
public int getNotationAttrIndex()
{
// !!! TBI
return -1;
}
/*
///////////////////////////////////////////////////////////////////////
// Internal methods
///////////////////////////////////////////////////////////////////////
*/
PrefixedName getElementPName()
{
return PrefixedName.valueOf(mContext.getCurrentElementName());
}
PrefixedName getAttrPName()
{
return new PrefixedName(mCurrAttrPrefix, mCurrAttrLocalName);
}
void doValidateText(TextAccumulator textAcc)
throws XMLStreamException
{
if (mCurrAcceptor != null) {
String str = textAcc.getAndClear();
DatatypeRef typeRef = null;
if (!mCurrAcceptor.onText2(str, this, mErrorRef, typeRef)
|| mErrorRef.str != null) {
reportError(mErrorRef);
}
}
}
private void reportError(StringRef errorRef)
throws XMLStreamException
{
String msg = errorRef.str;
errorRef.str = null;
if (msg == null) {
msg = "Unknown reason";
}
reportError(msg);
}
private void reportError(String msg)
throws XMLStreamException
{
reportError(msg, mContext.getValidationLocation());
}
private void reportError(String msg, Location loc)
throws XMLStreamException
{
XMLValidationProblem prob = new XMLValidationProblem(loc, msg, XMLValidationProblem.SEVERITY_ERROR);
prob.setReporter(this);
mContext.reportProblem(prob);
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/msv/RelaxNGSchema.java 0000644 0001750 0001750 00000003217 11745427074 023766 0 ustar giovanni giovanni /* Woodstox XML processor
*
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in the file LICENSE which is
* included with the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.msv;
import javax.xml.stream.*;
import org.codehaus.stax2.validation.*;
import com.sun.msv.grammar.trex.TREXGrammar;
import com.sun.msv.verifier.regexp.REDocumentDeclaration;
/**
* This is a validation schema instance based on a RELAX NG schema. It
* serves as a shareable "blueprint" for creating actual validator instances.
*/
public class RelaxNGSchema
implements XMLValidationSchema
{
/**
* This is VGM (in MSV lingo); shareable schema blueprint, basically
* peer of this schema object. It will be used for creating actual
* validator peer, root Acceptor.
*/
protected final TREXGrammar mGrammar;
public RelaxNGSchema(TREXGrammar grammar)
{
mGrammar = grammar;
}
public String getSchemaType() {
return XMLValidationSchema.SCHEMA_ID_RELAXNG;
}
public XMLValidator createValidator(ValidationContext ctxt)
throws XMLStreamException
{
REDocumentDeclaration dd = new REDocumentDeclaration(mGrammar);
return new GenericMsvValidator(this, ctxt, dd);
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/dtd/ 0000755 0001750 0001750 00000000000 11756143457 020447 5 ustar giovanni giovanni woodstox-4.1.3/src/java/com/ctc/wstx/dtd/DTDSubset.java 0000644 0001750 0001750 00000007205 11745427074 023115 0 ustar giovanni giovanni /* Woodstox XML processor
*
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in the file LICENSE which is
* included with the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.dtd;
import java.util.*;
import javax.xml.stream.XMLStreamException;
import org.codehaus.stax2.validation.*;
import com.ctc.wstx.sr.InputProblemReporter;
/**
* This is the abstract base class that implements the standard Stax2
* validation schema base class ({@link XMLValidationSchema}, as well
* as specifies extended Woodstox-specific interface for accessing
* DTD-specific things like entity expansions and notation properties.
*
* API is separated from its implementation to reduce coupling; for example,
* it is possible to have DTD subset implementations that do not implement
* validation logics, just entity expansion.
*/
public abstract class DTDSubset
implements DTDValidationSchema
{
/*
//////////////////////////////////////////////////////
// Life-cycle
//////////////////////////////////////////////////////
*/
protected DTDSubset() { }
/**
* Method that will combine definitions from this internal subset with
* definitions from passed-in external subset, producing a new combined
* DTDSubset instance.
*/
public abstract DTDSubset combineWithExternalSubset(InputProblemReporter rep,
DTDSubset extSubset)
throws XMLStreamException;
/*
//////////////////////////////////////////////////////
// XMLValidationSchema implementation
//////////////////////////////////////////////////////
*/
public abstract XMLValidator createValidator(ValidationContext ctxt)
throws XMLStreamException;
public String getSchemaType() {
return XMLValidationSchema.SCHEMA_ID_DTD;
}
/*
//////////////////////////////////////////////////////
// And extended DTDValidationSchema
//////////////////////////////////////////////////////
*/
public abstract int getEntityCount();
public abstract int getNotationCount();
/*
//////////////////////////////////////////////////////
// Woodstox-specific API, caching support
//////////////////////////////////////////////////////
*/
public abstract boolean isCachable();
/**
* Method used in determining whether cached external subset instance
* can be used with specified internal subset. If ext. subset references
* any parameter entities int subset (re-)defines, it can not; otherwise
* it can be used.
*
* @return True if this (external) subset refers to a parameter entity
* defined in passed-in internal subset.
*/
public abstract boolean isReusableWith(DTDSubset intSubset);
/*
//////////////////////////////////////////////////////
// Woodstox-specific API, entity/notation handling
//////////////////////////////////////////////////////
*/
public abstract HashMap getGeneralEntityMap();
public abstract List getGeneralEntityList();
public abstract HashMap getParameterEntityMap();
public abstract HashMap getNotationMap();
public abstract List getNotationList();
public abstract HashMap getElementMap();
}
woodstox-4.1.3/src/java/com/ctc/wstx/dtd/DTDSchemaFactory.java 0000644 0001750 0001750 00000015460 11745427074 024402 0 ustar giovanni giovanni /* Woodstox XML processor
*
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in the file LICENSE which is
* included with the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.dtd;
import java.io.*;
import java.net.URL;
import javax.xml.stream.*;
import org.codehaus.stax2.validation.*;
import com.ctc.wstx.api.ReaderConfig;
import com.ctc.wstx.api.ValidatorConfig;
import com.ctc.wstx.cfg.XmlConsts;
import com.ctc.wstx.exc.WstxIOException;
import com.ctc.wstx.io.*;
import com.ctc.wstx.util.DefaultXmlSymbolTable;
import com.ctc.wstx.util.SymbolTable;
import com.ctc.wstx.util.URLUtil;
/**
* Factory for creating DTD validator schema objects (shareable stateless
* "blueprints" for creating actual validators).
*
* Due to close coupling of XML and DTD, some of the functionality
* implemented (like that of reading internal subsets embedded in XML
* documents) is only accessible by core Woodstox. The externally
* accessible
*/
public class DTDSchemaFactory
extends XMLValidationSchemaFactory
{
/*
/////////////////////////////////////////////////////
// Objects shared by actual parsers
/////////////////////////////////////////////////////
*/
/**
* 'Root' symbol table, used for creating actual symbol table instances,
* but never as is.
*/
final static SymbolTable mRootSymbols = DefaultXmlSymbolTable.getInstance();
static {
mRootSymbols.setInternStrings(true);
}
/**
* Current configurations for this factory
*/
protected final ValidatorConfig mSchemaConfig;
/**
* This configuration object is used (instead of a more specific one)
* since the actual DTD reader uses such configuration object.
*/
protected final ReaderConfig mReaderConfig;
public DTDSchemaFactory()
{
super(XMLValidationSchema.SCHEMA_ID_DTD);
mReaderConfig = ReaderConfig.createFullDefaults();
mSchemaConfig = ValidatorConfig.createDefaults();
}
/*
////////////////////////////////////////////////////////////
// Stax2, Configuration methods
////////////////////////////////////////////////////////////
*/
public boolean isPropertySupported(String propName)
{
return mSchemaConfig.isPropertySupported(propName);
}
public boolean setProperty(String propName, Object value)
{
return mSchemaConfig.setProperty(propName, value);
}
public Object getProperty(String propName)
{
return mSchemaConfig.getProperty(propName);
}
/*
////////////////////////////////////////////////////////////
// Stax2, Factory methods
////////////////////////////////////////////////////////////
*/
public XMLValidationSchema createSchema(InputStream in, String encoding,
String publicId, String systemId)
throws XMLStreamException
{
ReaderConfig rcfg = createPrivateReaderConfig();
return doCreateSchema(rcfg, StreamBootstrapper.getInstance
(publicId, systemId, in), publicId, systemId, null);
}
public XMLValidationSchema createSchema(Reader r, String publicId,
String systemId)
throws XMLStreamException
{
ReaderConfig rcfg = createPrivateReaderConfig();
return doCreateSchema(rcfg, ReaderBootstrapper.getInstance
(publicId, systemId, r, null), publicId, systemId, null);
}
public XMLValidationSchema createSchema(URL url)
throws XMLStreamException
{
ReaderConfig rcfg = createPrivateReaderConfig();
try {
InputStream in = URLUtil.inputStreamFromURL(url);
return doCreateSchema(rcfg, StreamBootstrapper.getInstance
(null, null, in),
null, url.toExternalForm(), url);
} catch (IOException ioe) {
throw new WstxIOException(ioe);
}
}
public XMLValidationSchema createSchema(File f)
throws XMLStreamException
{
ReaderConfig rcfg = createPrivateReaderConfig();
try {
URL url = f.toURL();
return doCreateSchema(rcfg, StreamBootstrapper.getInstance
(null, null, new FileInputStream(f)),
null, url.toExternalForm(), url);
} catch (IOException ioe) {
throw new WstxIOException(ioe);
}
}
/*
////////////////////////////////////////////////////////////
// Woodstox-specific API
////////////////////////////////////////////////////////////
*/
/*
////////////////////////////////////////////////////////////
// Internal methods
////////////////////////////////////////////////////////////
*/
/**
* The main validator construction method, called by all externally
* visible methods.
*/
protected XMLValidationSchema doCreateSchema
(ReaderConfig rcfg, InputBootstrapper bs, String publicId, String systemId, URL ctxt)
throws XMLStreamException
{
try {
Reader r = bs.bootstrapInput(rcfg, false, XmlConsts.XML_V_UNKNOWN);
if (bs.declaredXml11()) {
rcfg.enableXml11(true);
}
if (ctxt == null) { // this is just needed as context for param entity expansion
ctxt = URLUtil.urlFromCurrentDir();
}
/* Note: need to pass unknown for 'xmlVersion' here (as well as
* above for bootstrapping), since this is assumed to be the main
* level parsed document and no xml version compatibility checks
* should be done.
*/
WstxInputSource src = InputSourceFactory.constructEntitySource
(rcfg, null, null, bs, publicId, systemId, XmlConsts.XML_V_UNKNOWN, ctxt, r);
/* true -> yes, fully construct for validation
* (does not mean it has to be used for validation, but required
* if it is to be used for that purpose)
*/
return FullDTDReader.readExternalSubset(src, rcfg, /*int.subset*/null, true, bs.getDeclaredVersion());
} catch (IOException ioe) {
throw new WstxIOException(ioe);
}
}
private ReaderConfig createPrivateReaderConfig()
{
return mReaderConfig.createNonShared(mRootSymbols.makeChild());
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/dtd/DTDEnumAttr.java 0000644 0001750 0001750 00000005344 11745427074 023411 0 ustar giovanni giovanni package com.ctc.wstx.dtd;
import javax.xml.stream.XMLStreamException;
import com.ctc.wstx.sr.InputProblemReporter;
import com.ctc.wstx.util.PrefixedName;
import com.ctc.wstx.util.WordResolver;
/**
* Specific attribute class for attributes that have enumerated values.
*/
public final class DTDEnumAttr
extends DTDAttribute
{
final WordResolver mEnumValues;
/*
///////////////////////////////////////////////////
// Life-cycle
///////////////////////////////////////////////////
*/
public DTDEnumAttr(PrefixedName name, DefaultAttrValue defValue,
int specIndex, boolean nsAware, boolean xml11,
WordResolver enumValues)
{
super(name, defValue, specIndex, nsAware, xml11);
mEnumValues = enumValues;
}
public DTDAttribute cloneWith(int specIndex)
{
return new DTDEnumAttr(mName, mDefValue, specIndex, mCfgNsAware,
mCfgXml11, mEnumValues);
}
/*
///////////////////////////////////////////////////
// Public API
///////////////////////////////////////////////////
*/
public int getValueType() {
return TYPE_ENUMERATED;
}
/*
///////////////////////////////////////////////////
// Public API, validation
///////////////////////////////////////////////////
*/
/**
* Method called by the validator
* to let the attribute do necessary normalization and/or validation
* for the value.
*/
public String validate(DTDValidatorBase v, char[] cbuf, int start, int end, boolean normalize)
throws XMLStreamException
{
String ok = validateEnumValue(cbuf, start, end, normalize, mEnumValues);
if (ok == null) {
String val = new String(cbuf, start, (end-start));
return reportValidationProblem(v, "Invalid enumerated value '"+val+"': has to be one of ("
+mEnumValues+")");
}
return ok;
}
/**
* Method called by the validator
* to ask attribute to verify that the default it has (if any) is
* valid for such type.
*/
public void validateDefault(InputProblemReporter rep, boolean normalize)
throws XMLStreamException
{
String def = validateDefaultNmToken(rep, normalize);
// And then that it's one of listed values:
String shared = mEnumValues.find(def);
if (shared == null) {
reportValidationProblem(rep, "Invalid default value '"+def+"': has to be one of ("
+mEnumValues+")");
return;
}
// Ok, cool it's ok...
if (normalize) {
mDefValue.setValue(shared);
}
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/dtd/DefaultAttrValue.java 0000644 0001750 0001750 00000014770 11745427074 024535 0 ustar giovanni giovanni /* Woodstox XML processor
*
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in the file LICENSE which is
* included with the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.dtd;
import java.text.MessageFormat;
import javax.xml.stream.Location;
import javax.xml.stream.XMLStreamException;
import org.codehaus.stax2.validation.ValidationContext;
import org.codehaus.stax2.validation.XMLValidationProblem;
import org.codehaus.stax2.validation.XMLValidator;
import com.ctc.wstx.cfg.ErrorConsts;
/**
* Simple container class used to contain information about the default
* value for an attribute. Although for most use cases a simple String
* would suffice, there are cases where additional information is needed
* (especially status of 'broken' default values, which only need to be
* reported should the default value be needed).
*/
public final class DefaultAttrValue
{
/*
////////////////////////////////////////////////////
// Constants
////////////////////////////////////////////////////
*/
// // // Default value types
public final static int DEF_DEFAULT = 1;
public final static int DEF_IMPLIED = 2;
public final static int DEF_REQUIRED = 3;
public final static int DEF_FIXED = 4;
/*
////////////////////////////////////////////////////
// Singleton instances
////////////////////////////////////////////////////
*/
final static DefaultAttrValue sImplied = new DefaultAttrValue(DEF_IMPLIED);
final static DefaultAttrValue sRequired = new DefaultAttrValue(DEF_REQUIRED);
/*
////////////////////////////////////////////////////
// State
////////////////////////////////////////////////////
*/
final int mDefValueType;
/**
* Actual expanded textual content of the default attribute value;
* normalized if appropriate in this mode.
* Note that all entities have been expanded: if a GE/PE was undefined,
* and no fatal errors were reported (non-validating mode), the
* references were just silently removed, and matching entries added
* to
* Notes about usage:
*
* Notes about thread-safety: this class is not thread-safe, since it does
* not have to be, in general case. That is, the only instances that can
* be shared are external subset instances, and those are used in read-only
* manner (with the exception of temporary arrays constructed on-demand).
*/
public final class DTDElement
{
/*
///////////////////////////////////////////////////
// Information about the element itself
///////////////////////////////////////////////////
*/
final PrefixedName mName;
/**
* Location of the (real) definition of the element; may be null for
* placeholder elements created to hold ATTLIST definitions
*/
final Location mLocation;
/**
* Base validator object for validating content model of this element;
* may be null for some simple content models (ANY, EMPTY).
*/
StructValidator mValidator;
int mAllowedContent;
/**
* True if the DTD was parsed (and is to be used) in namespace-aware
* mode.
* Affects (name) validation amongst other things.
*/
final boolean mNsAware;
/**
* True if the DTD was parsed in xml1.1 compliant mode (referenced to
* from an xml 1.1 document).
* Affects (name) validation amongst other things.
*/
final boolean mXml11;
/*
///////////////////////////////////////////////////
// Attribute info
///////////////////////////////////////////////////
*/
HashMap mAttrMap = null;
/**
* Ordered list of attributes that have 'special' properties (attribute
* is required, has a default value [regular or fixed]); these attributes
* have to be specifically checked after actual values have been resolved.
*/
ArrayList mSpecAttrList = null;
boolean mAnyFixed = false;
/**
* Flag set to true if there are any attributes that have either
* basic default value, or #FIXED default value.
*/
boolean mAnyDefaults = false;
/**
* Flag that is set to true if there is at least one attribute that
* has type that requires normalization and/or validation; that is,
* is of some other type than CDATA.
*/
boolean mValidateAttrs = false;
/**
* Id attribute instance, if one already declared for this element;
* can only have up to one such attribute per element.
*/
DTDAttribute mIdAttr;
/**
* Notation attribute instance, if one already declared for this element;
* can only have up to one such attribute per element.
*/
DTDAttribute mNotationAttr;
// // // !! If you add new attributes, make sure they get copied
// // // in #define() method !!
/*
///////////////////////////////////////////////////
// Namespace declaration defaulting...
///////////////////////////////////////////////////
*/
/**
* Set of namespace declarations with default values, if any
* (regular ns pseudo-attr declarations are just ignored)
*/
HashMap mNsDefaults = null; // [String : DTDAttribute]
/*
///////////////////////////////////////////////////
// Life-cycle
///////////////////////////////////////////////////
*/
private DTDElement(Location loc, PrefixedName name,
StructValidator val, int allowedContent,
boolean nsAware, boolean xml11)
{
mName = name;
mLocation = loc;
mValidator = val;
mAllowedContent = allowedContent;
mNsAware = nsAware;
mXml11 = xml11;
}
/**
* Method called to create an actual element definition, matching
* an ELEMENT directive in a DTD subset.
*/
public static DTDElement createDefined(ReaderConfig cfg, Location loc, PrefixedName name,
StructValidator val, int allowedContent)
{
if (allowedContent == XMLValidator.CONTENT_ALLOW_UNDEFINED) { // sanity check
ExceptionUtil.throwInternal("trying to use XMLValidator.CONTENT_ALLOW_UNDEFINED via createDefined()");
}
return new DTDElement(loc, name, val, allowedContent,
cfg.willSupportNamespaces(), cfg.isXml11());
}
/**
* Method called to create a "placeholder" element definition, needed to
* contain attribute definitions.
*/
public static DTDElement createPlaceholder(ReaderConfig cfg, Location loc, PrefixedName name)
{
return new DTDElement(loc, name, null, XMLValidator.CONTENT_ALLOW_UNDEFINED,
cfg.willSupportNamespaces(), cfg.isXml11());
}
/**
* Method called on placeholder element, to create a real instance that
* has all attribute definitions placeholder had (it'll always have at
* least one -- otherwise no placeholder was needed).
*/
public DTDElement define(Location loc, StructValidator val,
int allowedContent)
{
verifyUndefined();
if (allowedContent == XMLValidator.CONTENT_ALLOW_UNDEFINED) { // sanity check
ExceptionUtil.throwInternal("trying to use CONTENT_ALLOW_UNDEFINED via define()");
}
DTDElement elem = new DTDElement(loc, mName, val, allowedContent,
mNsAware, mXml11);
// Ok, need to copy state collected so far:
elem.mAttrMap = mAttrMap;
elem.mSpecAttrList = mSpecAttrList;
elem.mAnyFixed = mAnyFixed;
elem.mValidateAttrs = mValidateAttrs;
elem.mAnyDefaults = mAnyDefaults;
elem.mIdAttr = mIdAttr;
elem.mNotationAttr = mNotationAttr;
elem.mNsDefaults = mNsDefaults;
return elem;
}
/**
* Method called to "upgrade" a placeholder using a defined element,
* including adding attributes.
*/
public void defineFrom(InputProblemReporter rep, DTDElement definedElem,
boolean fullyValidate)
throws XMLStreamException
{
if (fullyValidate) {
verifyUndefined();
}
mValidator = definedElem.mValidator;
mAllowedContent = definedElem.mAllowedContent;
mergeMissingAttributesFrom(rep, definedElem, fullyValidate);
}
private void verifyUndefined()
{
if (mAllowedContent != XMLValidator.CONTENT_ALLOW_UNDEFINED) { // sanity check
ExceptionUtil.throwInternal("redefining defined element spec");
}
}
/**
* Method called by DTD parser when it has read information about
* an attribute that belong to this element
*
* @return Newly created attribute Object if the attribute definition was
* added (hadn't been declared yet); null if it's a duplicate, in which
* case original definition sticks.
*/
public DTDAttribute addAttribute(InputProblemReporter rep,
PrefixedName attrName, int valueType,
DefaultAttrValue defValue, WordResolver enumValues,
boolean fullyValidate)
throws XMLStreamException
{
HashMap m = mAttrMap;
if (m == null) {
mAttrMap = m = new HashMap();
}
List specList = defValue.isSpecial() ? getSpecialList() : null;
DTDAttribute attr;
int specIndex = (specList == null) ? -1 : specList.size();
switch (valueType) {
case DTDAttribute.TYPE_CDATA:
attr = new DTDCdataAttr(attrName, defValue, specIndex, mNsAware, mXml11);
break;
case DTDAttribute.TYPE_ENUMERATED:
attr = new DTDEnumAttr(attrName, defValue, specIndex, mNsAware, mXml11, enumValues);
break;
case DTDAttribute.TYPE_ID:
/* note: although ID attributes are not to have default value,
* this is 'only' a validity constraint, and in dtd-aware-but-
* not-validating mode it is apparently 'legal' to add default
* values. Bleech.
*/
attr = new DTDIdAttr(attrName, defValue, specIndex, mNsAware, mXml11);
break;
case DTDAttribute.TYPE_IDREF:
attr = new DTDIdRefAttr(attrName, defValue, specIndex, mNsAware, mXml11);
break;
case DTDAttribute.TYPE_IDREFS:
attr = new DTDIdRefsAttr(attrName, defValue, specIndex, mNsAware, mXml11);
break;
case DTDAttribute.TYPE_ENTITY:
attr = new DTDEntityAttr(attrName, defValue, specIndex, mNsAware, mXml11);
break;
case DTDAttribute.TYPE_ENTITIES:
attr = new DTDEntitiesAttr(attrName, defValue, specIndex, mNsAware, mXml11);
break;
case DTDAttribute.TYPE_NOTATION:
attr = new DTDNotationAttr(attrName, defValue, specIndex, mNsAware, mXml11, enumValues);
break;
case DTDAttribute.TYPE_NMTOKEN:
attr = new DTDNmTokenAttr(attrName, defValue, specIndex, mNsAware, mXml11);
break;
case DTDAttribute.TYPE_NMTOKENS:
attr = new DTDNmTokensAttr(attrName, defValue, specIndex, mNsAware, mXml11);
break;
default:
// 18-Jan-2006, TSa: should never get here...
ExceptionUtil.throwGenericInternal();
attr = null; // unreachable, but compiler wants it
}
DTDAttribute old = doAddAttribute(m, rep, attr, specList, fullyValidate);
return (old == null) ? attr : null;
}
/**
* Method called to add a definition of a namespace-declaration
* pseudo-attribute with a default value.
*
* @return Attribute that acts as the placeholder, if the declaration
* was added; null to indicate it
* was a dup (there was an earlier declaration)
*/
public DTDAttribute addNsDefault
(InputProblemReporter rep, PrefixedName attrName, int valueType,
DefaultAttrValue defValue, boolean fullyValidate)
throws XMLStreamException
{
/* Let's simplify handling a bit: although theoretically all
* combinations of value can be used, let's really only differentiate
* between CDATA and 'other' (for which let's use NMTOKEN)
*/
DTDAttribute nsAttr;
switch (valueType) {
case DTDAttribute.TYPE_CDATA:
nsAttr = new DTDCdataAttr(attrName, defValue, -1, mNsAware, mXml11);
break;
default: // something else, default to NMTOKEN then
nsAttr = new DTDNmTokenAttr(attrName, defValue, -1, mNsAware, mXml11);
break;
}
// Ok. So which prefix are we to bind? Need to access by prefix...
String prefix = attrName.getPrefix();
if (prefix == null || prefix.length() == 0) { // defult NS -> ""
prefix = "";
} else { // non-default, use the local name
prefix = attrName.getLocalName();
}
if (mNsDefaults == null) {
mNsDefaults = new HashMap();
} else {
if (mNsDefaults.containsKey(prefix)) {
return null;
}
}
mNsDefaults.put(prefix, nsAttr);
return nsAttr;
}
public void mergeMissingAttributesFrom(InputProblemReporter rep, DTDElement other,
boolean fullyValidate)
throws XMLStreamException
{
Map otherMap = other.getAttributes();
HashMap m = mAttrMap;
if (m == null) {
mAttrMap = m = new HashMap();
}
//boolean anyAdded = false;
if (otherMap != null && otherMap.size() > 0) {
Iterator it = otherMap.entrySet().iterator();
while (it.hasNext()) {
Map.Entry me = (Map.Entry) it.next();
Object key = me.getKey();
// Should only add if no such attribute exists...
if (!m.containsKey(key)) {
// can only use as is, if it's not a special attr
DTDAttribute newAttr = (DTDAttribute) me.getValue();
List specList;
// otherwise need to clone
if (newAttr.isSpecial()) {
specList = getSpecialList();
newAttr = newAttr.cloneWith(specList.size());
} else {
specList = null;
}
doAddAttribute(m, rep, newAttr, specList, fullyValidate);
}
}
}
HashMap otherNs = other.mNsDefaults;
if (otherNs != null) {
if (mNsDefaults == null) {
mNsDefaults = new HashMap();
}
Iterator it = otherNs.entrySet().iterator();
while (it.hasNext()) {
Map.Entry me = (Map.Entry) it.next();
Object key = me.getKey();
// Should only add if no such attribute exists...
if (!mNsDefaults.containsKey(key)) {
mNsDefaults.put(key, me.getValue());
}
}
}
}
/**
* @return Earlier declaration of the attribute, if any; null if
* this was a new attribute
*/
private DTDAttribute doAddAttribute(Map attrMap, InputProblemReporter rep,
DTDAttribute attr, List specList,
boolean fullyValidate)
throws XMLStreamException
{
PrefixedName attrName = attr.getName();
// Maybe we already have it? If so, need to ignore
DTDAttribute old = (DTDAttribute) attrMap.get(attrName);
if (old != null) {
rep.reportProblem(null, ErrorConsts.WT_ATTR_DECL, ErrorConsts.W_DTD_DUP_ATTR,
attrName, mName);
return old;
}
switch (attr.getValueType()) {
case DTDAttribute.TYPE_ID:
// Only one such attribute per element (Specs, 1.0#3.3.1)
if (fullyValidate && mIdAttr != null) {
rep.throwParseError("Invalid id attribute \"{0}\" for element <{1}>: already had id attribute \""+mIdAttr.getName()+"\"", attrName, mName);
}
mIdAttr = attr;
break;
case DTDAttribute.TYPE_NOTATION:
// Only one such attribute per element (Specs, 1.0#3.3.1)
if (fullyValidate && mNotationAttr != null) {
rep.throwParseError("Invalid notation attribute '"+attrName+"' for element <"+mName+">: already had notation attribute '"+mNotationAttr.getName()+"'");
}
mNotationAttr = attr;
break;
}
attrMap.put(attrName, attr);
if (specList != null) {
specList.add(attr);
}
if (!mAnyFixed) {
mAnyFixed = attr.isFixed();
}
if (!mValidateAttrs) {
mValidateAttrs = attr.needsValidation();
}
if (!mAnyDefaults) {
mAnyDefaults = attr.hasDefaultValue();
}
return null;
}
/*
///////////////////////////////////////////////////
// Public API, accessors:
///////////////////////////////////////////////////
*/
public PrefixedName getName() { return mName; }
public String toString() {
return mName.toString();
}
public String getDisplayName() {
return mName.toString();
}
public Location getLocation() { return mLocation; }
public boolean isDefined() {
return (mAllowedContent != XMLValidator.CONTENT_ALLOW_UNDEFINED);
}
/**
* @return Constant that identifies what kind of nodes are in general
* allowed inside this element.
*/
public int getAllowedContent() {
return mAllowedContent;
}
/**
* Specialized accessor used by non-validating but typing 'validator':
* essentially, used to figure out whether #PCDATA is allowed or not;
* and based on that, return one of 2 allowable text values (only
* space, or anything). This is the relevant subset in non-validating
* modes, needed to properly type resulting character events.
*/
public int getAllowedContentIfSpace()
{
int vld = mAllowedContent;
return (vld <= XMLValidator.CONTENT_ALLOW_WS) ?
XMLValidator.CONTENT_ALLOW_WS_NONSTRICT :
XMLValidator.CONTENT_ALLOW_ANY_TEXT;
}
public HashMap getAttributes() {
return mAttrMap;
}
public int getSpecialCount() {
return (mSpecAttrList == null) ? 0 : mSpecAttrList.size();
}
public List getSpecialAttrs() {
return mSpecAttrList;
}
/**
* @return True if at least one of the attributes has type other than
* CDATA; false if not
*/
public boolean attrsNeedValidation() {
return mValidateAttrs;
}
public boolean hasFixedAttrs() {
return mAnyFixed;
}
public boolean hasAttrDefaultValues() {
return mAnyDefaults;
}
public DTDAttribute getIdAttribute() {
return mIdAttr;
}
public DTDAttribute getNotationAttribute() {
return mNotationAttr;
}
public boolean hasNsDefaults() {
return (mNsDefaults != null);
}
/*
///////////////////////////////////////////////////
// Public API, factory methods:
///////////////////////////////////////////////////
*/
public StructValidator getValidator()
{
return (mValidator == null) ? null : mValidator.newInstance();
}
protected HashMap getNsDefaults() {
return mNsDefaults;
}
/*
///////////////////////////////////////////////////
// Internal methods
///////////////////////////////////////////////////
*/
private List getSpecialList()
{
ArrayList l = mSpecAttrList;
if (l == null) {
mSpecAttrList = l = new ArrayList();
}
return l;
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/dtd/PrefixedNameSet.java 0000644 0001750 0001750 00000002462 11745427074 024337 0 ustar giovanni giovanni /* Woodstox XML processor
*
* Copyright (c) 2004 Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in file LICENSE, included with
* the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.dtd;
import com.ctc.wstx.util.PrefixedName;
public abstract class PrefixedNameSet
{
protected PrefixedNameSet() { }
/**
* @return True if set contains more than one entry; false if not
* (empty or has one)
*/
public abstract boolean hasMultiple();
/**
* @return True if the set contains specified name; false if not.
*/
public abstract boolean contains(PrefixedName name);
public abstract void appendNames(StringBuffer sb, String sep);
public final String toString() {
return toString(", ");
}
public final String toString(String sep) {
StringBuffer sb = new StringBuffer();
appendNames(sb, sep);
return sb.toString();
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/dtd/DTDNotationAttr.java 0000644 0001750 0001750 00000005735 11745427074 024304 0 ustar giovanni giovanni package com.ctc.wstx.dtd;
import javax.xml.stream.XMLStreamException;
import com.ctc.wstx.sr.InputProblemReporter;
import com.ctc.wstx.util.PrefixedName;
import com.ctc.wstx.util.WordResolver;
/**
* Specific attribute class for attributes that are of NOTATION type,
* and also contain enumerated set of legal values.
*/
public final class DTDNotationAttr
extends DTDAttribute
{
final WordResolver mEnumValues;
/*
///////////////////////////////////////////////////
// Life-cycle
///////////////////////////////////////////////////
*/
public DTDNotationAttr(PrefixedName name, DefaultAttrValue defValue,
int specIndex, boolean nsAware, boolean xml11,
WordResolver enumValues)
{
super(name, defValue, specIndex, nsAware, xml11);
mEnumValues = enumValues;
}
public DTDAttribute cloneWith(int specIndex)
{
return new DTDNotationAttr(mName, mDefValue, specIndex,
mCfgNsAware, mCfgXml11, mEnumValues);
}
/*
///////////////////////////////////////////////////
// Public API
///////////////////////////////////////////////////
*/
public int getValueType() {
return TYPE_NOTATION;
}
public boolean typeIsNotation() {
return true;
}
/*
///////////////////////////////////////////////////
// Public API, validation
///////////////////////////////////////////////////
*/
/**
* Method called by the validator
* to let the attribute do necessary normalization and/or validation
* for the value.
*
* Note: identical to the implementation in {@link DTDEnumAttr}
*/
public String validate(DTDValidatorBase v, char[] cbuf, int start, int end, boolean normalize)
throws XMLStreamException
{
String ok = validateEnumValue(cbuf, start, end, normalize, mEnumValues);
if (ok == null) {
String val = new String(cbuf, start, (end-start));
return reportValidationProblem(v, "Invalid notation value '"+val+"': has to be one of ("
+mEnumValues+")");
}
return ok;
}
/**
* Method called by the validator
* to ask attribute to verify that the default it has (if any) is
* valid for such type.
*/
public void validateDefault(InputProblemReporter rep, boolean normalize)
throws XMLStreamException
{
// First, basic checks that it's a valid non-empty name:
String def = validateDefaultName(rep, normalize);
// And then that it's one of listed values:
String shared = mEnumValues.find(def);
if (shared == null) {
reportValidationProblem(rep, "Invalid default value '"+def+"': has to be one of ("
+mEnumValues+")");
}
// Ok, cool it's ok...
if (normalize) {
mDefValue.setValue(shared);
}
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/dtd/DTDTypingNonValidator.java 0000644 0001750 0001750 00000025311 11745427074 025441 0 ustar giovanni giovanni /* Woodstox XML processor
*
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in file LICENSE, included with
* the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.dtd;
import java.util.*;
import javax.xml.stream.XMLStreamException;
import org.codehaus.stax2.validation.*;
import com.ctc.wstx.util.DataUtil;
import com.ctc.wstx.util.ElementIdMap;
import com.ctc.wstx.util.ExceptionUtil;
/**
* This class is a "non-validating validator"; a validator-like object
* that handles DTD-based non-validation functionality: determining type
* information and default values. This instance does NOT implement any
* actual DTD-validation, and is to be used in DTD-aware non-validating
* mode.
*/
public class DTDTypingNonValidator
extends DTDValidatorBase
{
/*
///////////////////////////////////////////
// Element def/spec/validator stack, state
///////////////////////////////////////////
*/
/**
* Flag that indicates if current element has any attributes that
* have default values.
*/
protected boolean mHasAttrDefaults = false;
/**
* Bitset used for keeping track of defaulted attributes for which values
* have been found. Only non-null when current element does have such
* attributes
*/
protected BitSet mCurrDefaultAttrs = null;
/**
* Flag that indicates whether any of the attributes is potentially
* normalizable, and we are in attribute-normalizing mode.
*/
protected boolean mHasNormalizableAttrs = false;
/*
///////////////////////////////////////
// Temporary helper objects
///////////////////////////////////////
*/
/**
* Reusable lazily instantiated BitSet; needed to keep track of
* 'missing' attributes with default values (normal default, #FIXED).
*/
BitSet mTmpDefaultAttrs;
/*
///////////////////////////////////////////
// Life-cycle
///////////////////////////////////////////
*/
public DTDTypingNonValidator(DTDSubset schema, ValidationContext ctxt, boolean hasNsDefaults,
Map elemSpecs, Map genEntities)
{
super(schema, ctxt, hasNsDefaults, elemSpecs, genEntities);
}
/**
* @return False, since this is not a real validator
*/
public final boolean reallyValidating() { return false; }
/*
///////////////////////////////////////
// Configuration
///////////////////////////////////////
*/
/**
* This 'validator' will not normalize any attributes,
* so let's implement this as no-op.
*/
public void setAttrValueNormalization(boolean state) {
// nop
}
/*
///////////////////////////////////////
// XMLValidator implementation
///////////////////////////////////////
*/
//public XMLValidationSchema getSchema()
public void validateElementStart(String localName, String uri, String prefix)
throws XMLStreamException
{
// Ok, can we find the element definition?
mTmpKey.reset(prefix, localName);
DTDElement elem = (DTDElement) mElemSpecs.get(mTmpKey);
// whether it's found or not, let's add a stack frame:
int elemCount = mElemCount++;
if (elemCount >= mElems.length) {
mElems = (DTDElement[]) DataUtil.growArrayBy50Pct(mElems);
}
mElems[elemCount] = mCurrElem = elem;
mAttrCount = 0;
mIdAttrIndex = -2; // -2 as a "don't know yet" marker
/* but if not found, can not obtain any type information. Not
* a validation problem though, since we are doing none...
* Oh, also, unlike with real validation, not having actual element
* information is ok; can still have attributes!
*/
if (elem == null) { // || !elem.isDefined())
mCurrAttrDefs = EMPTY_MAP;
mHasAttrDefaults = false;
mCurrDefaultAttrs = null;
mHasNormalizableAttrs = false;
return;
}
// If element found, does it have any attributes?
mCurrAttrDefs = elem.getAttributes();
if (mCurrAttrDefs == null) {
mCurrAttrDefs = EMPTY_MAP;
mHasAttrDefaults = false;
mCurrDefaultAttrs = null;
mHasNormalizableAttrs = false;
return;
}
// Any normalization needed?
mHasNormalizableAttrs = mNormAttrs || elem.attrsNeedValidation();
// Any default values?
mHasAttrDefaults = elem.hasAttrDefaultValues();
if (mHasAttrDefaults) {
/* Special count also contains ones with #REQUIRED value, but
* that's a minor sub-optimality...
*/
int specCount = elem.getSpecialCount();
BitSet bs = mTmpDefaultAttrs;
if (bs == null) {
mTmpDefaultAttrs = bs = new BitSet(specCount);
} else {
bs.clear();
}
mCurrDefaultAttrs = bs;
} else {
mCurrDefaultAttrs = null;
}
}
public String validateAttribute(String localName, String uri,
String prefix, String value)
throws XMLStreamException
{
/* no need to do any validation; however, need to do following:
*
* (a) Figure out type info, if any (to get data type, id index etc);
* if yes, do:
* (1) If attribute has default value, note down it's not needed due
* to explicit definition
* (2) If attribute is normalizable, normalize it without validation
*/
DTDAttribute attr = (DTDAttribute) mCurrAttrDefs.get(mTmpKey.reset(prefix, localName));
int index = mAttrCount++;
if (index >= mAttrSpecs.length) {
mAttrSpecs = (DTDAttribute[]) DataUtil.growArrayBy50Pct(mAttrSpecs);
}
mAttrSpecs[index] = attr;
/* Although undeclared attribute would be a validation error,
* we don't care here... just need to skip it
*/
if (attr != null) {
if (mHasAttrDefaults) {
/* Once again, let's use more generic 'special' index,
* even though it also includes #REQUIRED values
*/
int specIndex = attr.getSpecialIndex();
if (specIndex >= 0) {
mCurrDefaultAttrs.set(specIndex);
}
}
if (mHasNormalizableAttrs) {
// !!! TBI
}
}
return null; // fine as is
}
public String validateAttribute(String localName, String uri,
String prefix,
char[] valueChars, int valueStart,
int valueEnd)
throws XMLStreamException
{
// note: cut'n pasted from above...
DTDAttribute attr = (DTDAttribute) mCurrAttrDefs.get(mTmpKey.reset(prefix, localName));
int index = mAttrCount++;
if (index >= mAttrSpecs.length) {
mAttrSpecs = (DTDAttribute[]) DataUtil.growArrayBy50Pct(mAttrSpecs);
}
mAttrSpecs[index] = attr;
if (attr != null) {
if (mHasAttrDefaults) {
int specIndex = attr.getSpecialIndex();
if (specIndex >= 0) {
mCurrDefaultAttrs.set(specIndex);
}
}
if (mHasNormalizableAttrs) { // may get normalized, after all
return attr.normalize(this, valueChars, valueStart, valueEnd);
}
}
return null; // fine as is
}
public int validateElementAndAttributes()
throws XMLStreamException
{
/* Ok; since we are not really validating, we just need to add possible
* attribute default values, and return "anything goes"
* as the allowable content:
*/
DTDElement elem = mCurrElem;
if (mHasAttrDefaults) {
BitSet specBits = mCurrDefaultAttrs;
int specCount = elem.getSpecialCount();
int ix = specBits.nextClearBit(0);
while (ix < specCount) { // something amiss!
List specAttrs = elem.getSpecialAttrs();
DTDAttribute attr = (DTDAttribute) specAttrs.get(ix);
if (attr.hasDefaultValue()) { // no default for #REQUIRED...
doAddDefaultValue(attr);
}
ix = specBits.nextClearBit(ix+1);
}
}
/* However: we should indicate cases where PCDATA is not supposed
* to occur -- although it won't be considered an error, when not
* validating, info is needed to determine type of SPACE instead
* of CHARACTERS. Other validation types are not to be returned,
* however, since caller doesn't know how to deal with such
* cases.
*/
return (elem == null) ? XMLValidator.CONTENT_ALLOW_ANY_TEXT :
elem.getAllowedContentIfSpace();
}
public int validateElementEnd(String localName, String uri, String prefix)
throws XMLStreamException
{
/* Since we are not really validating, only need to maintain
* the element stack, and return "anything goes" as allowable content:
*/
int ix = --mElemCount;
mElems[ix] = null;
if (ix < 1) {
return XMLValidator.CONTENT_ALLOW_ANY_TEXT;
}
DTDElement elem = mElems[ix-1];
return (elem == null) ? XMLValidator.CONTENT_ALLOW_ANY_TEXT :
mElems[ix-1].getAllowedContentIfSpace();
}
// base class implements these ok:
//public void validateText(String text, boolean lastTextSegment)
//public void validateText(char[] cbuf, int textStart, int textEnd, boolean lastTextSegment)
public void validationCompleted(boolean eod)
//throws XMLStreamException
{
// fine, great, nothing to do...
}
/*
///////////////////////////////////////
// Package methods, accessors
///////////////////////////////////////
*/
protected ElementIdMap getIdMap()
{
/* should never be called; for now let's throw an exception, if it
* turns out it does get called can/should return an empty immutable
* map or something
*/
ExceptionUtil.throwGenericInternal();
return null; // never gets here
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/dtd/DTDEventListener.java 0000644 0001750 0001750 00000003430 11745427074 024433 0 ustar giovanni giovanni /* Woodstox XML processor
*
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in file LICENSE, included with
* the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.dtd;
import java.net.URL;
import javax.xml.stream.XMLStreamException;
public interface DTDEventListener
{
// Configuration
/**
* @return True, if there is a listener interested in getting comment
* events within DTD subset (since that's optional)
*/
public boolean dtdReportComments();
// Basic content events
public void dtdProcessingInstruction(String target, String data);
public void dtdComment(char[] data, int offset, int len);
public void dtdSkippedEntity(String name);
// DTD declarations that must be exposed
public void dtdNotationDecl(String name, String publicId, String systemId, URL baseURL)
throws XMLStreamException;
public void dtdUnparsedEntityDecl(String name, String publicId, String systemId, String notationName, URL baseURL)
throws XMLStreamException;
// DTD declarations that can be exposed
public void attributeDecl(String eName, String aName, String type, String mode, String value);
public void dtdElementDecl(String name, String model);
public void dtdExternalEntityDecl(String name, String publicId, String systemId);
public void dtdInternalEntityDecl(String name, String value);
}
woodstox-4.1.3/src/java/com/ctc/wstx/dtd/DTDValidator.java 0000644 0001750 0001750 00000034245 11745427074 023601 0 ustar giovanni giovanni /* Woodstox XML processor
*
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in file LICENSE, included with
* the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.dtd;
import java.util.*;
import javax.xml.stream.XMLStreamException;
import org.codehaus.stax2.validation.*;
import com.ctc.wstx.cfg.ErrorConsts;
import com.ctc.wstx.util.DataUtil;
import com.ctc.wstx.util.ElementId;
import com.ctc.wstx.util.ElementIdMap;
import com.ctc.wstx.util.StringUtil;
/**
* Woodstox implementation of {@link XMLValidator}; the class that
* handles DTD-based validation.
*/
public class DTDValidator
extends DTDValidatorBase
{
/*
///////////////////////////////////////
// Configuration
///////////////////////////////////////
*/
/**
* Determines if identical problems (definition of the same element,
* for example) should cause multiple error notifications or not:
* if true, will get one error per instance, if false, only the first
* one will get reported.
*/
protected boolean mReportDuplicateErrors = false;
/*
///////////////////////////////////////
// Id/idref state
///////////////////////////////////////
*/
/**
* Information about declared and referenced element ids (unique
* ids that attributes may defined, as defined by DTD)
*/
protected ElementIdMap mIdMap = null;
/*
///////////////////////////////////////////
// Element def/spec/validator stack, state
///////////////////////////////////////////
*/
/**
* Stack of validators for open elements
*/
protected StructValidator[] mValidators = null;
/**
* Bitset used for keeping track of required and defaulted attributes
* for which values have been found.
*/
protected BitSet mCurrSpecialAttrs = null;
boolean mCurrHasAnyFixed = false;
/*
///////////////////////////////////////
// Temporary helper objects
///////////////////////////////////////
*/
/**
* Reusable lazily instantiated BitSet; needed to keep track of
* missing 'special' attributes (required ones, ones with default
* values)
*/
BitSet mTmpSpecialAttrs;
/*
///////////////////////////////////////
// Life-cycle
///////////////////////////////////////
*/
public DTDValidator(DTDSubset schema, ValidationContext ctxt, boolean hasNsDefaults,
Map elemSpecs, Map genEntities)
{
super(schema, ctxt, hasNsDefaults, elemSpecs, genEntities);
mValidators = new StructValidator[DEFAULT_STACK_SIZE];
}
public final boolean reallyValidating() { return true; }
/*
///////////////////////////////////////
// XMLValidator implementation
///////////////////////////////////////
*/
//public XMLValidationSchema getSchema();
/**
* Method called to update information about the newly encountered (start)
* element. At this point namespace information has been resolved, but
* no DTD validation has been done. Validator is to do these validations,
* including checking for attribute value (and existence) compatibility.
*/
public void validateElementStart(String localName, String uri, String prefix)
throws XMLStreamException
{
/* Ok, need to find the element definition; if not found (or
* only implicitly defined), need to throw the exception.
*/
mTmpKey.reset(prefix, localName);
DTDElement elem = (DTDElement) mElemSpecs.get(mTmpKey);
/* Let's add the entry in (even if it's a null); this is necessary
* to keep things in-sync if allowing graceful handling of validity
* errors
*/
int elemCount = mElemCount++;
if (elemCount >= mElems.length) {
mElems = (DTDElement[]) DataUtil.growArrayBy50Pct(mElems);
mValidators = (StructValidator[]) DataUtil.growArrayBy50Pct(mValidators);
}
mElems[elemCount] = mCurrElem = elem;
if (elem == null || !elem.isDefined()) {
reportValidationProblem(ErrorConsts.ERR_VLD_UNKNOWN_ELEM, mTmpKey.toString());
}
// Is this element legal under the parent element?
StructValidator pv = (elemCount > 0) ? mValidators[elemCount-1] : null;
if (pv != null && elem != null) {
String msg = pv.tryToValidate(elem.getName());
if (msg != null) {
int ix = msg.indexOf("$END");
String pname = mElems[elemCount-1].toString();
if (ix >= 0) {
msg = msg.substring(0, ix) + ""+pname+">"
+msg.substring(ix+4);
}
reportValidationProblem("Validation error, encountered element <"
+elem.getName()+"> as a child of <"
+pname+">: "+msg);
}
}
mAttrCount = 0;
mIdAttrIndex = -2; // -2 as a "don't know yet" marker
// Ok, need to get the child validator, then:
if (elem == null) {
mValidators[elemCount] = null;
mCurrAttrDefs = EMPTY_MAP;
mCurrHasAnyFixed = false;
mCurrSpecialAttrs = null;
} else {
mValidators[elemCount] = elem.getValidator();
mCurrAttrDefs = elem.getAttributes();
if (mCurrAttrDefs == null) {
mCurrAttrDefs = EMPTY_MAP;
}
mCurrHasAnyFixed = elem.hasFixedAttrs();
int specCount = elem.getSpecialCount();
if (specCount == 0) {
mCurrSpecialAttrs = null;
} else {
BitSet bs = mTmpSpecialAttrs;
if (bs == null) {
mTmpSpecialAttrs = bs = new BitSet(specCount);
} else {
bs.clear();
}
mCurrSpecialAttrs = bs;
}
}
}
public String validateAttribute(String localName, String uri,
String prefix, String value)
throws XMLStreamException
{
DTDAttribute attr = (DTDAttribute) mCurrAttrDefs.get(mTmpKey.reset(prefix, localName));
if (attr == null) {
// Only report error if not already covering from an error:
if (mCurrElem != null) {
reportValidationProblem(ErrorConsts.ERR_VLD_UNKNOWN_ATTR,
mCurrElem.toString(), mTmpKey.toString());
}
/* [WSTX-190] NPE if we continued (after reported didn't
* throw an exception); nothing more to do, let's leave
*/
return value;
}
int index = mAttrCount++;
if (index >= mAttrSpecs.length) {
mAttrSpecs = (DTDAttribute[]) DataUtil.growArrayBy50Pct(mAttrSpecs);
}
mAttrSpecs[index] = attr;
if (mCurrSpecialAttrs != null) { // Need to mark that we got it
int specIndex = attr.getSpecialIndex();
if (specIndex >= 0) {
mCurrSpecialAttrs.set(specIndex);
}
}
String result = attr.validate(this, value, mNormAttrs);
if (mCurrHasAnyFixed && attr.isFixed()) {
String act = (result == null) ? value : result;
String exp = attr.getDefaultValue(mContext, this);
if (!act.equals(exp)) {
reportValidationProblem("Value of attribute \""+attr+"\" (element <"+mCurrElem+">) not \""+exp+"\" as expected, but \""+act+"\"");
}
}
return result;
}
public String validateAttribute(String localName, String uri,
String prefix,
char[] valueChars, int valueStart,
int valueEnd)
throws XMLStreamException
{
DTDAttribute attr = (DTDAttribute) mCurrAttrDefs.get(mTmpKey.reset(prefix, localName));
if (attr == null) {
// Only report error if not already covering from an error:
if (mCurrElem != null) {
reportValidationProblem(ErrorConsts.ERR_VLD_UNKNOWN_ATTR,
mCurrElem.toString(), mTmpKey.toString());
}
/* [WSTX-190] NPE if we continued (after reported didn't
* throw an exception); nothing more to do, let's leave
*/
return new String(valueChars, valueStart, valueEnd);
}
int index = mAttrCount++;
if (index >= mAttrSpecs.length) {
mAttrSpecs = (DTDAttribute[]) DataUtil.growArrayBy50Pct(mAttrSpecs);
}
mAttrSpecs[index] = attr;
if (mCurrSpecialAttrs != null) { // Need to mark that we got it
int specIndex = attr.getSpecialIndex();
if (specIndex >= 0) {
mCurrSpecialAttrs.set(specIndex);
}
}
String result = attr.validate(this, valueChars, valueStart, valueEnd, mNormAttrs);
if (mCurrHasAnyFixed && attr.isFixed()) {
String exp = attr.getDefaultValue(mContext, this);
boolean match;
if (result == null) {
match = StringUtil.matches(exp, valueChars, valueStart, valueEnd - valueStart);
} else {
match = exp.equals(result);
}
if (!match) {
String act = (result == null) ?
new String(valueChars, valueStart, valueEnd) : result;
reportValidationProblem("Value of #FIXED attribute \""+attr+"\" (element <"+mCurrElem+">) not \""+exp+"\" as expected, but \""+act+"\"");
}
}
return result;
}
public int validateElementAndAttributes()
throws XMLStreamException
{
// Ok: are we fine with the attributes?
DTDElement elem = mCurrElem;
if (elem == null) { // had an error, most likely no such element defined...
// need to just return, nothing to do here
return XMLValidator.CONTENT_ALLOW_ANY_TEXT;
}
// Any special attributes missing?
if (mCurrSpecialAttrs != null) {
BitSet specBits = mCurrSpecialAttrs;
int specCount = elem.getSpecialCount();
int ix = specBits.nextClearBit(0);
while (ix < specCount) { // something amiss!
List specAttrs = elem.getSpecialAttrs();
DTDAttribute attr = (DTDAttribute) specAttrs.get(ix);
/* [WSTX-155]: Problems if reportValidationProblem returns
* ok (which happens if a reporter handles it). So what
* to do with missing required value? First thought is
* to just leave it as is.
*/
if (attr.isRequired()) {
reportValidationProblem("Required attribute \"{0}\" missing from element <{1}>", attr, elem);
} else {
doAddDefaultValue(attr);
}
ix = specBits.nextClearBit(ix+1);
}
}
return elem.getAllowedContent();
}
/**
* @return Validation state that should be effective for the parent
* element state
*/
public int validateElementEnd(String localName, String uri, String prefix)
throws XMLStreamException
{
// First, let's remove the top:
int ix = mElemCount-1;
/* [WSTX-200]: need to avoid problems when doing sub-tree
* validation...
*/
if (ix < 0) {
return XMLValidator.CONTENT_ALLOW_WS;
}
mElemCount = ix;
DTDElement closingElem = mElems[ix];
mElems[ix] = null;
StructValidator v = mValidators[ix];
mValidators[ix] = null;
// Validation?
if (v != null) {
String msg = v.fullyValid();
if (msg != null) {
reportValidationProblem("Validation error, element "
+closingElem+">: "+msg);
}
}
// Then let's get info from parent, if any
if (ix < 1) { // root element closing..
// doesn't really matter; epilog/prolog differently handled:
return XMLValidator.CONTENT_ALLOW_WS;
}
return mElems[ix-1].getAllowedContent();
}
//public void validateText(String text, boolean lastTextSegment) ;
//public void validateText(char[] cbuf, int textStart, int textEnd, boolean lastTextSegment) ;
public void validationCompleted(boolean eod)
throws XMLStreamException
{
/* Need to now ensure that all IDREF/IDREFS references
* point to defined ID attributes
*/
checkIdRefs();
}
/*
///////////////////////////////////////
// Package methods, accessors
///////////////////////////////////////
*/
protected ElementIdMap getIdMap() {
if (mIdMap == null) {
mIdMap = new ElementIdMap();
}
return mIdMap;
}
/*
///////////////////////////////////////
// Internal methods
///////////////////////////////////////
*/
protected void checkIdRefs()
throws XMLStreamException
{
/* 02-Oct-2004, TSa: Now we can also check that all id references
* pointed to ids that actually are defined
*/
if (mIdMap != null) {
ElementId ref = mIdMap.getFirstUndefined();
if (ref != null) { // problem!
reportValidationProblem("Undefined id '"+ref.getId()
+"': referenced from element <"
+ref.getElemName()+">, attribute '"
+ref.getAttrName()+"'",
ref.getLocation());
}
}
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/dtd/StarModel.java 0000644 0001750 0001750 00000003645 11745427074 023212 0 ustar giovanni giovanni package com.ctc.wstx.dtd;
import java.util.*;
/**
* Model class that represents any number of repetitions of its submodel
* (including no repetitions).
*/
public class StarModel
extends ModelNode
{
ModelNode mModel;
/*
///////////////////////////////////////////////////
// Life-cycle
///////////////////////////////////////////////////
*/
public StarModel(ModelNode model) {
super();
mModel = model;
}
/*
///////////////////////////////////////////////////
// Public API
///////////////////////////////////////////////////
*/
/**
* Method that has to create a deep copy of the model, without
* sharing any of existing Objects.
*/
public ModelNode cloneModel() {
return new StarModel(mModel.cloneModel());
}
public boolean isNullable() {
return true;
}
public void indexTokens(List tokens) {
mModel.indexTokens(tokens);
}
public void addFirstPos(BitSet pos) {
mModel.addFirstPos(pos);
}
public void addLastPos(BitSet pos) {
mModel.addLastPos(pos);
}
public void calcFollowPos(BitSet[] followPosSets)
{
// First, let's let sub-model do its stuff
mModel.calcFollowPos(followPosSets);
/* And then add the closure for the model (since sub-model
* can 'follow itself' as many times as it needs to)
*/
BitSet foll = new BitSet();
mModel.addFirstPos(foll);
BitSet toAddTo = new BitSet();
mModel.addLastPos(toAddTo);
int ix = 0; // need to/can skip the null entry (index 0)
while ((ix = toAddTo.nextSetBit(ix+1)) >= 0) {
/* Ok; so token at this index needs to have follow positions
* added...
*/
followPosSets[ix].or(foll);
}
}
public String toString() {
return mModel.toString() + "*";
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/dtd/SeqContentSpec.java 0000644 0001750 0001750 00000015152 11745427074 024212 0 ustar giovanni giovanni package com.ctc.wstx.dtd;
import java.util.*;
import com.ctc.wstx.util.PrefixedName;
/**
* Content specification that defines model that has sequence of one or more
* elements that have to come in the specified order.
*/
public class SeqContentSpec
extends ContentSpec
{
final boolean mNsAware;
final ContentSpec[] mContentSpecs;
/*
///////////////////////////////////////////////////
// Life-cycle
///////////////////////////////////////////////////
*/
public SeqContentSpec(boolean nsAware, char arity, ContentSpec[] subSpecs)
{
super(arity);
mNsAware = nsAware;
mContentSpecs = subSpecs;
}
public static SeqContentSpec construct(boolean nsAware, char arity, Collection subSpecs)
{
ContentSpec[] specs = new ContentSpec[subSpecs.size()];
subSpecs.toArray(specs);
return new SeqContentSpec(nsAware, arity, specs);
}
/*
///////////////////////////////////////////////////
// Public API
///////////////////////////////////////////////////
*/
public StructValidator getSimpleValidator()
{
/* Can we create a simple validator? Yes, if the sub-specs are
* all simple (leaves == element tokens with no arity modifier)
*/
ContentSpec[] specs = mContentSpecs;
int i = 0;
int len = specs.length;
for (; i < len; ++i) {
if (!specs[i].isLeaf()) {
break;
}
}
if (i == len) { // all leaves, kewl
PrefixedName[] set = new PrefixedName[len];
for (i = 0; i < len; ++i) {
TokenContentSpec ss = (TokenContentSpec) specs[i];
set[i] = ss.getName();
}
return new Validator(mArity, set);
}
// Nope, need a DFA:
return null;
}
public ModelNode rewrite()
{
/* First, need to create a tree of sub-models, consisting of
* binary concat nodes (as opposed to n-ary list). Can do that
* recursively (note that we'll always have at least 2 child
* nodes!)
*/
ModelNode model = rewrite(mContentSpecs, 0, mContentSpecs.length);
// and then resolve arity modifiers, if necessary:
if (mArity == '*') {
return new StarModel(model);
}
if (mArity == '?') {
return new OptionalModel(model);
}
if (mArity == '+') {
return new ConcatModel(model,
new StarModel(model.cloneModel()));
}
return model;
}
private ModelNode rewrite(ContentSpec[] specs, int first, int last)
{
// 3 or less, can convert and create; 4 or more, need to recurse:
int count = last - first;
if (count > 3) {
int mid = (last + first + 1) >> 1;
return new ConcatModel(rewrite(specs, first, mid),
rewrite(specs, mid, last));
}
ConcatModel model = new ConcatModel(specs[first].rewrite(),
specs[first+1].rewrite());
if (count == 3) {
model = new ConcatModel(model, specs[first+2].rewrite());
}
return model;
}
public String toString()
{
StringBuffer sb = new StringBuffer();
sb.append('(');
for (int i = 0; i < mContentSpecs.length; ++i) {
if (i > 0) {
sb.append(", ");
}
sb.append(mContentSpecs[i].toString());
}
sb.append(')');
if (mArity != ' ') {
sb.append(mArity);
}
return sb.toString();
}
/*
///////////////////////////////////////////////////
// Validator class that can be used for simple
// choices (including mixed content)
///////////////////////////////////////////////////
*/
/**
* Simple validator that can be used if all components of a sequence
* are leaf nodes, ie. elements with no explicit arity modifiers.
*/
final static class Validator
extends StructValidator
{
final char mArity;
final PrefixedName[] mNames;
/**
* Number of full repetitions done over the sequence
*/
int mRounds = 0;
/**
* Expected next element in the sequence
*/
int mStep = 0;
public Validator(char arity, PrefixedName[] names)
{
mArity = arity;
mNames = names;
}
/**
* Sequence content specification is always stateful; can not
* use a shared instance... so let's create new instance:
*/
public StructValidator newInstance() {
return new Validator(mArity, mNames);
}
public String tryToValidate(PrefixedName elemName)
{
// First; have we already done that max. 1 sequence?
if (mStep == 0 && mRounds == 1) {
if (mArity == '?' || mArity == ' ') {
return "was not expecting any more elements in the sequence ("
+concatNames(mNames)+")";
}
}
PrefixedName next = mNames[mStep];
if (!elemName.equals(next)) {
return expElem(mStep);
}
if (++mStep == mNames.length) {
++mRounds;
mStep = 0;
}
return null;
}
public String fullyValid()
{
if (mStep != 0) {
return expElem(mStep)+"; got end element";
}
switch (mArity) {
case '*':
case '?':
return null;
case '+': // need at least one (and multiples checked earlier)
case ' ':
if (mRounds > 0) {
return null;
}
return "Expected sequence ("+concatNames(mNames)+"); got end element";
}
// should never happen:
throw new IllegalStateException("Internal error");
}
private String expElem(int step)
{
return "expected element <"+mNames[step]+"> in sequence ("
+concatNames(mNames)+")";
}
final static String concatNames(PrefixedName[] names)
{
StringBuffer sb = new StringBuffer();
for (int i = 0, len = names.length; i < len; ++i) {
if (i > 0) {
sb.append(", ");
}
sb.append(names[i].toString());
}
return sb.toString();
}
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/dtd/DFAState.java 0000644 0001750 0001750 00000014360 11745427074 022707 0 ustar giovanni giovanni /* Woodstox XML processor
*
* Copyright (c) 2004 Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in the file LICENSE which is
* included with the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.dtd;
import java.util.*;
import com.ctc.wstx.util.PrefixedName;
/**
* Class that represents a state in DFA used for validating complex
* DTD content models.
*/
public final class DFAState
{
final int mIndex;
final boolean mAccepting;
BitSet mTokenSet;
HashMap mNext = new HashMap();
/*
///////////////////////////////////////////////
// Life-cycle:
///////////////////////////////////////////////
*/
public DFAState(int index, BitSet tokenSet)
{
mIndex = index;
// If we have a transition to state 0, it is an accepting state...
mAccepting = tokenSet.get(0);
mTokenSet = tokenSet;
}
public static DFAState constructDFA(ContentSpec rootSpec)
{
// Let's first create the real model tree:
ModelNode modelRoot = rootSpec.rewrite();
/* Then we need to add the dummy end token, and concat node
* to contain it:
*/
TokenModel eofToken = TokenModel.getNullToken();
ConcatModel dummyRoot = new ConcatModel(modelRoot, eofToken);
/* then need to allocate index numbers for tokens
* (which will also calculate nullability)
*/
ArrayList tokens = new ArrayList();
tokens.add(eofToken); // has to be added first, explicitly
dummyRoot.indexTokens(tokens);
/* And then we can request calculation of follow pos; this will
* also recursively calculate first/last pos as needed:
*/
int flen = tokens.size();
BitSet[] followPos = new BitSet[flen];
PrefixedName[] tokenNames = new PrefixedName[flen];
for (int i = 0; i < flen; ++i) {
followPos[i] = new BitSet(flen);
tokenNames[i] = ((TokenModel) tokens.get(i)).getName();
}
dummyRoot.calcFollowPos(followPos);
/* And then we can calculate DFA stuff. First step is to get
* firstpos set for the root node, for creating the first
* state:
*/
BitSet initial = new BitSet(flen);
dummyRoot.addFirstPos(initial);
DFAState firstState = new DFAState(0, initial);
ArrayList stateList = new ArrayList();
stateList.add(firstState);
HashMap stateMap = new HashMap();
stateMap.put(initial, firstState);
int i = 0;
while (i < stateList.size()) {
DFAState curr = (DFAState) stateList.get(i++);
curr.calcNext(tokenNames, followPos, stateList, stateMap);
}
// DEBUG:
/*
for (i = 0; i < stateList.size(); ++i) {
//System.out.println(stateList.get(i));
}
*/
// And there we have it!
return firstState;
}
/*
///////////////////////////////////////////////
// Public API, accessors:
///////////////////////////////////////////////
*/
public boolean isAcceptingState() {
return mAccepting;
}
public int getIndex() {
return mIndex;
}
public DFAState findNext(PrefixedName elemName) {
return (DFAState) mNext.get(elemName);
}
public TreeSet getNextNames() {
// Let's order them alphabetically
TreeSet names = new TreeSet();
Iterator it = mNext.keySet().iterator();
while (it.hasNext()) {
Object o = it.next();
names.add(o);
}
return names;
}
public void calcNext(PrefixedName[] tokenNames, BitSet[] tokenFPs,
List stateList, Map stateMap)
{
/* Need to loop over all included tokens, and find groups
* of said tokens
*/
int first = -1;
/* Need to clone; can not modify in place, since the BitSet
* is also used as the key...
*/
BitSet tokenSet = (BitSet) mTokenSet.clone();
// No need to keep the reference to it, though:
mTokenSet = null;
while ((first = tokenSet.nextSetBit(first+1)) >= 0) {
PrefixedName tokenName = tokenNames[first];
/* Special case; the dummy end token has null as name;
* we can skip that one:
*/
if (tokenName == null) {
continue;
}
BitSet nextGroup = (BitSet) tokenFPs[first].clone();
int second = first;
while ((second = tokenSet.nextSetBit(second+1)) > 0) {
if (tokenNames[second] == tokenName) {
// Let's clear it, too, so we won't match it again:
tokenSet.clear(second);
nextGroup.or(tokenFPs[second]);
}
}
// Ok; is it a new group?
DFAState next = (DFAState) stateMap.get(nextGroup);
if (next == null) { // yup!
next = new DFAState(stateList.size(), nextGroup);
stateList.add(next);
stateMap.put(nextGroup, next);
}
mNext.put(tokenName, next);
}
}
/*
///////////////////////////////////////////////
// Other methods
///////////////////////////////////////////////
*/
public String toString()
{
StringBuffer sb = new StringBuffer();
sb.append("State #"+mIndex+":\n");
sb.append(" Accepting: "+mAccepting);
sb.append("\n Next states:\n");
Iterator it = mNext.entrySet().iterator();
while (it.hasNext()) {
Map.Entry en = (Map.Entry) it.next();
sb.append(en.getKey());
sb.append(" -> ");
DFAState next = (DFAState) en.getValue();
sb.append(next.getIndex());
sb.append("\n");
}
return sb.toString();
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/dtd/ModelNode.java 0000644 0001750 0001750 00000001720 11745427074 023156 0 ustar giovanni giovanni package com.ctc.wstx.dtd;
import java.util.BitSet;
import java.util.List;
/**
* Abstract base class for classes constructed from {@link ContentSpec}
* objects, when they get rewritten (when their {@link ContentSpec#rewrite}
* gets called). These nodes are then used for constructing complete DFA
* states for validation.
*/
public abstract class ModelNode
{
/*
///////////////////////////////////////////////////
// Methods needed for DFA construction
///////////////////////////////////////////////////
*/
/**
* Method that has to create a deep copy of the model, without
* sharing any of existing Objects.
*/
public abstract ModelNode cloneModel();
public abstract boolean isNullable();
public abstract void indexTokens(List tokens);
public abstract void addFirstPos(BitSet firstPos);
public abstract void addLastPos(BitSet firstPos);
public abstract void calcFollowPos(BitSet[] followPosSets);
}
woodstox-4.1.3/src/java/com/ctc/wstx/dtd/DTDSubsetImpl.java 0000644 0001750 0001750 00000043650 11745427074 023743 0 ustar giovanni giovanni /* Woodstox XML processor
*
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in file LICENSE, included with
* the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.dtd;
import java.text.MessageFormat;
import java.util.*;
import javax.xml.stream.Location;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.NotationDeclaration;
import org.codehaus.stax2.validation.*;
import com.ctc.wstx.cfg.ErrorConsts;
import com.ctc.wstx.exc.WstxParsingException;
import com.ctc.wstx.sr.InputProblemReporter;
import com.ctc.wstx.util.DataUtil;
/**
* The default implementation of {@link DTDSubset}
*/
public final class DTDSubsetImpl
extends DTDSubset
{
/**
* Whether this subset is cachable. Only those external
* subsets that do not refer to PEs defined by internal subsets (or
* GEs via default attribute value expansion) are cachable.
*/
final boolean mIsCachable;
/**
* Whether this subset has full validation information; and
* consequently whether it will do actual validation, or just allow
* access to type information, notations, entities, and add default
* attribute values.
*/
final boolean mFullyValidating;
/**
* Flag that indicates whether any of the elements declarared
* has any attribute default values for namespace pseudo-attributes.
*/
final boolean mHasNsDefaults;
/*
//////////////////////////////////////////////////////
// Entity information
//////////////////////////////////////////////////////
*/
/**
* Map (name-to-EntityDecl) of general entity declarations (internal,
* external) for this DTD subset.
*/
final HashMap mGeneralEntities;
/**
* Lazily instantiated List that contains all notations from
* {@link #mGeneralEntities} (preferably in their declaration order; depends
* on whether platform, ie. JDK version, has insertion-ordered
* Maps available), used by DTD event Objects.
*/
volatile transient List mGeneralEntityList = null;
/**
* Set of names of general entities references by this subset. Note that
* only those GEs that are referenced by default attribute value
* definitions count, since GEs in text content are only expanded
* when reading documents, but attribute default values are expanded
* when reading DTD subset itself.
*
* Needed
* for determinining if external subset materially depends on definitions
* from internal subset; if so, such subset is not cachable.
* This also
* means that information is not stored for non-cachable instance.
*/
final Set mRefdGEs;
// // // Parameter entity info:
/**
* Map (name-to-WEntityDeclaration) that contains all parameter entities
* defined by this subset. May be empty if such information will not be
* needed for use; for example, external subset's definitions are needed,
* nor are combined DTD set's.
*/
final HashMap mDefinedPEs;
/**
* Set of names of parameter entities references by this subset. Needed
* when determinining if external subset materially depends on definitions
* from internal subset, which is needed to know when caching external
* subsets.
*
* Needed
* for determinining if external subset materially depends on definitions
* from internal subset; if so, such subset is not cachable.
* This also
* means that information is not stored for non-cachable instance.
*/
final Set mRefdPEs;
/*
//////////////////////////////////////////////////////
// Notation definitions:
//////////////////////////////////////////////////////
*/
/**
* Map (name-to-NotationDecl) that this subset has defined.
*/
final HashMap mNotations;
/**
* Lazily instantiated List that contains all notations from
* {@link #mNotations} (preferably in their declaration order; depends
* on whether platform, ie. JDK version, has insertion-ordered
* Maps available), used by DTD event Objects.
*/
transient List mNotationList = null;
/*
//////////////////////////////////////////////////////
// Element definitions:
//////////////////////////////////////////////////////
*/
final HashMap mElements;
/*
//////////////////////////////////////////////////////
// Life-cycle
//////////////////////////////////////////////////////
*/
private DTDSubsetImpl(boolean cachable,
HashMap genEnt, Set refdGEs,
HashMap paramEnt, Set peRefs,
HashMap notations, HashMap elements,
boolean fullyValidating)
{
mIsCachable = cachable;
mGeneralEntities = genEnt;
mRefdGEs = refdGEs;
mDefinedPEs = paramEnt;
mRefdPEs = peRefs;
mNotations = notations;
mElements = elements;
mFullyValidating = fullyValidating;
boolean anyNsDefs = false;
if (elements != null) {
Iterator it = elements.values().iterator();
while (it.hasNext()) {
DTDElement elem = (DTDElement) it.next();
if (elem.hasNsDefaults()) {
anyNsDefs = true;
break;
}
}
}
mHasNsDefaults = anyNsDefs;
}
public static DTDSubsetImpl constructInstance(boolean cachable,
HashMap genEnt, Set refdGEs,
HashMap paramEnt, Set refdPEs,
HashMap notations, HashMap elements,
boolean fullyValidating)
{
return new DTDSubsetImpl(cachable, genEnt, refdGEs,
paramEnt, refdPEs,
notations, elements,
fullyValidating);
}
/**
* Method that will combine definitions from internal and external subsets,
* producing a single DTD set.
*/
public DTDSubset combineWithExternalSubset(InputProblemReporter rep, DTDSubset extSubset)
throws XMLStreamException
{
/* First let's see if we can just reuse GE Map used by int or ext
* subset; (if only one has contents), or if not, combine them.
*/
HashMap ge1 = getGeneralEntityMap();
HashMap ge2 = extSubset.getGeneralEntityMap();
if (ge1 == null || ge1.isEmpty()) {
ge1 = ge2;
} else {
if (ge2 != null && !ge2.isEmpty()) {
/* Internal subset Objects are never shared or reused (and by
* extension, neither are objects they contain), so we can just
* modify GE map if necessary
*/
combineMaps(ge1, ge2);
}
}
// Ok, then, let's combine notations similarly
HashMap n1 = getNotationMap();
HashMap n2 = extSubset.getNotationMap();
if (n1 == null || n1.isEmpty()) {
n1 = n2;
} else {
if (n2 != null && !n2.isEmpty()) {
/* First; let's make sure there are no colliding notation
* definitions: it's an error to try to redefine notations.
*/
checkNotations(n1, n2);
/* Internal subset Objects are never shared or reused (and by
* extension, neither are objects they contain), so we can just
* modify notation map if necessary
*/
combineMaps(n1, n2);
}
}
// And finally elements, rather similarly:
HashMap e1 = getElementMap();
HashMap e2 = extSubset.getElementMap();
if (e1 == null || e1.isEmpty()) {
e1 = e2;
} else {
if (e2 != null && !e2.isEmpty()) {
/* Internal subset Objects are never shared or reused (and by
* extension, neither are objects they contain), so we can just
* modify element map if necessary
*/
combineElements(rep, e1, e2);
}
}
/* Combos are not cachable, and because of that, there's no point
* in storing any PE info either.
*/
return constructInstance(false, ge1, null, null, null, n1, e1,
mFullyValidating);
}
/*
//////////////////////////////////////////////////////
// XMLValidationSchema implementation
//////////////////////////////////////////////////////
*/
public XMLValidator createValidator(ValidationContext ctxt)
throws XMLStreamException
{
if (mFullyValidating) {
return new DTDValidator(this, ctxt, mHasNsDefaults,
getElementMap(), getGeneralEntityMap());
}
return new DTDTypingNonValidator(this, ctxt, mHasNsDefaults,
getElementMap(), getGeneralEntityMap());
}
/*
//////////////////////////////////////////////////////
// DTDValidationSchema implementation
//////////////////////////////////////////////////////
*/
public int getEntityCount() {
return (mGeneralEntities == null) ? 0 : mGeneralEntities.size();
}
public int getNotationCount() {
return (mNotations == null) ? 0 : mNotations.size();
}
/*
//////////////////////////////////////////////////////
// Woodstox-specific public API
//////////////////////////////////////////////////////
*/
public boolean isCachable() {
return mIsCachable;
}
public HashMap getGeneralEntityMap() {
return mGeneralEntities;
}
public List getGeneralEntityList()
{
List l = mGeneralEntityList;
if (l == null) {
if (mGeneralEntities == null || mGeneralEntities.size() == 0) {
l = Collections.EMPTY_LIST;
} else {
l = Collections.unmodifiableList(new ArrayList(mGeneralEntities.values()));
}
mGeneralEntityList = l;
}
return l;
}
public HashMap getParameterEntityMap() {
return mDefinedPEs;
}
public HashMap getNotationMap() {
return mNotations;
}
public synchronized List getNotationList()
{
List l = mNotationList;
if (l == null) {
if (mNotations == null || mNotations.size() == 0) {
l = Collections.EMPTY_LIST;
} else {
l = Collections.unmodifiableList(new ArrayList(mNotations.values()));
}
mNotationList = l;
}
return l;
}
public HashMap getElementMap() {
return mElements;
}
/**
* Method used in determining whether cached external subset instance
* can be used with specified internal subset. If ext. subset references
* any parameter/general entities int subset (re-)defines, it can not;
* otherwise it can be used.
*
* @return True if this (external) subset refers to a parameter entity
* defined in passed-in internal subset.
*/
public boolean isReusableWith(DTDSubset intSubset)
{
Set refdPEs = mRefdPEs;
if (refdPEs != null && refdPEs.size() > 0) {
HashMap intPEs = intSubset.getParameterEntityMap();
if (intPEs != null && intPEs.size() > 0) {
if (DataUtil.anyValuesInCommon(refdPEs, intPEs.keySet())) {
return false;
}
}
}
Set refdGEs = mRefdGEs;
if (refdGEs != null && refdGEs.size() > 0) {
HashMap intGEs = intSubset.getGeneralEntityMap();
if (intGEs != null && intGEs.size() > 0) {
if (DataUtil.anyValuesInCommon(refdGEs, intGEs.keySet())) {
return false;
}
}
}
return true; // yep, no dependencies overridden
}
/*
//////////////////////////////////////////////////////
// Overridden default methods:
//////////////////////////////////////////////////////
*/
public String toString() {
StringBuffer sb = new StringBuffer();
sb.append("[DTDSubset: ");
int count = getEntityCount();
sb.append(count);
sb.append(" general entities");
sb.append(']');
return sb.toString();
}
/*
//////////////////////////////////////////////////////
// Convenience methods used by other classes
//////////////////////////////////////////////////////
*/
public static void throwNotationException(NotationDeclaration oldDecl, NotationDeclaration newDecl)
throws XMLStreamException
{
throw new WstxParsingException
(MessageFormat.format(ErrorConsts.ERR_DTD_NOTATION_REDEFD,
new Object[] {
newDecl.getName(),
oldDecl.getLocation().toString()}),
newDecl.getLocation());
}
public static void throwElementException(DTDElement oldElem, Location loc)
throws XMLStreamException
{
throw new WstxParsingException
(MessageFormat.format(ErrorConsts.ERR_DTD_ELEM_REDEFD,
new Object[] {
oldElem.getDisplayName(),
oldElem.getLocation().toString() }),
loc);
}
/*
//////////////////////////////////////////////////////
// Internal methods
//////////////////////////////////////////////////////
*/
/**
*
* Note: The first Map argument WILL be modified; second one
* not. Caller needs to ensure this is acceptable.
*/
private static void combineMaps(HashMap m1, HashMap m2)
{
Iterator it = m2.entrySet().iterator();
while (it.hasNext()) {
Map.Entry me = (Map.Entry) it.next();
Object key = me.getKey();
/* Int. subset has precedence, but let's guess most of
* the time there are no collisions:
*/
Object old = m1.put(key, me.getValue());
// Oops, got value! Let's put it back
if (old != null) {
m1.put(key, old);
}
}
}
/**
* Method that will try to merge in elements defined in the external
* subset, into internal subset; it will also check for redeclarations
* when doing this, as it's invalid to redeclare elements. Care has to
* be taken to only check actual redeclarations: placeholders should
* not cause problems.
*/
private void combineElements(InputProblemReporter rep, HashMap intElems, HashMap extElems)
throws XMLStreamException
{
Iterator it = extElems.entrySet().iterator();
while (it.hasNext()) {
Map.Entry me = (Map.Entry) it.next();
Object key = me.getKey();
Object extVal = me.getValue();
Object oldVal = intElems.get(key);
// If there was no old value, can just merge new one in and continue
if (oldVal == null) {
intElems.put(key, extVal);
continue;
}
DTDElement extElem = (DTDElement) extVal;
DTDElement intElem = (DTDElement) oldVal;
// Which one is defined (if either)?
if (extElem.isDefined()) { // one from the ext subset
if (intElem.isDefined()) { // but both can't be; that's an error
throwElementException(intElem, extElem.getLocation());
} else {
/* Note: can/should not modify the external element (by
* for example adding attributes); external element may
* be cached and shared... so, need to do the reverse,
* define the one from internal subset.
*/
intElem.defineFrom(rep, extElem, mFullyValidating);
}
} else {
if (!intElem.isDefined()) {
/* ??? Should we warn about neither of them being really
* declared?
*/
rep.reportProblem(intElem.getLocation(),
ErrorConsts.WT_ENT_DECL,
ErrorConsts.W_UNDEFINED_ELEM,
extElem.getDisplayName(), null);
} else {
intElem.mergeMissingAttributesFrom(rep, extElem, mFullyValidating);
}
}
}
}
private static void checkNotations(HashMap fromInt, HashMap fromExt)
throws XMLStreamException
{
/* Since it's external subset that would try to redefine things
* defined in internal subset, let's traverse definitions in
* the ext. subset first (even though that may not be the fastest
* way), so that we have a chance of catching the first problem
* (As long as Maps iterate in insertion order).
*/
Iterator it = fromExt.entrySet().iterator();
while (it.hasNext()) {
Map.Entry en = (Map.Entry) it.next();
if (fromInt.containsKey(en.getKey())) {
throwNotationException((NotationDeclaration) fromInt.get(en.getKey()),
(NotationDeclaration) en.getValue());
}
}
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/dtd/DTDEntitiesAttr.java 0000644 0001750 0001750 00000013110 11745427074 024257 0 ustar giovanni giovanni package com.ctc.wstx.dtd;
import java.util.StringTokenizer;
import javax.xml.stream.XMLStreamException;
import com.ctc.wstx.ent.EntityDecl;
import com.ctc.wstx.io.WstxInputData;
import com.ctc.wstx.sr.InputProblemReporter;
import com.ctc.wstx.util.PrefixedName;
/**
* Specific attribute class for attributes that contain (unique)
* identifiers.
*/
public final class DTDEntitiesAttr
extends DTDAttribute
{
/*
///////////////////////////////////////////////////
// Life-cycle
///////////////////////////////////////////////////
*/
/**
* Main constructor. Note that id attributes can never have
* default values.
*/
public DTDEntitiesAttr(PrefixedName name, DefaultAttrValue defValue, int specIndex,
boolean nsAware, boolean xml11)
{
super(name, defValue, specIndex, nsAware, xml11);
}
public DTDAttribute cloneWith(int specIndex)
{
return new DTDEntitiesAttr(mName, mDefValue, specIndex, mCfgNsAware, mCfgXml11);
}
/*
///////////////////////////////////////////////////
// Public API
///////////////////////////////////////////////////
*/
public int getValueType() {
return TYPE_ENTITIES;
}
/*
///////////////////////////////////////////////////
// Public API, validation
///////////////////////////////////////////////////
*/
/**
* Method called by the {@link DTDValidatorBase}
* to let the attribute do necessary normalization and/or validation
* for the value.
*
*/
public String validate(DTDValidatorBase v, char[] cbuf, int start, int end, boolean normalize)
throws XMLStreamException
{
/* Let's skip leading/trailing white space, even if we are not
* to normalize visible attribute value. This allows for better
* round-trip handling (no changes for physical value caller
* gets), but still allows succesful validation.
*/
while (start < end && WstxInputData.isSpaceChar(cbuf[start])) {
++start;
}
// Empty value?
if (start >= end) {
return reportValidationProblem(v, "Empty ENTITIES value");
}
--end; // so that it now points to the last char
while (end > start && WstxInputData.isSpaceChar(cbuf[end])) {
--end;
}
// Ok; now start points to first, last to last char (both inclusive)
String idStr = null;
StringBuffer sb = null;
while (start <= end) {
// Ok, need to check char validity, and also calc hash code:
char c = cbuf[start];
if (!WstxInputData.isNameStartChar(c, mCfgNsAware, mCfgXml11)) {
return reportInvalidChar(v, c, "not valid as the first ENTITIES character");
}
int hash = (int) c;
int i = start+1;
for (; i <= end; ++i) {
c = cbuf[i];
if (WstxInputData.isSpaceChar(c)) {
break;
}
if (!WstxInputData.isNameChar(c, mCfgNsAware, mCfgXml11)) {
return reportInvalidChar(v, c, "not valid as an ENTITIES character");
}
hash = (hash * 31) + (int) c;
}
EntityDecl ent = findEntityDecl(v, cbuf, start, (i - start), hash);
// only returns if entity was found...
// Can skip the trailing space char (if there was one)
start = i+1;
/* When normalizing, we can possibly share id String, or
* alternatively, compose normalized String if multiple
*/
if (normalize) {
if (idStr == null) { // first idref
idStr = ent.getName();
} else {
if (sb == null) {
sb = new StringBuffer(idStr);
}
idStr = ent.getName();
sb.append(' ');
sb.append(idStr);
}
}
// Ok, any white space to skip?
while (start <= end && WstxInputData.isSpaceChar(cbuf[start])) {
++start;
}
}
if (normalize) {
if (sb != null) {
idStr = sb.toString();
}
return idStr;
}
return null;
}
/**
* Method called by the validator object
* to ask attribute to verify that the default it has (if any) is
* valid for such type.
*/
public void validateDefault(InputProblemReporter rep, boolean normalize)
throws XMLStreamException
{
String normStr = validateDefaultNames(rep, true);
if (normalize) {
mDefValue.setValue(normStr);
}
// Ok, but were they declared?
/* Performance really shouldn't be critical here (only called when
* parsing DTDs, which get cached) -- let's just
* tokenize using standard StringTokenizer
*/
StringTokenizer st = new StringTokenizer(normStr);
/* !!! 03-Dec-2004, TSa: This is rather ugly -- need to know we
* actually really get a DTD reader, and DTD reader needs
* to expose a special method... but it gets things done.
*/
MinimalDTDReader dtdr = (MinimalDTDReader) rep;
while (st.hasMoreTokens()) {
String str = st.nextToken();
EntityDecl ent = dtdr.findEntity(str);
// Needs to exists, and be an unparsed entity...
checkEntity(rep, normStr, ent);
}
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/dtd/StructValidator.java 0000644 0001750 0001750 00000003225 11745427074 024444 0 ustar giovanni giovanni /* Woodstox XML processor
*
* Copyright (c) 2004 Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in the file LICENSE which is
* included with the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.dtd;
import com.ctc.wstx.util.PrefixedName;
/**
* Base class for validator Objects used to validate tree structure of an
* XML-document against DTD.
*/
public abstract class StructValidator
{
/**
* Method that should be called to get the actual usable validator
* instance, from the 'template' validator.
*/
public abstract StructValidator newInstance();
/**
* Method called when a new (start) element is encountered within the
* scope of parent element this validator monitors.
*
* @return Null if element is valid in its current position; error
* message if not.
*/
public abstract String tryToValidate(PrefixedName elemName);
/**
* Method called when the end element of the scope this validator
* validates is encountered. It should make sure that the content
* model is valid, and if not, to construct an error message.
*
* @return Null if the content model for the element is valid; error
* message if not.
*/
public abstract String fullyValid();
}
woodstox-4.1.3/src/java/com/ctc/wstx/dtd/ConcatModel.java 0000644 0001750 0001750 00000005754 11745427074 023513 0 ustar giovanni giovanni package com.ctc.wstx.dtd;
import java.util.*;
/**
* Model class that represents sequence of 2 sub-models, needed to be
* matched in the order.
*/
public class ConcatModel
extends ModelNode
{
ModelNode mLeftModel;
ModelNode mRightModel;
final boolean mNullable;
BitSet mFirstPos, mLastPos;
/*
///////////////////////////////////////////////////
// Life-cycle
///////////////////////////////////////////////////
*/
public ConcatModel(ModelNode left, ModelNode right)
{
super();
mLeftModel = left;
mRightModel = right;
mNullable = mLeftModel.isNullable() && mRightModel.isNullable();
}
/*
///////////////////////////////////////////////////
// Public API
///////////////////////////////////////////////////
*/
/**
* Method that has to create a deep copy of the model, without
* sharing any of existing Objects.
*/
public ModelNode cloneModel() {
return new ConcatModel(mLeftModel.cloneModel(), mRightModel.cloneModel());
}
public boolean isNullable() {
return mNullable;
}
public void indexTokens(List tokens)
{
mLeftModel.indexTokens(tokens);
mRightModel.indexTokens(tokens);
}
public void addFirstPos(BitSet pos) {
if (mFirstPos == null) {
mFirstPos = new BitSet();
mLeftModel.addFirstPos(mFirstPos);
if (mLeftModel.isNullable()) {
mRightModel.addFirstPos(mFirstPos);
}
}
pos.or(mFirstPos);
}
public void addLastPos(BitSet pos) {
if (mLastPos == null) {
mLastPos = new BitSet();
mRightModel.addLastPos(mLastPos);
if (mRightModel.isNullable()) {
mLeftModel.addLastPos(mLastPos);
}
}
pos.or(mLastPos);
}
public void calcFollowPos(BitSet[] followPosSets)
{
// Let's let sub-models do what they need to do
mLeftModel.calcFollowPos(followPosSets);
mRightModel.calcFollowPos(followPosSets);
/* And then we can calculate follower sets between left and
* right sub models; so that left model's last position entries
* have right model's first position entries included
*/
BitSet foll = new BitSet();
mRightModel.addFirstPos(foll);
BitSet toAddTo = new BitSet();
mLeftModel.addLastPos(toAddTo);
int ix = 0; // need to/can skip the null entry (index 0)
while ((ix = toAddTo.nextSetBit(ix+1)) >= 0) {
/* Ok; so token at this index needs to have follow positions
* added...
*/
followPosSets[ix].or(foll);
}
}
public String toString()
{
StringBuffer sb = new StringBuffer();
sb.append('(');
sb.append(mLeftModel.toString());
sb.append(", ");
sb.append(mRightModel.toString());
sb.append(')');
return sb.toString();
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/dtd/DTDId.java 0000644 0001750 0001750 00000011023 11745427074 022175 0 ustar giovanni giovanni /* Woodstox XML processor
*
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in the file LICENSE which is
* included with the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.dtd;
import java.net.URI;
/**
* Simple key object class, used for accessing (external) DTDs when stored for
* caching. Main idea is that the primary id of a DTD (public or system id;
* latter normalized if possible)
* has to match, as well as couple of on/off settings for parsing (namespace
* support, text normalization).
* Latter restriction is needed since although DTDs do not deal
* with (or understand) namespaces, some parsing is done to be able to validate
* namespace aware/non-aware documents, and handling differs between the two.
* As to primary key part, public id is used if one was defined; if so,
* comparison is String equality. If not, then system id is compared: system
* id has to be expressed as URL if so.
*/
public final class DTDId
{
protected final String mPublicId;
protected final URI mSystemId;
protected final int mConfigFlags;
protected final boolean mXml11;
protected int mHashCode = 0;
/*
///////////////////////////////////////////////////////////////////////
// Life-cycle:
///////////////////////////////////////////////////////////////////////
*/
private DTDId(String publicId, URI systemId, int configFlags, boolean xml11)
{
mPublicId = publicId;
mSystemId = systemId;
mConfigFlags = configFlags;
mXml11 = xml11;
}
public static DTDId constructFromPublicId(String publicId, int configFlags,
boolean xml11)
{
if (publicId == null || publicId.length() == 0) {
throw new IllegalArgumentException("Empty/null public id.");
}
return new DTDId(publicId, null, configFlags, xml11);
}
public static DTDId constructFromSystemId(URI systemId, int configFlags,
boolean xml11)
{
if (systemId == null) {
throw new IllegalArgumentException("Null system id.");
}
return new DTDId(null, systemId, configFlags, xml11);
}
public static DTDId construct(String publicId, URI systemId, int configFlags, boolean xml11)
{
if (publicId != null && publicId.length() > 0) {
return new DTDId(publicId, null, configFlags, xml11);
}
if (systemId == null) {
throw new IllegalArgumentException("Illegal arguments; both public and system id null/empty.");
}
return new DTDId(null, systemId, configFlags, xml11);
}
/*
///////////////////////////////////////////////////////////////////////
// Overridden standard methods
///////////////////////////////////////////////////////////////////////
*/
public int hashCode() {
int hash = mHashCode;
if (hash == 0) {
hash = mConfigFlags;
if (mPublicId != null) {
hash ^= mPublicId.hashCode();
} else {
hash ^= mSystemId.hashCode();
}
if (mXml11) {
hash ^= 1;
}
mHashCode = hash;
}
return hash;
}
public String toString() {
StringBuffer sb = new StringBuffer(60);
sb.append("Public-id: ");
sb.append(mPublicId);
sb.append(", system-id: ");
sb.append(mSystemId);
sb.append(" [config flags: 0x");
sb.append(Integer.toHexString(mConfigFlags));
sb.append("], xml11: ");
sb.append(mXml11);
return sb.toString();
}
public boolean equals(Object o)
{
if (o == this) return true;
if (o == null || o.getClass() != getClass()) return false;
DTDId other = (DTDId) o;
if (other.mConfigFlags != mConfigFlags
|| other.mXml11 != mXml11) {
return false;
}
if (mPublicId != null) {
String op = other.mPublicId;
return (op != null) && op.equals(mPublicId);
}
return mSystemId.equals(other.mSystemId);
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/dtd/FullDTDReader.java 0000644 0001750 0001750 00000371203 11745427074 023677 0 ustar giovanni giovanni /* Woodstox XML processor
*
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in file LICENSE, included with
* the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.dtd;
import java.io.IOException;
import java.io.Writer;
import java.net.URL;
import java.text.MessageFormat;
import java.util.*;
import javax.xml.stream.Location;
import javax.xml.stream.XMLReporter;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.NotationDeclaration;
import org.codehaus.stax2.validation.XMLValidationProblem;
import org.codehaus.stax2.validation.XMLValidator;
import com.ctc.wstx.api.ReaderConfig;
import com.ctc.wstx.cfg.ErrorConsts;
import com.ctc.wstx.cfg.XmlConsts;
import com.ctc.wstx.ent.*;
import com.ctc.wstx.evt.WNotationDeclaration;
import com.ctc.wstx.io.WstxInputData;
import com.ctc.wstx.io.WstxInputSource;
import com.ctc.wstx.util.*;
/**
* Reader that reads in DTD information from internal or external subset.
*
* There are 2 main modes for DTDReader, depending on whether it is parsing
* internal or external subset. Parsing of internal subset is somewhat
* simpler, since no dependency checking is needed. For external subset,
* handling of parameter entities is bit more complicated, as care has to
* be taken to distinguish between using PEs defined in int. subset, and
* ones defined in ext. subset itself. This determines cachability of
* external subsets.
*
* Reader also implements simple stand-alone functionality for flattening
* DTD files (expanding all references to their eventual textual form);
* this is sometimes useful when optimizing modularized DTDs
* (which are more maintainable) into single monolithic DTDs (which in
* general can be more performant).
*
* @author Tatu Saloranta
*/
public class FullDTDReader
extends MinimalDTDReader
{
/**
* Flag that can be changed to enable or disable interning of shared
* names; shared names are used for enumerated values to reduce
* memory usage.
*/
final static boolean INTERN_SHARED_NAMES = false;
// // // Entity expansion types:
final static Boolean ENTITY_EXP_GE = Boolean.FALSE;
final static Boolean ENTITY_EXP_PE = Boolean.TRUE;
/*
//////////////////////////////////////////////////
// Configuration
//////////////////////////////////////////////////
*/
final int mConfigFlags;
// Extracted wstx-specific settings:
final boolean mCfgSupportDTDPP;
/**
* This flag indicates whether we should build a validating 'real'
* validator (true, the usual case),
* or a simpler pseudo-validator that can do all non-validation tasks
* that are based on DTD info (entity expansion, notation references,
* default attribute values). Latter is used in non-validating mode.
*
*/
final boolean mCfgFullyValidating;
/*
//////////////////////////////////////////////////
// Entity handling, parameter entities (PEs)
//////////////////////////////////////////////////
*/
/**
* Set of parameter entities defined so far in the currently parsed
* subset. Note: the first definition sticks, entities can not be
* redefined.
*
* Keys are entity name Strings; values are instances of EntityDecl
*/
HashMap mParamEntities;
/**
* Set of parameter entities already defined for the subset being
* parsed; namely, PEs defined in the internal subset passed when
* parsing matching external subset. Null when parsing internal
* subset.
*/
final HashMap mPredefdPEs;
/**
* Set of parameter entities (ids) that have been referenced by this
* DTD; only maintained for external subsets, and only as long as
* no pre-defined PE has been referenced.
*/
Set mRefdPEs;
/*
//////////////////////////////////////////////////
// Entity handling, general entities (GEs)
//////////////////////////////////////////////////
*/
/**
* Set of generic entities defined so far in this subset.
* As with parameter entities, the first definition sticks.
*
* Keys are entity name Strings; values are instances of EntityDecl
*
* Note: this Map only contains entities declared and defined in the
* subset being parsed; no previously defined values are passed.
*/
HashMap mGeneralEntities;
/**
* Set of general entities already defined for the subset being
* parsed; namely, PEs defined in the internal subset passed when
* parsing matching external subset. Null when parsing internal
* subset. Such entities are only needed directly for one purpose;
* to be expanded when reading attribute default value definitions.
*/
final HashMap mPredefdGEs;
/**
* Set of general entities (ids) that have been referenced by this
* DTD; only maintained for external subsets, and only as long as
* no pre-defined GEs have been referenced.
*/
Set mRefdGEs;
/*
//////////////////////////////////////////////////
// Entity handling, both PEs and GEs
//////////////////////////////////////////////////
*/
/**
* Flag used to keep track of whether current (external) subset
* has referenced at least one PE that was pre-defined.
*/
boolean mUsesPredefdEntities = false;
/*
//////////////////////////////////////////////////
// Notation settings
//////////////////////////////////////////////////
*/
/**
* Set of notations defined so far. Since it's illegal to (try to)
* redefine notations, there's no specific precedence.
*
* Keys are entity name Strings; values are instances of
* NotationDecl objects
*/
HashMap/*
* Note: see base class for some additional remarks about this
* method.
*/
public EntityDecl findEntity(String entName)
{
if (mPredefdGEs != null) {
EntityDecl decl = (EntityDecl) mPredefdGEs.get(entName);
if (decl != null) {
return decl;
}
}
return (EntityDecl) mGeneralEntities.get(entName);
}
/*
//////////////////////////////////////////////////
// Main-level parsing methods
//////////////////////////////////////////////////
*/
protected DTDSubset parseDTD()
throws XMLStreamException
{
while (true) {
mCheckForbiddenPEs = false; // PEs are ok at this point
int i = getNextAfterWS();
if (i < 0) {
if (mIsExternal) { // ok for external DTDs
break;
}
// Error for internal subset
throwUnexpectedEOF(SUFFIX_IN_DTD_INTERNAL);
}
if (i == '%') { // parameter entity
expandPE();
continue;
}
/* First, let's keep track of start of the directive; needed for
* entity and notation declaration events.
*/
mTokenInputTotal = mCurrInputProcessed + mInputPtr;
mTokenInputRow = mCurrInputRow;
mTokenInputCol = mInputPtr - mCurrInputRowStart;
if (i == '<') {
// PEs not allowed within declarations, in the internal subset proper
mCheckForbiddenPEs = !mIsExternal && (mInput == mRootInput);
if (mFlattenWriter == null) {
parseDirective();
} else {
parseDirectiveFlattened();
}
continue;
}
if (i == ']') {
if (mIncludeCount == 0 && !mIsExternal) { // End of internal subset
break;
}
if (mIncludeCount > 0) { // active INCLUDE block(s) open?
boolean suppress = (mFlattenWriter != null) && !mFlattenWriter.includeConditionals();
if (suppress) {
mFlattenWriter.flush(mInputBuffer, mInputPtr-1);
mFlattenWriter.disableOutput();
}
try {
// ]]> needs to be a token, can not come from PE:
char c = dtdNextFromCurr();
if (c == ']') {
c = dtdNextFromCurr();
if (c == '>') {
// Ok, fine, conditional include section ended.
--mIncludeCount;
continue;
}
}
throwDTDUnexpectedChar(c, "; expected ']]>' to close conditional include section");
} finally {
if (suppress) {
mFlattenWriter.enableOutput(mInputPtr);
}
}
}
// otherwise will fall through, and give an error
}
if (mIsExternal) {
throwDTDUnexpectedChar(i, "; expected a '<' to start a directive");
}
throwDTDUnexpectedChar(i, "; expected a '<' to start a directive, or \"]>\" to end internal subset");
}
/* 05-Feb-2006, TSa: Not allowed to have unclosed INCLUDE/IGNORE
* blocks...
*/
if (mIncludeCount > 0) { // active INCLUDE block(s) open?
String suffix = (mIncludeCount == 1) ? "an INCLUDE block" : (""+mIncludeCount+" INCLUDE blocks");
throwUnexpectedEOF(getErrorMsg()+"; expected closing marker for "+suffix);
}
/* First check: have all notation references been resolved?
* (related to [WSTX-121])
*/
if (mNotationForwardRefs != null && mNotationForwardRefs.size() > 0) {
_reportUndefinedNotationRefs();
}
// Ok; time to construct and return DTD data object.
DTDSubset ss;
// There are more settings for ext. subsets:
if (mIsExternal) {
/* External subsets are cachable if they did not refer to any
* PEs or GEs defined in internal subset passed in (if any),
* nor to any notations.
* We don't care about PEs it defined itself, but need to pass
* in Set of PEs it refers to, to check if cached copy can be
* used with different int. subsets.
* We need not worry about notations referred, since they are
* not allowed to be re-defined.
*/
boolean cachable = !mUsesPredefdEntities && !mUsesPredefdNotations;
ss = DTDSubsetImpl.constructInstance(cachable,
mGeneralEntities, mRefdGEs,
null, mRefdPEs,
mNotations, mElements,
mCfgFullyValidating);
} else {
/* Internal subsets are not cachable (no unique way to refer
* to unique internal subsets), and there can be no references
* to pre-defined PEs, as none were passed.
*/
ss = DTDSubsetImpl.constructInstance(false, mGeneralEntities, null,
mParamEntities, null,
mNotations, mElements,
mCfgFullyValidating);
}
return ss;
}
protected void parseDirective()
throws XMLStreamException
{
/* Hmmh. Don't think PEs are allowed to contain starting
* '!' (or '?')... and it has to come from the same
* input source too (no splits)
*/
char c = dtdNextFromCurr();
if (c == '?') { // xml decl?
readPI();
return;
}
if (c != '!') { // nothing valid
throwDTDUnexpectedChar(c, "; expected '!' to start a directive");
}
/* ignore/include, comment, or directive; we are still getting
* token from same section though
*/
c = dtdNextFromCurr();
if (c == '-') { // plain comment
c = dtdNextFromCurr();
if (c != '-') {
throwDTDUnexpectedChar(c, "; expected '-' for a comment");
}
if (mEventListener != null && mEventListener.dtdReportComments()) {
readComment(mEventListener);
} else {
skipComment();
}
} else if (c == '[') {
checkInclusion();
} else if (c >= 'A' && c <= 'Z') {
handleDeclaration(c);
} else {
throwDTDUnexpectedChar(c, ErrorConsts.ERR_DTD_MAINLEVEL_KEYWORD);
}
}
/**
* Method similar to {@link #parseDirective}, but one that takes care
* to properly output dtd contents via {@link com.ctc.wstx.dtd.DTDWriter}
* as necessary.
* Separated to simplify both methods; otherwise would end up with
* 'if (... flatten...) ... else ...' spaghetti code.
*/
protected void parseDirectiveFlattened()
throws XMLStreamException
{
/* First, need to flush any flattened output there may be, at
* this point (except for opening lt char): and then need to
* temporarily disable more output until we know the type and
* whether it should be output or not:
*/
mFlattenWriter.flush(mInputBuffer, mInputPtr-1);
mFlattenWriter.disableOutput();
/* Let's determine type here, and call appropriate skip/parse
* methods.
*/
char c = dtdNextFromCurr();
if (c == '?') { // xml decl?
mFlattenWriter.enableOutput(mInputPtr);
mFlattenWriter.output("");
readPI();
//throwDTDUnexpectedChar(c, " expected '!' to start a directive");
return;
}
if (c != '!') { // nothing valid
throwDTDUnexpectedChar(c, ErrorConsts.ERR_DTD_MAINLEVEL_KEYWORD);
}
// ignore/include, comment, or directive
c = dtdNextFromCurr();
if (c == '-') { // plain comment
c = dtdNextFromCurr();
if (c != '-') {
throwDTDUnexpectedChar(c, "; expected '-' for a comment");
}
boolean comm = mFlattenWriter.includeComments();
if (comm) {
mFlattenWriter.enableOutput(mInputPtr);
mFlattenWriter.output("");
}
public final void writePIStart(String target, boolean addSpace)
throws IOException
{
fastWriteRaw('<', '?');
fastWriteRaw(target);
if (addSpace) {
fastWriteRaw(' ');
}
}
public final void writePIEnd()
throws IOException
{
fastWriteRaw('?', '>');
}
/*
////////////////////////////////////////////////
// Higher-level output methods, text output
////////////////////////////////////////////////
*/
public int writeCData(String data)
throws IOException
{
if (mCheckContent) {
int ix = verifyCDataContent(data);
if (ix >= 0) {
if (!mFixContent) { // Can we fix it?
return ix;
}
// Yes we can! (...Bob the Builder...)
writeSegmentedCData(data, ix);
return -1;
}
}
fastWriteRaw("");
return -1;
}
public int writeCData(char[] cbuf, int offset, int len)
throws IOException
{
if (mCheckContent) {
int ix = verifyCDataContent(cbuf, offset, len);
if (ix >= 0) {
if (!mFixContent) { // Can we fix it?
return ix;
}
// Yes we can! (...Bob the Builder...)
writeSegmentedCData(cbuf, offset, len, ix);
return -1;
}
}
fastWriteRaw("");
return -1;
}
public void writeCharacters(String text)
throws IOException
{
if (mOut == null) {
return;
}
if (mTextWriter != null) { // custom escaping?
mTextWriter.write(text);
return;
}
int inPtr = 0;
final int len = text.length();
int highChar = mEncHighChar;
main_loop:
while (true) {
String ent = null;
inner_loop:
while (true) {
if (inPtr >= len) {
break main_loop;
}
char c = text.charAt(inPtr++);
if (c <= HIGHEST_ENCODABLE_TEXT_CHAR) {
if (c <= 0x0020) {
if (c != ' ' && c != '\n' && c != '\t') { // fine as is
if (c == '\r') {
if (mEscapeCR) {
break inner_loop;
}
} else {
if (!mXml11 || c == 0) {
c = handleInvalidChar(c); // throws an error usually
} else {
break inner_loop; // need quoting
}
}
}
} else if (c == '<') {
ent = "<";
break inner_loop;
} else if (c == '&') {
ent = "&";
break inner_loop;
} else if (c == '>') {
// Let's be conservative; and if there's any
// change it might be part of "]]>" quote it
if (inPtr < 2 || text.charAt(inPtr-2) == ']') {
ent = ">";
break inner_loop;
}
}
} else if (c >= highChar) {
break inner_loop;
}
if (mOutputPtr >= mOutputBufLen) {
flushBuffer();
}
mOutputBuffer[mOutputPtr++] = c;
}
if (ent != null) {
writeRaw(ent);
} else {
writeAsEntity(text.charAt(inPtr-1));
}
}
}
public void writeCharacters(char[] cbuf, int offset, int len)
throws IOException
{
if (mOut == null) {
return;
}
if (mTextWriter != null) { // custom escaping?
mTextWriter.write(cbuf, offset, len);
} else { // nope, default:
len += offset;
do {
int c = 0;
int highChar = mEncHighChar;
int start = offset;
String ent = null;
for (; offset < len; ++offset) {
c = cbuf[offset];
if (c <= HIGHEST_ENCODABLE_TEXT_CHAR) {
if (c == '<') {
ent = "<";
break;
} else if (c == '&') {
ent = "&";
break;
} else if (c == '>') {
/* Let's be conservative; and if there's any
* change it might be part of "]]>" quote it
*/
if ((offset == start) || cbuf[offset-1] == ']') {
ent = ">";
break;
}
} else if (c < 0x0020) {
if (c == '\n' || c == '\t') { // fine as is
;
} else if (c == '\r') {
if (mEscapeCR) {
break;
}
} else {
if (!mXml11 || c == 0) {
c = handleInvalidChar(c);
// Hmmh. This is very inefficient, but...
ent = String.valueOf((char) c);
}
break; // need quoting
}
}
} else if (c >= highChar) {
break;
}
// otherwise ok
}
int outLen = offset - start;
if (outLen > 0) {
writeRaw(cbuf, start, outLen);
}
if (ent != null) {
writeRaw(ent);
ent = null;
} else if (offset < len) {
writeAsEntity(c);
}
} while (++offset < len);
}
}
/**
* Method that will try to output the content as specified. If
* the content passed in has embedded "--" in it, it will either
* add an intervening space between consequtive hyphens (if content
* fixing is enabled), or return the offset of the first hyphen in
* multi-hyphen sequence.
*/
public int writeComment(String data)
throws IOException
{
if (mCheckContent) {
int ix = verifyCommentContent(data);
if (ix >= 0) {
if (!mFixContent) { // Can we fix it?
return ix;
}
// Yes we can! (...Bob the Builder...)
writeSegmentedComment(data, ix);
return -1;
}
}
fastWriteRaw("");
return -1;
}
public void writeDTD(String data)
throws IOException
{
writeRaw(data);
}
public void writeDTD(String rootName, String systemId, String publicId,
String internalSubset)
throws IOException, XMLStreamException
{
fastWriteRaw(" 0) {
fastWriteRaw(' ', '[');
fastWriteRaw(internalSubset);
fastWriteRaw(']');
}
fastWriteRaw('>');
}
public void writeEntityReference(String name)
throws IOException, XMLStreamException
{
if (mCheckNames) {
verifyNameValidity(name, mNsAware);
}
fastWriteRaw('&');
fastWriteRaw(name);
fastWriteRaw(';');
}
public void writeXmlDeclaration(String version, String encoding, String standalone)
throws IOException
{
fastWriteRaw(" 0) {
fastWriteRaw(" encoding='");
fastWriteRaw(encoding);
fastWriteRaw('\'');
}
if (standalone != null) {
fastWriteRaw(" standalone='");
fastWriteRaw(standalone);
fastWriteRaw('\'');
}
fastWriteRaw('?', '>');
}
public int writePI(String target, String data)
throws IOException, XMLStreamException
{
if (mCheckNames) {
// As per namespace specs, can not have colon(s)
verifyNameValidity(target, mNsAware);
}
fastWriteRaw('<', '?');
fastWriteRaw(target);
if (data != null && data.length() > 0) {
if (mCheckContent) {
int ix = data.indexOf('?');
if (ix >= 0) {
ix = data.indexOf("?>", ix);
if (ix >= 0) {
return ix;
}
}
}
fastWriteRaw(' ');
// Data may be longer, let's call regular writeRaw method
writeRaw(data);
}
fastWriteRaw('?', '>');
return -1;
}
/*
////////////////////////////////////////////////////
// Write methods, elements
////////////////////////////////////////////////////
*/
public void writeStartTagStart(String localName)
throws IOException, XMLStreamException
{
if (mCheckNames) {
verifyNameValidity(localName, mNsAware);
}
int ptr = mOutputPtr;
int extra = (mOutputBufLen - ptr) - (1 + localName.length());
if (extra < 0) { // split on boundary, slower
fastWriteRaw('<');
fastWriteRaw(localName);
} else {
char[] buf = mOutputBuffer;
buf[ptr++] = '<';
int len = localName.length();
localName.getChars(0, len, buf, ptr);
mOutputPtr = ptr+len;
}
}
public void writeStartTagStart(String prefix, String localName)
throws IOException, XMLStreamException
{
if (prefix == null || prefix.length() == 0) { // shouldn't happen
writeStartTagStart(localName);
return;
}
if (mCheckNames) {
verifyNameValidity(prefix, mNsAware);
verifyNameValidity(localName, mNsAware);
}
int ptr = mOutputPtr;
int len = prefix.length();
int extra = (mOutputBufLen - ptr) - (2 + localName.length() + len);
if (extra < 0) { // across buffer boundary, slow case
fastWriteRaw('<');
fastWriteRaw(prefix);
fastWriteRaw(':');
fastWriteRaw(localName);
} else { // fast case, all inlined
char[] buf = mOutputBuffer;
buf[ptr++] = '<';
prefix.getChars(0, len, buf, ptr);
ptr += len;
buf[ptr++] = ':';
len = localName.length();
localName.getChars(0, len, buf, ptr);
mOutputPtr = ptr+len;
}
}
public void writeStartTagEnd()
throws IOException
{
fastWriteRaw('>');
}
public void writeStartTagEmptyEnd()
throws IOException
{
int ptr = mOutputPtr;
if ((ptr + 3) >= mOutputBufLen) {
if (mOut == null) {
return;
}
flushBuffer();
ptr = mOutputPtr;
}
char[] buf = mOutputBuffer;
if (mAddSpaceAfterEmptyElem) {
buf[ptr++] = ' ';
}
buf[ptr++] = '/';
buf[ptr++] = '>';
mOutputPtr = ptr;
}
public void writeEndTag(String localName)
throws IOException
{
int ptr = mOutputPtr;
int extra = (mOutputBufLen - ptr) - (3 + localName.length());
if (extra < 0) {
fastWriteRaw('<', '/');
fastWriteRaw(localName);
fastWriteRaw('>');
} else {
char[] buf = mOutputBuffer;
buf[ptr++] = '<';
buf[ptr++] = '/';
int len = localName.length();
localName.getChars(0, len, buf, ptr);
ptr += len;
buf[ptr++] = '>';
mOutputPtr = ptr;
}
}
public void writeEndTag(String prefix, String localName)
throws IOException
{
if (prefix == null || prefix.length() == 0) {
writeEndTag(localName);
return;
}
int ptr = mOutputPtr;
int len = prefix.length();
int extra = (mOutputBufLen - ptr) - (4 + localName.length() + len);
if (extra < 0) {
fastWriteRaw('<', '/');
/* At this point, it is assumed caller knows that end tag
* matches with start tag, and that it (by extension) has been
* validated if and as necessary
*/
fastWriteRaw(prefix);
fastWriteRaw(':');
fastWriteRaw(localName);
fastWriteRaw('>');
} else {
char[] buf = mOutputBuffer;
buf[ptr++] = '<';
buf[ptr++] = '/';
prefix.getChars(0, len, buf, ptr);
ptr += len;
buf[ptr++] = ':';
len = localName.length();
localName.getChars(0, len, buf, ptr);
ptr += len;
buf[ptr++] = '>';
mOutputPtr = ptr;
}
}
/*
////////////////////////////////////////////////////
// Write methods, attributes/ns
////////////////////////////////////////////////////
*/
public void writeAttribute(String localName, String value)
throws IOException, XMLStreamException
{
if (mOut == null) {
return;
}
if (mCheckNames) {
verifyNameValidity(localName, mNsAware);
}
int len = localName.length();
if (((mOutputBufLen - mOutputPtr) - (3 + len)) < 0) {
fastWriteRaw(' ');
fastWriteRaw(localName);
fastWriteRaw('=', '"');
} else {
int ptr = mOutputPtr;
char[] buf = mOutputBuffer;
buf[ptr++] = ' ';
localName.getChars(0, len, buf, ptr);
ptr += len;
buf[ptr++] = '=';
buf[ptr++] = '"';
mOutputPtr = ptr;
}
len = (value == null) ? 0 : value.length();
if (len > 0) {
if (mAttrValueWriter != null) { // custom escaping?
mAttrValueWriter.write(value, 0, len);
} else { // nope, default
writeAttrValue(value, len);
}
}
fastWriteRaw('"');
}
public void writeAttribute(String localName, char[] value, int offset, int vlen)
throws IOException, XMLStreamException
{
if (mOut == null) {
return;
}
if (mCheckNames) {
verifyNameValidity(localName, mNsAware);
}
int len = localName.length();
if (((mOutputBufLen - mOutputPtr) - (3 + len)) < 0) {
fastWriteRaw(' ');
fastWriteRaw(localName);
fastWriteRaw('=', '"');
} else {
int ptr = mOutputPtr;
char[] buf = mOutputBuffer;
buf[ptr++] = ' ';
localName.getChars(0, len, buf, ptr);
ptr += len;
buf[ptr++] = '=';
buf[ptr++] = '"';
mOutputPtr = ptr;
}
if (vlen > 0) {
if (mAttrValueWriter != null) { // custom escaping?
mAttrValueWriter.write(value, offset, vlen);
} else { // nope, default
writeAttrValue(value, offset, vlen);
}
}
fastWriteRaw('"');
}
public void writeAttribute(String prefix, String localName, String value)
throws IOException, XMLStreamException
{
if (mOut == null) {
return;
}
if (mCheckNames) {
verifyNameValidity(prefix, mNsAware);
verifyNameValidity(localName, mNsAware);
}
int len = prefix.length();
if (((mOutputBufLen - mOutputPtr) - (4 + localName.length() + len)) < 0) {
fastWriteRaw(' ');
if (len > 0) {
fastWriteRaw(prefix);
fastWriteRaw(':');
}
fastWriteRaw(localName);
fastWriteRaw('=', '"');
} else {
int ptr = mOutputPtr;
char[] buf = mOutputBuffer;
buf[ptr++] = ' ';
prefix.getChars(0, len, buf, ptr);
ptr += len;
buf[ptr++] = ':';
len = localName.length();
localName.getChars(0, len, buf, ptr);
ptr += len;
buf[ptr++] = '=';
buf[ptr++] = '"';
mOutputPtr = ptr;
}
len = (value == null) ? 0 : value.length();
if (len > 0) {
if (mAttrValueWriter != null) { // custom escaping?
mAttrValueWriter.write(value, 0, len);
} else { // nope, default
writeAttrValue(value, len);
}
}
fastWriteRaw('"');
}
public void writeAttribute(String prefix, String localName, char[] value, int offset, int vlen)
throws IOException, XMLStreamException
{
if (mOut == null) {
return;
}
if (mCheckNames) {
verifyNameValidity(prefix, mNsAware);
verifyNameValidity(localName, mNsAware);
}
int len = prefix.length();
if (((mOutputBufLen - mOutputPtr) - (4 + localName.length() + len)) < 0) {
fastWriteRaw(' ');
if (len > 0) {
fastWriteRaw(prefix);
fastWriteRaw(':');
}
fastWriteRaw(localName);
fastWriteRaw('=', '"');
} else {
int ptr = mOutputPtr;
char[] buf = mOutputBuffer;
buf[ptr++] = ' ';
prefix.getChars(0, len, buf, ptr);
ptr += len;
buf[ptr++] = ':';
len = localName.length();
localName.getChars(0, len, buf, ptr);
ptr += len;
buf[ptr++] = '=';
buf[ptr++] = '"';
mOutputPtr = ptr;
}
if (vlen > 0) {
if (mAttrValueWriter != null) { // custom escaping?
mAttrValueWriter.write(value, offset, vlen);
} else { // nope, default
writeAttrValue(value, offset, vlen);
}
}
fastWriteRaw('"');
}
private final void writeAttrValue(String value, int len)
throws IOException
{
int inPtr = 0;
final char qchar = mEncQuoteChar;
int highChar = mEncHighChar;
main_loop:
while (true) { // main_loop
String ent = null;
inner_loop:
while (true) {
if (inPtr >= len) {
break main_loop;
}
char c = value.charAt(inPtr++);
if (c <= HIGHEST_ENCODABLE_ATTR_CHAR) { // special char?
if (c < 0x0020) { // tab, cr/lf need encoding too
if (c == '\r') {
if (mEscapeCR) {
break inner_loop; // quoting
}
} else if (c != '\n' && c != '\t'
&& (!mXml11 || c == 0)) {
c = handleInvalidChar(c);
} else {
break inner_loop; // need quoting
}
} else if (c == qchar) {
ent = mEncQuoteEntity;
break inner_loop;
} else if (c == '<') {
ent = "<";
break inner_loop;
} else if (c == '&') {
ent = "&";
break inner_loop;
}
} else if (c >= highChar) { // out of range, have to escape
break inner_loop;
}
if (mOutputPtr >= mOutputBufLen) {
flushBuffer();
}
mOutputBuffer[mOutputPtr++] = c;
}
if (ent != null) {
writeRaw(ent);
} else {
writeAsEntity(value.charAt(inPtr-1));
}
}
}
private final void writeAttrValue(char[] value, int offset, int len)
throws IOException
{
len += offset;
final char qchar = mEncQuoteChar;
int highChar = mEncHighChar;
main_loop:
while (true) { // main_loop
String ent = null;
inner_loop:
while (true) {
if (offset >= len) {
break main_loop;
}
char c = value[offset++];
if (c <= HIGHEST_ENCODABLE_ATTR_CHAR) { // special char?
if (c < 0x0020) { // tab, cr/lf need encoding too
if (c == '\r') {
if (mEscapeCR) {
break inner_loop; // quoting
}
} else if (c != '\n' && c != '\t'
&& (!mXml11 || c == 0)) {
c = handleInvalidChar(c);
} else {
break inner_loop; // need quoting
}
} else if (c == qchar) {
ent = mEncQuoteEntity;
break inner_loop;
} else if (c == '<') {
ent = "<";
break inner_loop;
} else if (c == '&') {
ent = "&";
break inner_loop;
}
} else if (c >= highChar) { // out of range, have to escape
break inner_loop;
}
if (mOutputPtr >= mOutputBufLen) {
flushBuffer();
}
mOutputBuffer[mOutputPtr++] = c;
}
if (ent != null) {
writeRaw(ent);
} else {
writeAsEntity(value[offset-1]);
}
}
}
/*
////////////////////////////////////////////////
// Methods used by Typed Access API
////////////////////////////////////////////////
*/
public final void writeTypedElement(AsciiValueEncoder enc)
throws IOException
{
if (mOut == null) {
return;
}
int free = mOutputBufLen - mOutputPtr;
if (enc.bufferNeedsFlush(free)) {
flush();
}
while (true) {
mOutputPtr = enc.encodeMore(mOutputBuffer, mOutputPtr, mOutputBufLen);
// If no flushing needed, indicates that all data was encoded
if (enc.isCompleted()) {
break;
}
flush();
}
}
public final void writeTypedElement(AsciiValueEncoder enc,
XMLValidator validator, char[] copyBuffer)
throws IOException, XMLStreamException
{
if (mOut == null) {
return;
}
int free = mOutputBufLen - mOutputPtr;
if (enc.bufferNeedsFlush(free)) {
flush();
}
int start = mOutputPtr;
while (true) {
mOutputPtr = enc.encodeMore(mOutputBuffer, mOutputPtr, mOutputBufLen);
// False -> can't be sure it's the whole remaining text
validator.validateText(mOutputBuffer, start, mOutputPtr, false);
if (enc.isCompleted()) {
break;
}
flush();
start = mOutputPtr;
}
}
public void writeTypedAttribute(String localName, AsciiValueEncoder enc)
throws IOException, XMLStreamException
{
if (mOut == null) {
return;
}
if (mCheckNames) {
verifyNameValidity(localName, mNsAware);
}
int len = localName.length();
if ((mOutputPtr + 3 + len) > mOutputBufLen) {
fastWriteRaw(' ');
fastWriteRaw(localName);
fastWriteRaw('=', '"');
} else {
int ptr = mOutputPtr;
char[] buf = mOutputBuffer;
buf[ptr++] = ' ';
localName.getChars(0, len, buf, ptr);
ptr += len;
buf[ptr++] = '=';
buf[ptr++] = '"';
mOutputPtr = ptr;
}
int free = mOutputBufLen - mOutputPtr;
if (enc.bufferNeedsFlush(free)) {
flush();
}
while (true) {
mOutputPtr = enc.encodeMore(mOutputBuffer, mOutputPtr, mOutputBufLen);
if (enc.isCompleted()) {
break;
}
flush();
}
fastWriteRaw('"');
}
public void writeTypedAttribute(String prefix, String localName,
AsciiValueEncoder enc)
throws IOException, XMLStreamException
{
if (mOut == null) {
return;
}
if (mCheckNames) {
verifyNameValidity(prefix, mNsAware);
verifyNameValidity(localName, mNsAware);
}
int plen = prefix.length();
int llen = localName.length();
if ((mOutputPtr + 4 + plen + llen) > mOutputBufLen) {
writePrefixedName(prefix, localName);
fastWriteRaw('=', '"');
} else {
int ptr = mOutputPtr;
char[] buf = mOutputBuffer;
buf[ptr++] = ' ';
if (plen > 0) {
prefix.getChars(0, plen, buf, ptr);
ptr += plen;
buf[ptr++] = ':';
}
localName.getChars(0, llen, buf, ptr);
ptr += llen;
buf[ptr++] = '=';
buf[ptr++] = '"';
mOutputPtr = ptr;
}
int free = mOutputBufLen - mOutputPtr;
if (enc.bufferNeedsFlush(free)) {
flush();
}
while (true) {
mOutputPtr = enc.encodeMore(mOutputBuffer, mOutputPtr, mOutputBufLen);
if (enc.isCompleted()) {
break;
}
flush();
}
fastWriteRaw('"');
}
public void writeTypedAttribute(String prefix, String localName, String nsURI,
AsciiValueEncoder enc,
XMLValidator validator, char[] copyBuffer)
throws IOException, XMLStreamException
{
if (mOut == null) {
return;
}
if (prefix == null) {
prefix = "";
}
if (nsURI == null) {
nsURI = "";
}
int plen = prefix.length();
if (mCheckNames) {
if (plen > 0) {
verifyNameValidity(prefix, mNsAware);
}
verifyNameValidity(localName, mNsAware);
}
if (((mOutputBufLen - mOutputPtr) - (4 + localName.length() + plen)) < 0) {
writePrefixedName(prefix, localName);
fastWriteRaw('=', '"');
} else {
int ptr = mOutputPtr;
char[] buf = mOutputBuffer;
buf[ptr++] = ' ';
if (plen > 0) {
prefix.getChars(0, plen, buf, ptr);
ptr += plen;
buf[ptr++] = ':';
}
int llen = localName.length();
localName.getChars(0, llen, buf, ptr);
ptr += llen;
buf[ptr++] = '=';
buf[ptr++] = '"';
mOutputPtr = ptr;
}
/* Tricky here is this: attributes to validate can not be
* split (validators expect complete values). So, if value
* won't fit as is, may need to aggregate using StringBuilder
*/
int free = mOutputBufLen - mOutputPtr;
if (enc.bufferNeedsFlush(free)) {
flush();
}
int start = mOutputPtr;
// First, let's see if one call is enough
mOutputPtr = enc.encodeMore(mOutputBuffer, mOutputPtr, mOutputBufLen);
if (enc.isCompleted()) { // yup
validator.validateAttribute(localName, nsURI, prefix, mOutputBuffer, start, mOutputPtr);
return;
}
// If not, must combine first
StringBuffer sb = new StringBuffer(mOutputBuffer.length << 1);
sb.append(mOutputBuffer, start, mOutputPtr-start);
while (true) {
flush();
start = mOutputPtr;
mOutputPtr = enc.encodeMore(mOutputBuffer, mOutputPtr, mOutputBufLen);
sb.append(mOutputBuffer, start, mOutputPtr-start);
// All done?
if (enc.isCompleted()) {
break;
}
}
fastWriteRaw('"');
// Then validate
String valueStr = sb.toString();
validator.validateAttribute(localName, nsURI, prefix, valueStr);
}
protected final void writePrefixedName(String prefix, String localName)
throws IOException
{
fastWriteRaw(' ');
if (prefix.length() > 0) {
fastWriteRaw(prefix);
fastWriteRaw(':');
}
fastWriteRaw(localName);
}
/*
////////////////////////////////////////////////////
// Internal methods, buffering
////////////////////////////////////////////////////
*/
private final void flushBuffer()
throws IOException
{
if (mOutputPtr > 0 && mOutputBuffer != null) {
int ptr = mOutputPtr;
// Need to update location info, to keep it in sync
mLocPastChars += ptr;
mLocRowStartOffset -= ptr;
mOutputPtr = 0;
mOut.write(mOutputBuffer, 0, ptr);
}
}
private final void fastWriteRaw(char c)
throws IOException
{
if (mOutputPtr >= mOutputBufLen) {
if (mOut == null) {
return;
}
flushBuffer();
}
mOutputBuffer[mOutputPtr++] = c;
}
private final void fastWriteRaw(char c1, char c2)
throws IOException
{
if ((mOutputPtr + 1) >= mOutputBufLen) {
if (mOut == null) {
return;
}
flushBuffer();
}
mOutputBuffer[mOutputPtr++] = c1;
mOutputBuffer[mOutputPtr++] = c2;
}
private final void fastWriteRaw(String str)
throws IOException
{
int len = str.length();
int ptr = mOutputPtr;
if ((ptr + len) >= mOutputBufLen) {
if (mOut == null) {
return;
}
/* It's even possible that String is longer than the buffer (not
* likely, possible). If so, let's just call the full
* method:
*/
if (len > mOutputBufLen) {
writeRaw(str);
return;
}
flushBuffer();
ptr = mOutputPtr;
}
str.getChars(0, len, mOutputBuffer, ptr);
mOutputPtr = ptr+len;
}
/*
////////////////////////////////////////////////////
// Internal methods, content verification/fixing
////////////////////////////////////////////////////
*/
/**
* @return Index at which a problem was found, if any; -1 if there's
* no problem.
*/
protected int verifyCDataContent(String content)
{
if (content != null && content.length() >= 3) {
int ix = content.indexOf(']');
if (ix >= 0) {
return content.indexOf("]]>", ix);
}
}
return -1;
}
protected int verifyCDataContent(char[] c, int start, int end)
{
if (c != null) {
start += 2;
/* Let's do simple optimization for search...
* (simple bayer-moore - like algorithm)
*/
while (start < end) {
char ch = c[start];
if (ch == ']') {
++start; // let's just move by one in this case
continue;
}
if (ch == '>') { // match?
if (c[start-1] == ']'
&& c[start-2] == ']') {
return start-2;
}
}
start += 2;
}
}
return -1;
}
protected int verifyCommentContent(String content)
{
int ix = content.indexOf('-');
if (ix >= 0) {
/* actually, it's illegal to just end with '-' too, since
* that would cause invalid end marker '--->'
*/
if (ix < (content.length() - 1)) {
ix = content.indexOf("--", ix);
}
}
return ix;
}
protected void writeSegmentedCData(String content, int index)
throws IOException
{
/* It's actually fairly easy, just split "]]>" into 2 pieces;
* for each ']]>'; first one containing "]]", second one ">"
* (as long as necessary)
*/
int start = 0;
while (index >= 0) {
fastWriteRaw("");
start = index+2;
index = content.indexOf("]]>", start);
}
// Ok, then the last segment
fastWriteRaw("");
}
protected void writeSegmentedCData(char[] c, int start, int len, int index)
throws IOException
{
int end = start + len;
while (index >= 0) {
fastWriteRaw("");
start = index+2;
index = verifyCDataContent(c, start, end);
}
// Ok, then the last segment
fastWriteRaw("");
}
protected void writeSegmentedComment(String content, int index)
throws IOException
{
int len = content.length();
// First the special case (last char is hyphen):
if (index == (len-1)) {
fastWriteRaw("");
return;
}
/* Fixing comments is more difficult than that of CDATA segments';
* this because CDATA can still contain embedded ']]'s, but
* comment neither allows '--' nor ending with '-->'; which means
* that it's impossible to just split segments. Instead we'll do
* something more intrusive, and embed single spaces between all
* '--' character pairs... it's intrusive, but comments are not
* supposed to contain any data, so that should be fine (plus
* at least result is valid, unlike contents as is)
*/
fastWriteRaw("");
}
/**
* Method used to figure out which part of the Unicode char set the
* encoding can natively support. Values returned are 7, 8 and 16,
* to indicate (respectively) "ascii", "ISO-Latin" and "native Unicode".
* These just best guesses, but should work ok for the most common
* encodings.
*/
public static int guessEncodingBitSize(String enc)
{
if (enc == null || enc.length() == 0) { // let's assume default is UTF-8...
return 16;
}
// Let's see if we can find a normalized name, first:
enc = CharsetNames.normalize(enc);
// Ok, first, do we have known ones; starting with most common:
if (enc == CharsetNames.CS_UTF8) {
return 16; // meaning up to 2^16 can be represented natively
} else if (enc == CharsetNames.CS_ISO_LATIN1) {
return 8;
} else if (enc == CharsetNames.CS_US_ASCII) {
return 7;
} else if (enc == CharsetNames.CS_UTF16
|| enc == CharsetNames.CS_UTF16BE
|| enc == CharsetNames.CS_UTF16LE
|| enc == CharsetNames.CS_UTF32BE
|| enc == CharsetNames.CS_UTF32LE) {
return 16;
}
/* Above and beyond well-recognized names, it might still be
* good to have more heuristics for as-of-yet unhandled cases...
* But, it's probably easier to only assume 8-bit clean (could
* even make it just 7, let's see how this works out)
*/
return 8;
}
protected final void writeAsEntity(int c)
throws IOException
{
char[] buf = mOutputBuffer;
int ptr = mOutputPtr;
if ((ptr + 10) >= buf.length) { // [up to 6 hex digits] ;
flushBuffer();
ptr = mOutputPtr;
}
buf[ptr++] = '&';
// Can use more optimal notation for 8-bit ascii stuff:
if (c < 256) {
/* Also; although not really mandatory, let's also
* use pre-defined entities where possible.
*/
if (c == '&') {
buf[ptr++] = 'a';
buf[ptr++] = 'm';
buf[ptr++] = 'p';
} else if (c == '<') {
buf[ptr++] = 'l';
buf[ptr++] = 't';
} else if (c == '>') {
buf[ptr++] = 'g';
buf[ptr++] = 't';
} else if (c == '\'') {
buf[ptr++] = 'a';
buf[ptr++] = 'p';
buf[ptr++] = 'o';
buf[ptr++] = 's';
} else if (c == '"') {
buf[ptr++] = 'q';
buf[ptr++] = 'u';
buf[ptr++] = 'o';
buf[ptr++] = 't';
} else {
buf[ptr++] = '#';;
buf[ptr++] = 'x';;
// Can use shortest quoting for tab, cr, lf:
if (c >= 16) {
int digit = (c >> 4);
buf[ptr++] = (char) ((digit < 10) ? ('0' + digit) : (('a' - 10) + digit));
c &= 0xF;
}
buf[ptr++] = (char) ((c < 10) ? ('0' + c) : (('a' - 10) + c));
}
} else {
buf[ptr++] = '#';
buf[ptr++] = 'x';
// Ok, let's write the shortest possible sequence then:
int shift = 20;
int origPtr = ptr;
do {
int digit = (c >> shift) & 0xF;
if (digit > 0 || (ptr != origPtr)) {
buf[ptr++] = (char) ((digit < 10) ? ('0' + digit) : (('a' - 10) + digit));
}
shift -= 4;
} while (shift > 0);
c &= 0xF;
buf[ptr++] = (char) ((c < 10) ? ('0' + c) : (('a' - 10) + c));
}
buf[ptr++] = ';';
mOutputPtr = ptr;
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/sw/OutputElementBase.java 0000644 0001750 0001750 00000027513 11745427074 024603 0 ustar giovanni giovanni /* Woodstox XML processor
*
* Copyright (c) 2005 Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in file LICENSE, included with
* the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.sw;
import java.util.*;
import javax.xml.XMLConstants;
import javax.xml.namespace.NamespaceContext;
import javax.xml.stream.XMLStreamException;
import org.codehaus.stax2.ri.EmptyIterator;
import com.ctc.wstx.util.BijectiveNsMap;
/**
* Class that encapsulates information about a specific element in virtual
* output stack for namespace-aware writers.
* It provides support for URI-to-prefix mappings as well as namespace
* mapping generation.
*
* One noteworthy feature of the class is that it is designed to allow
* "short-term recycling", ie. instances can be reused within context
* of a simple document output. While reuse/recycling of such lightweight
* object is often useless or even counter productive, here it may
* be worth using, due to simplicity of the scheme (basically using
* a very simple free-elements linked list).
*/
public abstract class OutputElementBase
implements NamespaceContext
{
public final static int PREFIX_UNBOUND = 0;
public final static int PREFIX_OK = 1;
public final static int PREFIX_MISBOUND = 2;
final static String sXmlNsPrefix = XMLConstants.XML_NS_PREFIX;
final static String sXmlNsURI = XMLConstants.XML_NS_URI;
/*
////////////////////////////////////////////
// Namespace binding/mapping information
////////////////////////////////////////////
*/
/**
* Namespace context end application may have supplied, and that
* (if given) should be used to augment explicitly defined bindings.
*/
protected NamespaceContext mRootNsContext;
protected String mDefaultNsURI;
/**
* Mapping of namespace prefixes to URIs and back.
*/
protected BijectiveNsMap mNsMapping;
/**
* True, if {@link #mNsMapping} is a shared copy from the parent;
* false if a local copy was created (which happens when namespaces
* get bound etc).
*/
protected boolean mNsMapShared;
/*
////////////////////////////////////////////
// Life-cycle
////////////////////////////////////////////
*/
/**
* Constructor for the virtual root element
*/
protected OutputElementBase()
{
mNsMapping = null;
mNsMapShared = false;
mDefaultNsURI = "";
mRootNsContext = null;
}
protected OutputElementBase(OutputElementBase parent, BijectiveNsMap ns)
{
mNsMapping = ns;
mNsMapShared = (ns != null);
mDefaultNsURI = parent.mDefaultNsURI;
mRootNsContext = parent.mRootNsContext;
}
/**
* Method called to reuse a pooled instance.
*/
protected void relink(OutputElementBase parent)
{
mNsMapping = parent.mNsMapping;
mNsMapShared = (mNsMapping != null);
mDefaultNsURI = parent.mDefaultNsURI;
mRootNsContext = parent.mRootNsContext;
}
protected abstract void setRootNsContext(NamespaceContext ctxt);
/*
////////////////////////////////////////////
// Public API, accessors
////////////////////////////////////////////
*/
public abstract boolean isRoot();
/**
* @return String presentation of the fully-qualified name, in
* "prefix:localName" format (no URI). Useful for error and
* debugging messages.
*/
public abstract String getNameDesc();
public final String getDefaultNsUri() {
return mDefaultNsURI;
}
/*
////////////////////////////////////////////
// Public API, ns binding, checking
////////////////////////////////////////////
*/
/**
* Method similar to {@link #getPrefix}, but one that will not accept
* the default namespace, only an explicit one. Usually used when
* trying to find a prefix for attributes.
*/
public final String getExplicitPrefix(String uri)
{
if (mNsMapping != null) {
String prefix = mNsMapping.findPrefixByUri(uri);
if (prefix != null) {
return prefix;
}
}
if (mRootNsContext != null) {
String prefix = mRootNsContext.getPrefix(uri);
if (prefix != null) {
// Hmmh... still can't use the default NS:
if (prefix.length() > 0) {
return prefix;
}
// ... should we try to find an explicit one?
}
}
return null;
}
/**
* Method that verifies that passed-in prefix indeed maps to the specified
* namespace URI; and depending on how it goes returns a status for
* caller.
*
* @param isElement If true, rules for the default NS are those of elements
* (ie. empty prefix can map to non-default namespace); if false,
* rules are those of attributes (only non-default prefix can map to
* a non-default namespace).
*
* @return PREFIX_OK, if passed-in prefix matches matched-in namespace URI
* in current scope; PREFIX_UNBOUND if it's not bound to anything,
* and PREFIX_MISBOUND if it's bound to another URI.
*
* @throws XMLStreamException True if default (no) prefix is allowed to
* match a non-default URI (elements); false if not (attributes)
*/
public final int isPrefixValid(String prefix, String nsURI,
boolean isElement)
throws XMLStreamException
{
// Hmmm.... caller shouldn't really pass null.
if (nsURI == null) {
nsURI = "";
}
/* First thing is to see if specified prefix is bound to a namespace;
* and if so, verify it matches with data passed in:
*/
// Checking default namespace?
if (prefix == null || prefix.length() == 0) {
if (isElement) {
// It's fine for elements only if the URI actually matches:
if (nsURI == mDefaultNsURI || nsURI.equals(mDefaultNsURI)) {
return PREFIX_OK;
}
} else {
/* Attributes never use the default namespace: "no prefix"
* can only mean "no namespace"
*/
if (nsURI.length() == 0) {
return PREFIX_OK;
}
}
return PREFIX_MISBOUND;
}
/* Need to handle 'xml' prefix and its associated
* URI; they are always declared by default
*/
if (prefix.equals(sXmlNsPrefix)) {
// Should we thoroughly verify its namespace matches...?
// 01-Apr-2005, TSa: Yes, let's always check this
if (!nsURI.equals(sXmlNsURI)) {
throwOutputError("Namespace prefix '"+sXmlNsPrefix
+"' can not be bound to non-default namespace ('"+nsURI+"'); has to be the default '"
+sXmlNsURI+"'");
}
return PREFIX_OK;
}
// Nope checking some other namespace
String act;
if (mNsMapping != null) {
act = mNsMapping.findUriByPrefix(prefix);
} else {
act = null;
}
if (act == null && mRootNsContext != null) {
act = mRootNsContext.getNamespaceURI(prefix);
}
// Not (yet) bound...
if (act == null) {
return PREFIX_UNBOUND;
}
return (act == nsURI || act.equals(nsURI)) ?
PREFIX_OK : PREFIX_MISBOUND;
}
/*
////////////////////////////////////////////
// Public API, mutators
////////////////////////////////////////////
*/
public abstract void setDefaultNsUri(String uri);
public final String generateMapping(String prefixBase, String uri, int[] seqArr)
{
// This is mostly cut'n pasted from addPrefix()...
if (mNsMapping == null) {
// Didn't have a mapping yet? Need to create one...
mNsMapping = BijectiveNsMap.createEmpty();
} else if (mNsMapShared) {
/* Was shared with parent(s)? Need to create a derivative, to
* allow for nesting/scoping of new prefix
*/
mNsMapping = mNsMapping.createChild();
mNsMapShared = false;
}
return mNsMapping.addGeneratedMapping(prefixBase, mRootNsContext,
uri, seqArr);
}
public final void addPrefix(String prefix, String uri)
{
if (mNsMapping == null) {
// Didn't have a mapping yet? Need to create one...
mNsMapping = BijectiveNsMap.createEmpty();
} else if (mNsMapShared) {
/* Was shared with parent(s)? Need to create a derivative, to
* allow for nesting/scoping of new prefix
*/
mNsMapping = mNsMapping.createChild();
mNsMapShared = false;
}
mNsMapping.addMapping(prefix, uri);
}
/*
//////////////////////////////////////////////////
// NamespaceContext implementation
//////////////////////////////////////////////////
*/
public final String getNamespaceURI(String prefix)
{
if (prefix.length() == 0) { //default NS
return mDefaultNsURI;
}
if (mNsMapping != null) {
String uri = mNsMapping.findUriByPrefix(prefix);
if (uri != null) {
return uri;
}
}
return (mRootNsContext != null) ?
mRootNsContext.getNamespaceURI(prefix) : null;
}
public final String getPrefix(String uri)
{
if (mDefaultNsURI.equals(uri)) {
return "";
}
if (mNsMapping != null) {
String prefix = mNsMapping.findPrefixByUri(uri);
if (prefix != null) {
return prefix;
}
}
return (mRootNsContext != null) ?
mRootNsContext.getPrefix(uri) : null;
}
public final Iterator getPrefixes(String uri)
{
List l = null;
if (mDefaultNsURI.equals(uri)) {
l = new ArrayList();
l.add("");
}
if (mNsMapping != null) {
l = mNsMapping.getPrefixesBoundToUri(uri, l);
}
// How about the root namespace context? (if any)
/* Note: it's quite difficult to properly resolve masking, when
* combining these things (not impossible, just tricky); for now
* let's do best effort without worrying about masking:
*/
if (mRootNsContext != null) {
Iterator it = mRootNsContext.getPrefixes(uri);
while (it.hasNext()) {
String prefix = (String) it.next();
if (prefix.length() == 0) { // default NS already checked
continue;
}
// slow check... but what the heck
if (l == null) {
l = new ArrayList();
} else if (l.contains(prefix)) { // double-defined...
continue;
}
l.add(prefix);
}
}
return (l == null) ? EmptyIterator.getInstance() :
l.iterator();
}
/*
////////////////////////////////////////////
// Internal methods
////////////////////////////////////////////
*/
protected final void throwOutputError(String msg)
throws XMLStreamException
{
throw new XMLStreamException(msg);
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/sw/EncodingXmlWriter.java 0000644 0001750 0001750 00000067631 11745427074 024607 0 ustar giovanni giovanni /* Woodstox XML processor
*
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in file LICENSE, included with
* the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.sw;
import java.io.*;
import javax.xml.stream.XMLStreamException;
import org.codehaus.stax2.ri.typed.AsciiValueEncoder;
import org.codehaus.stax2.validation.XMLValidator;
import com.ctc.wstx.api.WriterConfig;
import com.ctc.wstx.cfg.XmlConsts;
/**
* Intermediate base class used when outputting to streams that use
* an encoding that is compatible with 7-bit single-byte Ascii encoding.
* That means it can be used for UTF-8, ISO-Latin1 and pure Ascii.
*
* Implementation notes:
*
* Parts of surrogate handling are implemented here in the base class:
* storage for the first part of a split surrogate (only possible when
* character content is output split in multiple calls) is within
* base class. Also, simple checks for unmatched surrogate pairs are
* in
* TreeSet is used mostly because clearing it up is faster than
* clearing up HashSet, and the only access is done by
* adding entries and see if an value was already set.
*/
TreeSet mAttrNames;
/*
////////////////////////////////////////////////////
// Life-cycle (ctors)
////////////////////////////////////////////////////
*/
public NonNsStreamWriter(XmlWriter xw, String enc, WriterConfig cfg)
{
super(xw, enc, cfg);
mElements = new StringVector(32);
}
/*
////////////////////////////////////////////////////
// XMLStreamWriter API
////////////////////////////////////////////////////
*/
public NamespaceContext getNamespaceContext() {
return EmptyNamespaceContext.getInstance();
}
public String getPrefix(String uri) {
return null;
}
public void setDefaultNamespace(String uri)
throws XMLStreamException
{
reportIllegalArg("Can not set default namespace for non-namespace writer.");
}
public void setNamespaceContext(NamespaceContext context)
{
reportIllegalArg("Can not set NamespaceContext for non-namespace writer.");
}
public void setPrefix(String prefix, String uri)
throws XMLStreamException
{
reportIllegalArg("Can not set namespace prefix for non-namespace writer.");
}
public void writeAttribute(String localName, String value)
throws XMLStreamException
{
// No need to set mAnyOutput, nor close the element
if (!mStartElementOpen && mCheckStructure) {
reportNwfStructure(ErrorConsts.WERR_ATTR_NO_ELEM);
}
if (mCheckAttrs) {
/* 11-Dec-2005, TSa: Should use a more efficient Set/Map value
* for this in future.
*/
if (mAttrNames == null) {
mAttrNames = new TreeSet();
}
if (!mAttrNames.add(localName)) {
reportNwfAttr("Trying to write attribute '"+localName+"' twice");
}
}
if (mValidator != null) {
/* No need to get it normalized... even if validator does normalize
* it, we don't use that for anything
*/
mValidator.validateAttribute(localName, XmlConsts.ATTR_NO_NS_URI, XmlConsts.ATTR_NO_PREFIX, value);
}
try {
mWriter.writeAttribute(localName, value);
} catch (IOException ioe) {
throwFromIOE(ioe);
}
}
public void writeAttribute(String nsURI, String localName, String value)
throws XMLStreamException
{
writeAttribute(localName, value);
}
public void writeAttribute(String prefix, String nsURI,
String localName, String value)
throws XMLStreamException
{
writeAttribute(localName, value);
}
public void writeDefaultNamespace(String nsURI)
throws XMLStreamException
{
reportIllegalMethod("Can not call writeDefaultNamespace namespaces with non-namespace writer.");
}
public void writeEmptyElement(String localName)
throws XMLStreamException
{
doWriteStartElement(localName);
mEmptyElement = true;
}
public void writeEmptyElement(String nsURI, String localName)
throws XMLStreamException
{
writeEmptyElement(localName);
}
public void writeEmptyElement(String prefix, String localName, String nsURI)
throws XMLStreamException
{
writeEmptyElement(localName);
}
public void writeEndElement()
throws XMLStreamException
{
doWriteEndTag(null, mCfgAutomaticEmptyElems);
}
public void writeNamespace(String prefix, String nsURI)
throws XMLStreamException
{
reportIllegalMethod("Can not set write namespaces with non-namespace writer.");
}
public void writeStartElement(String localName)
throws XMLStreamException
{
doWriteStartElement(localName);
mEmptyElement = false;
}
public void writeStartElement(String nsURI, String localName)
throws XMLStreamException
{
writeStartElement(localName);
}
public void writeStartElement(String prefix, String localName, String nsURI)
throws XMLStreamException
{
writeStartElement(localName);
}
/*
////////////////////////////////////////////////////
// Remaining XMLStreamWriter2 methods (StAX2)
////////////////////////////////////////////////////
*/
/**
* Similar to {@link #writeEndElement}, but never allows implicit
* creation of empty elements.
*/
public void writeFullEndElement()
throws XMLStreamException
{
doWriteEndTag(null, false);
}
/*
////////////////////////////////////////////////////
// Remaining ValidationContext methods (StAX2)
////////////////////////////////////////////////////
*/
public QName getCurrentElementName() {
if (mElements.isEmpty()) {
return null;
}
return new QName(mElements.getLastString());
}
public String getNamespaceURI(String prefix) {
return null;
}
/*
////////////////////////////////////////////////////
// Package methods:
////////////////////////////////////////////////////
*/
public void writeStartElement(StartElement elem)
throws XMLStreamException
{
QName name = elem.getName();
writeStartElement(name.getLocalPart());
Iterator it = elem.getAttributes();
while (it.hasNext()) {
Attribute attr = (Attribute) it.next();
name = attr.getName();
writeAttribute(name.getLocalPart(), attr.getValue());
}
}
/**
* Method called by {@link javax.xml.stream.XMLEventWriter} implementation
* (instead of the version
* that takes no argument), so that we can verify it does match the
* start element, if necessary
*/
public void writeEndElement(QName name)
throws XMLStreamException
{
doWriteEndTag(mCheckStructure ? name.getLocalPart() : null,
mCfgAutomaticEmptyElems);
}
protected void writeTypedAttribute(String prefix, String nsURI, String localName,
AsciiValueEncoder enc)
throws XMLStreamException
{
// note: mostly copied from the other writeAttribute() method..
if (!mStartElementOpen && mCheckStructure) {
reportNwfStructure(ErrorConsts.WERR_ATTR_NO_ELEM);
}
if (mCheckAttrs) { // doh. Not good, need to construct non-transient value...
if (mAttrNames == null) {
mAttrNames = new TreeSet();
}
if (!mAttrNames.add(localName)) {
reportNwfAttr("Trying to write attribute '"+localName+"' twice");
}
}
try {
if (mValidator == null) {
mWriter.writeTypedAttribute(localName, enc);
} else {
mWriter.writeTypedAttribute(null, localName, null, enc, mValidator, getCopyBuffer());
}
} catch (IOException ioe) {
throwFromIOE(ioe);
}
}
/**
* Method called to close an open start element, when another
* main-level element (not namespace declaration or
* attribute) is being output; except for end element which is
* handled differently.
*/
protected void closeStartElement(boolean emptyElem)
throws XMLStreamException
{
mStartElementOpen = false;
if (mAttrNames != null) {
mAttrNames.clear();
}
try {
if (emptyElem) {
mWriter.writeStartTagEmptyEnd();
} else {
mWriter.writeStartTagEnd();
}
} catch (IOException ioe) {
throwFromIOE(ioe);
}
if (mValidator != null) {
mVldContent = mValidator.validateElementAndAttributes();
}
// Need bit more special handling for empty elements...
if (emptyElem) {
String localName = mElements.removeLast();
if (mElements.isEmpty()) {
mState = STATE_EPILOG;
}
if (mValidator != null) {
mVldContent = mValidator.validateElementEnd(localName, XmlConsts.ELEM_NO_NS_URI, XmlConsts.ELEM_NO_PREFIX);
}
}
}
/**
* Element copier method implementation suitable to be used with
* non-namespace-aware writers. The only special thing here is that
* the copier can convert namespace declarations to equivalent
* attribute writes.
*/
public void copyStartElement(InputElementStack elemStack,
AttributeCollector attrCollector)
throws IOException, XMLStreamException
{
String ln = elemStack.getLocalName();
boolean nsAware = elemStack.isNamespaceAware();
/* First, since we are not to output namespace stuff as is,
* we just need to copy the element:
*/
if (nsAware) { // but reader is ns-aware? Need to add prefix?
String prefix = elemStack.getPrefix();
if (prefix != null && prefix.length() > 0) { // yup
ln = prefix + ":" + ln;
}
}
writeStartElement(ln);
/* However, if there are any namespace declarations, we probably
* better output them just as 'normal' attributes:
*/
if (nsAware) {
int nsCount = elemStack.getCurrentNsCount();
if (nsCount > 0) {
for (int i = 0; i < nsCount; ++i) {
String prefix = elemStack.getLocalNsPrefix(i);
if (prefix == null || prefix.length() == 0) { // default NS decl
prefix = XMLConstants.XML_NS_PREFIX;
} else {
prefix = "xmlns:"+prefix;
}
writeAttribute(prefix, elemStack.getLocalNsURI(i));
}
}
}
/* And then let's just output attributes, if any (whether to copy
* implicit, aka "default" attributes, is configurable)
*/
int attrCount = mCfgCopyDefaultAttrs ?
attrCollector.getCount() :
attrCollector.getSpecifiedCount();
if (attrCount > 0) {
for (int i = 0; i < attrCount; ++i) {
attrCollector.writeAttribute(i, mWriter);
}
}
}
protected String getTopElementDesc()
{
return mElements.isEmpty() ? "#root" : mElements.getLastString();
}
public String validateQNamePrefix(QName name)
{
// Can either strip prefix out, or return as is
return name.getPrefix();
}
/*
////////////////////////////////////////////////////
// Internal methods
////////////////////////////////////////////////////
*/
private void doWriteStartElement(String localName)
throws XMLStreamException
{
mAnyOutput = true;
// Need to finish an open start element?
if (mStartElementOpen) {
closeStartElement(mEmptyElement);
} else if (mState == STATE_PROLOG) {
// 20-Dec-2005, TSa: Does this match DOCTYPE declaration?
verifyRootElement(localName, null);
} else if (mState == STATE_EPILOG) {
if (mCheckStructure) {
reportNwfStructure(ErrorConsts.WERR_PROLOG_SECOND_ROOT, localName);
}
// Outputting fragment? Better reset to tree, then...
mState = STATE_TREE;
}
/* Note: need not check for CONTENT_ALLOW_NONE here, since the
* validator should handle this particular case...
*/
/*if (mVldContent == XMLValidator.CONTENT_ALLOW_NONE) { // EMPTY content
reportInvalidContent(START_ELEMENT);
}*/
if (mValidator != null) {
mValidator.validateElementStart(localName, XmlConsts.ELEM_NO_NS_URI, XmlConsts.ELEM_NO_PREFIX);
}
mStartElementOpen = true;
mElements.addString(localName);
try {
mWriter.writeStartTagStart(localName);
} catch (IOException ioe) {
throwFromIOE(ioe);
}
}
/**
*
* Note: Caller has to do actual removal of the element from element
* stack, before calling this method.
*
* @param expName Name that the closing element should have; null
* if whatever is in stack should be used
* @param allowEmpty If true, is allowed to create the empty element
* if the closing element was truly empty; if false, has to write
* the full empty element no matter what
*/
private void doWriteEndTag(String expName, boolean allowEmpty)
throws XMLStreamException
{
/* First of all, do we need to close up an earlier empty element?
* (open start element that was not created via call to
* writeEmptyElement gets handled later on)
*/
if (mStartElementOpen && mEmptyElement) {
mEmptyElement = false;
// note: this method guarantees proper updates to validation
closeStartElement(true);
}
// Better have something to close... (to figure out what to close)
if (mState != STATE_TREE) {
// Have to throw an exception always, don't know elem name
reportNwfStructure("No open start element, when trying to write end element");
}
/* Now, do we have an unfinished start element (created via
* writeStartElement() earlier)?
*/
String localName = mElements.removeLast();
if (mCheckStructure) {
if (expName != null && !localName.equals(expName)) {
/* Only gets called when trying to output an XMLEvent... in
* which case names can actually be compared
*/
reportNwfStructure("Mismatching close element name, '"+localName+"'; expected '"+expName+"'.");
}
}
/* Can't yet validate, since we have two paths; one for empty
* elements, another for non-empty...
*/
// Got a half output start element to close?
if (mStartElementOpen) {
/* Can't/shouldn't call closeStartElement, but need to do same
* processing. Thus, this is almost identical to closeStartElement:
*/
if (mValidator != null) {
/* Note: return value is not of much use, since the
* element will be closed right away...
*/
mVldContent = mValidator.validateElementAndAttributes();
}
mStartElementOpen = false;
if (mAttrNames != null) {
mAttrNames.clear();
}
try {
// We could write an empty element, implicitly?
if (allowEmpty) {
mWriter.writeStartTagEmptyEnd();
if (mElements.isEmpty()) {
mState = STATE_EPILOG;
}
if (mValidator != null) {
mVldContent = mValidator.validateElementEnd(localName, XmlConsts.ELEM_NO_NS_URI, XmlConsts.ELEM_NO_PREFIX);
}
return;
}
// Nah, need to close open elem, and then output close elem
mWriter.writeStartTagEnd();
} catch (IOException ioe) {
throwFromIOE(ioe);
}
}
try {
mWriter.writeEndTag(localName);
} catch (IOException ioe) {
throwFromIOE(ioe);
}
if (mElements.isEmpty()) {
mState = STATE_EPILOG;
}
// Ok, time to validate...
if (mValidator != null) {
mVldContent = mValidator.validateElementEnd(localName, XmlConsts.ELEM_NO_NS_URI, XmlConsts.ELEM_NO_PREFIX);
}
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/sw/XmlWriter.java 0000644 0001750 0001750 00000051531 11745427074 023130 0 ustar giovanni giovanni /* Woodstox XML processor
*
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in file LICENSE, included with
* the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.sw;
import java.io.IOException;
import java.io.OutputStream;
import java.io.Writer;
import java.text.MessageFormat;
import javax.xml.stream.XMLStreamException;
import org.codehaus.stax2.io.EscapingWriterFactory;
import org.codehaus.stax2.ri.typed.AsciiValueEncoder;
import org.codehaus.stax2.validation.XMLValidator;
import com.ctc.wstx.api.InvalidCharHandler;
import com.ctc.wstx.api.WriterConfig;
import com.ctc.wstx.api.WstxOutputProperties;
import com.ctc.wstx.cfg.ErrorConsts;
import com.ctc.wstx.cfg.OutputConfigFlags;
import com.ctc.wstx.exc.WstxIOException;
import com.ctc.wstx.io.WstxInputData;
/**
* This is the base class for actual physical xml outputters. These
* instances will only handle actual writing (possibly including
* encoding) of the serialized textual xml, and will in general
* not verify content being output. The exception are the
* character-by-character checks that are most efficiently done
* at encoding level (such as character escaping, and checks for
* illegal character combinations), which are handled at this
* level.
*
* Note that implementations can have different operating modes:
* specifically, when dealing with illegal content (such as "--"
* in a comment, "?>" in processing instruction, or "]]>" within
* CDATA section), implementations can do one of 3 things:
*
* Note: can throw XMLStreamException, if name checking is enabled,
* and name is invalid (name check has to be in this writer, not
* caller, since it depends not only on xml limitations, but also
* on encoding limitations)
*/
public abstract void writeStartTagStart(String localName)
throws IOException, XMLStreamException;
/**
*
* Note: can throw XMLStreamException, if name checking is enabled,
* and name is invalid (name check has to be in this writer, not
* caller, since it depends not only on xml limitations, but also
* on encoding limitations)
*/
public abstract void writeStartTagStart(String prefix, String localName)
throws IOException, XMLStreamException;
public abstract void writeStartTagEnd()
throws IOException;
public abstract void writeStartTagEmptyEnd()
throws IOException;
public abstract void writeEndTag(String localName)
throws IOException;
public abstract void writeEndTag(String prefix, String localName)
throws IOException;
/*
////////////////////////////////////////////////////
// Write methods, attributes/ns
////////////////////////////////////////////////////
*/
/**
*
* Note: can throw XMLStreamException, if name checking is enabled,
* and name is invalid (name check has to be in this writer, not
* caller, since it depends not only on xml limitations, but also
* on encoding limitations)
*/
public abstract void writeAttribute(String localName, String value)
throws IOException, XMLStreamException;
public abstract void writeAttribute(String localName, char[] value, int offset, int len)
throws IOException, XMLStreamException;
/**
*
* Note: can throw XMLStreamException, if name checking is enabled,
* and name is invalid (name check has to be in this writer, not
* caller, since it depends not only on xml limitations, but also
* on encoding limitations)
*/
public abstract void writeAttribute(String prefix, String localName, String value)
throws IOException, XMLStreamException;
public abstract void writeAttribute(String prefix, String localName, char[] value, int offset, int len)
throws IOException, XMLStreamException;
/*
////////////////////////////////////////////////////
// Write methods, Typed Access API support
////////////////////////////////////////////////////
*/
/**
* Like {@link #writeRaw}, but caller guarantees that the contents
* additionally are known to be in 7-bit ascii range, and also
* passes an encoder object that will encode values only when
* being handed a buffer to append to.
*
* @param enc Encoder that will produce content
*/
public abstract void writeTypedElement(AsciiValueEncoder enc)
throws IOException;
/**
* Like {@link #writeRaw}, but caller guarantees that the contents
* additionally are known to be in 7-bit ascii range, and also
* passes an encoder object that will encode values only when
* being handed a buffer to append to.
*
* @param enc Encoder that will produce content
* @param validator Validator to use for validating serialized textual
* content (can not be null)
* @param copyBuffer Temporary buffer that writer can use for temporary
* copies as necessary
*/
public abstract void writeTypedElement(AsciiValueEncoder enc,
XMLValidator validator, char[] copyBuffer)
throws IOException, XMLStreamException;
/**
* Method similar to {@link #writeAttribute(String,String,char[],int,int)}
* but where is known not to require escaping.
* No validation needs to be performed.
*/
public abstract void writeTypedAttribute(String localName, AsciiValueEncoder enc)
throws IOException, XMLStreamException;
/**
* Method similar to {@link #writeAttribute(String,String,char[],int,int)}
* but where is known not to require escaping.
* No validation needs to be performed.
*/
public abstract void writeTypedAttribute(String prefix, String localName, AsciiValueEncoder enc)
throws IOException, XMLStreamException;
/**
* Method similar to {@link #writeAttribute(String,String,char[],int,int)}
* but where is known not to require escaping.
* Validation of the attribute value must be done by calling given
* validator appropriately.
*/
public abstract void writeTypedAttribute(String prefix, String localName, String nsURI,
AsciiValueEncoder enc,
XMLValidator validator, char[] copyBuffer)
throws IOException, XMLStreamException;
/*
////////////////////////////////////////////////////
// Location information
////////////////////////////////////////////////////
*/
protected abstract int getOutputPtr();
public int getRow() {
return mLocRowNr;
}
public int getColumn() {
return (getOutputPtr() - mLocRowStartOffset) + 1;
}
public int getAbsOffset() {
return mLocPastChars +getOutputPtr();
}
/*
////////////////////////////////////////////////////
// Wrapper methods, semi-public
////////////////////////////////////////////////////
*/
/**
* Method that can be called to get a wrapper instance that
* can be used to essentially call the
* Specific method makes sense, since earlier it was not clear
* whether null or empty string (or perhaps both) would be the
* right answer when there is no prefix.
*
* Current thinking (early 2008) is that empty string is the
* expected value
*/
protected static void assertNoPrefix(XMLStreamReader sr)
throws XMLStreamException
{
String prefix = sr.getPrefix();
if (prefix == null) {
fail("Expected \"\" to signify missing prefix (see XMLStreamReader#getPrefix() JavaDocs): got null");
} else {
if (prefix.length() > 0) {
fail("Current element should not have a prefix: got '"+prefix+"'");
}
}
}
/**
* Helper method for ensuring that the given return value for
* attribute prefix accessor has returned a value that
* represents "no prefix" value.
*
* Current thinking (early 2008) is that empty string is the
* expected value here.
*/
protected static void assertNoAttrPrefix(String attrPrefix)
throws XMLStreamException
{
if (attrPrefix == null) {
fail("Attribute that does not have a prefix should be indicated with \"\", not null");
} else {
if (attrPrefix.length() > 0) {
fail("Attribute should not have prefix (had '"+attrPrefix+"')");
}
}
}
/**
* Similar to {@link #assertNoPrefix}, but here we do know that unbound
* namespace URI should be indicated as empty String.
*/
protected static void assertNoNsURI(XMLStreamReader sr)
throws XMLStreamException
{
String uri = sr.getNamespaceURI();
if (uri == null) {
fail("Expected empty String to indicate \"no namespace\": got null");
} else if (uri.length() != 0) {
fail("Expected empty String to indicate \"no namespace\": got '"+uri+"'");
}
}
protected static void assertNoAttrNamespace(String attrNsURI)
throws XMLStreamException
{
if (attrNsURI == null) {
fail("Expected empty String to indicate \"no namespace\" (for attribute): got null");
} else if (attrNsURI.length() != 0) {
fail("Expected empty String to indicate \"no namespace\" (for attribute): got '"+attrNsURI+"'");
}
}
protected static void assertNoPrefixOrNs(XMLStreamReader sr)
throws XMLStreamException
{
assertNoPrefix(sr);
assertNoNsURI(sr);
}
/**
* Helper assertion that assert that the String is either null or
* empty ("").
*/
protected static void assertNullOrEmpty(String str)
{
if (str != null && str.length() > 0) {
fail("Expected String to be empty or null; was '"+str+"' (length "
+str.length()+")");
}
}
/*
//////////////////////////////////////////////////
// Debug/output helpers
//////////////////////////////////////////////////
*/
protected static String tokenTypeDesc(int tt)
{
String desc = (String) mTokenTypes.get(new Integer(tt));
if (desc == null) {
return "["+tt+"]";
}
return desc;
}
protected static String tokenTypeDesc(XMLEvent evt)
{
return tokenTypeDesc(evt.getEventType());
}
final static int MAX_DESC_TEXT_CHARS = 8;
protected static String tokenTypeDesc(int tt, XMLStreamReader sr)
{
String desc = tokenTypeDesc(tt);
// Let's show first 8 chars or so...
if (tt == CHARACTERS || tt == SPACE || tt == CDATA) {
String str = sr.getText();
if (str.length() > MAX_DESC_TEXT_CHARS) {
desc = "\""+str.substring(0, MAX_DESC_TEXT_CHARS) + "\"[...]";
} else {
desc = "\"" + desc + "\"";
}
desc = " ("+desc+")";
}
return desc;
}
protected static String valueDesc(String value)
{
if (value == null) {
return "[NULL]";
}
return "\"" + value + "\"";
}
protected static String printable(char ch)
{
if (ch == '\n') {
return "\\n";
}
if (ch == '\r') {
return "\\r";
}
if (ch == '\t') {
return "\\t";
}
if (ch == ' ') {
return "_";
}
if (ch > 127 || ch < 32) {
StringBuffer sb = new StringBuffer(6);
sb.append("\\u");
String hex = Integer.toHexString((int)ch);
for (int i = 0, len = 4 - hex.length(); i < len; i++) {
sb.append('0');
}
sb.append(hex);
return sb.toString();
}
return null;
}
protected static String printable(String str)
{
if (str == null || str.length() == 0) {
return str;
}
int len = str.length();
StringBuffer sb = new StringBuffer(len + 64);
for (int i = 0; i < len; ++i) {
char c = str.charAt(i);
String res = printable(c);
if (res == null) {
sb.append(c);
} else {
sb.append(res);
}
}
return sb.toString();
}
protected static String quotedPrintable(String str)
{
if (str == null || str.length() == 0) {
return "[0]''";
}
return "[len: "+str.length()+"] '"+printable(str)+"'";
}
protected void reportNADueToProperty(String method, String prop)
{
String clsName = getClass().getName();
/* 27-Sep-2005, TSa: Should probably use some other mechanism for
* reporting this. Does JUnit have something applicable?
*/
System.err.println("Skipping "+clsName+"#"+method+": property '"
+prop+"' (or one of its values) not supported.");
}
protected void reportNADueToNS(String method)
{
reportNADueToProperty(method, "IS_NAMESPACE_AWARE");
}
protected void reportNADueToExtEnt(String method)
{
reportNADueToProperty(method, "IS_SUPPORTING_EXTERNAL_ENTITIES");
}
protected void reportNADueToEntityExpansion(String method, int type)
{
String clsName = getClass().getName();
String msg = (type > 0) ? " (next event: "+tokenTypeDesc(type)+")" : "";
System.err.println("Skipping "+clsName+"#"+method+": entity expansion does not seem to be functioning properly"+msg+".");
}
protected void warn(String msg)
{
// Hmmh. Should we add a dependency to log4j or j.u.l?
// For now let's just dump to console.
System.err.println("WARN: "+msg);
}
}
woodstox-4.1.3/src/test/org/codehaus/stax/test/stream/ 0000755 0001750 0001750 00000000000 11756143457 023211 5 ustar giovanni giovanni woodstox-4.1.3/src/test/org/codehaus/stax/test/stream/TestProperties.java 0000644 0001750 0001750 00000003335 11745427075 027053 0 ustar giovanni giovanni package org.codehaus.stax.test.stream;
import javax.xml.stream.*;
/**
* Unit tests that verify handling of XMLInputFactory properties.
* This includes:
*
* Note: this is directly based on XMLTest/SAXTest #735.
*/
public void testInvalidNestedCData()
throws XMLStreamException
{
String XML = "
* Note: should we test Ascii or ISO-Latin, or only UTF-8 (since that's
* the only encoding XML parsers HAVE to understand)? Most parsers handle
* them all. Also; is sub-optimal behaviour (blocking too early) really
* a bug, or just sub-standard implementation?
*/
public class TestStreaming
extends BaseStreamTest
{
public TestStreaming(String name) {
super(name);
}
public void testAscii()
throws XMLStreamException, UnsupportedEncodingException
{
testWith("US-ASCII");
}
public void testISOLatin()
throws XMLStreamException, UnsupportedEncodingException
{
testWith("ISO-8859-1");
}
public void testUTF8()
throws XMLStreamException, UnsupportedEncodingException
{
testWith("UTF-8");
}
/*
////////////////////////////////////////
// Private methods, tests
////////////////////////////////////////
*/
private void testWith(String enc)
throws XMLStreamException, UnsupportedEncodingException
{
BlockingStream bs = getStream(enc);
XMLStreamReader sr = getReader(bs);
assertTokenType(START_ELEMENT, sr.next());
if (bs.hasBlocked()) {
fail("Stream reader causes blocking before returning START_ELEMENT event that should be parsed before blocking");
}
}
/*
////////////////////////////////////////
// Private methods, other
////////////////////////////////////////
*/
private BlockingStream getStream(String enc)
throws XMLStreamException, UnsupportedEncodingException
{
String contents = "
* One thing to note, though, is that the StAX specs do not tell much
* anything about expected ways that the implementation is to deal with
* problems resulting from filtering END_DOCUMENT event and so forth.
*
* @author Tatu Saloranta
*/
public class TestFilteredReader
extends BaseStreamTest
{
/**
* Simplest possible test: let's only check that we can actually
* construct an instance with dummy filter that accepts everything,
* and that we can traverse through all the events as usual.
*/
public void testCreation()
throws XMLStreamException
{
XMLStreamReader sr = createFilteredReader(new MyFilter(), "
* Note: Not made 'final static', so that compiler won't inline
* it. Makes possible to do partial re-compilations.
* Note: Since it's only used as the default value, sub-classes
* can separately turn it off as necessary
*/
//protected static boolean DEF_PRINT_EXP_EXCEPTION = true;
protected static boolean DEF_PRINT_EXP_EXCEPTION = false;
protected boolean PRINT_EXP_EXCEPTION = DEF_PRINT_EXP_EXCEPTION;
protected BaseStreamTest() { super(); }
protected BaseStreamTest(String name) { super(name); }
/*
//////////////////////////////////////////////////
// Higher-level test methods
//////////////////////////////////////////////////
*/
/**
* Method that will iterate through contents of an XML document
* using specified stream reader; will also access some of data
* to make sure reader reads most of lazy-loadable data.
* Method is usually called to try to get an exception for invalid
* content.
*
* @return Dummy value calculated on contents; used to make sure
* no dead code is eliminated
*/
protected int streamThrough(XMLStreamReader sr)
throws XMLStreamException
{
int result = 0;
assertNotNull(sr);
try {
while (sr.hasNext()) {
int type = sr.next();
result += type;
if (sr.hasText()) {
/* will also do basic verification for text content, to
* see that all text accessor methods return same content
*/
result += getAndVerifyText(sr).hashCode();
}
if (sr.hasName()) {
QName n = sr.getName();
assertNotNull(n);
result += n.hashCode();
}
}
} catch (RuntimeException rex) {
// Let's try to find a nested XMLStreamException, if possible
Throwable t = rex;
while (t != null) {
t = t.getCause();
if (t instanceof XMLStreamException) {
throw (XMLStreamException) t;
}
}
// Nope, just a runtime exception
throw rex;
}
return result;
}
protected int streamThroughFailing(XMLInputFactory f, String contents,
String msg)
{
int result = 0;
try {
XMLStreamReader sr = constructStreamReader(f, contents);
result = streamThrough(sr);
} catch (XMLStreamException ex) { // good
if (PRINT_EXP_EXCEPTION) {
System.out.println("Expected failure: '"+ex.getMessage()+"' "
+"(matching message: '"+msg+"')");
}
return 0;
} catch (Exception ex2) { // may still be ok:
if (ex2.getCause() instanceof XMLStreamException) {
if (PRINT_EXP_EXCEPTION) {
System.out.println("Expected failure: '"+ex2.getMessage()+"' "
+"(matching message: '"+msg+"')");
}
return 0;
}
fail("Expected an XMLStreamException (either direct, or getCause() of a primary exception) for "+msg
+", got: "+ex2);
}
fail("Expected an exception for "+msg);
return result; // never gets here
}
protected int streamThroughFailing(XMLStreamReader sr, String msg)
{
int result = 0;
try {
result = streamThrough(sr);
} catch (XMLStreamException ex) { // good
if (PRINT_EXP_EXCEPTION) {
System.out.println("Expected failure: '"+ex.getMessage()+"' "
+"(matching message: '"+msg+"')");
}
return 0;
} catch (Exception ex2) { // ok; iff links to XMLStreamException
Throwable t = ex2;
while (t.getCause() != null && !(t instanceof XMLStreamException)) {
t = t.getCause();
}
if (t instanceof XMLStreamException) {
if (PRINT_EXP_EXCEPTION) {
System.out.println("Expected failure: '"+ex2.getMessage()+"' "
+"(matching message: '"+msg+"')");
}
return 0;
}
if (t == ex2) {
fail("Expected an XMLStreamException (either direct, or getCause() of a primary exception) for "+msg
+", got: "+ex2);
}
fail("Expected an XMLStreamException (either direct, or getCause() of a primary exception) for "+msg
+", got: "+ex2+" (root: "+t+")");
}
fail("Expected an exception for "+msg);
return result; // never gets here
}
}
woodstox-4.1.3/src/test/org/codehaus/stax/test/stream/TestTextCoalescing.java 0000644 0001750 0001750 00000012356 11745427075 027636 0 ustar giovanni giovanni package org.codehaus.stax.test.stream;
import javax.xml.stream.*;
/**
* Unit test suite that tests that the stream reader does in fact
* coalesce adjacent text/CDATA segments when told to do so.
*/
public class TestTextCoalescing
extends BaseStreamTest
{
final static String VALID_XML = "
* One thing to note, though, is that the StAX specs do not tell much
* anything about expected ways that the implementation is to deal with
* problems resulting from filtering END_DOCUMENT event and so forth.
*
* @author Tatu Saloranta
*/
public class TestFilteredReader
extends BaseEventTest
{
/**
* Simplest possible test: let's only check that we can actually
* construct an instance with dummy filter that accepts everything,
* and that we can traverse through all the events as usual.
*/
public void testCreation()
throws XMLStreamException
{
XMLEventReader er = createFilteredReader(new MyFilter(), "
* Note: Not made 'final static', so that compiler won't inline
* it. Makes possible to do partial re-compilations.
* Note: Since it's only used as the default value, sub-classes
* can separately turn it off as necessary
*/
//protected static boolean DEF_PRINT_EXP_EXCEPTION = true;
protected static boolean DEF_PRINT_EXP_EXCEPTION = false;
protected boolean PRINT_EXP_EXCEPTION = DEF_PRINT_EXP_EXCEPTION;
/*
///////////////////////////////////////////////////
// Lazy-loaded thingies
///////////////////////////////////////////////////
*/
XMLInputFactory2 mInputFactory = null;
XMLOutputFactory2 mOutputFactory = null;
XMLEventFactory2 mEventFactory = null;
/*
//////////////////////////////////////////////////
// Factory methods
//////////////////////////////////////////////////
*/
protected XMLInputFactory2 getInputFactory()
{
if (mInputFactory == null) {
/* 29-Nov-2004, TSa: Better ensure we get the right
* implementation...
*/
System.setProperty("javax.xml.stream.XMLInputFactory",
"com.ctc.wstx.stax.WstxInputFactory");
mInputFactory = getNewInputFactory();
}
return mInputFactory;
}
protected XMLEventFactory2 getEventFactory()
{
if (mEventFactory == null) {
System.setProperty("javax.xml.stream.XMLEventFactory",
"com.ctc.wstx.stax.WstxEventFactory");
mEventFactory = (XMLEventFactory2) XMLEventFactory.newInstance();
}
return mEventFactory;
}
protected WstxInputFactory getWstxInputFactory() {
return (WstxInputFactory) getInputFactory();
}
protected static XMLInputFactory2 getNewInputFactory()
{
return (XMLInputFactory2) XMLInputFactory.newInstance();
}
protected XMLOutputFactory2 getOutputFactory()
{
if (mOutputFactory == null) {
System.setProperty("javax.xml.stream.XMLOutputFactory",
"com.ctc.wstx.stax.WstxOutputFactory");
mOutputFactory = getNewOutputFactory();
}
return mOutputFactory;
}
protected WstxOutputFactory getWstxOutputFactory() {
return (WstxOutputFactory) getOutputFactory();
}
protected static XMLOutputFactory2 getNewOutputFactory()
{
return (XMLOutputFactory2) XMLOutputFactory.newInstance();
}
protected static XMLStreamReader2 constructStreamReader(XMLInputFactory f, String content)
throws XMLStreamException
{
return (XMLStreamReader2) f.createXMLStreamReader(new StringReader(content));
}
protected static XMLStreamReader2 constructStreamReaderForFile(XMLInputFactory f, String filename)
throws IOException, XMLStreamException
{
File inf = new File(filename);
XMLStreamReader sr = f.createXMLStreamReader(inf.toURL().toString(),
new FileReader(inf));
assertEquals(sr.getEventType(), START_DOCUMENT);
return (XMLStreamReader2) sr;
}
protected static XMLEventReader2 constructEventReader(XMLInputFactory f, String content)
throws XMLStreamException
{
return (XMLEventReader2) f.createXMLEventReader(new StringReader(content));
}
protected XMLStreamReader2 constructNsStreamReader(String content, boolean coal)
throws XMLStreamException
{
XMLInputFactory f = getInputFactory();
setNamespaceAware(f, true);
setCoalescing(f, coal);
return (XMLStreamReader2) f.createXMLStreamReader(new StringReader(content));
}
protected XMLStreamReader2 constructNsStreamReader(InputStream in, boolean coal)
throws XMLStreamException
{
XMLInputFactory f = getInputFactory();
setNamespaceAware(f, true);
setCoalescing(f, coal);
return (XMLStreamReader2) f.createXMLStreamReader(in);
}
protected XMLStreamReader2 constructNonNsStreamReader(String content, boolean coal)
throws XMLStreamException
{
XMLInputFactory f = getInputFactory();
setNamespaceAware(f, false);
setCoalescing(f, coal);
return (XMLStreamReader2) f.createXMLStreamReader(new StringReader(content));
}
/*
//////////////////////////////////////////////////
// Configuring input factory
//////////////////////////////////////////////////
*/
protected static void setLazyParsing(XMLInputFactory f, boolean state)
throws XMLStreamException
{
f.setProperty(XMLInputFactory2.P_LAZY_PARSING,
state ? Boolean.TRUE : Boolean.FALSE);
}
protected static void setMinTextSegment(XMLInputFactory f, int len)
throws XMLStreamException
{
f.setProperty(WstxInputProperties.P_MIN_TEXT_SEGMENT, new Integer(len));
}
/*
//////////////////////////////////////////////////
// Configuring output factory
//////////////////////////////////////////////////
*/
protected static void setRepairing(XMLOutputFactory f, boolean state)
{
f.setProperty(XMLOutputFactory.IS_REPAIRING_NAMESPACES,
Boolean.valueOf(state));
}
protected static void setValidateStructure(XMLOutputFactory f, boolean state)
{
f.setProperty(WstxOutputProperties.P_OUTPUT_VALIDATE_STRUCTURE,
Boolean.valueOf(state));
}
protected static void setValidateContent(XMLOutputFactory f, boolean state)
{
f.setProperty(WstxOutputProperties.P_OUTPUT_VALIDATE_CONTENT,
Boolean.valueOf(state));
}
protected static void setValidateNames(XMLOutputFactory f, boolean state)
{
f.setProperty(WstxOutputProperties.P_OUTPUT_VALIDATE_NAMES,
Boolean.valueOf(state));
}
protected static void setValidateAll(XMLOutputFactory f, boolean state)
{
setValidateStructure(f, state);
setValidateContent(f, state);
setValidateNames(f, state);
}
protected static void setFixContent(XMLOutputFactory f, boolean state)
{
f.setProperty(WstxOutputProperties.P_OUTPUT_FIX_CONTENT,
Boolean.valueOf(state));
}
/*
//////////////////////////////////////////////////
// Higher-level test methods
//////////////////////////////////////////////////
*/
/**
* Method that will iterate through contents of an XML document
* using specified stream reader; will also access some of data
* to make sure reader reads most of lazy-loadable data.
* Method is usually called to try to get an exception for invalid
* content.
*
* @return Dummy value calculated on contents; used to make sure
* no dead code is eliminated
*/
protected int streamThrough(XMLStreamReader sr)
throws XMLStreamException
{
int result = 0;
while (sr.hasNext()) {
int type = sr.next();
result += type;
if (sr.hasText()) {
/* will also do basic verification for text content, to
* see that all text accessor methods return same content
*/
result += getAndVerifyText(sr).hashCode();
}
if (sr.hasName()) {
result += sr.getName().hashCode();
}
}
return result;
}
protected int streamThroughFailing(XMLInputFactory f, String contents,
String msg)
throws XMLStreamException
{
int result = 0;
try {
XMLStreamReader sr = constructStreamReader(f, contents);
result = streamThrough(sr);
} catch (XMLStreamException ex) { // good
if (PRINT_EXP_EXCEPTION) {
System.out.println("Expected failure: '"+ex.getMessage()+"' "
+"(matching message: '"+msg+"')");
}
return 0;
} catch (RuntimeException ex2) { // ok
if (PRINT_EXP_EXCEPTION) {
System.out.println("Expected failure: '"+ex2.getMessage()+"' "
+"(matching message: '"+msg+"')");
}
return 0;
} catch (Throwable t) { // not so good
fail("Expected an XMLStreamException or RuntimeException for "+msg
+", got: "+t);
}
fail("Expected an exception for "+msg);
return result; // never gets here
}
protected int streamThroughFailing(XMLStreamReader sr, String msg)
throws XMLStreamException
{
int result = 0;
try {
result = streamThrough(sr);
} catch (XMLStreamException ex) { // good
if (PRINT_EXP_EXCEPTION) {
System.out.println("Expected failure: '"+ex.getMessage()+"' "
+"(matching message: '"+msg+"')");
}
return 0;
} catch (RuntimeException ex2) { // ok
if (PRINT_EXP_EXCEPTION) {
System.out.println("Expected failure: '"+ex2.getMessage()+"' "
+"(matching message: '"+msg+"')");
}
return 0;
} catch (Throwable t) { // not so good
fail("Expected an XMLStreamException or RuntimeException for "+msg
+", got: "+t);
}
fail("Expected an exception for "+msg);
return result; // never gets here
}
/*
//////////////////////////////////////////////////
// Assertions
//////////////////////////////////////////////////
*/
protected static String tokenTypeDesc(int tt)
{
String desc = (String) mTokenTypes.get(new Integer(tt));
return (desc == null) ? ("["+tt+"]") : desc;
}
protected static void assertTokenType(int expType, int actType)
{
if (expType != actType) {
String expStr = tokenTypeDesc(expType);
String actStr = tokenTypeDesc(actType);
if (expStr == null) {
expStr = ""+expType;
}
if (actStr == null) {
actStr = ""+actType;
}
fail("Expected token "+expStr+"; got "+actStr+".");
}
}
/**
* Helper assertion that assert that the String is either null or
* empty ("").
*/
protected static void assertNullOrEmpty(String str)
{
if (str != null && str.length() > 0) {
fail("Expected String to be empty or null; was '"+str+"' (length "
+str.length()+")");
}
}
protected static void assertNotNullOrEmpty(String str)
{
if (str == null || str.length() == 0) {
fail("Expected String to be non-empty; got "
+((str == null) ? "NULL" : "\"\""));
}
}
/**
* Method that can be used to verify that the current element
* pointed to by the stream reader has no prefix.
*/
protected static void assertNoElemPrefix(XMLStreamReader sr)
throws XMLStreamException
{
String prefix = sr.getPrefix();
if (prefix != XmlConsts.ELEM_NO_PREFIX) {
fail("Element that does not have a prefix should be indicated with <"+XmlConsts.ELEM_NO_PREFIX+">, not <"+prefix+">");
}
}
/**
* Helper method for ensuring that the given return value for
* attribute prefix accessor has returned a value that
* represents "no prefix" value.
*
* Current thinking (early 2008) is that empty string is the
* expected value here.
*/
protected static void assertNoAttrPrefix(String attrPrefix)
throws XMLStreamException
{
if (attrPrefix != XmlConsts.ATTR_NO_PREFIX) {
fail("Attribute that does not have a prefix should be indicated with <"+XmlConsts.ATTR_NO_PREFIX+">, not <"+attrPrefix+">");
}
}
/**
* Method that can be used to verify that the current element
* pointed to by the stream reader does not belong to a namespace.
*/
protected static void assertElemNotInNamespace(XMLStreamReader sr)
throws XMLStreamException
{
String uri = sr.getNamespaceURI();
if (uri == null) {
fail("Excepted empty String to indicate \"no namespace\": got null");
} else if (uri.length() != 0) {
fail("Excepted no (null) namespace URI: got '"+uri+"'");
}
}
protected static void assertNoAttrNamespace(String attrNsURI)
throws XMLStreamException
{
if (attrNsURI == null) {
fail("Expected empty String to indicate \"no namespace\" (for attribute): got null");
} else if (attrNsURI.length() != 0) {
fail("Expected empty String to indicate \"no namespace\" (for attribute): got '"+attrNsURI+"'");
}
}
protected static void failStrings(String msg, String exp, String act)
{
// !!! TODO: Indicate position where Strings differ
fail(msg+": expected "+quotedPrintable(exp)+", got "
+quotedPrintable(act));
}
/**
* Method that not only gets currently available text from the
* reader, but also checks that its consistenly accessible using
* different (basic) StAX methods.
*/
protected static String getAndVerifyText(XMLStreamReader sr)
throws XMLStreamException
{
/* 05-Apr-2006, TSa: Although getText() is available for DTD
* and ENTITY_REFERENCE, getTextXxx() are not. Thus, can not
* do more checks for those types.
*/
int type = sr.getEventType();
if (type == ENTITY_REFERENCE || type == DTD) {
return sr.getText();
}
int expLen = sr.getTextLength();
/* Hmmh. It's only ok to return empty text for DTD event... well,
* maybe also for CDATA, since empty CDATA blocks are legal?
*/
/* !!! 01-Sep-2004, TSa:
* note: theoretically, in coalescing mode, it could be possible
* to have empty CDATA section(s) get converted to CHARACTERS,
* which would be empty... may need to enhance this to check that
* mode is not coalescing? Or something
*/
if (type == CHARACTERS) {
assertTrue("Stream reader should never return empty Strings.", (expLen > 0));
}
String text = sr.getText();
assertNotNull("getText() should never return null.", text);
assertEquals("Expected text length of "+expLen+", got "+text.length(),
expLen, text.length());
char[] textChars = sr.getTextCharacters();
int start = sr.getTextStart();
String text2 = new String(textChars, start, expLen);
assertEquals(text, text2);
return text;
}
/*
//////////////////////////////////////////////////
// Debug/output helpers
//////////////////////////////////////////////////
*/
public static void warn(String msg)
{
System.err.println("WARN: "+msg);
}
public static String printable(char ch)
{
if (ch == '\n') {
return "\\n";
}
if (ch == '\r') {
return "\\r";
}
if (ch == '\t') {
return "\\t";
}
if (ch == ' ') {
return "_";
}
if (ch > 127 || ch < 32) {
StringBuffer sb = new StringBuffer(6);
sb.append("\\u");
String hex = Integer.toHexString((int)ch);
for (int i = 0, len = 4 - hex.length(); i < len; i++) {
sb.append('0');
}
sb.append(hex);
return sb.toString();
}
return null;
}
public static String printableWithSpaces(char ch)
{
if (ch == '\n') {
return "\\n";
}
if (ch == '\r') {
return "\\r";
}
if (ch == '\t') {
return "\\t";
}
if (ch > 127 || ch < 32) {
StringBuffer sb = new StringBuffer(6);
sb.append("\\u");
String hex = Integer.toHexString((int)ch);
for (int i = 0, len = 4 - hex.length(); i < len; i++) {
sb.append('0');
}
sb.append(hex);
return sb.toString();
}
return null;
}
public static String printable(String str)
{
if (str == null || str.length() == 0) {
return str;
}
int len = str.length();
StringBuffer sb = new StringBuffer(len + 64);
for (int i = 0; i < len; ++i) {
char c = str.charAt(i);
String res = printable(c);
if (res == null) {
sb.append(c);
} else {
sb.append(res);
}
}
return sb.toString();
}
public static String printableWithSpaces(String str)
{
if (str == null || str.length() == 0) {
return str;
}
int len = str.length();
StringBuffer sb = new StringBuffer(len + 64);
for (int i = 0; i < len; ++i) {
char c = str.charAt(i);
String res = printableWithSpaces(c);
if (res == null) {
sb.append(c);
} else {
sb.append(res);
}
}
return sb.toString();
}
protected static String quotedPrintable(String str)
{
if (str == null || str.length() == 0) {
return "[0]''";
}
return "[len: "+str.length()+"] '"+printable(str)+"'";
}
}
woodstox-4.1.3/src/test/wstxtest/empty and spaces.dtd 0000644 0001750 0001750 00000000000 11745427076 023106 0 ustar giovanni giovanni woodstox-4.1.3/src/test/wstxtest/stream/ 0000755 0001750 0001750 00000000000 11756143457 020576 5 ustar giovanni giovanni woodstox-4.1.3/src/test/wstxtest/stream/TestEncodingDetection.java 0000644 0001750 0001750 00000011551 11745427076 025671 0 ustar giovanni giovanni package wstxtest.stream;
import java.io.*;
import javax.xml.stream.*;
/**
* This set on unit tests checks that woodstox-specific invariants
* regarding automatic input encoding detection are maintained. Some
* of these might be required by stax specification too, but it is not
* quite certain, thus tests are included in woodstox-specific packages.
*/
public class TestEncodingDetection
extends BaseStreamTest
{
final static String ENC_EBCDIC_IN_PREFIX = "cp";
final static String ENC_EBCDIC_OUT_PREFIX = "IBM";
public void testUtf8()
throws IOException, XMLStreamException
{
/* Default is, in absence of any other indications, UTF-8...
* let's check the shortest legal doc:
*/
String XML = "";
byte[] b = XML.getBytes("UTF-8");
XMLStreamReader sr = getReader(b);
assertTokenType(START_DOCUMENT, sr.getEventType());
assertNull(sr.getCharacterEncodingScheme());
assertEquals("UTF-8", sr.getEncoding());
// let's iterate just for fun though
assertTokenType(START_ELEMENT, sr.next());
sr.close();
}
public void testUtf16()
throws XMLStreamException
{
// Should be able to figure out encoding...
String XML = ".
* Note: one more implicit assumption tested: not only is the ordering
* of explicit attributes fixed, but so is that of defaulted attributes.
* Latter always come after explicit ones, and in the same order as
* they were declared in DTD.
*/
public void testNestedAttrsNs()
throws Exception
{
XMLStreamReader sr = getValidatingReader(NESTED_XML, true);
assertTokenType(DTD, sr.next());
// root elem:
assertTokenType(START_ELEMENT, sr.next());
assertEquals(3, sr.getAttributeCount());
assertEquals("123", sr.getAttributeValue(0)); // explicit
assertEquals("rootValue", sr.getAttributeValue(1)); // default
assertEquals("xyz", sr.getAttributeValue(2)); // default
// 1st branch:
assertTokenType(START_ELEMENT, sr.next());
assertEquals(2, sr.getAttributeCount());
assertEquals("a", sr.getAttributePrefix(0));
assertEquals("b", sr.getAttributeLocalName(0));
assertEquals("ns", sr.getAttributeNamespace(0));
assertEquals("ab", sr.getAttributeValue(0)); // explicit
assertEquals("branchValue", sr.getAttributeValue(1)); // default
// and how about what should NOT be found?
assertNull(sr.getAttributeValue(null, "xyz"));
// 2nd branch:
assertTokenType(START_ELEMENT, sr.next());
assertEquals(5, sr.getAttributeCount());
assertEquals("a", sr.getAttributeLocalName(0));
assertEquals("value", sr.getAttributeValue(0)); // explicit
assertEquals("xyz", sr.getAttributeLocalName(1));
assertEquals("456", sr.getAttributeValue(1)); // explicit
assertEquals("c", sr.getAttributeLocalName(2));
assertEquals("1", sr.getAttributeValue(2)); // explicit
assertEquals("f", sr.getAttributeLocalName(3));
assertEquals("", sr.getAttributeValue(3)); // explicit
assertEquals("a", sr.getAttributePrefix(4));
assertEquals("ns", sr.getAttributeNamespace(4));
assertEquals("b", sr.getAttributeLocalName(4));
assertEquals("xyz", sr.getAttributeValue(4)); // default
// 1st leaf:
assertTokenType(START_ELEMENT, sr.next());
assertEquals(2, sr.getAttributeCount());
assertEquals("leafValue", sr.getAttributeValue(0)); // default
assertEquals("123", sr.getAttributeValue(1)); // default
// and how about what should not be found?
assertNull(sr.getAttributeValue(null, "foo"));
assertNull(sr.getAttributeValue(null, "a"));
assertNull(sr.getAttributeValue(null, "c"));
assertNull(sr.getAttributeValue(null, "f"));
assertNull(sr.getAttributeValue(null, "xyz"));
// close leaf
assertTokenType(END_ELEMENT, sr.next());
// close 2nd branch
assertTokenType(END_ELEMENT, sr.next());
// close 1st branch
assertTokenType(END_ELEMENT, sr.next());
// close root
assertTokenType(END_ELEMENT, sr.next());
assertTokenType(END_DOCUMENT, sr.next());
}
/**
* This tests handling of ATTLIST declarations for "xmlns:xx" and "xmlns"
* attributes (that is, namespace declarations). Some legacy DTDs
* (most notably, XHTML dtd) do this, and Woodstox had some problems with
* this concept...
*/
public void testNsAttr()
throws XMLStreamException
{
String XML =
"\n"
+"\n"
+"]>
* Please note that due to arbitrary nature of GC and its interactions
* with soft reference, as well as the way JUnit may run its unit
* tests, these tests may not be as robust as they should be.
*/
public class TestBufferRecycling
extends BaseStreamTest
{
final static String DOC = "
* Note that although this test should really be part of StAX2 test
* suite, currently there is no standard way to define properties that
* would make it more likely that the parser may return partial
* text segments; but Woodstox does. So, for now we can at least
* test that Woodstox is conformant... ;-)
*/
public class TestStreaming
extends BaseStreamTest
{
public void testTextStreaming()
throws IOException, XMLStreamException
{
String CONTENT_IN =
"Some content\nthat will be "
+""streamed" & sliced"
+" and\nprocessed...";
;
String CONTENT_OUT =
"Some content\nthat will be "
+"\"streamed\" & sliced"
+" and\nprocessed...";
;
/* Let's also add trailing CDATA, to ensure no coalescing is done
* when not requested
*/
String XML = "
* Note: Here we count on the fact that the current stream writer
* does not (and actually, can not!) verify whether the entity
* has been properly declared.
*/
public void testValidEntityNames()
throws Exception
{
for (int n = 0; n < 4; ++n) {
boolean ns = ((n & 1) == 0);
boolean validate = ((n & 2) == 0);
XMLStreamWriter sw = startDoc(validate, ns);
// Let's add a dummy root:
sw.writeStartElement("dummy");
/* No colons allowed in namespace-aware mode
*/
String[] strs = ns ? VALID_NS_NAMES : VALID_NON_NS_NAMES;
for (int i = 0; i < strs.length; ++i) {
String name = strs[i];
sw.writeEntityRef(name);
}
sw.writeEndElement();
closeDoc(sw);
}
}
public void testInvalidEntityNames()
throws XMLStreamException
{
for (int n = 0; n < 2; ++n) {
boolean ns = ((n & 1) == 0);
String[] strs = ns ? INVALID_NS_ROOT_NAMES : INVALID_NON_NS_NAMES;
for (int i = 0; i < strs.length; ++i) {
String name = strs[i];
XMLStreamWriter2 sw = (XMLStreamWriter2)startDoc(true, ns);
sw.writeStartElement("dummy");
try {
// only root name is mandatory, others are optional
sw.writeEntityRef(name);
} catch (XMLStreamException sex) {
sw.writeEndElement();
closeDoc(sw);
continue; // good
}
fail("Failed to catch an invalid entity name (ns = "+ns+") '"
+name+"'.");
}
}
}
/*
////////////////////////////////////////////////////
// Internal methods
////////////////////////////////////////////////////
*/
private XMLOutputFactory getFactory(boolean validateNames, boolean ns)
throws XMLStreamException
{
XMLOutputFactory f = getOutputFactory();
setValidateNames(f, validateNames);
setNamespaceAware(f, ns);
// Let's disable repairing
setRepairing(f, false);
return f;
}
private XMLStreamWriter startDoc(boolean validateNames, boolean ns)
throws XMLStreamException
{
XMLOutputFactory f = getFactory(validateNames, ns);
XMLStreamWriter sw = f.createXMLStreamWriter(new StringWriter());
sw.writeStartDocument();
return sw;
}
private void closeDoc(XMLStreamWriter sw)
throws XMLStreamException
{
sw.writeEndDocument();
sw.close();
}
}
woodstox-4.1.3/src/test/wstxtest/wstream/TestOptions.java 0000644 0001750 0001750 00000004267 11745427076 024134 0 ustar giovanni giovanni package wstxtest.wstream;
import java.io.*;
import javax.xml.stream.*;
import com.ctc.wstx.api.WstxOutputProperties;
/**
* This unit test suite verifies Woodstox-specific writer-side options
*/
public class TestOptions
extends BaseWriterTest
{
public void testEmptyElemSpaces()
throws IOException, XMLStreamException
{
/* Need to test both with and without space; as well as
* using Writer and using an OutputStream (since backends
* for the two are very different).
*/
for (int i = 0; i < 6; ++i) {
boolean space = ((i & 1) == 0);
String str;
boolean writer = (i < 2);
StringWriter strw = new StringWriter();
ByteArrayOutputStream bos = new ByteArrayOutputStream();
XMLStreamWriter sw;
if (writer) {
sw = getWriter(space, strw, null, null);
} else {
sw = getWriter(space, null, bos, (i < 4) ? "UTF-8" : "ISO-8859--1");
}
sw.writeStartDocument();
sw.writeEmptyElement("root");
sw.writeEndDocument();
sw.close();
// Should have a space!
if (writer) {
str = strw.toString();
} else {
str = new String(bos.toByteArray(), "UTF-8");
}
if (space) {
if (str.indexOf("
* Since this functionality is not required (or even suggested from what
* I can tell) by Stax 1.0 specs (and Stax2 does not change definitions
* of core API), this is in woodstox-specific section of tests.
*/
public void testExplicitNsWrites()
throws XMLStreamException
{
final String URI = "http://bar";
XMLOutputFactory f = getFactory();
StringWriter strw = new StringWriter();
XMLStreamWriter sw = f.createXMLStreamWriter(strw);
sw.writeStartDocument();
/* root in no namespace, no attributes; but want to add
* an 'early' ns declaration for ns prefix 'foo',
* with URI 'http://bar'
*/
sw.writeStartElement("", "root");
sw.writeNamespace("foo", URI);
// leaf in that namespace, then:
sw.writeStartElement(URI, "leaf");
sw.writeEndElement();
sw.writeEndElement();
sw.writeEndDocument();
sw.close();
String result = strw.toString();
// Ok, so let's parse and verify:
XMLStreamReader sr = constructNsStreamReader(result, false);
assertTokenType(START_ELEMENT, sr.next());
assertEquals("root", sr.getLocalName());
assertElemNotInNamespace(sr);
int nsCount = sr.getNamespaceCount();
assertEquals("Expected one (and only one) namespace declaration, got "+nsCount, 1, nsCount);
assertEquals("foo", sr.getNamespacePrefix(0));
assertEquals(URI, sr.getNamespaceURI(0));
// And then the branch should have no ns decls:
assertTokenType(START_ELEMENT, sr.next());
assertEquals("leaf", sr.getLocalName());
assertEquals(URI, sr.getNamespaceURI());
assertEquals(0, sr.getNamespaceCount());
assertTokenType(END_ELEMENT, sr.next());
assertEquals("leaf", sr.getLocalName());
// fine, rest is ok
sr.close();
}
/**
* Similar to {@link #testExplicitNsWrites}, but tests behavior
* of calls to
* As of Woodstox 4.0, we will be actually using {@link XMLReporter2}
* interface, both to test that the improved interface works, and
* to get access to more accurate information.
*/
public class TestXMLReporter
extends BaseStreamTest
{
/**
* Basic unit test for verifying that XMLReporter gets validation
* errors reported.
*/
public void testValidationError()
throws XMLStreamException
{
String XML =
"\n"
+"]>
*
*/
public class WstxOutputFactory
extends XMLOutputFactory2
implements OutputConfigFlags
{
/*
///////////////////////////////////////////////////////////
// Actual storage of configuration settings
///////////////////////////////////////////////////////////
*/
protected final WriterConfig mConfig;
/*
///////////////////////////////////////////////////////////
// Life-cycle
///////////////////////////////////////////////////////////
*/
public WstxOutputFactory() {
mConfig = WriterConfig.createFullDefaults();
}
/*
///////////////////////////////////////////////////////////
// XMLOutputFactory API
///////////////////////////////////////////////////////////
*/
public XMLEventWriter createXMLEventWriter(OutputStream out)
throws XMLStreamException
{
return createXMLEventWriter(out, null);
}
public XMLEventWriter createXMLEventWriter(OutputStream out, String enc)
throws XMLStreamException
{
if (out == null) {
throw new IllegalArgumentException("Null OutputStream is not a valid argument");
}
return new Stax2EventWriterImpl(createSW(out, null, enc, false));
}
public XMLEventWriter createXMLEventWriter(javax.xml.transform.Result result)
throws XMLStreamException
{
return new Stax2EventWriterImpl(createSW(result));
}
public XMLEventWriter createXMLEventWriter(Writer w)
throws XMLStreamException
{
if (w == null) {
throw new IllegalArgumentException("Null Writer is not a valid argument");
}
return new Stax2EventWriterImpl(createSW(null, w, null, false));
}
public XMLStreamWriter createXMLStreamWriter(OutputStream out)
throws XMLStreamException
{
return createXMLStreamWriter(out, null);
}
public XMLStreamWriter createXMLStreamWriter(OutputStream out, String enc)
throws XMLStreamException
{
if (out == null) {
throw new IllegalArgumentException("Null OutputStream is not a valid argument");
}
return createSW(out, null, enc, false);
}
public XMLStreamWriter createXMLStreamWriter(javax.xml.transform.Result result)
throws XMLStreamException
{
return createSW(result);
}
public XMLStreamWriter createXMLStreamWriter(Writer w)
throws XMLStreamException
{
if (w == null) {
throw new IllegalArgumentException("Null Writer is not a valid argument");
}
return createSW(null, w, null, false);
}
public Object getProperty(String name)
{
return mConfig.getProperty(name);
}
public boolean isPropertySupported(String name) {
return mConfig.isPropertySupported(name);
}
public void setProperty(String name, Object value)
{
mConfig.setProperty(name, value);
}
/*
///////////////////////////////////////////////////////////
// Stax2 extensions
///////////////////////////////////////////////////////////
*/
// // // Stax2 additional (encoding-aware) factory methods
public XMLEventWriter createXMLEventWriter(Writer w, String enc)
throws XMLStreamException
{
return new Stax2EventWriterImpl(createSW(null, w, enc, false));
}
public XMLEventWriter createXMLEventWriter(XMLStreamWriter sw)
throws XMLStreamException
{
XMLStreamWriter2 sw2 = Stax2WriterAdapter.wrapIfNecessary(sw);
return new Stax2EventWriterImpl(sw2);
}
public XMLStreamWriter2 createXMLStreamWriter(Writer w, String enc)
throws XMLStreamException
{
return createSW(null, w, enc, false);
}
// // // Stax2 "Profile" mutators
public void configureForXmlConformance()
{
mConfig.configureForXmlConformance();
}
public void configureForRobustness()
{
mConfig.configureForRobustness();
}
public void configureForSpeed()
{
mConfig.configureForSpeed();
}
/*
///////////////////////////////////////////////////////////
// Woodstox-specific configuration access
///////////////////////////////////////////////////////////
*/
public WriterConfig getConfig() {
return mConfig;
}
/*
///////////////////////////////////////////////////////////
// Internal methods:
///////////////////////////////////////////////////////////
*/
/**
* Bottleneck factory method used internally; needs to take care of passing
* proper settings to stream writer.
*
* @param requireAutoClose Whether this result will always require
* auto-close be enabled (true); or only if application has
* requested it (false)
*/
private XMLStreamWriter2 createSW(OutputStream out, Writer w, String enc,
boolean requireAutoClose)
throws XMLStreamException
{
/* Need to ensure that the configuration object is not shared
* any more; otherwise later changes via factory could be
* visible half-way through output...
*/
WriterConfig cfg = mConfig.createNonShared();
XmlWriter xw;
boolean autoCloseOutput = requireAutoClose || mConfig.willAutoCloseOutput();
if (w == null) {
if (enc == null) {
enc = WstxOutputProperties.DEFAULT_OUTPUT_ENCODING;
} else {
/* Canonical ones are interned, so we may have
* normalized encoding already...
*/
if (enc != CharsetNames.CS_UTF8
&& enc != CharsetNames.CS_ISO_LATIN1
&& enc != CharsetNames.CS_US_ASCII) {
enc = CharsetNames.normalize(enc);
}
}
try {
if (enc == CharsetNames.CS_UTF8) {
w = new UTF8Writer(cfg, out, autoCloseOutput);
xw = new BufferingXmlWriter(w, cfg, enc, autoCloseOutput, out, 16);
} else if (enc == CharsetNames.CS_ISO_LATIN1) {
xw = new ISOLatin1XmlWriter(out, cfg, autoCloseOutput);
} else if (enc == CharsetNames.CS_US_ASCII) {
xw = new AsciiXmlWriter(out, cfg, autoCloseOutput);
} else {
w = new OutputStreamWriter(out, enc);
xw = new BufferingXmlWriter(w, cfg, enc, autoCloseOutput, out, -1);
}
} catch (IOException ex) {
throw new XMLStreamException(ex);
}
} else {
// we may still be able to figure out the encoding:
if (enc == null) {
enc = CharsetNames.findEncodingFor(w);
}
try {
xw = new BufferingXmlWriter(w, cfg, enc, autoCloseOutput, null, -1);
} catch (IOException ex) {
throw new XMLStreamException(ex);
}
}
return createSW(enc, cfg, xw);
}
/**
* Called by {@link #createSW(OutputStream, Writer, String, boolean)} after all of the nessesary configuration
* logic is complete.
*/
protected XMLStreamWriter2 createSW(String enc, WriterConfig cfg, XmlWriter xw) {
if (cfg.willSupportNamespaces()) {
if (cfg.automaticNamespacesEnabled()) {
return new RepairingNsStreamWriter(xw, enc, cfg);
}
return new SimpleNsStreamWriter(xw, enc, cfg);
}
return new NonNsStreamWriter(xw, enc, cfg);
}
private XMLStreamWriter2 createSW(Result res)
throws XMLStreamException
{
OutputStream out = null;
Writer w = null;
String encoding = null;
boolean requireAutoClose;
String sysId = null;
if (res instanceof Stax2Result) {
Stax2Result sr = (Stax2Result) res;
try {
out = sr.constructOutputStream();
if (out == null) {
w = sr.constructWriter();
}
} catch (IOException ioe) {
throw new WstxIOException(ioe);
}
// yes, it's required since caller has no access to stream/writer:
requireAutoClose = true;
} else if (res instanceof StreamResult) {
StreamResult sr = (StreamResult) res;
out = sr.getOutputStream();
sysId = sr.getSystemId();
if (out == null) {
w = sr.getWriter();
}
/* Caller owns it, only auto-close if requested to do so:
* (except that for system-id-only, it'll still be required,
* see code below)
*/
requireAutoClose = false;
} else if (res instanceof SAXResult) {
SAXResult sr = (SAXResult) res;
sysId = sr.getSystemId();
if (sysId == null || sysId.length() == 0) {
throw new XMLStreamException("Can not create a stream writer for a SAXResult that does not have System Id (support for using SAX input source not implemented)");
}
requireAutoClose = true;
} else if (res instanceof DOMResult) {
return WstxDOMWrappingWriter.createFrom(mConfig.createNonShared(), (DOMResult) res);
} else {
throw new IllegalArgumentException("Can not instantiate a writer for XML result type "+res.getClass()+" (unrecognized type)");
}
if (out != null) {
return createSW(out, null, encoding, requireAutoClose);
}
if (w != null) {
return createSW(null, w, encoding, requireAutoClose);
}
if (sysId != null && sysId.length() > 0) {
/* 26-Dec-2008, TSa: If we must construct URL from system id,
* it means caller will not have access to resulting
* stream, thus we will force auto-closing.
*/
requireAutoClose = true;
try {
out = URLUtil.outputStreamFromURL(URLUtil.urlFromSystemId(sysId));
} catch (IOException ioe) {
throw new WstxIOException(ioe);
}
return createSW(out, null, encoding, requireAutoClose);
}
throw new XMLStreamException("Can not create Stax writer for passed-in Result -- neither writer, output stream or system id was accessible");
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/stax/WstxInputFactory.java 0000644 0001750 0001750 00000071142 11745427074 025036 0 ustar giovanni giovanni /* Woodstox XML processor
*
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in the file LICENSE which is
* included with the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.stax;
import java.io.*;
import java.net.URL;
import javax.xml.stream.*;
import javax.xml.stream.util.XMLEventAllocator;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.sax.SAXSource;
import javax.xml.transform.stream.StreamSource;
import org.xml.sax.InputSource;
import org.codehaus.stax2.XMLEventReader2;
import org.codehaus.stax2.XMLInputFactory2;
import org.codehaus.stax2.XMLStreamReader2;
import org.codehaus.stax2.io.Stax2Source;
import org.codehaus.stax2.io.Stax2ByteArraySource;
import org.codehaus.stax2.ri.Stax2FilteredStreamReader;
import org.codehaus.stax2.ri.Stax2ReaderAdapter;
import org.codehaus.stax2.ri.evt.Stax2EventReaderAdapter;
import org.codehaus.stax2.ri.evt.Stax2FilteredEventReader;
import com.ctc.wstx.api.ReaderConfig;
import com.ctc.wstx.api.WstxInputProperties;
import com.ctc.wstx.cfg.InputConfigFlags;
import com.ctc.wstx.cfg.XmlConsts;
import com.ctc.wstx.dtd.DTDId;
import com.ctc.wstx.dtd.DTDSubset;
import com.ctc.wstx.dom.WstxDOMWrappingReader;
import com.ctc.wstx.evt.DefaultEventAllocator;
import com.ctc.wstx.evt.WstxEventReader;
import com.ctc.wstx.exc.WstxIOException;
import com.ctc.wstx.io.*;
import com.ctc.wstx.sr.ValidatingStreamReader;
import com.ctc.wstx.sr.ReaderCreator;
import com.ctc.wstx.util.DefaultXmlSymbolTable;
import com.ctc.wstx.util.SimpleCache;
import com.ctc.wstx.util.SymbolTable;
import com.ctc.wstx.util.URLUtil;
/**
* Factory for creating various Stax objects (stream/event reader,
* writer).
*
*makeChild
method
*/
SymbolTable mSymbols = mRootSymbols;
/*
///////////////////////////////////////////////////////////
// Life-cycle:
///////////////////////////////////////////////////////////
*/
public WstxInputFactory() {
mConfig = ReaderConfig.createFullDefaults();
}
/*
///////////////////////////////////////////////////////////
// ReaderCreator implementation
///////////////////////////////////////////////////////////
*/
// // // Configuration access methods:
/**
* Method readers created by this factory call, if DTD caching is
* enabled, to see if an external DTD (subset) has been parsed
* and cached earlier.
*/
public synchronized DTDSubset findCachedDTD(DTDId id)
{
return (mDTDCache == null) ?
null : (DTDSubset) mDTDCache.find(id);
}
// // // Callbacks for updating shared information
/**
* Method individual parsers call to pass back symbol table that
* they updated, which may be useful for other parser to reuse, instead
* of previous base symbol table.
*close()
* is called. Will be true for input sources that are automatically
* managed by stream reader (input streams created for
* {@link java.net.URL} and {@link java.io.File} arguments, or when
* configuration settings indicate auto-closing is to be enabled
* (the default value is false as per Stax 1.0 specs).
*/
private XMLStreamReader2 doCreateSR(ReaderConfig cfg, String systemId, InputBootstrapper bs,
URL src, boolean forER,
boolean autoCloseInput)
throws XMLStreamException
{
/* Automatic closing of input: will happen always for some input
* types (ones application has no direct access to; but can also
* be explicitly enabled.
*/
if (!autoCloseInput) {
autoCloseInput = cfg.willAutoCloseInput();
}
Reader r;
try {
r = bs.bootstrapInput(cfg, true, XmlConsts.XML_V_UNKNOWN);
if (bs.declaredXml11()) {
cfg.enableXml11(true);
}
} catch (IOException ie) {
throw new WstxIOException(ie);
}
/* null -> no public id available
* false -> don't close the reader when scope is closed.
*/
BranchingReaderSource input = InputSourceFactory.constructDocumentSource
(cfg, bs, null, systemId, src, r, autoCloseInput);
return ValidatingStreamReader.createValidatingStreamReader(input, this, cfg, bs, forER);
}
/**
* Method that is eventually called to create a (full) stream read
* instance.
*close()
* is called. Will be true for input sources that are automatically
* managed by stream reader (input streams created for
* {@link java.net.URL} and {@link java.io.File} arguments, or when
* configuration settings indicate auto-closing is to be enabled
* (the default value is false as per Stax 1.0 specs).
*/
public XMLStreamReader2 createSR(ReaderConfig cfg, String systemId, InputBootstrapper bs,
boolean forER,
boolean autoCloseInput)
throws XMLStreamException
{
// 16-Aug-2004, TSa: Maybe we have a context?
URL src = cfg.getBaseURL();
// If not, maybe we can derive it from system id?
if ((src == null) && (systemId != null && systemId.length() > 0)) {
try {
src = URLUtil.urlFromSystemId(systemId);
} catch (IOException ie) {
throw new WstxIOException(ie);
}
}
return doCreateSR(cfg, systemId, bs, src, forER, autoCloseInput);
}
protected XMLStreamReader2 createSR(String systemId, InputStream in, String enc,
boolean forER,
boolean autoCloseInput)
throws XMLStreamException
{
// sanity check:
if (in == null) {
throw new IllegalArgumentException("Null InputStream is not a valid argument");
}
ReaderConfig cfg = createPrivateConfig();
if (enc == null || enc.length() == 0) {
return createSR(cfg, systemId, StreamBootstrapper.getInstance
(null, systemId, in), forER, autoCloseInput);
}
/* !!! 17-Feb-2006, TSa: We don't yet know if it's xml 1.0 or 1.1;
* so have to specify 1.0 (which is less restrictive WRT input
* streams). Would be better to let bootstrapper deal with it
* though:
*/
Reader r = DefaultInputResolver.constructOptimizedReader(cfg, in, false, enc);
return createSR(cfg, systemId, ReaderBootstrapper.getInstance
(null, systemId, r, enc), forER, autoCloseInput);
}
protected XMLStreamReader2 createSR(ReaderConfig cfg, URL src, boolean forER, boolean autoCloseInput)
throws XMLStreamException
{
try {
return createSR(cfg, src, URLUtil.inputStreamFromURL(src),
forER, autoCloseInput);
} catch (IOException ioe) {
throw new WstxIOException(ioe);
}
}
private XMLStreamReader2 createSR(ReaderConfig cfg, URL src, InputStream in,
boolean forER,
boolean autoCloseInput)
throws XMLStreamException
{
String systemId = src.toExternalForm();
return doCreateSR(cfg, systemId,
StreamBootstrapper.getInstance(null, systemId, in),
src, forER, autoCloseInput);
}
protected XMLStreamReader2 createSR(String systemId, Reader r,
boolean forER,
boolean autoCloseInput)
throws XMLStreamException
{
return createSR(createPrivateConfig(), systemId,
ReaderBootstrapper.getInstance
(null, systemId, r, null), forER, autoCloseInput);
}
protected XMLStreamReader2 createSR(File f, boolean forER, boolean autoCloseInput)
throws XMLStreamException
{
ReaderConfig cfg = createPrivateConfig();
try {
/* 18-Nov-2008, TSa: If P_BASE_URL is set, and File reference is
* relative, let's resolve against base...
*/
if (!f.isAbsolute()) {
URL base = cfg.getBaseURL();
if (base != null) {
URL src = new URL(base, f.getPath());
return createSR(cfg, src, URLUtil.inputStreamFromURL(src), forER, autoCloseInput);
}
}
return createSR(cfg, f.toURL(), new FileInputStream(f), forER, autoCloseInput);
} catch (IOException ie) {
throw new WstxIOException(ie);
}
}
/**
* Another internal factory method, used when dealing with a generic
* Source base type. One thing worth noting is that 'auto-closing'
* will be enabled if the input source or Reader is constructed (and
* thus owned) by Woodstox.
*
* @param forER True, if the reader is being constructed to be used
* by an event reader; false if it is not (or the purpose is not known)
*/
protected XMLStreamReader2 createSR(javax.xml.transform.Source src,
boolean forER)
throws XMLStreamException
{
ReaderConfig cfg = createPrivateConfig();
Reader r = null;
InputStream in = null;
String pubId = null;
String sysId = null;
String encoding = null;
boolean autoCloseInput;
InputBootstrapper bs = null;
if (src instanceof Stax2Source) {
Stax2Source ss = (Stax2Source) src;
sysId = ss.getSystemId();
pubId = ss.getPublicId();
encoding = ss.getEncoding();
try {
/* 11-Nov-2008, TSa: Let's add optimized handling for byte-block
* source
*/
if (src instanceof Stax2ByteArraySource) {
Stax2ByteArraySource bas = (Stax2ByteArraySource) src;
bs = StreamBootstrapper.getInstance(pubId, sysId, bas.getBuffer(), bas.getBufferStart(), bas.getBufferEnd());
} else {
in = ss.constructInputStream();
if (in == null) {
r = ss.constructReader();
}
}
} catch (IOException ioe) {
throw new WstxIOException(ioe);
}
/* Caller has no direct access to stream/reader, Woodstox
* owns it and thus has to close too
*/
autoCloseInput = true;
} else if (src instanceof StreamSource) {
StreamSource ss = (StreamSource) src;
sysId = ss.getSystemId();
pubId = ss.getPublicId();
in = ss.getInputStream();
if (in == null) {
r = ss.getReader();
}
/* Caller still has access to stream/reader; no need to
* force auto-close-input
*/
autoCloseInput = cfg.willAutoCloseInput();
} else if (src instanceof SAXSource) {
SAXSource ss = (SAXSource) src;
/* 28-Jan-2006, TSa: Not a complete implementation, but maybe
* even this might help...
*/
sysId = ss.getSystemId();
InputSource isrc = ss.getInputSource();
if (isrc != null) {
encoding = isrc.getEncoding();
in = isrc.getByteStream();
if (in == null) {
r = isrc.getCharacterStream();
}
}
/* Caller still has access to stream/reader; no need to
* force auto-close-input
*/
autoCloseInput = cfg.willAutoCloseInput();
} else if (src instanceof DOMSource) {
DOMSource domSrc = (DOMSource) src;
// SymbolTable not used by the DOM-based 'reader':
return WstxDOMWrappingReader.createFrom(domSrc, cfg);
} else {
throw new IllegalArgumentException("Can not instantiate Stax reader for XML source type "+src.getClass()+" (unrecognized type)");
}
if (bs == null) { // may have already created boostrapper...
if (r != null) {
bs = ReaderBootstrapper.getInstance(pubId, sysId, r, encoding);
} else if (in != null) {
bs = StreamBootstrapper.getInstance(pubId, sysId, in);
} else if (sysId != null && sysId.length() > 0) {
/* 26-Dec-2008, TSa: If we must construct URL from system id,
* it means caller will not have access to resulting
* stream, thus we will force auto-closing.
*/
autoCloseInput = true;
try {
return createSR(cfg, URLUtil.urlFromSystemId(sysId),
forER, autoCloseInput);
} catch (IOException ioe) {
throw new WstxIOException(ioe);
}
} else {
throw new XMLStreamException("Can not create Stax reader for the Source passed -- neither reader, input stream nor system id was accessible; can not use other types of sources (like embedded SAX streams)");
}
}
return createSR(cfg, sysId, bs, forER, autoCloseInput);
}
protected XMLEventAllocator createEventAllocator()
{
// Explicitly set allocate?
if (mAllocator != null) {
return mAllocator.newInstance();
}
/* Complete or fast one? Note: standard allocator is designed
* in such a way that newInstance() need not be called (calling
* it wouldn't do anything, anyway)
*/
return mConfig.willPreserveLocation() ?
DefaultEventAllocator.getDefaultInstance()
: DefaultEventAllocator.getFastInstance();
}
/**
* Method called to construct a copy of the factory's configuration
* object, such that two will be unlinked (changes to one are not
* reflect in the other).
*javax.xml.transform.dom.DOMResult
.
*
*
*
* @author Tatu Saloranta
* @author Dan Diephouse
*/
public class WstxDOMWrappingWriter
extends DOMWrappingWriter
{
/*
///////////////////////////////////////////////////////////
// Constants
///////////////////////////////////////////////////////////
*/
final protected static String ERR_NSDECL_WRONG_STATE =
"Trying to write a namespace declaration when there is no open start element.";
/*
///////////////////////////////////////////////////////////
// Configuration
///////////////////////////////////////////////////////////
*/
protected final WriterConfig mConfig;
/*
///////////////////////////////////////////////////////////
// State
///////////////////////////////////////////////////////////
*/
/**
* This element is the current context element, under which
* all other nodes are added, until matching end element
* is output. Null outside of the main element tree.
*writeEmptyElement
) will never become
* current element.
*/
protected DOMOutputElement mCurrElem;
/**
* This element is non-null right after a call to
* either writeStartElement
and
* writeEmptyElement
, and can be used to
* add attributes and namespace declarations.
*writeEmptyElement
) will
* become open element but NOT current element. Conversely,
* regular elements will remain current element when
* non elements are written (text, comments, PI), but
* not the open element.
*/
protected DOMOutputElement mOpenElement;
/**
* for NsRepairing mode
*/
protected int[] mAutoNsSeq;
protected String mSuggestedDefNs = null;
protected String mAutomaticNsPrefix;
/**
* Map that contains URI-to-prefix entries that point out suggested
* prefixes for URIs. These are populated by calls to
* {@link #setPrefix}, and they are only used as hints for binding;
* if there are conflicts, repairing writer can just use some other
* prefix.
*/
HashMap mSuggestedPrefixes = null;
/*
///////////////////////////////////////////////////////////
// Life-cycle
///////////////////////////////////////////////////////////
*/
private WstxDOMWrappingWriter(WriterConfig cfg, Node treeRoot)
throws XMLStreamException
{
super(treeRoot, cfg.willSupportNamespaces(), cfg.automaticNamespacesEnabled());
mConfig = cfg;
mAutoNsSeq = null;
mAutomaticNsPrefix = mNsRepairing ? mConfig.getAutomaticNsPrefix() : null;
/* Ok; we need a document node; or an element node; or a document
* fragment node.
*/
switch (treeRoot.getNodeType()) {
case Node.DOCUMENT_NODE:
case Node.DOCUMENT_FRAGMENT_NODE:
// both are ok, but no current element
mCurrElem = DOMOutputElement.createRoot(treeRoot);
mOpenElement = null;
break;
case Node.ELEMENT_NODE: // can make sub-tree... ok
{
// still need a virtual root node as parent
DOMOutputElement root = DOMOutputElement.createRoot(treeRoot);
Element elem = (Element) treeRoot;
mOpenElement = mCurrElem = root.createChild(elem);
}
break;
default: // other Nodes not usable
throw new XMLStreamException("Can not create an XMLStreamWriter for a DOM node of type "+treeRoot.getClass());
}
}
public static WstxDOMWrappingWriter createFrom(WriterConfig cfg, DOMResult dst)
throws XMLStreamException
{
Node rootNode = dst.getNode();
return new WstxDOMWrappingWriter(cfg, rootNode);
}
/*
///////////////////////////////////////////////////////////
// XMLStreamWriter API (Stax 1.0)
///////////////////////////////////////////////////////////
*/
//public void close() { }
//public void flush() { }
public NamespaceContext getNamespaceContext()
{
if (!mNsAware) {
return EmptyNamespaceContext.getInstance();
}
return mCurrElem;
}
public String getPrefix(String uri)
{
if (!mNsAware) {
return null;
}
if (mNsContext != null) {
String prefix = mNsContext.getPrefix(uri);
if (prefix != null) {
return prefix;
}
}
return mCurrElem.getPrefix(uri);
}
public Object getProperty(String name) {
return mConfig.getProperty(name);
}
public void setDefaultNamespace(String uri) {
mSuggestedDefNs = (uri == null || uri.length() == 0) ? null : uri;
}
//public void setNamespaceContext(NamespaceContext context)
public void setPrefix(String prefix, String uri)
throws XMLStreamException
{
if (prefix == null) {
throw new NullPointerException("Can not pass null 'prefix' value");
}
// Are we actually trying to set the default namespace?
if (prefix.length() == 0) {
setDefaultNamespace(uri);
return;
}
if (uri == null) {
throw new NullPointerException("Can not pass null 'uri' value");
}
/* Let's verify that xml/xmlns are never (mis)declared; as
* mandated by XML NS specification
*/
{
if (prefix.equals("xml")) {
if (!uri.equals(XMLConstants.XML_NS_URI)) {
throwOutputError(ErrorConsts.ERR_NS_REDECL_XML, uri);
}
} else if (prefix.equals("xmlns")) { // prefix "xmlns"
if (!uri.equals(XMLConstants.XMLNS_ATTRIBUTE_NS_URI)) {
throwOutputError(ErrorConsts.ERR_NS_REDECL_XMLNS, uri);
}
// At any rate; we are NOT to output it
return;
} else {
// Neither of prefixes.. but how about URIs?
if (uri.equals(XMLConstants.XML_NS_URI)) {
throwOutputError(ErrorConsts.ERR_NS_REDECL_XML_URI, prefix);
} else if (uri.equals(XMLConstants.XMLNS_ATTRIBUTE_NS_URI)) {
throwOutputError(ErrorConsts.ERR_NS_REDECL_XMLNS_URI, prefix);
}
}
}
if (mSuggestedPrefixes == null) {
mSuggestedPrefixes = new HashMap(16);
}
mSuggestedPrefixes.put(uri, prefix);
}
public void writeAttribute(String localName, String value)
throws XMLStreamException
{
outputAttribute(null, null, localName, value);
}
public void writeAttribute(String nsURI, String localName, String value)
throws XMLStreamException
{
outputAttribute(nsURI, null, localName, value);
}
public void writeAttribute(String prefix, String nsURI, String localName, String value)
throws XMLStreamException
{
outputAttribute(nsURI, prefix, localName, value);
}
//public void writeCData(String data)
//public void writeCharacters(char[] text, int start, int len)
//public void writeCharacters(String text)
//public void writeComment(String data)
public void writeDefaultNamespace(String nsURI)
{
if (mOpenElement == null) {
throw new IllegalStateException("No currently open START_ELEMENT, cannot write attribute");
}
setDefaultNamespace(nsURI);
mOpenElement.addAttribute(XMLConstants.XMLNS_ATTRIBUTE_NS_URI, "xmlns", nsURI);
}
//public void writeDTD(String dtd)
public void writeEmptyElement(String localName)
throws XMLStreamException
{
writeEmptyElement(null, localName);
}
public void writeEmptyElement(String nsURI, String localName)
throws XMLStreamException
{
// First things first: must
/* Note: can not just call writeStartElement(), since this
* element will only become the open elem, but not a parent elem
*/
createStartElem(nsURI, null, localName, true);
}
public void writeEmptyElement(String prefix, String localName, String nsURI)
throws XMLStreamException
{
if (prefix == null) { // passing null would mean "dont care", if repairing
prefix = "";
}
createStartElem(nsURI, prefix, localName, true);
}
public void writeEndDocument()
{
mCurrElem = mOpenElement = null;
}
public void writeEndElement()
{
// Simple, just need to traverse up... if we can
if (mCurrElem == null || mCurrElem.isRoot()) {
throw new IllegalStateException("No open start element to close");
}
mOpenElement = null; // just in case it was open
mCurrElem = mCurrElem.getParent();
}
//public void writeEntityRef(String name)
public void writeNamespace(String prefix, String nsURI) throws XMLStreamException
{
if (prefix == null || prefix.length() == 0) {
writeDefaultNamespace(nsURI);
return;
}
if (!mNsAware) {
throwOutputError("Can not write namespaces with non-namespace writer.");
}
outputAttribute(XMLConstants.XMLNS_ATTRIBUTE_NS_URI, "xmlns", prefix, nsURI);
mCurrElem.addPrefix(prefix, nsURI);
}
//public void writeProcessingInstruction(String target)
//public void writeProcessingInstruction(String target, String data)
//public void writeStartDocument()
//public void writeStartDocument(String version)
//public void writeStartDocument(String encoding, String version)
public void writeStartElement(String localName)
throws XMLStreamException
{
writeStartElement(null, localName);
}
public void writeStartElement(String nsURI, String localName)
throws XMLStreamException
{
createStartElem(nsURI, null, localName, false);
}
public void writeStartElement(String prefix, String localName, String nsURI)
throws XMLStreamException
{
createStartElem(nsURI, prefix, localName, false);
}
/*
///////////////////////////////////////////////////////////
// XMLStreamWriter2 API (Stax2 v3.0):
// additional accessors
///////////////////////////////////////////////////////////
*/
//public XMLStreamLocation2 getLocation()
//public String getEncoding()
public boolean isPropertySupported(String name)
{
// !!! TBI: not all these properties are really supported
return mConfig.isPropertySupported(name);
}
public boolean setProperty(String name, Object value)
{
/* Note: can not call local method, since it'll return false for
* recognized but non-mutable properties
*/
return mConfig.setProperty(name, value);
}
/*
///////////////////////////////////////////////////////////
// XMLStreamWriter2 API (Stax2 v2.0):
// extended write methods
///////////////////////////////////////////////////////////
*/
//public void writeCData(char[] text, int start, int len)
public void writeDTD(String rootName, String systemId, String publicId,
String internalSubset)
throws XMLStreamException
{
/* Alas: although we can create a DocumentType object, there
* doesn't seem to be a way to attach it in DOM-2!
*/
if (mCurrElem != null) {
throw new IllegalStateException("Operation only allowed to the document before adding root element");
}
reportUnsupported("writeDTD()");
}
//public void writeFullEndElement() throws XMLStreamException
//public void writeSpace(char[] text, int start, int len)
//public void writeSpace(String text)
//public void writeStartDocument(String version, String encoding, boolean standAlone)
/*
///////////////////////////////////////////////////////////
// XMLStreamWriter2 API (Stax2 v2.0): validation
///////////////////////////////////////////////////////////
*/
//public XMLValidator validateAgainst(XMLValidationSchema schema)
//public XMLValidator stopValidatingAgainst(XMLValidationSchema schema)
//public XMLValidator stopValidatingAgainst(XMLValidator validator)
//public ValidationProblemHandler setValidationProblemHandler(ValidationProblemHandler h)
/*
///////////////////////////////////////////////////////////
// Impls of abstract methods from base class
///////////////////////////////////////////////////////////
*/
protected void appendLeaf(Node n)
throws IllegalStateException
{
mCurrElem.appendNode(n);
mOpenElement = null;
}
/*
///////////////////////////////////////////////////////////
// Internal methods
///////////////////////////////////////////////////////////
*/
/* Note: copied from regular RepairingNsStreamWriter#writeStartOrEmpty
* (and its non-repairing counterpart).
*/
/**
* Method called by all start element write methods.
*
* @param nsURI Namespace URI to use: null and empty String denote 'no namespace'
*/
protected void createStartElem(String nsURI, String prefix, String localName, boolean isEmpty)
throws XMLStreamException
{
DOMOutputElement elem;
if (!mNsAware) {
if(nsURI != null && nsURI.length() > 0) {
throwOutputError("Can not specify non-empty uri/prefix in non-namespace mode");
}
elem = mCurrElem.createAndAttachChild(mDocument.createElement(localName));
} else {
if (mNsRepairing) {
String actPrefix = validateElemPrefix(prefix, nsURI, mCurrElem);
if (actPrefix != null) { // fine, an existing binding we can use:
if (actPrefix.length() != 0) {
elem = mCurrElem.createAndAttachChild(mDocument.createElementNS(nsURI, actPrefix+":"+localName));
} else {
elem = mCurrElem.createAndAttachChild(mDocument.createElementNS(nsURI, localName));
}
} else { // nah, need to create a new binding...
/* Need to ensure that we'll pass "" as prefix, not null,
* so it is understood as "I want to use the default NS",
* not as "whatever prefix, I don't care"
*/
if (prefix == null) {
prefix = "";
}
actPrefix = generateElemPrefix(prefix, nsURI, mCurrElem);
boolean hasPrefix = (actPrefix.length() != 0);
if (hasPrefix) {
localName = actPrefix + ":" + localName;
}
elem = mCurrElem.createAndAttachChild(mDocument.createElementNS(nsURI, localName));
/* Hmmh. writeNamespace method requires open element
* to be defined. So we'll need to set it first
* (will be set again at a later point -- would be
* good to refactor this method into separate
* sub-classes or so)
*/
mOpenElement = elem;
// Need to add new ns declaration as well
if (hasPrefix) {
writeNamespace(actPrefix, nsURI);
elem.addPrefix(actPrefix, nsURI);
} else {
writeDefaultNamespace(nsURI);
elem.setDefaultNsUri(nsURI);
}
}
} else {
/* Non-repairing; if non-null prefix (including "" to
* indicate "no prefix") passed, use as is, otherwise
* try to locate the prefix if got namespace.
*/
if (prefix == null && nsURI != null && nsURI.length() > 0) {
if (nsURI == null) {
nsURI = "";
}
prefix = (mSuggestedPrefixes == null) ? null : (String) mSuggestedPrefixes.get(nsURI);
if (prefix == null) {
throwOutputError("Can not find prefix for namespace \""+nsURI+"\"");
}
}
if (prefix != null && prefix.length() != 0) {
localName = prefix + ":" +localName;
}
elem = mCurrElem.createAndAttachChild(mDocument.createElementNS(nsURI, localName));
}
}
/* Got the element; need to make it the open element, and
* if it's not an (explicit) empty element, current element as well
*/
mOpenElement = elem;
if (!isEmpty) {
mCurrElem = elem;
}
}
protected void outputAttribute(String nsURI, String prefix, String localName, String value)
throws XMLStreamException
{
if (mOpenElement == null) {
throw new IllegalStateException("No currently open START_ELEMENT, cannot write attribute");
}
if (mNsAware) {
if (mNsRepairing) {
prefix = findOrCreateAttrPrefix(prefix, nsURI, mOpenElement);
}
if (prefix != null && prefix.length() > 0) {
localName = prefix + ":" + localName;
}
mOpenElement.addAttribute(nsURI, localName, value);
} else { // non-ns, simple
if (prefix != null && prefix.length() > 0) {
localName = prefix + ":" + localName;
}
mOpenElement.addAttribute(localName, value);
}
}
private final String validateElemPrefix(String prefix, String nsURI,
DOMOutputElement elem)
throws XMLStreamException
{
/* 06-Feb-2005, TSa: Special care needs to be taken for the
* "empty" (or missing) namespace:
* (see comments from findOrCreatePrefix())
*/
if (nsURI == null || nsURI.length() == 0) {
String currURL = elem.getDefaultNsUri();
if (currURL == null || currURL.length() == 0) {
// Ok, good:
return "";
}
// Nope, needs to be re-bound:
return null;
}
int status = elem.isPrefixValid(prefix, nsURI, true);
if (status == DOMOutputElement.PREFIX_OK) {
return prefix;
}
return null;
}
/*
///////////////////////////////////////////////////////////
// Internal methods
///////////////////////////////////////////////////////////
*/
/**
* Method called to find an existing prefix for the given namespace,
* if any exists in the scope. If one is found, it's returned (including
* "" for the current default namespace); if not, null is returned.
*
* @param nsURI URI of namespace for which we need a prefix
*/
protected final String findElemPrefix(String nsURI, DOMOutputElement elem)
throws XMLStreamException
{
/* Special case: empty NS URI can only be bound to the empty
* prefix...
*/
if (nsURI == null || nsURI.length() == 0) {
String currDefNsURI = elem.getDefaultNsUri();
if (currDefNsURI != null && currDefNsURI.length() > 0) {
// Nope; won't do... has to be re-bound, but not here:
return null;
}
return "";
}
return mCurrElem.getPrefix(nsURI);
}
/**
* Method called after {@link #findElemPrefix} has returned null,
* to create and bind a namespace mapping for specified namespace.
*/
protected final String generateElemPrefix(String suggPrefix, String nsURI,
DOMOutputElement elem)
throws XMLStreamException
{
/* Ok... now, since we do not have an existing mapping, let's
* see if we have a preferred prefix to use.
*/
/* Except if we need the empty namespace... that can only be
* bound to the empty prefix:
*/
if (nsURI == null || nsURI.length() == 0) {
return "";
}
/* Ok; with elements this is easy: the preferred prefix can
* ALWAYS be used, since it can mask preceding bindings:
*/
if (suggPrefix == null) {
// caller wants this URI to map as the default namespace?
if (mSuggestedDefNs != null && mSuggestedDefNs.equals(nsURI)) {
suggPrefix = "";
} else {
suggPrefix = (mSuggestedPrefixes == null) ? null:
(String) mSuggestedPrefixes.get(nsURI);
if (suggPrefix == null) {
/* 16-Oct-2005, TSa: We have 2 choices here, essentially;
* could make elements always try to override the def
* ns... or can just generate new one. Let's do latter
* for now.
*/
if (mAutoNsSeq == null) {
mAutoNsSeq = new int[1];
mAutoNsSeq[0] = 1;
}
suggPrefix = elem.generateMapping(mAutomaticNsPrefix, nsURI,
mAutoNsSeq);
}
}
}
// Ok; let's let the caller deal with bindings
return suggPrefix;
}
/**
* Method called to somehow find a prefix for given namespace, to be
* used for a new start element; either use an existing one, or
* generate a new one. If a new mapping needs to be generated,
* it will also be automatically bound, and necessary namespace
* declaration output.
*
* @param suggPrefix Suggested prefix to bind, if any; may be null
* to indicate "no preference"
* @param nsURI URI of namespace for which we need a prefix
* @param elem Currently open start element, on which the attribute
* will be added.
*/
protected final String findOrCreateAttrPrefix(String suggPrefix, String nsURI,
DOMOutputElement elem)
throws XMLStreamException
{
if (nsURI == null || nsURI.length() == 0) {
/* Attributes never use the default namespace; missing
* prefix always leads to the empty ns... so nothing
* special is needed here.
*/
return null;
}
// Maybe the suggested prefix is properly bound?
if (suggPrefix != null) {
int status = elem.isPrefixValid(suggPrefix, nsURI, false);
if (status == OutputElementBase.PREFIX_OK) {
return suggPrefix;
}
/* Otherwise, if the prefix is unbound, let's just bind
* it -- if caller specified a prefix, it probably prefers
* binding that prefix even if another prefix already existed?
* The remaining case (already bound to another URI) we don't
* want to touch, at least not yet: it may or not be safe
* to change binding, so let's just not try it.
*/
if (status == OutputElementBase.PREFIX_UNBOUND) {
elem.addPrefix(suggPrefix, nsURI);
writeNamespace(suggPrefix, nsURI);
return suggPrefix;
}
}
// If not, perhaps there's another existing binding available?
String prefix = elem.getExplicitPrefix(nsURI);
if (prefix != null) { // already had a mapping for the URI... cool.
return prefix;
}
/* Nope, need to create one. First, let's see if there's a
* preference...
*/
if (suggPrefix != null) {
prefix = suggPrefix;
} else if (mSuggestedPrefixes != null) {
prefix = (String) mSuggestedPrefixes.get(nsURI);
// note: def ns is never added to suggested prefix map
}
if (prefix != null) {
/* Can not use default namespace for attributes.
* Also, re-binding is tricky for attributes; can't
* re-bind anything that's bound on this scope... or
* used in this scope. So, to simplify life, let's not
* re-bind anything for attributes.
*/
if (prefix.length() == 0
|| (elem.getNamespaceURI(prefix) != null)) {
prefix = null;
}
}
if (prefix == null) {
if (mAutoNsSeq == null) {
mAutoNsSeq = new int[1];
mAutoNsSeq[0] = 1;
}
prefix = mCurrElem.generateMapping(mAutomaticNsPrefix, nsURI,
mAutoNsSeq);
}
// Ok; so far so good: let's now bind and output the namespace:
elem.addPrefix(prefix, nsURI);
writeNamespace(prefix, nsURI);
return prefix;
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/io/ 0000755 0001750 0001750 00000000000 11756143457 020303 5 ustar giovanni giovanni woodstox-4.1.3/src/java/com/ctc/wstx/io/ReaderBootstrapper.java 0000644 0001750 0001750 00000031075 11745427074 024761 0 ustar giovanni giovanni /* Woodstox XML processor
*
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in file LICENSE, included with
* the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.io;
import java.io.*;
import java.text.MessageFormat;
import javax.xml.stream.Location;
import javax.xml.stream.XMLReporter;
import javax.xml.stream.XMLStreamException;
import org.codehaus.stax2.validation.XMLValidationProblem;
import com.ctc.wstx.api.ReaderConfig;
import com.ctc.wstx.cfg.ErrorConsts;
import com.ctc.wstx.cfg.ParsingErrorMsgs;
import com.ctc.wstx.exc.*;
import com.ctc.wstx.util.StringUtil;
/**
* Input bootstrap class used when input comes from a Reader; in this case,
* encoding is already known, and thus encoding from XML declaration (if
* any) is only double-checked, not really used.
*mByteBufferEnd
*/
protected int mBytePtr;
/**
* Pointed to the end marker, that is, position one after the last
* valid available byte.
*/
protected int mByteBufferEnd;
/**
* Flag that indicates whether the read buffer is to be recycled
* when Reader is closed or not.
*/
private final boolean mRecycleBuffer;
/*
////////////////////////////////////////
// Life-cycle
////////////////////////////////////////
*/
protected BaseReader(ReaderConfig cfg, InputStream in, byte[] buf, int ptr, int len,
boolean recycleBuffer)
{
mConfig = cfg;
mIn = in;
mByteBuffer = buf;
mBytePtr = ptr;
mByteBufferEnd = len;
mRecycleBuffer = recycleBuffer;
}
/*
////////////////////////////////////////
// Configuration
////////////////////////////////////////
*/
/**
* Method that can be called to indicate the xml conformance used
* when reading content using this reader. Some of the character
* validity checks need to be done at reader level, and sometimes
* they depend on xml level (for example, xml 1.1 has new linefeeds
* and both more and less restricted characters).
*/
public abstract void setXmlCompliancy(int xmlVersion);
/**
* Method that can be used to see if we can actually modify the
* underlying buffer. This is the case if we are managing the buffer,
* but not if it was just given to us.
*/
protected final boolean canModifyBuffer()
{
return mRecycleBuffer;
}
/*
////////////////////////////////////////
// Reader API
////////////////////////////////////////
*/
public void close()
throws IOException
{
InputStream in = mIn;
if (in != null) {
mIn = null;
freeBuffers();
in.close();
}
}
char[] mTmpBuf = null;
/**
* Although this method is implemented by the base class, AND it should
* never be called by Woodstox code, let's still implement it bit more
* efficiently just in case
*/
public int read()
throws IOException
{
if (mTmpBuf == null) {
mTmpBuf = new char[1];
}
if (read(mTmpBuf, 0, 1) < 1) {
return -1;
}
return mTmpBuf[0];
}
/*
////////////////////////////////////////
// Internal/package methods:
////////////////////////////////////////
*/
protected final InputStream getStream() { return mIn; }
/**
* Method for reading as many bytes from the underlying stream as possible
* (that fit in the buffer), to the beginning of the buffer.
*/
protected final int readBytes()
throws IOException
{
mBytePtr = 0;
mByteBufferEnd = 0;
if (mIn != null) {
int count = mIn.read(mByteBuffer, 0, mByteBuffer.length);
if (count > 0) {
mByteBufferEnd = count;
}
return count;
}
return -1;
}
/**
* Method for reading as many bytes from the underlying stream as possible
* (that fit in the buffer considering offset), to the specified offset.
*
* @return Number of bytes read, if any; -1 to indicate none available
* (that is, end of input)
*/
protected final int readBytesAt(int offset)
throws IOException
{
// shouldn't modify mBytePtr, assumed to be 'offset'
if (mIn != null) {
int count = mIn.read(mByteBuffer, offset, mByteBuffer.length - offset);
if (count > 0) {
mByteBufferEnd += count;
}
return count;
}
return -1;
}
/**
* This method should be called along with (or instead of) normal
* close. After calling this method, no further reads should be tried.
* Method will try to recycle read buffers (if any).
*/
public final void freeBuffers()
{
/* 11-Apr-2005, TSa: Ok, we can release the buffer now, to be
* recycled by the next stream reader instantiated by this
* thread (if any).
*/
if (mRecycleBuffer) {
byte[] buf = mByteBuffer;
if (buf != null) {
mByteBuffer = null;
if (mConfig != null) {
mConfig.freeFullBBuffer(buf);
}
}
}
}
protected void reportBounds(char[] cbuf, int start, int len)
throws IOException
{
throw new ArrayIndexOutOfBoundsException("read(buf,"+start+","+len+"), cbuf["+cbuf.length+"]");
}
protected void reportStrangeStream()
throws IOException
{
throw new IOException("Strange I/O stream, returned 0 bytes on read");
}
protected void reportInvalidXml11(int value, int bytePos, int charPos)
throws IOException
{
throw new CharConversionException("Invalid character 0x"
+Integer.toHexString(value)
+", can only be included in xml 1.1 using character entities (at char #"+charPos+", byte #"+bytePos+")");
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/io/WstxInputLocation.java 0000644 0001750 0001750 00000011734 11745427074 024630 0 ustar giovanni giovanni /* Woodstox XML processor
*
* Copyright (c) 2004 Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in file LICENSE, included with
* the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.io;
import java.io.Serializable;
import javax.xml.stream.Location;
import org.codehaus.stax2.XMLStreamLocation2;
import com.ctc.wstx.util.StringUtil;
/**
* Basic implementation of {@link Location}, used by Wstx readers.
*/
public class WstxInputLocation
implements Serializable, XMLStreamLocation2
{
private static final long serialVersionUID = 1L;
private final static WstxInputLocation sEmptyLocation
= new WstxInputLocation(null, "", "", -1, -1, -1);
/**
* Enclosing (parent) input location; location from which current
* location is derived.
*/
final protected WstxInputLocation mContext;
final protected String mPublicId, mSystemId;
final protected int mCharOffset;
final protected int mCol, mRow;
transient protected String mDesc = null;
/**
* @param ctxt Enclosing input location, if any
*/
public WstxInputLocation(WstxInputLocation ctxt,
String pubId, String sysId,
int charOffset, int row, int col)
{
mContext = ctxt;
mPublicId = pubId;
mSystemId = sysId;
/* Overflow? Can obviously only handle limited range of overflows,
* but let's do that at least?
*/
mCharOffset = (charOffset < 0) ? Integer.MAX_VALUE : charOffset;
mCol = col;
mRow = row;
}
public static WstxInputLocation getEmptyLocation() {
return sEmptyLocation;
}
public int getCharacterOffset() { return mCharOffset; }
public int getColumnNumber() { return mCol; }
public int getLineNumber() { return mRow; }
public String getPublicId() { return mPublicId; }
public String getSystemId() { return mSystemId; }
/*
////////////////////////////////////////////////////////
// StAX 2 API:
////////////////////////////////////////////////////////
*/
public XMLStreamLocation2 getContext() { return mContext; }
/*
////////////////////////////////////////////////////////
// Overridden standard methods
////////////////////////////////////////////////////////
*/
public String toString()
{
if (mDesc == null) {
StringBuffer sb;
if (mContext != null) {
sb = new StringBuffer(200);
} else {
sb = new StringBuffer(80);
}
appendDesc(sb);
mDesc = sb.toString();
}
return mDesc;
}
public int hashCode() {
return mCharOffset ^ mRow ^ mCol + (mCol << 3);
}
public boolean equals(Object o) {
if (!(o instanceof Location)) {
return false;
}
Location other = (Location) o;
// char offset should be good enough, without row/col:
if (other.getCharacterOffset() != getCharacterOffset()) {
return false;
}
String otherPub = other.getPublicId();
if (otherPub == null) {
otherPub = "";
}
if (!otherPub.equals(mPublicId)) {
return false;
}
String otherSys = other.getSystemId();
if (otherSys == null) {
otherSys = "";
}
return otherSys.equals(mSystemId);
}
/*
////////////////////////////////////////////////////////
// Internal methods:
////////////////////////////////////////////////////////
*/
private void appendDesc(StringBuffer sb)
{
String srcId;
if (mSystemId != null) {
sb.append("[row,col,system-id]: ");
srcId = mSystemId;
} else if (mPublicId != null) {
sb.append("[row,col,public-id]: ");
srcId = mPublicId;
} else {
sb.append("[row,col {unknown-source}]: ");
srcId = null;
}
sb.append('[');
sb.append(mRow);
sb.append(',');
sb.append(mCol);
// Uncomment for testing, to see the char offset:
//sb.append(" #").append(mCharOffset);
//sb.append("{").append(System.identityHashCode(this)).append("}");
if (srcId != null) {
sb.append(',');
sb.append('"');
sb.append(srcId);
sb.append('"');
}
sb.append(']');
if (mContext != null) {
StringUtil.appendLF(sb);
sb.append(" from ");
mContext.appendDesc(sb);
}
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/io/ReaderSource.java 0000644 0001750 0001750 00000014166 11745427074 023537 0 ustar giovanni giovanni package com.ctc.wstx.io;
import java.io.IOException;
import java.io.Reader;
import java.net.URL;
import javax.xml.stream.XMLStreamException;
import com.ctc.wstx.api.ReaderConfig;
import com.ctc.wstx.exc.WstxException;
/**
* Input source that reads input via a Reader.
*/
public class ReaderSource
extends BaseInputSource
{
final ReaderConfig mConfig;
/**
* Underlying Reader to read character data from
*/
Reader mReader;
/**
* If true, will close the underlying Reader when this source is closed;
* if false will leave it open.
*/
final boolean mDoRealClose;
int mInputProcessed = 0;
int mInputRow = 1;
int mInputRowStart = 0;
public ReaderSource(ReaderConfig cfg, WstxInputSource parent, String fromEntity,
String pubId, String sysId, URL src,
Reader r, boolean realClose)
{
super(parent, fromEntity, pubId, sysId, src);
mConfig = cfg;
mReader = r;
mDoRealClose = realClose;
int bufSize = cfg.getInputBufferLength();
mBuffer = cfg.allocFullCBuffer(bufSize);
}
/**
* Method called to change the default offsets this source has. Generally
* done when the underlying Reader had been partially read earlier (like
* reading the xml declaration before starting real parsing).
*/
public void setInputOffsets(int proc, int row, int rowStart)
{
mInputProcessed = proc;
mInputRow = row;
mInputRowStart = rowStart;
}
/**
* Input location is easy to set, as we'll start from the beginning
* of a File.
*/
protected void doInitInputLocation(WstxInputData reader)
{
reader.mCurrInputProcessed = mInputProcessed;
reader.mCurrInputRow = mInputRow;
reader.mCurrInputRowStart = mInputRowStart;
}
/**
* This is a hard-coded assumption, for now this source is
* only created from external entities
*/
public boolean fromInternalEntity() {
return false;
}
public int readInto(WstxInputData reader)
throws IOException, XMLStreamException
{
/* Shouldn't really try to read after closing, but it may be easier
* for caller not to have to keep track of closure...
*/
if (mBuffer == null) {
return -1;
}
int count = mReader.read(mBuffer, 0, mBuffer.length);
if (count < 1) {
/* Let's prevent caller from accidentally being able to access
* data, first.
*/
mInputLast = 0;
reader.mInputPtr = 0;
reader.mInputEnd = 0;
if (count == 0) {
/* Sanity check; should never happen with correctly written
* Readers:
*/
throw new WstxException("Reader (of type "+mReader.getClass().getName()+") returned 0 characters, even when asked to read up to "+mBuffer.length, getLocation());
}
return -1;
}
reader.mInputBuffer = mBuffer;
reader.mInputPtr = 0;
mInputLast = count;
reader.mInputEnd = count;
return count;
}
public boolean readMore(WstxInputData reader, int minAmount)
throws IOException, XMLStreamException
{
/* Shouldn't really try to read after closing, but it may be easier
* for caller not to have to keep track of closure...
*/
if (mBuffer == null) {
return false;
}
int ptr = reader.mInputPtr;
int currAmount = mInputLast - ptr;
// Let's first adjust caller's data appropriately:
/* Since we are essentially removing 'ptr' chars that we
* have used already, they count as past chars. Also, since
* offsets are reduced by 'ptr', need to adjust linefeed offset
* marker as well.
*/
reader.mCurrInputProcessed += ptr;
reader.mCurrInputRowStart -= ptr;
// Existing data to move?
if (currAmount > 0) {
System.arraycopy(mBuffer, ptr, mBuffer, 0, currAmount);
minAmount -= currAmount;
}
reader.mInputBuffer = mBuffer;
reader.mInputPtr = 0;
mInputLast = currAmount;
while (minAmount > 0) {
int amount = mBuffer.length - currAmount;
int actual = mReader.read(mBuffer, currAmount, amount);
if (actual < 1) {
if (actual == 0) { // sanity check:
throw new WstxException("Reader (of type "+mReader.getClass().getName()+") returned 0 characters, even when asked to read up to "+amount, getLocation());
}
reader.mInputEnd = mInputLast = currAmount;
return false;
}
currAmount += actual;
minAmount -= actual;
}
reader.mInputEnd = mInputLast = currAmount;
return true;
}
public void close()
throws IOException
{
/* Buffer gets nullified by call to close() or closeCompletely(),
* no need to call second time
*/
if (mBuffer != null) { // so that it's ok to call multiple times
closeAndRecycle(mDoRealClose);
}
}
public void closeCompletely()
throws IOException
{
/* Only need to call if the Reader is not yet null... since
* buffer may have been cleaned by a call to close()
*/
if (mReader != null) { // so that it's ok to call multiple times
closeAndRecycle(true);
}
}
private void closeAndRecycle(boolean fullClose)
throws IOException
{
char[] buf = mBuffer;
// Can we recycle buffers?
if (buf != null) {
mBuffer = null;
mConfig.freeFullCBuffer(buf);
}
// How about Reader; close and/or recycle its buffers?
if (mReader != null) {
if (mReader instanceof BaseReader) {
((BaseReader) mReader).freeBuffers();
}
if (fullClose) {
Reader r = mReader;
mReader = null;
r.close();
}
}
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/io/WstxInputData.java 0000644 0001750 0001750 00000037550 11745427074 023735 0 ustar giovanni giovanni /* Woodstox XML processor
*
* Copyright (c) 2004 Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in file LICENSE, included with
* the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.io;
import com.ctc.wstx.util.XmlChars;
/**
* Base class used by readers (specifically, by
* {@link com.ctc.wstx.sr.StreamScanner}, and its sub-classes)
* to encapsulate input buffer portion of the class. Philosophically
* this should probably be done via containment (composition), not
* sub-classing but for performance reason, this "core" class is generally
* extended from instead.
*ThreadLocal
member of the owning class pointing to
* instance of this class through a SoftReference
. The
* end result is a low-overhead GC-cleanable recycling: hopefully
* ideal for use by stream readers.
*
*
*/
public class MergedNsContext
extends BaseNsContext
{
final NamespaceContext mParentCtxt;
/**
* List of {@link Namespace} instances.
*/
final List mNamespaces;
Map mNsByPrefix = null;
Map mNsByURI = null;
protected MergedNsContext(NamespaceContext parentCtxt, List localNs)
{
mParentCtxt = parentCtxt;
mNamespaces = (localNs == null) ? Collections.EMPTY_LIST : localNs;
}
public static BaseNsContext construct(NamespaceContext parentCtxt,
List localNs)
{
return new MergedNsContext(parentCtxt, localNs);
}
/*
/////////////////////////////////////////////
// NamespaceContext API
/////////////////////////////////////////////
*/
public String doGetNamespaceURI(String prefix)
{
// Note: base class checks for 'known' problems and prefixes:
if (mNsByPrefix == null) {
mNsByPrefix = buildByPrefixMap();
}
Namespace ns = (Namespace) mNsByPrefix.get(prefix);
if (ns == null && mParentCtxt != null) {
return mParentCtxt.getNamespaceURI(prefix);
}
return (ns == null) ? null : ns.getNamespaceURI();
}
public String doGetPrefix(String nsURI)
{
// Note: base class checks for 'known' problems and prefixes:
if (mNsByURI == null) {
mNsByURI = buildByNsURIMap();
}
Namespace ns = (Namespace) mNsByURI.get(nsURI);
if (ns == null && mParentCtxt != null) {
return mParentCtxt.getPrefix(nsURI);
}
return (ns == null) ? null : ns.getPrefix();
}
public Iterator doGetPrefixes(String nsURI)
{
// Note: base class checks for 'known' problems and prefixes:
ArrayList l = null;
for (int i = 0, len = mNamespaces.size(); i < len; ++i) {
Namespace ns = (Namespace) mNamespaces.get(i);
String uri = ns.getNamespaceURI();
if (uri == null) {
uri = "";
}
if (uri.equals(nsURI)) {
if (l == null) {
l = new ArrayList();
}
String prefix = ns.getPrefix();
l.add((prefix == null) ? "" : prefix);
}
}
if (mParentCtxt != null) {
Iterator it = mParentCtxt.getPrefixes(nsURI);
if (l == null) {
return it;
}
while (it.hasNext()) {
l.add(it.next());
}
}
return (l == null) ? EmptyIterator.getInstance() : l.iterator();
}
/*
/////////////////////////////////////////////
// Extended API
/////////////////////////////////////////////
*/
/**
* Method that returns information about namespace definition declared
* in this scope; not including ones declared in outer scopes.
*/
public Iterator getNamespaces()
{
return mNamespaces.iterator();
}
public void outputNamespaceDeclarations(Writer w) throws IOException
{
for (int i = 0, len = mNamespaces.size(); i < len; ++i) {
Namespace ns = (Namespace) mNamespaces.get(i);
w.write(' ');
w.write(XMLConstants.XMLNS_ATTRIBUTE);
if (!ns.isDefaultNamespaceDeclaration()) {
w.write(':');
w.write(ns.getPrefix());
}
w.write("=\"");
w.write(ns.getNamespaceURI());
w.write('"');
}
}
/**
* Method called by the matching start element class to
* output all namespace declarations active in current namespace
* scope, if any.
*/
public void outputNamespaceDeclarations(XMLStreamWriter w) throws XMLStreamException
{
for (int i = 0, len = mNamespaces.size(); i < len; ++i) {
Namespace ns = (Namespace) mNamespaces.get(i);
if (ns.isDefaultNamespaceDeclaration()) {
w.writeDefaultNamespace(ns.getNamespaceURI());
} else {
w.writeNamespace(ns.getPrefix(), ns.getNamespaceURI());
}
}
}
/*
/////////////////////////////////////////////
// Private methods:
/////////////////////////////////////////////
*/
private Map buildByPrefixMap()
{
int len = mNamespaces.size();
if (len == 0) {
return Collections.EMPTY_MAP;
}
LinkedHashMap m = new LinkedHashMap(1 + len + (len>>1));
for (int i = 0; i < len; ++i) {
Namespace ns = (Namespace) mNamespaces.get(i);
String prefix = ns.getPrefix();
if (prefix == null) { // shouldn't happen but...
prefix = "";
}
m.put(prefix, ns);
}
return m;
}
private Map buildByNsURIMap()
{
int len = mNamespaces.size();
if (len == 0) {
return Collections.EMPTY_MAP;
}
LinkedHashMap m = new LinkedHashMap(1 + len + (len>>1));
for (int i = 0; i < len; ++i) {
Namespace ns = (Namespace) mNamespaces.get(i);
String uri = ns.getNamespaceURI();
if (uri == null) { // shouldn't happen but...
uri = "";
}
m.put(uri, ns);
}
return m;
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/evt/DefaultEventAllocator.java 0000644 0001750 0001750 00000025101 11745427074 025561 0 ustar giovanni giovanni /* Woodstox XML processor
*
* Copyright (c) 2004 Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in file LICENSE, included with
* the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.evt;
import java.util.*;
import javax.xml.namespace.QName;
import javax.xml.namespace.NamespaceContext;
import javax.xml.stream.*;
import javax.xml.stream.events.XMLEvent;
import javax.xml.stream.util.XMLEventAllocator;
import javax.xml.stream.util.XMLEventConsumer;
import org.codehaus.stax2.*;
import org.codehaus.stax2.ri.evt.*;
import com.ctc.wstx.cfg.ErrorConsts;
import com.ctc.wstx.dtd.DTDSubset;
import com.ctc.wstx.ent.EntityDecl;
import com.ctc.wstx.exc.WstxException;
import com.ctc.wstx.sr.ElemAttrs;
import com.ctc.wstx.sr.ElemCallback;
import com.ctc.wstx.sr.StreamReaderImpl;
import com.ctc.wstx.util.BaseNsContext;
/**
* Straight-forward implementation of {@link XMLEventAllocator}, to be
* used with Woodstox' event reader.
*mUndeclaredEntity
*/
private String mValue = null;
/**
* For now, let's only keep track of the first undeclared entity:
* can be extended if necessary.
*/
private UndeclaredEntity mUndeclaredEntity = null;
/*
////////////////////////////////////////////////////
// Life-cycle (creation, configuration)
////////////////////////////////////////////////////
*/
private DefaultAttrValue(int defValueType)
{
mDefValueType = defValueType;
}
public static DefaultAttrValue constructImplied() { return sImplied; }
public static DefaultAttrValue constructRequired() { return sRequired; }
public static DefaultAttrValue constructFixed() {
return new DefaultAttrValue(DEF_FIXED);
}
public static DefaultAttrValue constructOptional() {
return new DefaultAttrValue(DEF_DEFAULT);
}
public void setValue(String v) {
mValue = v;
}
public void addUndeclaredPE(String name, Location loc)
{
addUndeclaredEntity(name, loc, true);
}
public void addUndeclaredGE(String name, Location loc)
{
addUndeclaredEntity(name, loc, false);
}
public void reportUndeclared(ValidationContext ctxt, XMLValidator dtd)
throws XMLStreamException
{
mUndeclaredEntity.reportUndeclared(ctxt, dtd);
}
/*
////////////////////////////////////////////////////
// Accessors:
////////////////////////////////////////////////////
*/
public boolean hasUndeclaredEntities() {
return (mUndeclaredEntity != null);
}
public String getValue() {
return mValue;
}
/**
* @return Expanded default value String, if there were no problems
* (no undeclared entities), or null to indicate there were problems.
* In latter case, caller is to figure out exact type of the problem
* and report this appropriately to the application.
*/
public String getValueIfOk()
{
return (mUndeclaredEntity == null) ? mValue : null;
}
public boolean isRequired() {
return (this == sRequired);
}
public boolean isFixed() {
return (mDefValueType == DEF_FIXED);
}
public boolean hasDefaultValue() {
return (mDefValueType == DEF_DEFAULT)
|| (mDefValueType == DEF_FIXED);
}
/**
* Method used by the element to figure out if attribute needs "special"
* checking; basically if it's required, and/or has a default value.
* In both cases missing the attribute has specific consequences, either
* exception or addition of a default value.
*/
public boolean isSpecial() {
// Only non-special if #IMPLIED
return (this != sImplied);
}
/*
////////////////////////////////////////////////////
// Internal methods
////////////////////////////////////////////////////
*/
private void addUndeclaredEntity(String name, Location loc, boolean isPe)
{
if (mUndeclaredEntity == null) {
mUndeclaredEntity = new UndeclaredEntity(name, loc, isPe);
}
}
/*
////////////////////////////////////////////////////
// Helper class(es):
////////////////////////////////////////////////////
*/
final static class UndeclaredEntity
{
final String mName;
final boolean mIsPe;
final Location mLocation;
UndeclaredEntity(String name, Location loc, boolean isPe)
{
mName = name;
mIsPe = isPe;
mLocation = loc;
}
public void reportUndeclared(ValidationContext ctxt, XMLValidator dtd)
throws XMLStreamException
{
String msg = MessageFormat.format(ErrorConsts.ERR_DTD_UNDECLARED_ENTITY, new Object[] { (mIsPe ? "parsed" : "general"), mName });
XMLValidationProblem prob = new XMLValidationProblem
(mLocation, msg, XMLValidationProblem.SEVERITY_FATAL);
prob.setReporter(dtd);
ctxt.reportProblem(prob);
}
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/dtd/DTDEntityAttr.java 0000644 0001750 0001750 00000007045 11745427074 023761 0 ustar giovanni giovanni package com.ctc.wstx.dtd;
import javax.xml.stream.XMLStreamException;
import com.ctc.wstx.ent.EntityDecl;
import com.ctc.wstx.io.WstxInputData;
import com.ctc.wstx.sr.InputProblemReporter;
import com.ctc.wstx.util.PrefixedName;
/**
* Specific attribute class for attributes that contain (unique)
* identifiers.
*/
public final class DTDEntityAttr
extends DTDAttribute
{
/*
///////////////////////////////////////////////////
// Life-cycle
///////////////////////////////////////////////////
*/
/**
* Main constructor. Note that id attributes can never have
* default values.
*/
public DTDEntityAttr(PrefixedName name, DefaultAttrValue defValue, int specIndex,
boolean nsAware, boolean xml11)
{
super(name, defValue, specIndex, nsAware, xml11);
}
public DTDAttribute cloneWith(int specIndex)
{
return new DTDEntityAttr(mName, mDefValue, specIndex, mCfgNsAware, mCfgXml11);
}
/*
///////////////////////////////////////////////////
// Public API
///////////////////////////////////////////////////
*/
public int getValueType() {
return TYPE_ENTITY;
}
/*
///////////////////////////////////////////////////
// Public API, validation
///////////////////////////////////////////////////
*/
/**
* Method called by the {@link DTDValidatorBase}
* to let the attribute do necessary normalization and/or validation
* for the value.
*/
public String validate(DTDValidatorBase v, char[] cbuf, int start, int end, boolean normalize)
throws XMLStreamException
{
while (start < end && WstxInputData.isSpaceChar(cbuf[start])) {
++start;
}
// Empty value?
if (start >= end) {
return reportValidationProblem(v, "Empty ENTITY value");
}
--end; // so that it now points to the last char
while (end > start && WstxInputData.isSpaceChar(cbuf[end])) {
--end;
}
// Ok, need to check char validity, and also calc hash code:
char c = cbuf[start];
if (!WstxInputData.isNameStartChar(c, mCfgNsAware, mCfgXml11) && c != ':') {
return reportInvalidChar(v, c, "not valid as the first ID character");
}
int hash = (int) c;
for (int i = start+1; i <= end; ++i) {
c = cbuf[i];
if (!WstxInputData.isNameChar(c, mCfgNsAware, mCfgXml11)) {
return reportInvalidChar(v, c, "not valid as an ID character");
}
hash = (hash * 31) + (int) c;
}
EntityDecl ent = findEntityDecl(v, cbuf, start, (end - start + 1), hash);
// only returns if it succeeded...
return normalize ? ent.getName() : null;
}
/**
* Method called by the validator object
* to ask attribute to verify that the default it has (if any) is
* valid for such type.
*/
public void validateDefault(InputProblemReporter rep, boolean normalize)
throws XMLStreamException
{
String normStr = validateDefaultName(rep, normalize);
if (normalize) {
mDefValue.setValue(normStr);
}
// Ok, but was it declared?
/* 03-Dec-2004, TSa: This is rather ugly -- need to know we
* actually really get a DTD reader, and DTD reader needs
* to expose a special method... but it gets things done.
*/
EntityDecl ent = ((MinimalDTDReader) rep).findEntity(normStr);
checkEntity(rep, normStr, ent);
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/dtd/DFAValidator.java 0000644 0001750 0001750 00000004436 11745427074 023557 0 ustar giovanni giovanni /* Woodstox XML processor
*
* Copyright (c) 2004 Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in the file LICENSE which is
* included with the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.dtd;
import java.util.*;
import com.ctc.wstx.util.PrefixedName;
import com.ctc.wstx.util.StringUtil;
/**
* Validator class that is based on a DFA constructed from DTD content
* specification.
*/
public final class DFAValidator
extends StructValidator
{
/**
* For root validator instance, the start state of DFA; for other
* instances, current state.
*/
DFAState mState;
public DFAValidator(DFAState initialState) {
mState = initialState;
}
public StructValidator newInstance() {
return new DFAValidator(mState);
}
public String tryToValidate(PrefixedName elemName)
{
// Do we have a follow state with that key?
DFAState next = mState.findNext(elemName);
if (next == null) {
// Nope; let's show what we'd have expected instead...
TreeSet names = mState.getNextNames();
if (names.size() == 0) { // expected end tag?
return "Expected $END";
}
// Either end tag, or another tag?
if (mState.isAcceptingState()) {
return "Expected <"+StringUtil.concatEntries(names, ">, <", null)+"> or $END";
}
return "Expected <"+StringUtil.concatEntries(names,
">, <", "> or <")+">";
}
mState = next;
return null;
}
public String fullyValid()
{
if (mState.isAcceptingState()) {
return null;
}
TreeSet names = mState.getNextNames();
return "Expected <"+StringUtil.concatEntries(names,
">, <", "> or <")+">";
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/dtd/DTDCdataAttr.java 0000644 0001750 0001750 00000003345 11745427074 023520 0 ustar giovanni giovanni package com.ctc.wstx.dtd;
import org.codehaus.stax2.validation.XMLValidationException;
import com.ctc.wstx.sr.InputProblemReporter;
import com.ctc.wstx.util.PrefixedName;
/**
* Simple {@link DTDAttribute} sub-class used for plain vanilla CDATA
* valued attributes. Although base class implements most of the methods,
* it's better designwise to keep that base class abstract and have
* separate CDATA type as well.
*/
public final class DTDCdataAttr
extends DTDAttribute
{
public DTDCdataAttr(PrefixedName name, DefaultAttrValue defValue, int specIndex,
boolean nsAware, boolean xml11)
{
super(name, defValue, specIndex, nsAware, xml11);
}
public DTDAttribute cloneWith(int specIndex)
{
return new DTDCdataAttr(mName, mDefValue, specIndex, mCfgNsAware, mCfgXml11);
}
/*
///////////////////////////////////////////////////
// Public API, validation
///////////////////////////////////////////////////
*/
// @Override
public String validate(DTDValidatorBase v, char[] cbuf, int start, int end, boolean normalize)
throws XMLValidationException
{
// Nothing to do for pure CDATA attributes...
return null;
}
// @Override
public void validateDefault(InputProblemReporter rep, boolean normalize)
throws javax.xml.stream.XMLStreamException
{
// Nothing to do for CDATA; all values are fine
}
// @Override
public String normalize(DTDValidatorBase v, char[] cbuf, int start, int end)
{
// Nothing to do for pure CDATA attributes...
return null;
}
// @Override
public void normalizeDefault()
{
// Nothing to do for pure CDATA attributes...
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/dtd/DTDNmTokenAttr.java 0000644 0001750 0001750 00000005704 11745427074 024060 0 ustar giovanni giovanni package com.ctc.wstx.dtd;
import javax.xml.stream.XMLStreamException;
import com.ctc.wstx.io.WstxInputData;
import com.ctc.wstx.sr.InputProblemReporter;
import com.ctc.wstx.util.PrefixedName;
/**
* Specific attribute class for attributes that contain (unique)
* identifiers.
*/
public final class DTDNmTokenAttr
extends DTDAttribute
{
/*
///////////////////////////////////////////////////
// Life-cycle
///////////////////////////////////////////////////
*/
/**
* Main constructor.
*/
public DTDNmTokenAttr(PrefixedName name, DefaultAttrValue defValue, int specIndex,
boolean nsAware, boolean xml11)
{
super(name, defValue, specIndex, nsAware, xml11);
}
public DTDAttribute cloneWith(int specIndex)
{
return new DTDNmTokenAttr(mName, mDefValue, specIndex, mCfgNsAware, mCfgXml11);
}
/*
///////////////////////////////////////////////////
// Public API
///////////////////////////////////////////////////
*/
public int getValueType() {
return TYPE_NMTOKEN;
}
/*
///////////////////////////////////////////////////
// Public API, validation
///////////////////////////////////////////////////
*/
/**
* Method called by the validator
* to let the attribute do necessary normalization and/or validation
* for the value.
*/
public String validate(DTDValidatorBase v, char[] cbuf, int start, int end, boolean normalize)
throws XMLStreamException
{
int origLen = end-start;
// Let's trim leading white space first...
while (start < end && WstxInputData.isSpaceChar(cbuf[start])) {
++start;
}
// Empty value?
if (start >= end) {
return reportValidationProblem(v, "Empty NMTOKEN value");
}
--end; // so that it now points to the last char
while (end > start && WstxInputData.isSpaceChar(cbuf[end])) {
--end;
}
// Ok, need to check char validity
for (int i = start; i <= end; ++i) {
char c = cbuf[i];
if (!WstxInputData.isNameChar(c, mCfgNsAware, mCfgXml11)) {
return reportInvalidChar(v, c, "not valid NMTOKEN character");
}
}
if (normalize) {
// Let's only create the String if we trimmed something
int len = (end - start)+1;
if (len != origLen) {
return new String(cbuf, start, len);
}
}
return null;
}
/**
* Method called by the validator
* to ask attribute to verify that the default it has (if any) is
* valid for such type.
*/
public void validateDefault(InputProblemReporter rep, boolean normalize)
throws XMLStreamException
{
String def = validateDefaultNmToken(rep, normalize);
if (normalize) {
mDefValue.setValue(def);
}
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/dtd/OptionalModel.java 0000644 0001750 0001750 00000002750 11745427074 024062 0 ustar giovanni giovanni package com.ctc.wstx.dtd;
import java.util.*;
/**
* Content specification class that represents an optional specification.
* Optional specifications are generally a result of '?' arity marker,
* and are created when {@link ContentSpec#rewrite} is called
* on a specification with '?' arity modifier.
*/
public class OptionalModel
extends ModelNode
{
ModelNode mModel;
/*
///////////////////////////////////////////////////
// Life-cycle
///////////////////////////////////////////////////
*/
public OptionalModel(ModelNode model) {
mModel = model;
}
/*
///////////////////////////////////////////////////
// Public API
///////////////////////////////////////////////////
*/
/**
* Method that has to create a deep copy of the model, without
* sharing any of existing Objects.
*/
public ModelNode cloneModel() {
return new OptionalModel(mModel.cloneModel());
}
public boolean isNullable() {
return true;
}
public void indexTokens(List tokens) {
mModel.indexTokens(tokens);
}
public void addFirstPos(BitSet pos) {
mModel.addFirstPos(pos);
}
public void addLastPos(BitSet pos) {
mModel.addLastPos(pos);
}
public void calcFollowPos(BitSet[] followPosSets)
{
// Let's let sub-model do its stuff
mModel.calcFollowPos(followPosSets);
}
public String toString() {
return mModel + "[?]";
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/dtd/ChoiceContentSpec.java 0000644 0001750 0001750 00000015512 11745427074 024654 0 ustar giovanni giovanni package com.ctc.wstx.dtd;
import java.util.*;
import com.ctc.wstx.util.ExceptionUtil;
import com.ctc.wstx.util.PrefixedName;
/**
* Content specification that defines content model that has
* multiple alternative elements; including mixed content model.
*/
public class ChoiceContentSpec
extends ContentSpec
{
final boolean mNsAware;
/**
* Whether this is a mixed content model; mostly affects String
* representation
*/
final boolean mHasMixed;
final ContentSpec[] mContentSpecs;
/*
///////////////////////////////////////////////////
// Life-cycle
///////////////////////////////////////////////////
*/
private ChoiceContentSpec(boolean nsAware, char arity, boolean mixed,
ContentSpec[] specs)
{
super(arity);
mNsAware = nsAware;
mHasMixed = mixed;
mContentSpecs = specs;
}
private ChoiceContentSpec(boolean nsAware, char arity, boolean mixed, Collection specs)
{
super(arity);
mNsAware = nsAware;
mHasMixed = mixed;
mContentSpecs = new ContentSpec[specs.size()];
specs.toArray(mContentSpecs);
}
public static ChoiceContentSpec constructChoice(boolean nsAware, char arity,
Collection specs)
{
return new ChoiceContentSpec(nsAware, arity, false, specs);
}
public static ChoiceContentSpec constructMixed(boolean nsAware, Collection specs)
{
return new ChoiceContentSpec(nsAware, '*', true, specs);
}
/*
///////////////////////////////////////////////////
// Public API
///////////////////////////////////////////////////
*/
public StructValidator getSimpleValidator()
{
/* Can we create a simple validator? Yes, if the sub-specs are
* all simple (leaves == element tokens with no arity modifier);
* this is always true for mixed.
*/
ContentSpec[] specs = mContentSpecs;
int len = specs.length;
int i;
if (mHasMixed) {
i = len;
} else {
i = 0;
for (; i < len; ++i) {
if (!specs[i].isLeaf()) {
break;
}
}
}
if (i == len) { // all leaves, kewl
PrefixedNameSet keyset = namesetFromSpecs(mNsAware, specs);
return new Validator(mArity, keyset);
}
// Nah, need a DFA...
return null;
}
public ModelNode rewrite()
{
// First, need to convert sub-specs:
ContentSpec[] specs = mContentSpecs;
int len = specs.length;
ModelNode[] models = new ModelNode[len];
for (int i = 0; i < len; ++i) {
models[i] = specs[i].rewrite();
}
ChoiceModel model = new ChoiceModel(models);
// and then resolve arity modifiers, if necessary:
if (mArity == '*') {
return new StarModel(model);
}
if (mArity == '?') {
return new OptionalModel(model);
}
if (mArity == '+') {
return new ConcatModel(model,
new StarModel(model.cloneModel()));
}
return model;
}
public String toString()
{
StringBuffer sb = new StringBuffer();
if (mHasMixed) {
sb.append("(#PCDATA | ");
} else {
sb.append('(');
}
for (int i = 0; i < mContentSpecs.length; ++i) {
if (i > 0) {
sb.append(" | ");
}
sb.append(mContentSpecs[i].toString());
}
sb.append(')');
if (mArity != ' ') {
sb.append(mArity);
}
return sb.toString();
}
/*
///////////////////////////////////////////////////
// Internal methods
///////////////////////////////////////////////////
*/
/*
///////////////////////////////////////////////////
// Package methods
///////////////////////////////////////////////////
*/
protected static PrefixedNameSet namesetFromSpecs(boolean nsAware, ContentSpec[] specs)
{
int len = specs.length;
PrefixedName[] nameArray = new PrefixedName[len];
for (int i = 0; i < len; ++i) {
nameArray[i] = ((TokenContentSpec)specs[i]).getName();
}
if (len < 5) { // 4 or fewer elements -> small
return new SmallPrefixedNameSet(nsAware, nameArray);
}
return new LargePrefixedNameSet(nsAware, nameArray);
}
/*
///////////////////////////////////////////////////
// Validator class that can be used for simple
// choices (including mixed content)
///////////////////////////////////////////////////
*/
final static class Validator
extends StructValidator
{
final char mArity;
final PrefixedNameSet mNames;
int mCount = 0;
public Validator(char arity, PrefixedNameSet names)
{
mArity = arity;
mNames = names;
}
/**
* Rules for reuse are simple: if we can have any number of
* repetitions, we can just use a shared root instance. Although
* its count variable will get updated this doesn't really
* matter as it won't be used. Otherwise a new instance has to
* be created always, to keep track of instance counts.
*/
public StructValidator newInstance() {
return (mArity == '*') ? this : new Validator(mArity, mNames);
}
public String tryToValidate(PrefixedName elemName)
{
if (!mNames.contains(elemName)) {
if (mNames.hasMultiple()) {
return "Expected one of ("+mNames.toString(" | ")+")";
}
return "Expected <"+mNames.toString("")+">";
}
if (++mCount > 1 && (mArity == '?' || mArity == ' ')) {
if (mNames.hasMultiple()) {
return "Expected $END (already had one of ["
+mNames.toString(" | ")+"]";
}
return "Expected $END (already had one <"
+mNames.toString("")+">]";
}
return null;
}
public String fullyValid()
{
switch (mArity) {
case '*':
case '?':
return null;
case '+': // need at least one (and multiples checked earlier)
case ' ':
if (mCount > 0) {
return null;
}
return "Expected "+(mArity == '+' ? "at least" : "")
+" one of elements ("+mNames+")";
}
// should never happen:
ExceptionUtil.throwGenericInternal();
return null;
}
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/dtd/EmptyValidator.java 0000644 0001750 0001750 00000002471 11745427074 024260 0 ustar giovanni giovanni package com.ctc.wstx.dtd;
import com.ctc.wstx.util.PrefixedName;
/**
* Simple content model validator that accepts no elements, ever; this
* is true for pure #PCDATA content model as well as EMPTY content model.
* Can be used as a singleton, since all info needed for diagnostics
* is passed via methods.
*/
public class EmptyValidator
extends StructValidator
{
final static EmptyValidator sPcdataInstance = new EmptyValidator("No elements allowed in pure #PCDATA content model");
final static EmptyValidator sEmptyInstance = new EmptyValidator("No elements allowed in EMPTY content model");
final String mErrorMsg;
private EmptyValidator(String errorMsg) {
mErrorMsg = errorMsg;
}
public static EmptyValidator getPcdataInstance() { return sPcdataInstance; }
public static EmptyValidator getEmptyInstance() { return sPcdataInstance; }
/**
* Simple; can always (re)use instance itself; no state information
* is kept.
*/
public StructValidator newInstance() {
return this;
}
public String tryToValidate(PrefixedName elemName)
{
return mErrorMsg;
}
/**
* If we ever get as far as element closing, things are all good;
* can just return null.
*/
public String fullyValid()
{
return null;
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/dtd/DTDNmTokensAttr.java 0000644 0001750 0001750 00000014345 11745427074 024244 0 ustar giovanni giovanni package com.ctc.wstx.dtd;
import javax.xml.stream.XMLStreamException;
import com.ctc.wstx.io.WstxInputData;
import com.ctc.wstx.sr.InputProblemReporter;
import com.ctc.wstx.util.PrefixedName;
/**
* Specific attribute class for attributes that contain (unique)
* identifiers.
*/
public final class DTDNmTokensAttr
extends DTDAttribute
{
/*
///////////////////////////////////////////////////
// Life-cycle
///////////////////////////////////////////////////
*/
/**
* Main constructor.
*/
public DTDNmTokensAttr(PrefixedName name, DefaultAttrValue defValue, int specIndex,
boolean nsAware, boolean xml11)
{
super(name, defValue, specIndex, nsAware, xml11);
}
public DTDAttribute cloneWith(int specIndex)
{
return new DTDNmTokensAttr(mName, mDefValue, specIndex, mCfgNsAware, mCfgXml11);
}
/*
///////////////////////////////////////////////////
// Public API
///////////////////////////////////////////////////
*/
public int getValueType() {
return TYPE_NMTOKENS;
}
/*
///////////////////////////////////////////////////
// Public API, validation
///////////////////////////////////////////////////
*/
/**
* Method called by the validator
* to let the attribute do necessary normalization and/or validation
* for the value.
*/
public String validate(DTDValidatorBase v, char[] cbuf, int start, int end, boolean normalize)
throws XMLStreamException
{
//int origStart = start;
/* First things first; let's ensure value is not empty (all
* white space)...
*/
while (start < end && WstxInputData.isSpaceChar(cbuf[start])) {
++start;
}
// Empty value?
if (start >= end) {
return reportValidationProblem(v, "Empty NMTOKENS value");
}
/* Then, let's have separate handling for normalizing and
* non-normalizing case, since latter is trivially easy case:
*/
if (!normalize) {
for (; start < end; ++start) {
char c = cbuf[start];
if (!WstxInputData.isSpaceChar(c)
&& !WstxInputData.isNameChar(c, mCfgNsAware, mCfgXml11)) {
return reportInvalidChar(v, c, "not valid as NMTOKENS character");
}
}
return null; // ok, all good
}
//boolean trimmed = (origStart != start);
//origStart = start;
--end; // so that it now points to the last char
// Wouldn't absolutely have to trim trailing... but is easy to do
while (end > start && WstxInputData.isSpaceChar(cbuf[end])) {
--end;
//trimmed = true;
}
/* Ok, now, need to check we only have valid chars, and maybe
* also coalesce multiple spaces, if any.
*/
StringBuffer sb = null;
while (start <= end) {
int i = start;
for (; i <= end; ++i) {
char c = cbuf[i];
if (WstxInputData.isSpaceChar(c)) {
break;
}
if (!WstxInputData.isNameChar(c, mCfgNsAware, mCfgXml11)) {
return reportInvalidChar(v, c, "not valid as an NMTOKENS character");
}
}
if (sb == null) {
sb = new StringBuffer(end - start + 1);
} else {
sb.append(' ');
}
sb.append(cbuf, start, (i - start));
start = i + 1;
// Ok, any white space to skip?
while (start <= end && WstxInputData.isSpaceChar(cbuf[start])) {
++start;
}
}
/* 27-Nov-2005, TSa: Could actually optimize trimming, and often
* avoid using StringBuffer... but let's only do it if it turns
* out dealing with NMTOKENS normalization shows up on profiling...
*/
return sb.toString();
}
/**
* Method called by the validator
* to ask attribute to verify that the default it has (if any) is
* valid for such type.
*/
public void validateDefault(InputProblemReporter rep, boolean normalize)
throws XMLStreamException
{
String defValue = mDefValue.getValue();
int len = defValue.length();
// Then code similar to actual value validation:
StringBuffer sb = null;
int count = 0;
int start = 0;
main_loop:
while (start < len) {
char c = defValue.charAt(start);
// Ok, any white space to skip?
while (true) {
if (!WstxInputData.isSpaceChar(c)) {
break;
}
if (++start >= len) {
break main_loop;
}
c = defValue.charAt(start);
}
int i = start+1;
do {
if (++i >= len) {
break;
}
c = defValue.charAt(i);
} while (!WstxInputData.isSpaceChar(c));
++count;
String token = defValue.substring(start, i);
int illegalIx = WstxInputData.findIllegalNmtokenChar(token, mCfgNsAware, mCfgXml11);
if (illegalIx >= 0) {
reportValidationProblem(rep, "Invalid default value '"+defValue
+"'; character #"+illegalIx+" ("
+WstxInputData.getCharDesc(defValue.charAt(illegalIx))
+") not a valid NMTOKENS character");
}
if (normalize) {
if (sb == null) {
sb = new StringBuffer(i - start + 32);
} else {
sb.append(' ');
}
sb.append(token);
}
start = i+1;
}
if (count == 0) {
reportValidationProblem(rep, "Invalid default value '"+defValue
+"'; empty String is not a valid NMTOKENS value");
return;
}
if (normalize) {
mDefValue.setValue(sb.toString());
}
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/dtd/package.html 0000644 0001750 0001750 00000000123 11745427074 022722 0 ustar giovanni giovanni
*
*/
public final class LargePrefixedNameSet
extends PrefixedNameSet
{
/**
* Let's not bother creating tiny hash areas; should seldom be a problem
* as smaller sets are usually created using different impl. class.
*/
final static int MIN_HASH_AREA = 8;
final boolean mNsAware;
/**
* Primary hash area in which NameKeys are added. Sized to be the smallest
* power of two bigger than number of entries; but at least 4 (it doesn't
* make sense to create smaller arrays)
*/
final PrefixedName[] mNames;
/**
* Secondary (spill) area, in which keys whose hash values collide
* with primary ones are added. Number of buckets is 1/4 of number
* of primary entries,
*/
final Bucket[] mBuckets;
public LargePrefixedNameSet(boolean nsAware, PrefixedName[] names)
{
mNsAware = nsAware;
int len = names.length;
// Let's find the size first... let's except 1/8 slack (88% fill rate)
int minSize = len + ((len + 7) >> 3);
// Let's not create hash areas smaller than certain limit
int tableSize = MIN_HASH_AREA;
while (tableSize < minSize) {
tableSize += tableSize;
}
mNames = new PrefixedName[tableSize];
// and 1/4 of that for spill area... but let's do that lazily
Bucket[] buckets = null;
int mask = (tableSize - 1);
for (int i = 0; i < len; ++i) {
PrefixedName nk = names[i];
int ix = (nk.hashCode() & mask);
if (mNames[ix] == null) { // no collision
mNames[ix] = nk;
} else { // collision, need to add a bucket
ix >>= 2;
Bucket old;
if (buckets == null) {
buckets = new Bucket[tableSize >> 2];
old = null;
} else {
old = buckets[ix];
}
buckets[ix] = new Bucket(nk, old);
}
}
mBuckets = buckets;
}
public boolean hasMultiple() { return true; }
/**
* @return True if the set contains specified name; false if not.
*/
public boolean contains(PrefixedName name)
{
PrefixedName[] hashArea = mNames;
int index = name.hashCode() & (hashArea.length - 1);
PrefixedName res = hashArea[index];
if (res != null && res.equals(name)) {
return true;
}
Bucket[] buckets = mBuckets;
if (buckets != null) {
for (Bucket bucket = buckets[index >> 2]; bucket != null;
bucket = bucket.getNext()) {
res = bucket.getName();
if (res.equals(name)) {
return true;
}
}
}
return false;
}
/**
* Method called by debug/error handling code, to get a list of
* all names contained.
*/
public void appendNames(StringBuffer sb, String sep)
{
// Let's first get the alphabetized list of all names from main hash
TreeSet ts = new TreeSet();
for (int i = 0; i < mNames.length; ++i) {
PrefixedName name = mNames[i];
if (name != null) {
ts.add(name);
}
}
// then spill area
if (mBuckets != null) {
for (int i = 0; i < (mNames.length >> 2); ++i) {
Bucket b = mBuckets[i];
while (b != null) {
ts.add(b.getName());
b = b.getNext();
}
}
}
// And then append them:
Iterator it = ts.iterator();
boolean first = true;
while (it.hasNext()) {
if (first) {
first = false;
} else {
sb.append(sep);
}
sb.append(it.next().toString());
}
}
/*
///////////////////////////////////////////////////////////
// Helper class(es)
///////////////////////////////////////////////////////////
*/
private final static class Bucket
{
final PrefixedName mName;
final Bucket mNext;
public Bucket(PrefixedName name, Bucket next) {
mName = name;
mNext = next;
}
public PrefixedName getName() { return mName; }
public Bucket getNext() { return mNext; }
public boolean contains(PrefixedName n) {
return mName.equals(n);
}
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/dtd/DTDValidatorBase.java 0000644 0001750 0001750 00000041642 11745427074 024373 0 ustar giovanni giovanni /* Woodstox XML processor
*
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in file LICENSE, included with
* the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.dtd;
import java.text.MessageFormat;
import java.util.*;
import javax.xml.stream.Location;
import javax.xml.stream.XMLStreamException;
import org.codehaus.stax2.validation.*;
import com.ctc.wstx.api.WstxInputProperties;
import com.ctc.wstx.sr.NsDefaultProvider;
import com.ctc.wstx.sr.InputElementStack;
import com.ctc.wstx.util.DataUtil;
import com.ctc.wstx.util.ElementIdMap;
import com.ctc.wstx.util.ExceptionUtil;
import com.ctc.wstx.util.PrefixedName;
/**
* Shared abstract base class for Woodstox implementations
* of {@link XMLValidator} for DTD validation.
* Since there are 2 sub-types -- full actual DTD validator, and a dummy
* one that only adds type information and default values, with no actual
* validation -- common functionality was refactored into this base
* class.
*/
public abstract class DTDValidatorBase
extends XMLValidator
implements NsDefaultProvider // for namespace attr defaults
{
/*
/////////////////////////////////////////////////////
// Constants
/////////////////////////////////////////////////////
*/
/**
* Estimated maximum depth of typical documents; used to allocate
* the array for element stack
*/
final static int DEFAULT_STACK_SIZE = 16;
/**
* Estimated maximum number of attributes for a single element
*/
final static int EXP_MAX_ATTRS = 16;
/**
* Let's actually just reuse a local Map...
*/
protected final static HashMap EMPTY_MAP = new HashMap();
/*
///////////////////////////////////////
// Configuration
///////////////////////////////////////
*/
/**
* Flag that indicates whether any of the elements declared has default
* attribute values for namespace declaration pseudo-attributes.
*/
final boolean mHasNsDefaults;
/**
* DTD schema ({@link DTDSubsetImpl}) object that created this validator
* instance.
*/
final DTDSubset mSchema;
/**
* Validation context (owner) for this validator. Needed for adding
* default attribute values, for example.
*/
final ValidationContext mContext;
/**
* Map that contains element specifications from DTD; null if no
* DOCTYPE declaration found.
*/
final Map mElemSpecs;
/**
* General entities defined in DTD subsets; needed for validating
* ENTITY/ENTITIES attributes.
*/
final Map mGeneralEntities;
/**
* Flag that indicates whether parser wants the attribute values
* to be normalized (according to XML specs) or not (which may be
* more efficient, although not compliant with the specs)
*/
protected boolean mNormAttrs;
/*
///////////////////////////////////////////
// Element def/spec/validator stack, state
///////////////////////////////////////////
*/
/**
* This is the element that is currently being validated; valid
* during
* validateElementStart
,
* validateAttribute
,
* validateElementAndAttributes
calls.
*/
protected DTDElement mCurrElem = null;
/**
* Stack of element definitions matching the current active element stack.
* Instances are elements definitions read from DTD.
*/
protected DTDElement[] mElems = null;
/**
* Number of elements in {@link #mElems}.
*/
protected int mElemCount = 0;
/**
* Attribute definitions for attributes the current element may have
*/
protected HashMap mCurrAttrDefs = null;
/**
* List of attribute declarations/specifications, one for each
* attribute of the current element, for which there is a matching
* value (either explicitly defined, or assigned via defaulting).
*/
protected DTDAttribute[] mAttrSpecs = new DTDAttribute[EXP_MAX_ATTRS];
/**
* Number of attribute specification Objects in
* {@link #mAttrSpecs}; needed to store in case type information
* is requested later on.
*/
protected int mAttrCount = 0;
/**
* Index of the attribute of type ID, within current element's
* attribute list. Track of this is kept separate from other
* attribute since id attributes often need to be used for resolving
* cross-references.
*/
protected int mIdAttrIndex = -1;
/*
///////////////////////////////////////
// Temporary helper objects
///////////////////////////////////////
*/
protected final transient PrefixedName mTmpKey = new PrefixedName(null, null);
/**
* Temporary buffer attribute instances can share for validation
* purposes
*/
char[] mTmpAttrValueBuffer = null;
/*
///////////////////////////////////////
// Life-cycle
///////////////////////////////////////
*/
public DTDValidatorBase(DTDSubset schema, ValidationContext ctxt, boolean hasNsDefaults,
Map elemSpecs, Map genEntities)
{
mSchema = schema;
mContext = ctxt;
mHasNsDefaults = hasNsDefaults;
mElemSpecs = (elemSpecs == null || elemSpecs.size() == 0) ?
Collections.EMPTY_MAP : elemSpecs;
mGeneralEntities = genEntities;
// By default, let's assume attrs are to be normalized (fully xml compliant)
mNormAttrs = true;
mElems = new DTDElement[DEFAULT_STACK_SIZE];
}
/*
///////////////////////////////////////
// Configuration
///////////////////////////////////////
*/
/**
* Method that allows enabling/disabling attribute value normalization.
* In general, readers by default enable normalization (to be fully xml
* compliant),
* whereas writers do not (since there is usually little to gain, if
* anything -- it is even possible value may be written before validation
* is called in some cases)
*/
public void setAttrValueNormalization(boolean state) {
mNormAttrs = state;
}
/**
* @return True for validator object that actually do validate
* content; false for objects that only use DTD type information.
*/
public abstract boolean reallyValidating();
/*
///////////////////////////////////////
// XMLValidator implementation
///////////////////////////////////////
*/
public final XMLValidationSchema getSchema() {
return mSchema;
}
/**
* Method called to update information about the newly encountered (start)
* element. At this point namespace information has been resolved, but
* no DTD validation has been done. Validator is to do these validations,
* including checking for attribute value (and existence) compatibility.
*/
public abstract void validateElementStart(String localName, String uri, String prefix)
throws XMLStreamException;
public abstract String validateAttribute(String localName, String uri,
String prefix, String value)
throws XMLStreamException;
public abstract String validateAttribute(String localName, String uri,
String prefix,
char[] valueChars, int valueStart,
int valueEnd)
throws XMLStreamException;
public abstract int validateElementAndAttributes()
throws XMLStreamException;
/**
* @return Validation state that should be effective for the parent
* element state
*/
public abstract int validateElementEnd(String localName, String uri, String prefix)
throws XMLStreamException;
public void validateText(String text, boolean lastTextSegment)
throws XMLStreamException
{
/* This method is a NOP, since basic DTD has no mechanism for
* validating textual content.
*/
}
public void validateText(char[] cbuf, int textStart, int textEnd,
boolean lastTextSegment)
throws XMLStreamException
{
/* This method is a NOP, since basic DTD has no mechanism for
* validating textual content.
*/
}
public abstract void validationCompleted(boolean eod)
throws XMLStreamException;
/*
///////////////////////////////////////
// Attribute info access
///////////////////////////////////////
*/
// // // Access to type info
public String getAttributeType(int index)
{
DTDAttribute attr = mAttrSpecs[index];
return (attr == null) ? WstxInputProperties.UNKNOWN_ATTR_TYPE :
attr.getValueTypeString();
}
/**
* Method for finding out the index of the attribute (collected using
* the attribute collector; having DTD-derived info in same order)
* that is of type ID. DTD explicitly specifies that at most one
* attribute can have this type for any element.
*
* @return Index of the attribute with type ID, in the current
* element, if one exists: -1 otherwise
*/
public int getIdAttrIndex()
{
// Let's figure out the index only when needed
int ix = mIdAttrIndex;
if (ix == -2) {
ix = -1;
if (mCurrElem != null) {
DTDAttribute idAttr = mCurrElem.getIdAttribute();
if (idAttr != null) {
DTDAttribute[] attrs = mAttrSpecs;
for (int i = 0, len = attrs.length; i < len; ++i) {
if (attrs[i] == idAttr) {
ix = i;
break;
}
}
}
}
mIdAttrIndex = ix;
}
return ix;
}
/**
* Method for finding out the index of the attribute (collected using
* the attribute collector; having DTD-derived info in same order)
* that is of type NOTATION. DTD explicitly specifies that at most one
* attribute can have this type for any element.
*
* @return Index of the attribute with type NOTATION, in the current
* element, if one exists: -1 otherwise
*/
public int getNotationAttrIndex()
{
/* If necessary, we could find this index when resolving the
* element, could avoid linear search. But who knows how often
* it's really needed...
*/
for (int i = 0, len = mAttrCount; i < len; ++i) {
if (mAttrSpecs[i].typeIsNotation()) {
return i;
}
}
return -1;
}
/*
/////////////////////////////////////////////////////
// NsDefaultProvider interface
/////////////////////////////////////////////////////
*/
/**
* Calling this method before {@link #checkNsDefaults} is necessary
* to pass information regarding the current element; although
* it will become available later on (via normal XMLValidator interface),
* that's too late (after namespace binding and resolving).
*/
public boolean mayHaveNsDefaults(String elemPrefix, String elemLN)
{
mTmpKey.reset(elemPrefix, elemLN);
DTDElement elem = (DTDElement) mElemSpecs.get(mTmpKey);
mCurrElem = elem;
return (elem != null) && elem.hasNsDefaults();
}
public void checkNsDefaults(InputElementStack nsStack)
throws XMLStreamException
{
// We only get called if mCurrElem != null, and has defaults
HashMap m = mCurrElem.getNsDefaults();
if (m != null) {
Iterator it = m.entrySet().iterator();
while (it.hasNext()) {
Map.Entry me = (Map.Entry) it.next();
String prefix = (String) me.getKey();
if (!nsStack.isPrefixLocallyDeclared(prefix)) {
DTDAttribute attr = (DTDAttribute) me.getValue();
String uri = attr.getDefaultValue(mContext, this);
nsStack.addNsBinding(prefix, uri);
}
}
}
}
/*
///////////////////////////////////////
// Package methods, accessors
///////////////////////////////////////
*/
/**
* Name of current element on the top of the element stack.
*/
PrefixedName getElemName() {
DTDElement elem = mElems[mElemCount-1];
return elem.getName();
}
Location getLocation() {
return mContext.getValidationLocation();
}
protected abstract ElementIdMap getIdMap();
Map getEntityMap() {
return mGeneralEntities;
}
char[] getTempAttrValueBuffer(int neededLength)
{
if (mTmpAttrValueBuffer == null
|| mTmpAttrValueBuffer.length < neededLength) {
int size = (neededLength < 100) ? 100 : neededLength;
mTmpAttrValueBuffer = new char[size];
}
return mTmpAttrValueBuffer;
}
public boolean hasNsDefaults() {
return mHasNsDefaults;
}
/*
///////////////////////////////////////
// Package methods, error handling
///////////////////////////////////////
*/
/**
* Method called to report validity problems; depending on mode, will
* either throw an exception, or add a problem notification to the
* list of problems.
*/
void reportValidationProblem(String msg)
throws XMLStreamException
{
doReportValidationProblem(msg, null);
}
void reportValidationProblem(String msg, Location loc)
throws XMLStreamException
{
doReportValidationProblem(msg, loc);
}
void reportValidationProblem(String format, Object arg)
throws XMLStreamException
{
doReportValidationProblem(MessageFormat.format(format, new Object[] { arg }),
null);
}
void reportValidationProblem(String format, Object arg1, Object arg2)
throws XMLStreamException
{
doReportValidationProblem(MessageFormat.format(format, new Object[] { arg1, arg2 }),
null);
}
/*
///////////////////////////////////////
// Private/sub-class methods
///////////////////////////////////////
*/
protected void doReportValidationProblem(String msg, Location loc)
throws XMLStreamException
{
if (loc == null) {
loc = getLocation();
}
XMLValidationProblem prob = new XMLValidationProblem(loc, msg, XMLValidationProblem.SEVERITY_ERROR);
prob.setReporter(this);
mContext.reportProblem(prob);
}
protected void doAddDefaultValue(DTDAttribute attr)
throws XMLStreamException
{
/* If we get here, we should have a non-null (possibly empty) default
* value:
*/
String def = attr.getDefaultValue(mContext, this);
if (def == null) {
ExceptionUtil.throwInternal("null default attribute value");
}
PrefixedName an = attr.getName();
// Ok, do we need to find the URI?
String prefix = an.getPrefix();
String uri = "";
if (prefix != null && prefix.length() > 0) {
uri = mContext.getNamespaceURI(prefix);
// Can not map to empty NS!
if (uri == null || uri.length() == 0) {
/* Hmmh. This is a weird case where we do have to
* throw a validity exception; even though it really
* is more a ns-well-formedness error...
*/
reportValidationProblem("Unbound namespace prefix \"{0}\" for default attribute \"{1}\"", prefix, attr);
// May continue if we don't throw errors, just collect them to a list
uri = "";
}
}
int defIx = mContext.addDefaultAttribute(an.getLocalName(), uri, prefix, def);
if (defIx < 0) {
/* 13-Dec-2005, Tatus: Hmmh. For readers this is an error
* condition, but writers may just indicate they are not
* interested in defaults. So let's let context report
* problem(s) if it has any regarding the request.
*/
// nop
} else {
while (defIx >= mAttrSpecs.length) {
mAttrSpecs = (DTDAttribute[]) DataUtil.growArrayBy50Pct(mAttrSpecs);
}
/* Any intervening empty slots? (can happen if other
* validators add default attributes...)
*/
while (mAttrCount < defIx) {
mAttrSpecs[mAttrCount++] = null;
}
mAttrSpecs[defIx] = attr;
mAttrCount = defIx+1;
}
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/dtd/DTDElement.java 0000644 0001750 0001750 00000045365 11745427074 023252 0 ustar giovanni giovanni /* Woodstox XML processor
*
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in the file LICENSE which is
* included with the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.dtd;
import java.util.*;
import javax.xml.stream.Location;
import javax.xml.stream.XMLStreamException;
import org.codehaus.stax2.validation.XMLValidator;
import com.ctc.wstx.api.ReaderConfig;
import com.ctc.wstx.cfg.ErrorConsts;
import com.ctc.wstx.sr.InputProblemReporter;
import com.ctc.wstx.util.ExceptionUtil;
import com.ctc.wstx.util.PrefixedName;
import com.ctc.wstx.util.WordResolver;
/**
* Class that contains element definitions from DTD.
*writeAscii
method, since it is the most convenient
* place to catch cases where a text segment ends with an unmatched
* surrogate pair half.
*/
public abstract class EncodingXmlWriter
extends XmlWriter
{
/**
* Let's use a typical default to have a compromise between large
* enough chunks to output, and minimizing memory overhead.
* 4k should be close enough to a physical page to work out
* acceptably, without causing excessive (if temporary) memory usage.
*/
final static int DEFAULT_BUFFER_SIZE = 4000;
final static byte BYTE_SPACE = (byte) ' ';
final static byte BYTE_COLON = (byte) ':';
final static byte BYTE_SEMICOLON = (byte) ';';
final static byte BYTE_LBRACKET = (byte) '[';
final static byte BYTE_RBRACKET = (byte) ']';
final static byte BYTE_QMARK = (byte) '?';
final static byte BYTE_EQ = (byte) '=';
final static byte BYTE_SLASH = (byte) '/';
final static byte BYTE_HASH = (byte) '#';
final static byte BYTE_HYPHEN = (byte) '-';
final static byte BYTE_LT = (byte) '<';
final static byte BYTE_GT = (byte) '>';
final static byte BYTE_AMP = (byte) '&';
final static byte BYTE_QUOT = (byte) '"';
final static byte BYTE_APOS = (byte) '\'';
final static byte BYTE_A = (byte) 'a';
final static byte BYTE_G = (byte) 'g';
final static byte BYTE_L = (byte) 'l';
final static byte BYTE_M = (byte) 'm';
final static byte BYTE_O = (byte) 'o';
final static byte BYTE_P = (byte) 'p';
final static byte BYTE_Q = (byte) 'q';
final static byte BYTE_S = (byte) 's';
final static byte BYTE_T = (byte) 't';
final static byte BYTE_U = (byte) 'u';
final static byte BYTE_X = (byte) 'x';
/*
////////////////////////////////////////////////
// Output state, buffering
////////////////////////////////////////////////
*/
/**
* Actual output stream to use for outputting encoded content as
* bytes.
*/
private final OutputStream mOut;
protected byte[] mOutputBuffer;
protected int mOutputPtr;
/**
* In case a split surrogate pair is output (which can only successfully
* occur with either writeRaw
or
* writeCharacters
), the first part is temporarily stored
* within this member variable.
*/
protected int mSurrogate = 0;
/*
////////////////////////////////////////////////
//
////////////////////////////////////////////////
*/
public EncodingXmlWriter(OutputStream out, WriterConfig cfg, String encoding,
boolean autoclose)
throws IOException
{
super(cfg, encoding, autoclose);
mOut = out;
mOutputBuffer = cfg.allocFullBBuffer(DEFAULT_BUFFER_SIZE);
mOutputPtr = 0;
}
/**
* This method is needed by the super class, to calculate hard
* byte/char offsets.
*/
protected int getOutputPtr() {
return mOutputPtr;
}
/*
////////////////////////////////////////////////
// Partial API implementation
////////////////////////////////////////////////
*/
final protected OutputStream getOutputStream()
{
return mOut;
}
final protected Writer getWriter()
{
// No writers are involved with these implementations...
return null;
}
public void close(boolean forceRealClose)
throws IOException
{
flush();
// Buffers to free?
byte[] buf = mOutputBuffer;
if (buf != null) {
mOutputBuffer = null;
mConfig.freeFullBBuffer(buf);
}
// Plus may need to close the actual stream
if (forceRealClose || mAutoCloseOutput) {
/* 14-Nov-2008, TSa: Wrt [WSTX-163]; no need to
* check whether mOut implements CompletelyCloseable
* (unlike with BufferingXmlWriter)
*/
mOut.close();
}
}
public final void flush()
throws IOException
{
flushBuffer();
mOut.flush();
}
public abstract void writeRaw(char[] cbuf, int offset, int len)
throws IOException;
public abstract void writeRaw(String str, int offset, int len)
throws IOException;
/*
//////////////////////////////////////////////////
// "Trusted" low-level output methods (that do not
// need to verify validity of input)
//////////////////////////////////////////////////
*/
public final void writeCDataStart()
throws IOException
{
writeAscii("");
}
public final void writeCommentStart()
throws IOException
{
writeAscii("");
}
public final void writePIStart(String target, boolean addSpace)
throws IOException
{
writeAscii(BYTE_LT, BYTE_QMARK);
writeRaw(target);
if (addSpace) {
writeAscii(BYTE_SPACE);
}
}
public final void writePIEnd()
throws IOException
{
writeAscii(BYTE_QMARK, BYTE_GT);
}
/*
////////////////////////////////////////////////
// Higher-level output methods, text output
////////////////////////////////////////////////
*/
public int writeCData(String data)
throws IOException
{
writeAscii("= 0) {
return ix;
}
writeAscii("]]>");
return -1;
}
public int writeCData(char[] cbuf, int offset, int len)
throws IOException
{
writeAscii("= 0) {
return ix;
}
writeAscii("]]>");
return -1;
}
public final void writeCharacters(String data)
throws IOException
{
// Note: may get second part of a surrogate
if (mTextWriter != null) { // custom escaping?
mTextWriter.write(data);
} else { // nope, default:
writeTextContent(data);
}
}
public final void writeCharacters(char[] cbuf, int offset, int len)
throws IOException
{
// Note: may get second part of a surrogate
if (mTextWriter != null) { // custom escaping?
mTextWriter.write(cbuf, offset, len);
} else { // nope, default:
writeTextContent(cbuf, offset, len);
}
}
/**
* Method that will try to output the content as specified. If
* the content passed in has embedded "--" in it, it will either
* add an intervening space between consequtive hyphens (if content
* fixing is enabled), or return the offset of the first hyphen in
* multi-hyphen sequence.
*/
public int writeComment(String data)
throws IOException
{
writeAscii("");
return -1;
}
public void writeDTD(String data)
throws IOException
{
if (mSurrogate != 0) {
throwUnpairedSurrogate();
}
writeRaw(data, 0, data.length());
}
public void writeDTD(String rootName, String systemId, String publicId,
String internalSubset)
throws IOException, XMLStreamException
{
writeAscii(" 0) {
writeAscii(BYTE_SPACE, BYTE_LBRACKET);
writeRaw(internalSubset, 0, internalSubset.length());
writeAscii(BYTE_RBRACKET);
}
writeAscii(BYTE_GT);
}
public void writeEntityReference(String name)
throws IOException, XMLStreamException
{
if (mSurrogate != 0) {
throwUnpairedSurrogate();
}
writeAscii(BYTE_AMP);
writeName(name);
writeAscii(BYTE_SEMICOLON);
}
public void writeXmlDeclaration(String version, String encoding, String standalone)
throws IOException
{
writeAscii(" 0) {
writeAscii(" encoding='");
// Should be ascii, but let's play it safe:
writeRaw(encoding, 0, encoding.length());
writeAscii(BYTE_APOS);
}
if (standalone != null) {
writeAscii(" standalone='");
writeAscii(standalone);
writeAscii(BYTE_APOS);
}
writeAscii(BYTE_QMARK, BYTE_GT);
}
public int writePI(String target, String data)
throws IOException, XMLStreamException
{
writeAscii(BYTE_LT, BYTE_QMARK);
writeName(target);
if (data != null && data.length() > 0) {
writeAscii(BYTE_SPACE);
int ix = writePIData(data);
if (ix >= 0) { // embedded "?>"?
return ix;
}
}
writeAscii(BYTE_QMARK, BYTE_GT);
return -1;
}
/*
////////////////////////////////////////////////////
// Write methods, elements
////////////////////////////////////////////////////
*/
public void writeStartTagStart(String localName)
throws IOException, XMLStreamException
{
writeAscii(BYTE_LT);
writeName(localName);
}
public void writeStartTagStart(String prefix, String localName)
throws IOException, XMLStreamException
{
if (prefix == null || prefix.length() == 0) {
writeStartTagStart(localName);
return;
}
writeAscii(BYTE_LT);
writeName(prefix);
writeAscii(BYTE_COLON);
writeName(localName);
}
public void writeStartTagEnd()
throws IOException
{
writeAscii(BYTE_GT);
}
public void writeStartTagEmptyEnd()
throws IOException
{
if (mAddSpaceAfterEmptyElem) {
writeAscii(" />");
} else {
writeAscii(BYTE_SLASH, BYTE_GT);
}
}
public void writeEndTag(String localName)
throws IOException
{
writeAscii(BYTE_LT, BYTE_SLASH);
/* At this point, it is assumed caller knows that end tag
* matches with start tag, and that it (by extension) has been
* validated if and as necessary
*/
writeNameUnchecked(localName);
writeAscii(BYTE_GT);
}
public void writeEndTag(String prefix, String localName)
throws IOException
{
writeAscii(BYTE_LT, BYTE_SLASH);
/* At this point, it is assumed caller knows that end tag
* matches with start tag, and that it (by extension) has been
* validated if and as necessary
*/
if (prefix != null && prefix.length() > 0) {
writeNameUnchecked(prefix);
writeAscii(BYTE_COLON);
}
writeNameUnchecked(localName);
writeAscii(BYTE_GT);
}
/*
////////////////////////////////////////////////////
// Write methods, attributes/ns
////////////////////////////////////////////////////
*/
public void writeAttribute(String localName, String value)
throws IOException, XMLStreamException
{
writeAscii(BYTE_SPACE);
writeName(localName);
writeAscii(BYTE_EQ, BYTE_QUOT);
int len = value.length();
if (len > 0) {
if (mAttrValueWriter != null) { // custom escaping?
mAttrValueWriter.write(value, 0, len);
} else { // nope, default
writeAttrValue(value);
}
}
writeAscii(BYTE_QUOT);
}
public void writeAttribute(String localName, char[] value, int offset, int len)
throws IOException, XMLStreamException
{
writeAscii(BYTE_SPACE);
writeName(localName);
writeAscii(BYTE_EQ, BYTE_QUOT);
if (len > 0) {
if (mAttrValueWriter != null) { // custom escaping?
mAttrValueWriter.write(value, offset, len);
} else { // nope, default
writeAttrValue(value, offset, len);
}
}
writeAscii(BYTE_QUOT);
}
public void writeAttribute(String prefix, String localName, String value)
throws IOException, XMLStreamException
{
writeAscii(BYTE_SPACE);
writeName(prefix);
writeAscii(BYTE_COLON);
writeName(localName);
writeAscii(BYTE_EQ, BYTE_QUOT);
int len = value.length();
if (len > 0) {
if (mAttrValueWriter != null) { // custom escaping?
mAttrValueWriter.write(value, 0, len);
} else { // nope, default
writeAttrValue(value);
}
}
writeAscii(BYTE_QUOT);
}
public void writeAttribute(String prefix, String localName, char[] value, int offset, int len)
throws IOException, XMLStreamException
{
writeAscii(BYTE_SPACE);
writeName(prefix);
writeAscii(BYTE_COLON);
writeName(localName);
writeAscii(BYTE_EQ, BYTE_QUOT);
if (len > 0) {
if (mAttrValueWriter != null) { // custom escaping?
mAttrValueWriter.write(value, offset, len);
} else { // nope, default
writeAttrValue(value, offset, len);
}
}
writeAscii(BYTE_QUOT);
}
/*
////////////////////////////////////////////////
// Methods used by Typed Access API
////////////////////////////////////////////////
*/
/**
* Non-validating version of typed write method
*/
public final void writeTypedElement(AsciiValueEncoder enc)
throws IOException
{
if (mSurrogate != 0) {
throwUnpairedSurrogate();
}
if (enc.bufferNeedsFlush(mOutputBuffer.length - mOutputPtr)) {
flush();
}
while (true) {
mOutputPtr = enc.encodeMore(mOutputBuffer, mOutputPtr, mOutputBuffer.length);
// If no flushing needed, indicates that all data was encoded
if (enc.isCompleted()) {
break;
}
flush();
}
}
/**
* Validating version of typed write method
*/
public final void writeTypedElement(AsciiValueEncoder enc,
XMLValidator validator, char[] copyBuffer)
throws IOException, XMLStreamException
{
if (mSurrogate != 0) {
throwUnpairedSurrogate();
}
/* Ok, this gets trickier: can't use efficient direct-to-bytes
* encoding since validator won't be able to use it. Instead
* have to use temporary copy buffer.
*/
final int copyBufferLen = copyBuffer.length;
// Copy buffer should never be too small, no need to check up front
do {
int ptr = enc.encodeMore(copyBuffer, 0, copyBufferLen);
// False -> can't be sure it's the whole remaining text
validator.validateText(copyBuffer, 0, ptr, false);
writeRawAscii(copyBuffer, 0, ptr);
} while (!enc.isCompleted());
}
public void writeTypedAttribute(String localName, AsciiValueEncoder enc)
throws IOException, XMLStreamException
{
writeAscii(BYTE_SPACE);
writeName(localName);
writeAscii(BYTE_EQ, BYTE_QUOT);
if (enc.bufferNeedsFlush(mOutputBuffer.length - mOutputPtr)) {
flush();
}
while (true) {
mOutputPtr = enc.encodeMore(mOutputBuffer, mOutputPtr, mOutputBuffer.length);
if (enc.isCompleted()) {
break;
}
flush();
}
writeAscii(BYTE_QUOT);
}
public void writeTypedAttribute(String prefix, String localName,
AsciiValueEncoder enc)
throws IOException, XMLStreamException
{
writeAscii(BYTE_SPACE);
writeName(prefix);
writeAscii(BYTE_COLON);
writeName(localName);
writeAscii(BYTE_EQ, BYTE_QUOT);
if (enc.bufferNeedsFlush(mOutputBuffer.length - mOutputPtr)) {
flush();
}
while (true) {
mOutputPtr = enc.encodeMore(mOutputBuffer, mOutputPtr, mOutputBuffer.length);
if (enc.isCompleted()) {
break;
}
flush();
}
writeAscii(BYTE_QUOT);
}
public void writeTypedAttribute(String prefix, String localName, String nsURI,
AsciiValueEncoder enc,
XMLValidator validator, char[] copyBuffer)
throws IOException, XMLStreamException
{
boolean hasPrefix = (prefix != null && prefix.length() > 0);
if (nsURI == null) {
nsURI = "";
}
//validator.validateAttribute(localName, nsURI, (hasPrefix ? prefix: ""), buf, offset, len);
writeAscii(BYTE_SPACE);
if (hasPrefix) {
writeName(prefix);
writeAscii(BYTE_COLON);
}
writeName(localName);
writeAscii(BYTE_EQ, BYTE_QUOT);
/* Ok, this gets trickier: can't use efficient direct-to-bytes
* encoding since validator won't be able to use it. Instead
* have to use temporary copy buffer.
* In addition, attributes to validate can not be
* split (validators expect complete values). So, if value
* won't fit as is, may need to aggregate using StringBuilder
*/
final int copyBufferLen = copyBuffer.length;
// First, let's see if one call is enough
int last = enc.encodeMore(copyBuffer, 0, copyBufferLen);
writeRawAscii(copyBuffer, 0, last);
if (enc.isCompleted()) {
validator.validateAttribute(localName, nsURI, prefix, copyBuffer, 0, last);
return;
}
// If not, must combine first
StringBuffer sb = new StringBuffer(copyBufferLen << 1);
sb.append(copyBuffer, 0, last);
do {
last = enc.encodeMore(copyBuffer, 0, copyBufferLen);
writeRawAscii(copyBuffer, 0, last);
sb.append(copyBuffer, 0, last);
} while (!enc.isCompleted());
writeAscii(BYTE_QUOT);
// Then validate
String valueStr = sb.toString();
validator.validateAttribute(localName, nsURI, prefix, valueStr);
return;
}
/*
////////////////////////////////////////////////
// Methods for sub-classes to use
////////////////////////////////////////////////
*/
protected final void flushBuffer()
throws IOException
{
if (mOutputPtr > 0 && mOutputBuffer != null) {
int ptr = mOutputPtr;
mOutputPtr = 0;
mOut.write(mOutputBuffer, 0, ptr);
}
}
protected final void writeAscii(byte b)
throws IOException
{
if (mSurrogate != 0) {
throwUnpairedSurrogate();
}
if (mOutputPtr >= mOutputBuffer.length) {
flushBuffer();
}
mOutputBuffer[mOutputPtr++] = b;
}
protected final void writeAscii(byte b1, byte b2)
throws IOException
{
if (mSurrogate != 0) {
throwUnpairedSurrogate();
}
if ((mOutputPtr + 1) >= mOutputBuffer.length) {
flushBuffer();
}
mOutputBuffer[mOutputPtr++] = b1;
mOutputBuffer[mOutputPtr++] = b2;
}
protected final void writeAscii(String str)
throws IOException
{
if (mSurrogate != 0) {
throwUnpairedSurrogate();
}
int len = str.length();
int ptr = mOutputPtr;
byte[] buf = mOutputBuffer;
if ((ptr + len) >= buf.length) {
/* It's even possible that String is longer than the buffer (not
* likely, possible). If so, let's just call the full
* method:
*/
if (len > buf.length) {
writeRaw(str, 0, len);
return;
}
flushBuffer();
ptr = mOutputPtr;
}
mOutputPtr += len;
for (int i = 0; i < len; ++i) {
buf[ptr++] = (byte)str.charAt(i);
}
}
public final void writeRawAscii(char[] buf, int offset, int len)
throws IOException
{
if (mSurrogate != 0) {
throwUnpairedSurrogate();
}
int ptr = mOutputPtr;
byte[] dst = mOutputBuffer;
if ((ptr + len) >= dst.length) {
if (len > dst.length) {
writeRaw(buf, offset, len);
return;
}
flushBuffer();
ptr = mOutputPtr;
}
mOutputPtr += len;
for (int i = 0; i < len; ++i) {
dst[ptr+i] = (byte)buf[offset+i];
}
}
/**
* Entity writing can be optimized quite nicely, since it only
* needs to output ascii characters.
*
* @return New value of mOutputPtr
*/
protected final int writeAsEntity(int c)
throws IOException
{
byte[] buf = mOutputBuffer;
int ptr = mOutputPtr;
if ((ptr + 10) >= buf.length) { // [up to 6 hex digits] ;
flushBuffer();
ptr = mOutputPtr;
}
buf[ptr++] = BYTE_AMP;
// Can use more optimal notation for 8-bit ascii stuff:
if (c < 256) {
/* Also; although not really mandatory, let's also
* use pre-defined entities where possible.
*/
if (c == '&') {
buf[ptr++] = BYTE_A;
buf[ptr++] = BYTE_M;
buf[ptr++] = BYTE_P;
} else if (c == '<') {
buf[ptr++] = BYTE_L;
buf[ptr++] = BYTE_T;
} else if (c == '>') {
buf[ptr++] = BYTE_G;
buf[ptr++] = BYTE_T;
} else if (c == '\'') {
buf[ptr++] = BYTE_A;
buf[ptr++] = BYTE_P;
buf[ptr++] = BYTE_O;
buf[ptr++] = BYTE_S;
} else if (c == '"') {
buf[ptr++] = BYTE_Q;
buf[ptr++] = BYTE_U;
buf[ptr++] = BYTE_O;
buf[ptr++] = BYTE_T;
} else {
buf[ptr++] = BYTE_HASH;
buf[ptr++] = BYTE_X;
// Can use shortest quoting for tab, cr, lf:
if (c >= 16) {
int digit = (c >> 4);
buf[ptr++] = (byte) ((digit < 10) ? ('0' + digit) : (('a' - 10) + digit));
c &= 0xF;
}
buf[ptr++] = (byte) ((c < 10) ? ('0' + c) : (('a' - 10) + c));
}
} else {
buf[ptr++] = BYTE_HASH;
buf[ptr++] = BYTE_X;
// Ok, let's write the shortest possible sequence then:
int shift = 20;
int origPtr = ptr;
do {
int digit = (c >> shift) & 0xF;
if (digit > 0 || (ptr != origPtr)) {
buf[ptr++] = (byte) ((digit < 10) ? ('0' + digit) : (('a' - 10) + digit));
}
shift -= 4;
} while (shift > 0);
c &= 0xF;
buf[ptr++] = (byte) ((c < 10) ? ('0' + c) : (('a' - 10) + c));
}
buf[ptr++] = BYTE_SEMICOLON;
mOutputPtr = ptr;
return ptr;
}
protected final void writeName(String name)
throws IOException, XMLStreamException
{
if (mCheckNames) {
verifyNameValidity(name, mNsAware);
}
// TODO: maybe we could reuse some previously encoded names?
writeRaw(name, 0, name.length());
}
protected final void writeNameUnchecked(String name)
throws IOException
{
writeRaw(name, 0, name.length());
}
protected final int calcSurrogate(int secondSurr)
throws IOException
{
// First, let's verify first surrogate is valid:
int firstSurr = mSurrogate;
mSurrogate = 0;
if (firstSurr < SURR1_FIRST || firstSurr > SURR1_LAST) {
throwUnpairedSurrogate(firstSurr);
}
// Then that the second one is:
if ((secondSurr < SURR2_FIRST) || (secondSurr > SURR2_LAST)) {
throwUnpairedSurrogate(secondSurr);
}
int ch = 0x10000 + ((firstSurr - SURR1_FIRST) << 10) + (secondSurr - SURR2_FIRST);
if (ch > XmlConsts.MAX_UNICODE_CHAR) {
throw new IOException("Illegal surrogate character pair, resulting code 0x"+Integer.toHexString(ch)+" above legal XML character range");
}
return ch;
}
protected final void throwUnpairedSurrogate()
throws IOException
{
int surr = mSurrogate;
mSurrogate = 0;
throwUnpairedSurrogate(surr);
}
protected final void throwUnpairedSurrogate(int code)
throws IOException
{
// Let's flush to make debugging easier
flush();
throw new IOException("Unpaired surrogate character (0x"+Integer.toHexString(code)+")");
}
/*
////////////////////////////////////////////////
// Abstract methods for sub-classes to define
////////////////////////////////////////////////
*/
protected abstract void writeAttrValue(String data)
throws IOException;
protected abstract void writeAttrValue(char[] value, int offset, int len)
throws IOException;
protected abstract int writeCDataContent(String data)
throws IOException;
protected abstract int writeCDataContent(char[] cbuf, int start, int len)
throws IOException;
protected abstract int writeCommentContent(String data)
throws IOException;
protected abstract int writePIData(String data)
throws IOException, XMLStreamException;
protected abstract void writeTextContent(String data)
throws IOException;
protected abstract void writeTextContent(char[] cbuf, int start, int len)
throws IOException;
}
woodstox-4.1.3/src/java/com/ctc/wstx/sw/NonNsStreamWriter.java 0000644 0001750 0001750 00000043747 11745427074 024611 0 ustar giovanni giovanni /* Woodstox XML processor
*
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in the file LICENSE which is
* included with the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.sw;
import java.io.IOException;
import java.util.*;
import javax.xml.XMLConstants;
import javax.xml.namespace.NamespaceContext;
import javax.xml.namespace.QName;
import javax.xml.stream.XMLStreamWriter;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.Attribute;
import javax.xml.stream.events.StartElement;
import org.codehaus.stax2.ri.typed.AsciiValueEncoder;
import com.ctc.wstx.api.WriterConfig;
import com.ctc.wstx.cfg.ErrorConsts;
import com.ctc.wstx.cfg.XmlConsts;
import com.ctc.wstx.sr.AttributeCollector;
import com.ctc.wstx.sr.InputElementStack;
import com.ctc.wstx.util.EmptyNamespaceContext;
import com.ctc.wstx.util.StringVector;
/**
* Implementation of {@link XMLStreamWriter} used when namespace support
* is not enabled. This means that only local names are used for elements
* and attributes; and if rudimentary namespace declarations need to be
* output, they are output using attribute writing methods.
*/
public class NonNsStreamWriter
extends TypedStreamWriter
{
/*
////////////////////////////////////////////////////
// State information
////////////////////////////////////////////////////
*/
/**
* Stack of currently open start elements; only local names
* are included.
*/
final StringVector mElements;
/**
* Container for attribute names for current element; used only
* if uniqueness of attribute names is to be enforced.
*
*
*/
public abstract class XmlWriter
{
protected final static int SURR1_FIRST = 0xD800;
protected final static int SURR1_LAST = 0xDBFF;
protected final static int SURR2_FIRST = 0xDC00;
protected final static int SURR2_LAST = 0xDFFF;
protected final static char DEFAULT_QUOTE_CHAR = '"';
protected final WriterConfig mConfig;
protected final String mEncoding;
// // // Operating mode: base class needs to know whether
// // // namespaces are support (for entity/PI target validation)
protected final boolean mNsAware;
protected final boolean mCheckStructure;
protected final boolean mCheckContent;
protected final boolean mCheckNames;
protected final boolean mFixContent;
/**
* Whether to escape CR (\r) character.
*/
final boolean mEscapeCR;
/**
* Whether to add a space after empty element (before closing "/>")
* or not.
*/
final boolean mAddSpaceAfterEmptyElem;
/**
* Flag that defines whether close() on this writer should call
* close on the underlying output object (stream, writer)
*/
protected final boolean mAutoCloseOutput;
/**
* Optional escaping writer used for escaping characters like '<'
* '&' and '>' in textual content.
* Constructed if calling code has
* installed a special escaping writer factory for text content.
* Null if the default escaper is to be used.
*/
protected Writer mTextWriter;
/**
* Optional escaping writer used for escaping characters like '"'
* '&' and '<' in attribute values.
* Constructed if calling code has
* installed a special escaping writer factory for text content.
* Null if the default escaper is to be used.
*/
protected Writer mAttrValueWriter;
/**
* Indicates whether output is to be compliant; if false, is to be
* xml 1.0 compliant, if true, xml 1.1 compliant.
*/
protected boolean mXml11 = false;
/**
* Lazy-constructed wrapper object, which will route all calls to
* Writer API, to matching writeRaw
methods of this
* XmlWriter instance.
*/
protected XmlWriterWrapper mRawWrapper = null;
/**
* Lazy-constructed wrapper object, which will route all calls to
* Writer API, to matching writeCharacters
methods of this
* XmlWriter instance.
*/
protected XmlWriterWrapper mTextWrapper = null;
/*
///////////////////////////////////////////////////////
// Output location info
///////////////////////////////////////////////////////
*/
/**
* Number of characters output prior to currently buffered output
*/
protected int mLocPastChars = 0;
protected int mLocRowNr = 1;
/**
* Offset of the first character on this line. May be negative, if
* the offset was in a buffer that has been flushed out.
*/
protected int mLocRowStartOffset = 0;
/*
///////////////////////////////////////////////////////
// Life-cycle
///////////////////////////////////////////////////////
*/
protected XmlWriter(WriterConfig cfg, String encoding, boolean autoclose)
throws IOException
{
mConfig = cfg;
mEncoding = encoding;
mAutoCloseOutput = autoclose;
int flags = cfg.getConfigFlags();
mNsAware = (flags & OutputConfigFlags.CFG_ENABLE_NS) != 0;
mCheckStructure = (flags & OutputConfigFlags.CFG_VALIDATE_STRUCTURE) != 0;
mCheckContent = (flags & OutputConfigFlags.CFG_VALIDATE_CONTENT) != 0;
mCheckNames = (flags & OutputConfigFlags.CFG_VALIDATE_NAMES) != 0;
mFixContent = (flags & OutputConfigFlags.CFG_FIX_CONTENT) != 0;
mEscapeCR = (flags & OutputConfigFlags.CFG_ESCAPE_CR) != 0;
mAddSpaceAfterEmptyElem = (flags & OutputConfigFlags.CFG_ADD_SPACE_AFTER_EMPTY_ELEM) != 0;
// Has caller requested any custom text or attr value escaping?
EscapingWriterFactory f = mConfig.getTextEscaperFactory();
if (f == null) {
mTextWriter = null;
} else {
String enc = (mEncoding == null || mEncoding.length() == 0) ?
WstxOutputProperties.DEFAULT_OUTPUT_ENCODING : mEncoding;
mTextWriter = f.createEscapingWriterFor(wrapAsRawWriter(), enc);
}
f = mConfig.getAttrValueEscaperFactory();
if (f == null) {
mAttrValueWriter = null;
} else {
String enc = (mEncoding == null || mEncoding.length() == 0) ?
WstxOutputProperties.DEFAULT_OUTPUT_ENCODING : mEncoding;
mAttrValueWriter = f.createEscapingWriterFor(wrapAsRawWriter(), enc);
}
}
/*
////////////////////////////////////////////////////
// Extra configuration
////////////////////////////////////////////////////
*/
public void enableXml11() {
mXml11 = true;
}
/*
////////////////////////////////////////////////////
// Access to underlying physical output destinations
////////////////////////////////////////////////////
*/
/**
* @return Underlying OutputStream used for physical output,
* if the writer was constructed using one
*/
protected abstract OutputStream getOutputStream();
/**
* @return Underlying Writer used for physical output,
* if the writer was constructed with one, or one was
* created to be used with an OutputStream.
*/
protected abstract Writer getWriter();
/*
////////////////////////////////////////////////////
// Basic methods for communicating with underlying
// stream or writer
////////////////////////////////////////////////////
*/
/**
* Method called to flush the buffer(s), and close the output
* sink (stream or writer) if enabled (auto-closing) or
* forced.
*/
public abstract void close(boolean forceRealClose) throws IOException;
public abstract void flush()
throws IOException;
public abstract void writeRaw(String str, int offset, int len)
throws IOException;
public void writeRaw(String str)
throws IOException
{
writeRaw(str, 0, str.length());
}
public abstract void writeRaw(char[] cbuf, int offset, int len)
throws IOException;
/**
* Like {@link #writeRaw}, but caller guarantees that the contents
* additionally are known to be in 7-bit ascii range.
*/
public abstract void writeRawAscii(char[] cbuf, int offset, int len)
throws IOException;
/*
////////////////////////////////////////////////////
// Raw, non-verifying write methods; used when
// directly copying trusted content
////////////////////////////////////////////////////
*/
public abstract void writeCDataStart()
throws IOException;
public abstract void writeCDataEnd()
throws IOException;
public abstract void writeCommentStart()
throws IOException;
public abstract void writeCommentEnd()
throws IOException;
public abstract void writePIStart(String target, boolean addSpace)
throws IOException;
public abstract void writePIEnd()
throws IOException;
/*
////////////////////////////////////////////////////
// Write methods, textual:
////////////////////////////////////////////////////
*/
/**
* @param data Contents of the CDATA section to write out
* @return offset of the (first) illegal content segment ("]]>") in
* passed content and not in repairing mode; or -1 if none or is
* repairing
*/
public abstract int writeCData(String data)
throws IOException, XMLStreamException;
public abstract int writeCData(char[] cbuf, int offset, int len)
throws IOException, XMLStreamException;
public abstract void writeCharacters(String data)
throws IOException;
public abstract void writeCharacters(char[] cbuf, int offset, int len)
throws IOException;
/*
////////////////////////////////////////////////////
// Write methods, non-textual, non-elem/attr:
////////////////////////////////////////////////////
*/
/**
* Method that will try to output the content as specified. If
* the content passed in has embedded "--" in it, it will either
* add an intervening space between consequtive hyphens (if content
* fixing is enabled), or return the offset of the first hyphen in
* multi-hyphen sequence.
*/
public abstract int writeComment(String data)
throws IOException, XMLStreamException;
/**
* Older "legacy" output method for outputting DOCTYPE declaration.
* Assumes that the passed-in String contains a complete DOCTYPE
* declaration properly quoted.
*/
public abstract void writeDTD(String data)
throws IOException, XMLStreamException;
public abstract void writeDTD(String rootName,
String systemId, String publicId,
String internalSubset)
throws IOException, XMLStreamException;
public abstract void writeEntityReference(String name)
throws IOException, XMLStreamException;
public abstract int writePI(String target, String data)
throws IOException, XMLStreamException;
public abstract void writeXmlDeclaration(String version, String enc, String standalone)
throws IOException;
/*
////////////////////////////////////////////////////
// Write methods, elements
////////////////////////////////////////////////////
*/
/**
*writeRaw
* method.
*/
public final Writer wrapAsRawWriter()
{
if (mRawWrapper == null) {
mRawWrapper = XmlWriterWrapper.wrapWriteRaw(this);
}
return mRawWrapper;
}
public final Writer wrapAsTextWriter()
{
if (mTextWrapper == null) {
mTextWrapper = XmlWriterWrapper.wrapWriteCharacters(this);
}
return mTextWrapper;
}
/*
////////////////////////////////////////////////////
// Helper methods for sub-classes
////////////////////////////////////////////////////
*/
/**
* Method called to verify that the name is a legal XML name.
*/
public final void verifyNameValidity(String name, boolean checkNs)
throws XMLStreamException
{
/* No empty names... caller must have dealt with optional arguments
* prior to calling this method
*/
if (name == null || name.length() == 0) {
reportNwfName(ErrorConsts.WERR_NAME_EMPTY);
}
int illegalIx = WstxInputData.findIllegalNameChar(name, checkNs, mXml11);
if (illegalIx >= 0) {
if (illegalIx == 0) {
reportNwfName(ErrorConsts.WERR_NAME_ILLEGAL_FIRST_CHAR,
WstxInputData.getCharDesc(name.charAt(0)));
}
reportNwfName(ErrorConsts.WERR_NAME_ILLEGAL_CHAR,
WstxInputData.getCharDesc(name.charAt(illegalIx)));
}
}
/**
* This is the method called when an output method call violates
* name well-formedness checks
* and {@link WstxOutputProperties#P_OUTPUT_VALIDATE_NAMES} is
* is enabled.
*/
protected void reportNwfName(String msg)
throws XMLStreamException
{
throwOutputError(msg);
}
protected void reportNwfName(String msg, Object arg)
throws XMLStreamException
{
throwOutputError(msg, arg);
}
protected void reportNwfContent(String msg)
throws XMLStreamException
{
throwOutputError(msg);
}
protected void throwOutputError(String msg)
throws XMLStreamException
{
// First, let's flush any output we may have, to make debugging easier
try {
flush();
} catch (IOException ioe) {
throw new WstxIOException(ioe);
}
throw new XMLStreamException(msg);
}
protected void throwOutputError(String format, Object arg)
throws XMLStreamException
{
String msg = MessageFormat.format(format, new Object[] { arg });
throwOutputError(msg);
}
/**
* Method called to handle invalid character in textual content requested
* to be output. Content may be part of textual events (CHARACTER, CDATA),
* attribute value, COMMENT content or PROCESSING_INSTRUCTION data.
* The default behavior is to just throw an exception, but this can
* be configured via property {@link WstxOutputProperties#P_OUTPUT_INVALID_CHAR_HANDLER}.
*/
protected char handleInvalidChar(int c)
throws IOException
{
// First, let's flush any output we may have, to make debugging easier
flush();
InvalidCharHandler h = mConfig.getInvalidCharHandler();
if (h == null) {
h = InvalidCharHandler.FailingHandler.getInstance();
}
return h.convertInvalidChar(c);
}
}
woodstox-4.1.3/src/java/com/ctc/wstx/sw/TypedStreamWriter.java 0000644 0001750 0001750 00000025710 11745427074 024631 0 ustar giovanni giovanni /* Woodstox XML processor
*
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in file LICENSE, included with
* the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.sw;
import java.io.IOException;
import java.math.BigDecimal;
import java.math.BigInteger;
import javax.xml.namespace.QName;
import javax.xml.stream.XMLStreamException;
import org.codehaus.stax2.typed.Base64Variant;
import org.codehaus.stax2.typed.Base64Variants;
import org.codehaus.stax2.ri.typed.AsciiValueEncoder;
import org.codehaus.stax2.ri.typed.ValueEncoderFactory;
import org.codehaus.stax2.validation.XMLValidator;
import com.ctc.wstx.api.WriterConfig;
import com.ctc.wstx.cfg.ErrorConsts;
import com.ctc.wstx.exc.WstxIOException;
/**
* Intermediate base class that implements Typed Access API (Stax2 v3)
* for all (repairing, non-repairing, non-namespace) native stream
* writer implementations.
*/
public abstract class TypedStreamWriter
extends BaseStreamWriter
{
/**
* When outputting using Typed Access API, we will need
* encoders. If so, they will created by lazily-constructed
* factory
*/
protected ValueEncoderFactory mValueEncoderFactory;
/*
////////////////////////////////////////////////////
// Life-cycle
////////////////////////////////////////////////////
*/
protected TypedStreamWriter(XmlWriter xw, String enc, WriterConfig cfg)
{
super(xw, enc, cfg);
}
protected final ValueEncoderFactory valueEncoderFactory()
{
if (mValueEncoderFactory == null) {
mValueEncoderFactory = new ValueEncoderFactory();
}
return mValueEncoderFactory;
}
/*
/////////////////////////////////////////////////
// TypedXMLStreamWriter2 implementation
// (Typed Access API, Stax v3.0)
/////////////////////////////////////////////////
*/
// // // Typed element content write methods
public void writeBoolean(boolean value)
throws XMLStreamException
{
writeTypedElement(valueEncoderFactory().getEncoder(value));
}
public void writeInt(int value)
throws XMLStreamException
{
writeTypedElement(valueEncoderFactory().getEncoder(value));
}
public void writeLong(long value)
throws XMLStreamException
{
writeTypedElement(valueEncoderFactory().getEncoder(value));
}
public void writeFloat(float value)
throws XMLStreamException
{
writeTypedElement(valueEncoderFactory().getEncoder(value));
}
public void writeDouble(double value)
throws XMLStreamException
{
writeTypedElement(valueEncoderFactory().getEncoder(value));
}
public void writeInteger(BigInteger value)
throws XMLStreamException
{
/* No really efficient method exposed by JDK, keep it simple
* (esp. considering that length is actually not bound)
*/
writeTypedElement(valueEncoderFactory().getScalarEncoder(value.toString()));
}
public void writeDecimal(BigDecimal value)
throws XMLStreamException
{
/* No really efficient method exposed by JDK, keep it simple
* (esp. considering that length is actually not bound)
*/
writeTypedElement(valueEncoderFactory().getScalarEncoder(value.toString()));
}
public void writeQName(QName name)
throws XMLStreamException
{
/* Can't use AsciiValueEncoder, since QNames can contain
* non-ascii characters
*/
writeCharacters(serializeQName(name));
}
public final void writeIntArray(int[] value, int from, int length)
throws XMLStreamException
{
writeTypedElement(valueEncoderFactory().getEncoder(value, from, length));
}
public void writeLongArray(long[] value, int from, int length)
throws XMLStreamException
{
writeTypedElement(valueEncoderFactory().getEncoder(value, from, length));
}
public void writeFloatArray(float[] value, int from, int length)
throws XMLStreamException
{
writeTypedElement(valueEncoderFactory().getEncoder(value, from, length));
}
public void writeDoubleArray(double[] value, int from, int length)
throws XMLStreamException
{
writeTypedElement(valueEncoderFactory().getEncoder(value, from, length));
}
public void writeBinary(byte[] value, int from, int length)
throws XMLStreamException
{
Base64Variant v = Base64Variants.getDefaultVariant();
writeTypedElement(valueEncoderFactory().getEncoder(v, value, from, length));
}
public void writeBinary(Base64Variant v, byte[] value, int from, int length)
throws XMLStreamException
{
writeTypedElement(valueEncoderFactory().getEncoder(v, value, from, length));
}
protected final void writeTypedElement(AsciiValueEncoder enc)
throws XMLStreamException
{
if (mStartElementOpen) {
closeStartElement(mEmptyElement);
}
// How about well-formedness?
if (mCheckStructure) {
if (inPrologOrEpilog()) {
reportNwfStructure(ErrorConsts.WERR_PROLOG_NONWS_TEXT);
}
}
// Or validity?
if (mVldContent <= XMLValidator.CONTENT_ALLOW_WS) {
reportInvalidContent(CHARACTERS);
}
// So far so good: let's serialize
try {
XMLValidator vld = (mVldContent == XMLValidator.CONTENT_ALLOW_VALIDATABLE_TEXT) ?
mValidator : null;
if (vld == null) {
mWriter.writeTypedElement(enc);
} else {
mWriter.writeTypedElement(enc, vld, getCopyBuffer());
}
} catch (IOException ioe) {
throw new WstxIOException(ioe);
}
}
// // // Typed attribute value write methods
public void writeBooleanAttribute(String prefix, String nsURI, String localName, boolean value)
throws XMLStreamException
{
writeTypedAttribute(prefix, nsURI, localName,
valueEncoderFactory().getEncoder(value));
}
public void writeIntAttribute(String prefix, String nsURI, String localName, int value)
throws XMLStreamException
{
writeTypedAttribute(prefix, nsURI, localName,
valueEncoderFactory().getEncoder(value));
}
public void writeLongAttribute(String prefix, String nsURI, String localName, long value)
throws XMLStreamException
{
writeTypedAttribute(prefix, nsURI, localName,
valueEncoderFactory().getEncoder(value));
}
public void writeFloatAttribute(String prefix, String nsURI, String localName, float value)
throws XMLStreamException
{
writeTypedAttribute(prefix, nsURI, localName,
valueEncoderFactory().getEncoder(value));
}
public void writeDoubleAttribute(String prefix, String nsURI, String localName, double value)
throws XMLStreamException
{
writeTypedAttribute(prefix, nsURI, localName,
valueEncoderFactory().getEncoder(value));
}
public void writeIntegerAttribute(String prefix, String nsURI, String localName, BigInteger value)
throws XMLStreamException
{
// not optimal, but has to do:
writeTypedAttribute(prefix, nsURI, localName,
valueEncoderFactory().getScalarEncoder(value.toString()));
}
public void writeDecimalAttribute(String prefix, String nsURI, String localName, BigDecimal value)
throws XMLStreamException
{
// not optimal, but has to do:
writeTypedAttribute(prefix, nsURI, localName,
valueEncoderFactory().getScalarEncoder(value.toString()));
}
public void writeQNameAttribute(String prefix, String nsURI, String localName, QName name)
throws XMLStreamException
{
/* Can't use AsciiValueEncoder, since QNames can contain
* non-ascii characters
*/
writeAttribute(prefix, nsURI, localName, serializeQName(name));
}
public void writeIntArrayAttribute(String prefix, String nsURI, String localName, int[] value)
throws XMLStreamException
{
writeTypedAttribute(prefix, nsURI, localName,
valueEncoderFactory().getEncoder(value, 0, value.length));
}
public void writeLongArrayAttribute(String prefix, String nsURI, String localName, long[] value)
throws XMLStreamException
{
writeTypedAttribute(prefix, nsURI, localName,
valueEncoderFactory().getEncoder(value, 0, value.length));
}
public void writeFloatArrayAttribute(String prefix, String nsURI, String localName, float[] value)
throws XMLStreamException
{
writeTypedAttribute(prefix, nsURI, localName,
valueEncoderFactory().getEncoder(value, 0, value.length));
}
public void writeDoubleArrayAttribute(String prefix, String nsURI, String localName, double[] value)
throws XMLStreamException
{
writeTypedAttribute(prefix, nsURI, localName,
valueEncoderFactory().getEncoder(value, 0, value.length));
}
public void writeBinaryAttribute(String prefix, String nsURI, String localName, byte[] value)
throws XMLStreamException
{
Base64Variant v = Base64Variants.getDefaultVariant();
writeTypedAttribute(prefix, nsURI, localName,
valueEncoderFactory().getEncoder(v, value, 0, value.length));
}
public void writeBinaryAttribute(Base64Variant v, String prefix, String nsURI, String localName, byte[] value)
throws XMLStreamException
{
writeTypedAttribute(prefix, nsURI, localName,
valueEncoderFactory().getEncoder(v, value, 0, value.length));
}
/**
* Method that will write attribute with value that is known not to
* require additional escaping.
*/
protected abstract void writeTypedAttribute(String prefix, String nsURI,
String localName,
AsciiValueEncoder enc)
throws XMLStreamException;
private String serializeQName(QName name)
throws XMLStreamException
{
String vp = validateQNamePrefix(name);
String local = name.getLocalPart();
if (vp == null || vp.length() == 0) {
return local;
}
// Not efficient... but should be ok
return vp + ":" + local;
}
}
woodstox-4.1.3/src/test/ 0000755 0001750 0001750 00000000000 11745427076 015376 5 ustar giovanni giovanni woodstox-4.1.3/src/test/org/ 0000755 0001750 0001750 00000000000 11745427075 016164 5 ustar giovanni giovanni woodstox-4.1.3/src/test/org/codehaus/ 0000755 0001750 0001750 00000000000 11745427075 017757 5 ustar giovanni giovanni woodstox-4.1.3/src/test/org/codehaus/stax/ 0000755 0001750 0001750 00000000000 11745427075 020736 5 ustar giovanni giovanni woodstox-4.1.3/src/test/org/codehaus/stax/test/ 0000755 0001750 0001750 00000000000 11756143457 021716 5 ustar giovanni giovanni woodstox-4.1.3/src/test/org/codehaus/stax/test/BaseStaxTest.java 0000644 0001750 0001750 00000052317 11745427075 025142 0 ustar giovanni giovanni package org.codehaus.stax.test;
import java.io.*;
import java.util.HashMap;
import junit.framework.TestCase;
import javax.xml.stream.*;
import javax.xml.stream.events.XMLEvent;
/* Latest updates:
*
* - 07-Sep-2007, TSa: Updating based on latest understanding of
* the proper use of null and Empty String wrt. "no prefix" and
* "no namespace" cases.
*/
/**
* Base class for all StaxTest unit test classes. Contains shared
* functionality for many common set up tasks, as well as for
* outputting diagnostics.
*
* @author Tatu Saloranta
*/
public abstract class BaseStaxTest
extends TestCase
implements XMLStreamConstants
{
/**
* This is the de facto standard property that enables accurate reporting
* of CDATA events.
*/
final static String PROP_REPORT_CDATA = "http://java.sun.com/xml/stream/properties/report-cdata-event";
final static HashMap mTokenTypes = new HashMap();
static {
mTokenTypes.put(new Integer(START_ELEMENT), "START_ELEMENT");
mTokenTypes.put(new Integer(END_ELEMENT), "END_ELEMENT");
mTokenTypes.put(new Integer(START_DOCUMENT), "START_DOCUMENT");
mTokenTypes.put(new Integer(END_DOCUMENT), "END_DOCUMENT");
mTokenTypes.put(new Integer(CHARACTERS), "CHARACTERS");
mTokenTypes.put(new Integer(CDATA), "CDATA");
mTokenTypes.put(new Integer(COMMENT), "COMMENT");
mTokenTypes.put(new Integer(PROCESSING_INSTRUCTION), "PROCESSING_INSTRUCTION");
mTokenTypes.put(new Integer(DTD), "DTD");
mTokenTypes.put(new Integer(SPACE), "SPACE");
mTokenTypes.put(new Integer(ENTITY_REFERENCE), "ENTITY_REFERENCE");
mTokenTypes.put(new Integer(NAMESPACE), "NAMESPACE_DECLARATION");
mTokenTypes.put(new Integer(NOTATION_DECLARATION), "NOTATION_DECLARATION");
mTokenTypes.put(new Integer(ENTITY_DECLARATION), "ENTITY_DECLARATION");
}
/*
///////////////////////////////////////////////////
// Consts for expected values
///////////////////////////////////////////////////
*/
/**
* Expected return value for streamReader.getNamespaceURI() in
* non-namespace-aware mode.
*/
protected final String DEFAULT_URI_NON_NS = "";
protected final String DEFAULT_URI_NS = "";
/*
///////////////////////////////////////////////////
// Other consts
///////////////////////////////////////////////////
*/
/*
///////////////////////////////////////////////////
// Cached instances
///////////////////////////////////////////////////
*/
XMLInputFactory mInputFactory;
XMLOutputFactory mOutputFactory;
XMLEventFactory mEventFactory;
protected BaseStaxTest(String name) { super(name); }
protected BaseStaxTest() { super(); }
/*
//////////////////////////////////////////////////
// Factory methods
//////////////////////////////////////////////////
*/
protected XMLInputFactory getInputFactory()
{
if (mInputFactory == null) {
mInputFactory = getNewInputFactory();
}
return mInputFactory;
}
protected static XMLInputFactory getNewInputFactory()
{
return XMLInputFactory.newInstance();
}
protected XMLOutputFactory getOutputFactory()
{
if (mOutputFactory == null) {
mOutputFactory = getNewOutputFactory();
}
return mOutputFactory;
}
protected static XMLOutputFactory getNewOutputFactory()
{
return XMLOutputFactory.newInstance();
}
protected XMLEventFactory getEventFactory()
{
if (mEventFactory == null) {
mEventFactory = XMLEventFactory.newInstance();
}
return mEventFactory;
}
protected static XMLStreamReader constructUtf8StreamReader(XMLInputFactory f, String content)
throws XMLStreamException
{
try {
return f.createXMLStreamReader(new ByteArrayInputStream(content.getBytes("UTF-8")));
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
}
protected static XMLStreamReader constructCharStreamReader(XMLInputFactory f, String content)
throws XMLStreamException
{
return f.createXMLStreamReader(new StringReader(content));
}
protected static XMLStreamReader constructStreamReader(XMLInputFactory f, String content)
throws XMLStreamException
{
/* Can either create a simple reader from String, or go with
* input stream & decoding?
*/
//return constructCharStreamReader(f, content);
return constructUtf8StreamReader(f, content);
}
protected static XMLStreamReader constructStreamReader(XMLInputFactory f, byte[] b)
throws XMLStreamException
{
return f.createXMLStreamReader(new ByteArrayInputStream(b));
}
protected static XMLStreamReader constructStreamReaderForFile(XMLInputFactory f, String filename)
throws IOException, XMLStreamException
{
File inf = new File(filename);
XMLStreamReader sr = f.createXMLStreamReader(inf.toURL().toString(),
new FileReader(inf));
assertEquals(START_DOCUMENT, sr.getEventType());
return sr;
}
protected XMLStreamReader constructNsStreamReader(String content)
throws XMLStreamException
{
XMLInputFactory f = getInputFactory();
setNamespaceAware(f, true);
return f.createXMLStreamReader(new StringReader(content));
}
protected XMLStreamReader constructNsStreamReader(String content, boolean coal)
throws XMLStreamException
{
XMLInputFactory f = getInputFactory();
setNamespaceAware(f, true);
setCoalescing(f, coal);
return f.createXMLStreamReader(new StringReader(content));
}
/*
//////////////////////////////////////////////////
// Configuring input factory
//////////////////////////////////////////////////
*/
protected static boolean isCoalescing(XMLInputFactory f)
throws XMLStreamException
{
return ((Boolean) f.getProperty(XMLInputFactory.IS_COALESCING)).booleanValue();
}
protected static void setCoalescing(XMLInputFactory f, boolean state)
throws XMLStreamException
{
Boolean b = state ? Boolean.TRUE : Boolean.FALSE;
f.setProperty(XMLInputFactory.IS_COALESCING, b);
// Let's just double-check it...
assertEquals(state, isCoalescing(f));
}
protected static boolean isValidating(XMLInputFactory f)
throws XMLStreamException
{
return ((Boolean) f.getProperty(XMLInputFactory.IS_VALIDATING)).booleanValue();
}
protected static void setValidating(XMLInputFactory f, boolean state)
throws XMLStreamException
{
try {
Boolean b = state ? Boolean.TRUE : Boolean.FALSE;
f.setProperty(XMLInputFactory.IS_VALIDATING, b);
} catch (IllegalArgumentException iae) {
fail("Could not set DTD validating mode to "+state+": "+iae);
//throw new XMLStreamException(iae.getMessage(), iae);
}
assertEquals(state, isValidating(f));
}
protected static boolean isNamespaceAware(XMLInputFactory f)
throws XMLStreamException
{
return ((Boolean) f.getProperty(XMLInputFactory.IS_NAMESPACE_AWARE)).booleanValue();
}
/**
* @return True if setting succeeded, and property supposedly was
* succesfully set to the value specified; false if there was a problem.
*/
protected static boolean setNamespaceAware(XMLInputFactory f, boolean state)
throws XMLStreamException
{
try {
f.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE,
state ? Boolean.TRUE : Boolean.FALSE);
/* 07-Sep-2005, TSa: Let's not assert, but instead let's see if
* it sticks. Some implementations might choose to silently
* ignore setting, at least for 'false'?
*/
return (isNamespaceAware(f) == state);
} catch (IllegalArgumentException e) {
/* Let's assume, then, that the property (or specific value for it)
* is NOT supported...
*/
return false;
}
}
protected static void setReplaceEntities(XMLInputFactory f, boolean state)
throws XMLStreamException
{
Boolean b = state ? Boolean.TRUE : Boolean.FALSE;
f.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, b);
assertEquals(b, f.getProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES));
}
protected static void setSupportDTD(XMLInputFactory f, boolean state)
throws XMLStreamException
{
Boolean b = state ? Boolean.TRUE : Boolean.FALSE;
f.setProperty(XMLInputFactory.SUPPORT_DTD, b);
assertEquals(b, f.getProperty(XMLInputFactory.SUPPORT_DTD));
}
protected static boolean setSupportExternalEntities(XMLInputFactory f, boolean state)
throws XMLStreamException
{
Boolean b = state ? Boolean.TRUE : Boolean.FALSE;
try {
f.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, b);
Object act = f.getProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES);
return (act instanceof Boolean) && ((Boolean) act).booleanValue() == state;
} catch (IllegalArgumentException e) {
/* Let's assume, then, that the property (or specific value for it)
* is NOT supported...
*/
return false;
}
}
protected static void setResolver(XMLInputFactory f, XMLResolver resolver)
throws XMLStreamException
{
f.setProperty(XMLInputFactory.RESOLVER, resolver);
}
protected static boolean setReportCData(XMLInputFactory f, boolean state)
throws XMLStreamException
{
Boolean b = state ? Boolean.TRUE : Boolean.FALSE;
if (f.isPropertySupported(PROP_REPORT_CDATA)) {
f.setProperty(PROP_REPORT_CDATA, b);
return true;
}
return false;
}
/*
//////////////////////////////////////////////////
// Stream reader accessors
//////////////////////////////////////////////////
*/
/**
* Method that not only gets currently available text from the
* reader, but also checks that its consistenly accessible using
* different StAX methods.
*/
protected static String getAndVerifyText(XMLStreamReader sr)
throws XMLStreamException
{
String text = sr.getText();
/* 05-Apr-2006, TSa: Although getText() is available for DTD
* and ENTITY_REFERENCE, getTextXxx() are not. Thus, can not
* do more checks for those types.
*/
int type = sr.getEventType();
if (type != ENTITY_REFERENCE && type != DTD) {
assertNotNull("getText() should never return null.", text);
int expLen = sr.getTextLength();
/* Hmmh. Can only return empty text for CDATA (since empty
* blocks are legal).
*/
/* !!! 01-Sep-2004, TSa:
* note: theoretically, in coalescing mode, it could be possible
* to have empty CDATA section(s) get converted to CHARACTERS,
* which would be empty... may need to enhance this to check that
* mode is not coalescing? Or something
*/
if (sr.getEventType() == CHARACTERS) {
if (expLen == 0) {
fail("Stream reader should never return empty Strings.");
}
}
assertEquals("Expected text length of "+expLen+", got "+text.length(),
expLen, text.length());
char[] textChars = sr.getTextCharacters();
int start = sr.getTextStart();
String text2 = new String(textChars, start, expLen);
assertEquals("Expected getText() and getTextCharacters() to return same value for event of type ("+tokenTypeDesc(sr.getEventType())+")", text, text2);
} else { // DTD or ENTITY_REFERENCE
// not sure if null is legal for these either, but...
if (text == null) { // let's prevent an NPE at caller
text = "";
}
}
return text;
}
protected static String getAllText(XMLStreamReader sr)
throws XMLStreamException
{
StringBuffer sb = new StringBuffer();
while (true) {
int tt = sr.getEventType();
if (tt != CHARACTERS && tt != SPACE && tt != CDATA) {
break;
}
sb.append(getAndVerifyText(sr));
sr.next();
}
return sb.toString();
}
protected static String getAllCData(XMLStreamReader sr)
throws XMLStreamException
{
StringBuffer sb = new StringBuffer();
while (true) {
/* Note: CDATA sections CAN be reported as CHARACTERS, but
* not as SPACE
*/
int tt = sr.getEventType();
if (tt != CHARACTERS && tt != CDATA) {
break;
}
sb.append(getAndVerifyText(sr));
sr.next();
}
return sb.toString();
}
/*
//////////////////////////////////////////////////
// Derived assert/fail methods
//////////////////////////////////////////////////
*/
protected static void assertTokenType(int expType, int actType)
{
if (expType == actType) {
return;
}
fail("Expected token "+tokenTypeDesc(expType)
+"; got "+tokenTypeDesc(actType)+".");
}
protected static void assertTokenType(int expType, int actType,
XMLStreamReader sr)
{
if (expType == actType) {
return;
}
fail("Expected token "+tokenTypeDesc(expType)
+"; got "+tokenTypeDesc(actType, sr)+".");
}
protected static void assertTextualTokenType(int actType)
{
if (actType != CHARACTERS && actType != SPACE
&& actType != CDATA) {
fail("Expected textual token (CHARACTERS, SPACE or CDATA)"
+"; got "+tokenTypeDesc(actType)+".");
}
}
protected static void failStrings(String msg, String exp, String act)
{
// !!! TODO: Indicate position where Strings differ
fail(msg+": expected "+quotedPrintable(exp)+", got "
+quotedPrintable(act));
}
/**
* Helper method for ensuring that the current element
* (START_ELEMENT, END_ELEMENT) has no prefix
*
*
*
* @author Tatu Saloranta
*/
public class TestProperties
extends BaseStreamTest
{
public void testDefaultEntitySettings()
{
XMLInputFactory f = getNewInputFactory();
assertEquals(Boolean.TRUE, f.getProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES));
Object o = f.getProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES);
if (!(o instanceof Boolean)) {
fail("Property value for XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES not of type Boolean, but "+((o == null) ? "[null]" : o.getClass().getName()));
}
}
public void testDefaultValidationSettings()
{
XMLInputFactory f = getNewInputFactory();
assertEquals(Boolean.FALSE, f.getProperty(XMLInputFactory.IS_VALIDATING));
// A few impls might not support this, but it is the default...
assertEquals(Boolean.TRUE, f.getProperty(XMLInputFactory.SUPPORT_DTD));
}
public void testDefaultMiscSettings()
{
XMLInputFactory f = getNewInputFactory();
assertEquals(Boolean.TRUE, f.getProperty(XMLInputFactory.IS_NAMESPACE_AWARE));
assertEquals(Boolean.FALSE, f.getProperty(XMLInputFactory.IS_COALESCING));
// Shouldn't have default handlero objects either
assertNull(f.getProperty(XMLInputFactory.REPORTER));
assertNull(f.getProperty(XMLInputFactory.RESOLVER));
assertNull(f.getProperty(XMLInputFactory.ALLOCATOR));
}
}
woodstox-4.1.3/src/test/org/codehaus/stax/test/stream/TestGetSegmentedText.java 0000644 0001750 0001750 00000023603 11745427075 030137 0 ustar giovanni giovanni package org.codehaus.stax.test.stream;
import java.io.*;
import java.util.Random;
import javax.xml.stream.*;
/**
* Unit test suite that ensures that the 'segmented' text accessors
* (multi-argument getTextCharacters) works as expected, with various
* combinations of access lengths, and orderings.
*
* @author Tatu Saloranta
*/
public class TestGetSegmentedText
extends BaseStreamTest
{
static String sXmlInput = null;
static String sExpResult = null;
public void testCoalescingAutoEntity()
throws Exception
{
doTest(false, true, true); // non-ns
doTest(true, true, true); // ns-aware
}
public void testNonCoalescingAutoEntity()
throws Exception
{
doTest(false, false, true); // non-ns
doTest(true, false, true); // ns-aware
}
public void testCoalescingNonAutoEntity()
throws Exception
{
doTest(false, true, false); // non-ns
doTest(true, true, false); // ns-aware
}
public void testNonCoalescingNonAutoEntity()
throws Exception
{
doTest(false, false, false); // non-ns
doTest(true, false, false); // ns-aware
}
public void testSegmentedGetCharacters()
throws XMLStreamException
{
final String TEXT = "Let's just add some content in here ('') to fill some of the parser buffers, to test multi-argument getTextCharacters() method";
final String XML = "
*
*
* @author Tatu Saloranta
*/
public class TestProperties
extends BaseWriterTest
{
public void testDefaultSettings()
{
XMLOutputFactory f = getNewOutputFactory();
assertEquals(Boolean.FALSE, f.getProperty(XMLOutputFactory.IS_REPAIRING_NAMESPACES));
}
}
woodstox-4.1.3/src/test/org/codehaus/stax/test/wstream/TestOutputEncoding.java 0000644 0001750 0001750 00000007130 11745427075 030052 0 ustar giovanni giovanni package org.codehaus.stax.test.wstream;
import javax.xml.stream.*;
import java.io.*;
/**
* Set of unit tests for verifying operation of {@link XMLStreamWriter}
* when outputting text nodes that contain characters that should
* be quoted.
*
* @author Tatu Saloranta
*/
public class TestOutputEncoding
extends BaseWriterTest
{
final String ISO_LATIN_ENCODING = "ISO-8859-1";
final String UTF8_ENCODING = "UTF-8";
public void testSimpleContentQuoting()
throws IOException, XMLStreamException
{
String TEXT = "
"
+"Input : {"+printableWithSpaces(input)+"}\n
"
+"Output: {"+printableWithSpaces(result)+"}\n
"
+"Exp. : {"+printableWithSpaces(expOutput)+"}\n
");
}
return count;
}
protected int streamAndSkip(XMLInputFactory f, InputConfigIterator it,
String input)
throws XMLStreamException, UnsupportedEncodingException
{
int count = 0;
// Let's loop couple of input methods
for (int m = 0; m < 3; ++m) {
XMLStreamReader sr;
switch (m) {
case 0: // simple StringReader:
sr = constructStreamReader(f, input);
break;
case 1: // via InputStream and auto-detection
{
ByteArrayInputStream bin = new ByteArrayInputStream
(input.getBytes("UTF-8"));
sr = f.createXMLStreamReader(bin);
}
break;
case 2: // explicit UTF-8 stream
{
ByteArrayInputStream bin = new ByteArrayInputStream
(input.getBytes("UTF-8"));
Reader br = new InputStreamReader(bin, "UTF-8");
sr = f.createXMLStreamReader(br);
}
break;
default: throw new Error("Internal error");
}
count += streamAndSkip(sr, it, input);
}
return count;
}
protected int streamAndSkip(XMLStreamReader sr, InputConfigIterator it,
String input)
throws XMLStreamException
{
int count = 0;
while (sr.hasNext()) {
count += sr.next();
}
return count;
}
protected void generateData(Random r, StringBuffer input,
StringBuffer output, boolean autoEnt)
{
final String PREAMBLE =
""
+"\n"
+" \n"
+" \n"
+"]>";
/* Ok; template will use '*' chars as placeholders, to be replaced
* by pseudo-randomly selected choices.
*/
final String TEMPLATE =
"XMLStreamWriter.writeDefaultNamespace
*/
public void testExplicitDefaultNsWrite()
throws XMLStreamException
{
final String URI1 = "http://foo";
final String URI2 = "http://bar";
XMLOutputFactory f = getFactory();
StringWriter strw = new StringWriter();
XMLStreamWriter sw = f.createXMLStreamWriter(strw);
sw.writeStartDocument();
/* root in explicit namespace, but additionally want to
* reserve the default ns:
*/
sw.writeStartElement("myns", "root", URI1);
sw.writeDefaultNamespace(URI2);
// leaf in that namespace, then:
sw.writeStartElement(URI2, "leaf");
sw.writeEndElement();
sw.writeEndElement();
sw.writeEndDocument();
sw.close();
String result = strw.toString();
// Ok, so let's parse and verify:
XMLStreamReader sr = constructNsStreamReader(result, false);
assertTokenType(START_ELEMENT, sr.next());
assertEquals("root", sr.getLocalName());
assertEquals("myns", sr.getPrefix());
assertEquals(URI1, sr.getNamespaceURI());
int nsCount = sr.getNamespaceCount();
assertEquals("Expected two namespace declarations, got "+nsCount, 2, nsCount);
// And then leaf should have no ns decls:
assertTokenType(START_ELEMENT, sr.next());
assertEquals("leaf", sr.getLocalName());
assertNoElemPrefix(sr);
assertEquals(URI2, sr.getNamespaceURI());
assertEquals(0, sr.getNamespaceCount());
assertTokenType(END_ELEMENT, sr.next());
assertEquals("leaf", sr.getLocalName());
// fine, rest is ok
sr.close();
}
/**
* This test further verifies that caller's prefix-preference
* has higher priority than that of trying to find an existing
* prefix to use.
*/
public void testExplicitDupNsWrite()
throws XMLStreamException
{
final String URI = "http://bar";
XMLOutputFactory f = getFactory();
StringWriter strw = new StringWriter();
XMLStreamWriter sw = f.createXMLStreamWriter(strw);
sw.writeStartDocument();
sw.writeStartElement("ns", "root", URI);
sw.writeAttribute("attrns", URI, "attr", "value");
sw.writeEndElement();
sw.writeEndDocument();
sw.close();
String result = strw.toString();
// Ok, so let's parse and verify:
XMLStreamReader sr = constructNsStreamReader(result, false);
assertTokenType(START_ELEMENT, sr.next());
assertEquals("root", sr.getLocalName());
assertEquals(URI, sr.getNamespaceURI());
assertEquals(1, sr.getAttributeCount());
assertEquals("attr", sr.getAttributeLocalName(0));
assertEquals(URI, sr.getAttributeNamespace(0));
// so far so good: but let's verify prefix is also what caller specified
assertEquals("attrns", sr.getAttributePrefix(0));
assertEquals(2, sr.getNamespaceCount());
// fine, rest is ok
sr.close();
}
/*
////////////////////////////////////////////////////
// Internal methods
////////////////////////////////////////////////////
*/
private XMLOutputFactory getFactory()
throws XMLStreamException
{
XMLOutputFactory f = getOutputFactory();
setNamespaceAware(f, true);
setRepairing(f, true);
return f;
}
}
woodstox-4.1.3/src/test/wstxtest/wstream/TestContentValidation.java 0000644 0001750 0001750 00000030127 11745427076 026120 0 ustar giovanni giovanni package wstxtest.wstream;
import java.io.*;
import javax.xml.stream.*;
import org.codehaus.stax2.*;
/**
* This unit test suite verifies that output-side content validation
* works as expected, when enabled.
*/
public class TestContentValidation
extends BaseWriterTest
{
final String COMMENT_CONTENT_IN = "can not have -- in there";
final String COMMENT_CONTENT_OUT = "can not have - - in there";
final String CDATA_CONTENT_IN = "CData in: ";
final String CDATA_CONTENT_OUT = "CData in: ";
final String PI_CONTENT_IN = "this should end PI: ?> shouldn't it?";
final String PI_CONTENT_OUT = "this should end PI: ?> shouldn't it?";
/*
////////////////////////////////////////////////////
// Main test methods
////////////////////////////////////////////////////
*/
public void testCommentChecking()
throws XMLStreamException
{
for (int i = 0; i <= 2; ++i) {
XMLOutputFactory2 f = getFactory(i, true, false);
for (int enc = 0; enc < 3; ++enc) {
XMLStreamWriter2 sw;
if (enc == 0) {
StringWriter strw = new StringWriter();
sw = (XMLStreamWriter2) f.createXMLStreamWriter(strw);
} else {
ByteArrayOutputStream bos = new ByteArrayOutputStream();
String encStr = (enc == 1) ? "UTF-8" : "ISO-8859-1";
sw = (XMLStreamWriter2) f.createXMLStreamWriter(bos, encStr);
}
sw.writeStartDocument();
sw.writeStartElement("root");
try {
sw.writeComment(COMMENT_CONTENT_IN);
fail("Expected an XMLStreamException for illegal comment content (contains '--') in checking + non-fixing mode (type "+i+")");
} catch (XMLStreamException sex) {
// good
} catch (Throwable t) {
fail("Expected an XMLStreamException for illegal comment content (contains '--') in checking + non-fixing mode; got: "+t);
}
}
}
}
public void testCommentFixing()
throws Exception
{
for (int i = 0; i <= 2; ++i) {
XMLOutputFactory2 f = getFactory(i, true, true);
/* 24-Aug-2006, TSa: Let's also test with output stream-based
* output... writers may use different code
*/
for (int enc = 0; enc < 3; ++enc) {
XMLStreamWriter2 sw;
StringWriter strw = null;
ByteArrayOutputStream bos = null;
String encStr = null;
if (enc == 0) {
strw = new StringWriter();
sw = (XMLStreamWriter2) f.createXMLStreamWriter(strw);
} else {
bos = new ByteArrayOutputStream();
encStr = (enc == 1) ? "UTF-8" : "ISO-8859-1";
sw = (XMLStreamWriter2) f.createXMLStreamWriter(bos, encStr);
}
sw.writeStartDocument();
sw.writeStartElement("root");
/* now it should be ok, and result in one padded or
* 2 separate comments...
*/
sw.writeComment(COMMENT_CONTENT_IN);
sw.writeEndElement();
sw.writeEndDocument();
sw.close();
String output;
if (strw != null) {
output = strw.toString();
} else {
output = new String(bos.toByteArray(), encStr);
}
// so far so good; but let's ensure it also parses:
XMLStreamReader sr = getReader(output);
assertTokenType(START_ELEMENT, sr.next());
assertTokenType(COMMENT, sr.next());
StringBuffer sb = new StringBuffer();
sb.append(getAndVerifyText(sr));
// May get another one too...?
int type;
while ((type = sr.next()) == COMMENT) {
sb.append(getAndVerifyText(sr));
}
/* Ok... now, except for additional spaces, we should have
* about the same content:
*/
/* For now, since it's wstx-specific, let's just hard-code
* exactly what we are to get:
*/
String act = sb.toString();
if (!COMMENT_CONTENT_OUT.equals(act)) {
failStrings("Failed to properly quote comment content (type "+i+")",
COMMENT_CONTENT_OUT, act);
}
assertTokenType(END_ELEMENT, type);
}
}
}
public void testCDataChecking()
throws Exception
{
for (int i = 0; i <= 2; ++i) {
for (int itype = 0; itype < 2; ++itype) {
XMLOutputFactory2 f = getFactory(i, true, false);
/* 24-Aug-2006, TSa: Let's also test with output stream-based
* output... writers may use different code
*/
for (int enc = 0; enc < 3; ++enc) {
XMLStreamWriter2 sw;
if (enc == 0) {
StringWriter strw = new StringWriter();
sw = (XMLStreamWriter2) f.createXMLStreamWriter(strw);
} else {
ByteArrayOutputStream bos = new ByteArrayOutputStream();
sw = (XMLStreamWriter2) f.createXMLStreamWriter
(bos, (enc == 1) ? "UTF-8" : "ISO-8859-1");
}
sw.writeStartDocument();
sw.writeStartElement("root");
try {
if (itype == 0) {
sw.writeCData(CDATA_CONTENT_IN);
} else {
char[] ch = CDATA_CONTENT_IN.toCharArray();
sw.writeCData(ch, 0, ch.length);
}
fail("Expected an XMLStreamException for illegal CDATA content (contains ']]>') in checking + non-fixing mode (type "+i+", itype "+itype+")");
} catch (XMLStreamException sex) {
// good
} catch (Exception t) {
fail("Expected an XMLStreamException for illegal CDATA content (contains ']]>') in checking + non-fixing mode; got: "+t);
}
}
}
}
}
public void testCDataFixing()
throws Exception
{
for (int i = 0; i <= 2; ++i) {
for (int itype = 0; itype < 2; ++itype) {
XMLOutputFactory2 f = getFactory(i, true, true);
/* 24-Aug-2006, TSa: Let's also test with output stream-based
* output... writers may use different code
*/
for (int enc = 0; enc < 3; ++enc) {
XMLStreamWriter2 sw;
StringWriter strw = null;
ByteArrayOutputStream bos = null;
String encStr = null;
if (enc == 0) {
strw = new StringWriter();
sw = (XMLStreamWriter2) f.createXMLStreamWriter(strw);
} else {
bos = new ByteArrayOutputStream();
encStr = (enc == 1) ? "UTF-8" : "ISO-8859-1";
sw = (XMLStreamWriter2) f.createXMLStreamWriter(bos, encStr);
}
sw.writeStartDocument();
sw.writeStartElement("root");
/* now it should be ok, and result in two separate CDATA
* segments...
*/
if (itype == 0) {
sw.writeCData(CDATA_CONTENT_IN);
} else {
char[] ch = CDATA_CONTENT_IN.toCharArray();
sw.writeCData(ch, 0, ch.length);
}
sw.writeEndElement();
sw.writeEndDocument();
sw.close();
String output;
if (strw != null) {
output = strw.toString();
} else {
output = new String(bos.toByteArray(), encStr);
}
// so far so good; but let's ensure it also parses:
XMLStreamReader sr = getReader(output);
assertTokenType(START_ELEMENT, sr.next());
int type = sr.next();
assertTokenType(CDATA, type);
StringBuffer sb = new StringBuffer();
sb.append(getAndVerifyText(sr));
// Should be getting one or more segments...
while ((type = sr.next()) == CDATA) {
sb.append(getAndVerifyText(sr));
}
String act = sb.toString();
if (!CDATA_CONTENT_OUT.equals(act)) {
failStrings("Failed to properly quote CDATA content (type "+i+", itype "+itype+")",
CDATA_CONTENT_OUT, act);
}
assertTokenType(END_ELEMENT, type);
}
}
}
}
public void testPIChecking()
throws Exception
{
for (int i = 0; i <= 2; ++i) {
XMLOutputFactory2 f = getFactory(i, true, false);
/* 24-Aug-2006, TSa: Let's also test with output stream-based
* output... writers may use different code
*/
for (int enc = 0; enc < 3; ++enc) {
XMLStreamWriter2 sw;
if (enc == 0) {
StringWriter strw = new StringWriter();
sw = (XMLStreamWriter2) f.createXMLStreamWriter(strw);
} else {
ByteArrayOutputStream bos = new ByteArrayOutputStream();
sw = (XMLStreamWriter2) f.createXMLStreamWriter
(bos, (enc == 1) ? "UTF-8" : "ISO-8859-1");
}
sw.writeStartDocument();
sw.writeStartElement("root");
try {
sw.writeProcessingInstruction("target", PI_CONTENT_IN);
fail("Expected an XMLStreamException for illegal PI content (contains '?>') in checking + non-fixing mode (type "+enc+")");
} catch (XMLStreamException sex) {
// good
} catch (Exception t) {
fail("Expected an XMLStreamException for illegal PI content (contains '?>') in checking + non-fixing mode; got: "+t);
}
}
}
}
// // Note: no way (currently?) to fix PI content; thus, no test:
/*
////////////////////////////////////////////////////
// Internal methods
////////////////////////////////////////////////////
*/
private XMLOutputFactory2 getFactory(int type, boolean checkAll, boolean fixAll)
throws XMLStreamException
{
XMLOutputFactory2 f = getOutputFactory();
// type 0 -> non-ns, 1 -> ns, non-repairing, 2 -> ns, repairing
setNamespaceAware(f, type > 0);
setRepairing(f, type > 1);
setValidateAll(f, checkAll);
setFixContent(f, fixAll);
return f;
}
private XMLStreamReader getReader(String content)
throws XMLStreamException
{
XMLInputFactory2 f = getInputFactory();
setCoalescing(f, false);
return f.createXMLStreamReader(new StringReader(content));
}
}
woodstox-4.1.3/src/test/wstxtest/wstream/TestAttrValidation.java 0000644 0001750 0001750 00000001743 11745427076 025422 0 ustar giovanni giovanni package wstxtest.wstream;
import java.io.*;
import javax.xml.stream.*;
import org.codehaus.stax2.*;
/**
* This unit test suite verifies that output-side content validation
* works as expected, when enabled.
*/
public class TestAttrValidation
extends BaseWriterTest
{
/**
* Unit test suite for testing violations of structural checks, when
* trying to output things in prolog/epilog.
*/
public void testSimpleAttrs()
throws Exception
{
XMLOutputFactory2 f = getOutputFactory();
StringWriter w = new StringWriter();
XMLStreamWriter sw = f.createXMLStreamWriter(w, "UTF-8");
sw.writeStartDocument();
sw.writeEmptyElement("root");
try {
sw.writeAttribute("foo", "Null is invalid: \0");
fail("Expected an exception when trying to write attribute value with null character");
} catch (XMLStreamException sex) {
;
}
sw.writeEndDocument();
}
}
woodstox-4.1.3/src/test/wstxtest/wstream/TestStructuralValidation.java 0000644 0001750 0001750 00000015112 11745427076 026653 0 ustar giovanni giovanni package wstxtest.wstream;
import java.io.*;
import javax.xml.stream.*;
import org.codehaus.stax2.*;
/**
* This unit test suite verifies that output-side content validation
* works as expected, when enabled.
*/
public class TestStructuralValidation
extends BaseWriterTest
{
/*
////////////////////////////////////////////////////
// Main test methods
////////////////////////////////////////////////////
*/
/**
* Unit test suite for testing violations of structural checks, when
* trying to output things in prolog/epilog.
*/
public void testPrologChecks()
throws Exception
{
for (int i = 0; i <= 2; ++i) { // non-ns, simple-ns, repairing-ns
for (int j = 0; j < 2; ++j) { // prolog / epilog
boolean epilog = (j > 0);
final String prologMsg = epilog ? " in epilog" : " in prolog";
for (int op = 0; op <= 4; ++op) {
XMLOutputFactory2 f = getFactory(i, true);
StringWriter strw = new StringWriter();
XMLStreamWriter2 sw = (XMLStreamWriter2) f.createXMLStreamWriter(strw);
String failMsg = null;
sw.writeStartDocument();
if (epilog) {
sw.writeStartElement("root");
sw.writeEndElement();
}
try {
switch (op) {
case 0: // No non-white space text in prolog/epilog
failMsg = "when calling writeCharacters() for non-white space text";
sw.writeCharacters("test!");
break;
case 1: // - "" -
failMsg = "when calling writeCharacters() for non-white space text";
sw.writeCharacters(new char[] { 't', 'e', 's', 't' }, 0, 4);
break;
case 2: // No CDATA in prolog/epilog
failMsg = "when calling writeCData()";
sw.writeCData("cdata");
case 3: // - "" -
failMsg = "when calling writeCData()";
sw.writeCData(new char[] { 't', 'e', 's', 't' }, 0, 4);
case 4: // no entity refs in prolog/epilog:
failMsg = "when calling writeEntityRef()";
sw.writeEntityRef("entity");
default:
throw new Error("Internal error: illegal test index "+op);
}
} catch (XMLStreamException sex) {
// good
continue;
} catch (Throwable t) {
fail("Expected an XMLStreamException for "+failMsg+prologMsg+"; got "+t);
} finally {
if (epilog) {
sw.close();
}
}
fail("Expected an XMLStreamException for "+failMsg+prologMsg+"; no exception thrown");
}
}
}
}
/**
* Unit test that verifies that root element structural problems (no root,
* that is, an empty doc; more than one root element) are caught.
*/
public void testRootElementChecks()
throws XMLStreamException
{
for (int i = 0; i <= 2; ++i) { // non-ns, simple-ns, repairing-ns
for (int op = 0; op < 2; ++op) {
XMLOutputFactory2 f = getFactory(i, true);
StringWriter strw = new StringWriter();
XMLStreamWriter2 sw = (XMLStreamWriter2) f.createXMLStreamWriter(strw);
String failMsg = null;
sw.writeStartDocument();
try {
switch (op) {
case 0: // No root element?
failMsg = "missing root element";
sw.writeEndDocument();
break;
case 1: // Two root elements...
failMsg = "second root element";
sw.writeStartElement("root1");
sw.writeEndElement();
sw.writeStartElement("root1");
sw.writeEndElement();
break;
default:
throw new Error("Internal error: illegal test index "+op);
}
} catch (XMLStreamException sex) {
// good
continue;
} catch (Throwable t) {
fail("Expected an XMLStreamException for "+failMsg);
}
}
}
}
public void testWriteElementChecks()
throws XMLStreamException
{
/*
for (int i = 0; i <= 2; ++i) {
// First, checks for prolog:
for (int op = 0; op < 2; ++op) {
XMLOutputFactory2 f = getFactory(i, true);
StringWriter strw = new StringWriter();
XMLStreamWriter2 sw = (XMLStreamWriter2) f.createXMLStreamWriter(strw);
String failMsg = null;
sw.writeStartDocument();
// No non-white space text in prolog
try {
switch (op) {
case 0:
failMsg = "when calling writeCharacters() for non-white space text in prolog";
sw.writeCharacters("test!");
break;
default:
}
} catch (XMLStreamException sex) {
// good
} catch (Throwable t) {
fail("Expected an XMLStreamException for illegal comment content (contains '--') in checking + non-fixing mode; got: "+t);
}
sw.close();
}
}
*/
}
/*
////////////////////////////////////////////////////
// Helper methods
////////////////////////////////////////////////////
*/
private XMLOutputFactory2 getFactory(int type, boolean checkStruct)
throws XMLStreamException
{
XMLOutputFactory2 f = getOutputFactory();
// type 0 -> non-ns, 1 -> ns, non-repairing, 2 -> ns, repairing
setNamespaceAware(f, type > 0);
setRepairing(f, type > 1);
setValidateStructure(f, checkStruct);
return f;
}
}
woodstox-4.1.3/src/test/wstxtest/wstream/TestAutoEndElems.java 0000644 0001750 0001750 00000003231 11745427076 025014 0 ustar giovanni giovanni package wstxtest.wstream;
import java.io.*;
import javax.xml.stream.*;
import com.ctc.wstx.api.WstxOutputProperties;
/**
* Unit tests for verifying that [WSTX-165] works ok.
*/
public class TestAutoEndElems
extends BaseWriterTest
{
public void testAutomaticEndElemsEnabled()
throws XMLStreamException
{
StringWriter strw = new StringWriter();
XMLStreamWriter sw = getFactory(true).createXMLStreamWriter(strw);
sw.writeStartElement("root");
sw.writeStartElement("leaf");
sw.writeCharacters(""); // to prevent empty elem, simplify testing
sw.close();
assertEquals("
*
*/
public class TestEventReader
extends wstxtest.BaseWstxTest
{
public void testEventReaderNonLaziness()
throws XMLStreamException
{
/* We can test this by forcing coalescing to happen, and injecting
* an intentional error after first two segments. In lazy mode,
* coalescing is done not when event type is fetched, but only
* when getText() is called. In non-lazy mode, it's thrown right
* from next() method. Although the exact mechanism is hidden by
* the Event API, what we do see is the type of exception we get --
* that should be XMLStreamException, NOT a runtime wrapper instead
* of it.
*/
final String XML =
"