http://xml.apache.org/http://www.apache.org/http://www.w3.org/

Home

Readme
Installation
Build

API Docs
Samples
Programming
Migration
FAQs

Releases
Feedback
Bug-Todo

Download
CVS Repository
Mail Archive

API Docs for SAX and DOM
 

Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members  

DOMParser.hpp

Go to the documentation of this file.
00001 /*
00002  * The Apache Software License, Version 1.1

00003  *
00004  * Copyright (c) 1999-2000 The Apache Software Foundation.  All rights

00005  * reserved.

00006  *
00007  * Redistribution and use in source and binary forms, with or without

00008  * modification, are permitted provided that the following conditions

00009  * are met:

00010  *
00011  * 1. Redistributions of source code must retain the above copyright

00012  *    notice, this list of conditions and the following disclaimer.

00013  *
00014  * 2. Redistributions in binary form must reproduce the above copyright

00015  *    notice, this list of conditions and the following disclaimer in

00016  *    the documentation and/or other materials provided with the

00017  *    distribution.

00018  *
00019  * 3. The end-user documentation included with the redistribution,

00020  *    if any, must include the following acknowledgment:

00021  *       "This product includes software developed by the

00022  *        Apache Software Foundation (http://www.apache.org/)."

00023  *    Alternately, this acknowledgment may appear in the software itself,

00024  *    if and wherever such third-party acknowledgments normally appear.

00025  *
00026  * 4. The names "Xerces" and "Apache Software Foundation" must

00027  *    not be used to endorse or promote products derived from this

00028  *    software without prior written permission. For written

00029  *    permission, please contact apache\@apache.org.

00030  *
00031  * 5. Products derived from this software may not be called "Apache",

00032  *    nor may "Apache" appear in their name, without prior written

00033  *    permission of the Apache Software Foundation.

00034  *
00035  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED

00036  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES

00037  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE

00038  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR

00039  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

00040  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

00041  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF

00042  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND

00043  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,

00044  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT

00045  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF

00046  * SUCH DAMAGE.

00047  * ====================================================================

00048  *
00049  * This software consists of voluntary contributions made by many

00050  * individuals on behalf of the Apache Software Foundation, and was

00051  * originally based on software copyright (c) 1999, International

00052  * Business Machines, Inc., http://www.ibm.com .  For more information

00053  * on the Apache Software Foundation, please see

00054  * <http://www.apache.org/>.

00055  */
00056 
00057 /*
00058  * $Id: DOMParser.hpp,v 1.15 2001/01/25 23:59:15 lehors Exp $

00059  *
00060  */
00061 
00062 #if !defined(DOMPARSER_HPP)
00063 #define DOMPARSER_HPP
00064 
00065 
00066 #include <dom/DOM_Document.hpp>
00067 #include <framework/XMLDocumentHandler.hpp>
00068 #include <framework/XMLErrorReporter.hpp>
00069 #include <framework/XMLEntityHandler.hpp>
00070 #include <util/ValueStackOf.hpp>
00071 
00072 #include <validators/DTD/DocTypeHandler.hpp>
00073 #include <dom/DOM_DocumentType.hpp>
00074 #include <validators/DTD/DTDElementDecl.hpp>
00075 #include <validators/DTD/DTDValidator.hpp>
00076 
00077 class EntityResolver;
00078 class ErrorHandler;
00079 class XMLPScanToken;
00080 class XMLScanner;
00081 class XMLValidator;
00082 
00083 
00094 class  DOMParser :
00095 
00096     public XMLDocumentHandler
00097     , public XMLErrorReporter
00098     , public XMLEntityHandler
00099     , public DocTypeHandler
00100 {
00101 public :
00102     // -----------------------------------------------------------------------
00103     //  Class types
00104     // -----------------------------------------------------------------------
00105     enum ValSchemes
00106     {
00107         Val_Never
00108         , Val_Always
00109         , Val_Auto
00110     };
00111 
00112 
00113     // -----------------------------------------------------------------------
00114     //  Constructors and Detructor
00115     // -----------------------------------------------------------------------
00116 
00119 
00128     DOMParser(XMLValidator* const valToAdopt = 0);
00129 
00133     ~DOMParser();
00134 
00136 
00142     void reset();
00143 
00144 
00145     // -----------------------------------------------------------------------
00146     //  Getter methods
00147     // -----------------------------------------------------------------------
00148 
00151 
00161     DOM_Document getDocument();
00162 
00170     ErrorHandler* getErrorHandler();
00171 
00179     const ErrorHandler* getErrorHandler() const;
00180 
00188     EntityResolver* getEntityResolver();
00189 
00197     const EntityResolver* getEntityResolver() const;
00198 
00206     const XMLScanner& getScanner() const;
00207 
00215     const XMLValidator& getValidator() const;
00216 
00224     ValSchemes getValidationScheme() const;
00225 
00236     bool getDoNamespaces() const;
00237 
00250     bool getExitOnFirstFatalError() const;
00251 
00263     bool getExpandEntityReferences() const;
00264 
00282     bool  getCreateEntityReferenceNodes()const;
00283 
00294     bool getIncludeIgnorableWhitespace() const;
00295 
00305     bool getToCreateXMLDeclTypeNode() const;
00306 
00307 
00309 
00310 
00311     // -----------------------------------------------------------------------
00312     //  Setter methods
00313     // -----------------------------------------------------------------------
00314 
00317 
00331     void setErrorHandler(ErrorHandler* const handler);
00332 
00348     void setEntityResolver(EntityResolver* const handler);
00349 
00368     void setDoNamespaces(const bool newState);
00369 
00386     void setExitOnFirstFatalError(const bool newState);
00387 
00402     void setExpandEntityReferences(const bool expand);
00403 
00420     void setCreateEntityReferenceNodes(const bool create);
00421 
00440     void setIncludeIgnorableWhitespace(const bool include);
00441 
00458     void setValidationScheme(const ValSchemes newScheme);
00459 
00470     void setToCreateXMLDeclTypeNode(const bool create);
00471 
00473 
00474 
00475     // -----------------------------------------------------------------------
00476     //  Parsing methods
00477     // -----------------------------------------------------------------------
00478 
00481 
00495     void parse(const InputSource& source, const bool reuseValidator = false);
00496 
00511     void parse(const XMLCh* const systemId, const bool reuseValidator = false);
00512 
00524     void parse(const char* const systemId, const bool reuseValidator = false);
00525 
00555     bool parseFirst
00556     (
00557         const   XMLCh* const    systemId
00558         ,       XMLPScanToken&  toFill
00559         , const bool            reuseValidator = false
00560     );
00561 
00592     bool parseFirst
00593     (
00594         const   char* const     systemId
00595         ,       XMLPScanToken&  toFill
00596         , const bool            reuseValidator = false
00597     );
00598 
00629     bool parseFirst
00630     (
00631         const   InputSource&    source
00632         ,       XMLPScanToken&  toFill
00633         , const bool            reuseValidator = false
00634     );
00635 
00658     bool parseNext(XMLPScanToken& token);
00659 
00685     void parseReset(XMLPScanToken& token);
00686 
00688 
00689 
00690 
00691     // -----------------------------------------------------------------------
00692     //  Implementation of the XMLErrorReporter interface.
00693     // -----------------------------------------------------------------------
00694 
00697 
00722     virtual void error
00723     (
00724         const   unsigned int                errCode
00725         , const XMLCh* const                msgDomain
00726         , const XMLErrorReporter::ErrTypes  errType
00727         , const XMLCh* const                errorText
00728         , const XMLCh* const                systemId
00729         , const XMLCh* const                publicId
00730         , const unsigned int                lineNum
00731         , const unsigned int                colNum
00732     );
00733 
00742     virtual void resetErrors();
00744 
00745 
00746     // -----------------------------------------------------------------------
00747     //  Implementation of the XMLEntityHandler interface.
00748     // -----------------------------------------------------------------------
00749 
00752 
00765     virtual void endInputSource(const InputSource& inputSource);
00766 
00782     virtual bool expandSystemId
00783     (
00784         const   XMLCh* const    systemId
00785         ,       XMLBuffer&      toFill
00786     );
00787 
00796     virtual void resetEntities();
00797 
00813     virtual InputSource* resolveEntity
00814     (
00815         const   XMLCh* const    publicId
00816         , const XMLCh* const    systemId
00817     );
00818 
00831     virtual void startInputSource(const InputSource& inputSource);
00832 
00834 
00835 
00836 
00837     // -----------------------------------------------------------------------
00838     //  Implementation of the XMLDocumentHandler interface.
00839     // -----------------------------------------------------------------------
00840 
00843 
00856     virtual void docCharacters
00857     (
00858         const   XMLCh* const    chars
00859         , const unsigned int    length
00860         , const bool            cdataSection
00861     );
00862 
00871     virtual void docComment
00872     (
00873         const   XMLCh* const    comment
00874     );
00875 
00888     virtual void docPI
00889     (
00890         const   XMLCh* const    target
00891         , const XMLCh* const    data
00892     );
00893 
00898     virtual void endDocument();
00899 
00913     virtual void endElement
00914     (
00915         const   XMLElementDecl& elemDecl
00916         , const unsigned int    urlId
00917         , const bool            isRoot
00918     );
00919 
00928     virtual void endEntityReference
00929     (
00930         const   XMLEntityDecl&  entDecl
00931     );
00932 
00951     virtual void ignorableWhitespace
00952     (
00953         const   XMLCh* const    chars
00954         , const unsigned int    length
00955         , const bool            cdataSection
00956     );
00957 
00964     virtual void resetDocument();
00965 
00970     virtual void startDocument();
00971 
00999     virtual void startElement
01000     (
01001         const   XMLElementDecl&         elemDecl
01002         , const unsigned int            urlId
01003         , const XMLCh* const            elemPrefix
01004         , const RefVectorOf<XMLAttr>&   attrList
01005         , const unsigned int            attrCount
01006         , const bool                    isEmpty
01007         , const bool                    isRoot
01008     );
01009 
01019     virtual void startEntityReference
01020     (
01021         const   XMLEntityDecl&  entDecl
01022     );
01023 
01042     virtual void XMLDecl
01043     (
01044         const   XMLCh* const    versionStr
01045         , const XMLCh* const    encodingStr
01046         , const XMLCh* const    standaloneStr
01047         , const XMLCh* const    actualEncStr
01048     );
01050 
01051 
01054 
01064     bool getDoValidation() const;
01065 
01079     void setDoValidation(const bool newState);
01080     //doctypehandler interfaces
01081     virtual void attDef
01082     (
01083         const   DTDElementDecl&     elemDecl
01084         , const DTDAttDef&          attDef
01085         , const bool                ignoring
01086     );
01087 
01088     virtual void doctypeComment
01089     (
01090         const   XMLCh* const    comment
01091     );
01092 
01093     virtual void doctypeDecl
01094     (
01095         const   DTDElementDecl& elemDecl
01096         , const XMLCh* const    publicId
01097         , const XMLCh* const    systemId
01098         , const bool            hasIntSubset
01099     );
01100 
01101     virtual void doctypePI
01102     (
01103         const   XMLCh* const    target
01104         , const XMLCh* const    data
01105     );
01106 
01107     virtual void doctypeWhitespace
01108     (
01109         const   XMLCh* const    chars
01110         , const unsigned int    length
01111     );
01112 
01113     virtual void elementDecl
01114     (
01115         const   DTDElementDecl& decl
01116         , const bool            isIgnored
01117     );
01118 
01119     virtual void endAttList
01120     (
01121         const   DTDElementDecl& elemDecl
01122     );
01123 
01124     virtual void endIntSubset();
01125 
01126     virtual void endExtSubset();
01127 
01128     virtual void entityDecl
01129     (
01130         const   DTDEntityDecl&  entityDecl
01131         , const bool            isPEDecl
01132         , const bool            isIgnored
01133     );
01134 
01135     virtual void resetDocType();
01136 
01137     virtual void notationDecl
01138     (
01139         const   XMLNotationDecl&    notDecl
01140         , const bool                isIgnored
01141     );
01142 
01143     virtual void startAttList
01144     (
01145         const   DTDElementDecl& elemDecl
01146     );
01147 
01148     virtual void startIntSubset();
01149 
01150     virtual void startExtSubset();
01151 
01152     virtual void TextDecl
01153     (
01154         const   XMLCh* const    versionStr
01155         , const XMLCh* const    encodingStr
01156     );
01157 
01158 
01160 
01161 
01162 protected :
01163     // -----------------------------------------------------------------------
01164     //  Protected getter methods
01165     // -----------------------------------------------------------------------
01166 
01169 
01174     DOM_Node getCurrentNode();
01175 
01177 
01178 
01179     // -----------------------------------------------------------------------
01180     //  Protected setter methods
01181     // -----------------------------------------------------------------------
01182 
01185 
01193     void setCurrentNode(DOM_Node toSet);
01194 
01201     void setDocument(DOM_Document toSet);
01203 
01204 
01205 private :
01206     //local private function to populate the doctype data
01207     virtual void populateDocumentType();
01208 
01209     // -----------------------------------------------------------------------
01210     //  Private data members
01211     //
01212     //  fCurrentNode
01213     //  fCurrentParent
01214     //      Used to track the current node during nested element events. Since
01215     //      the tree must be built from a set of disjoint callbacks, we need
01216     //      these to keep up with where we currently are.
01217     //
01218     //  fDocument
01219     //      The root document object, filled with the document contents.
01220     //
01221     //  fEntityResolver
01222     //      The installed SAX entity resolver, if any. Null if none.
01223     //
01224     //  fErrorHandler
01225     //      The installed SAX error handler, if any. Null if none.
01226     //
01227     //  fCreateEntityReferenceNode
01228     //      Indicates whether entity reference nodes should be created.
01229     //
01230     //  fIncludeIgnorableWhitespace
01231     //      Indicates whether ignorable whiltespace should be added to
01232     //      the DOM tree for validating parsers.
01233     //
01234     //  fNodeStack
01235     //      Used to track previous parent nodes during nested element events.
01236     //
01237     //  fParseInProgress
01238     //      Used to prevent multiple entrance to the parser while its doing
01239     //      a parse.
01240     //
01241     //  fScanner
01242     //      The scanner used for this parser. This is created during the
01243     //      constructor.
01244     //
01245     //  fValidator
01246     //      The validator that is installed. If none is provided, we will
01247     //      create and install a DTD validator. We install this on the
01248     //      scanner we create, which it will use to do validation. We set
01249     //      ourself on it as the error reporter for validity errors.
01250     //
01251     //  fWithinElement
01252     //      A flag to indicate that the parser is within at least one level
01253     //      of element processing.
01254     //
01255     //  fDocumentType
01256     //      Used to store and update the documentType variable information
01257     //      in fDocument
01258     //
01259     //  fOldDocTypeHandler
01260     //      Used to chain the old documentType node if the user has set it
01261     //      from outside
01262     //
01263     //  fToCreateXMLDecTypeNode
01264     //      A flag to create a DOM_XMLDecl node in the ODM tree if it exists
01265     //      This is an extension to xerces implementation
01266     //
01267     // -----------------------------------------------------------------------
01268     DOM_Node                fCurrentParent;
01269     DOM_Node                fCurrentNode;
01270     DOM_Document            fDocument;
01271     EntityResolver*         fEntityResolver;
01272     ErrorHandler*           fErrorHandler;
01273     bool                    fCreateEntityReferenceNodes;
01274     bool                    fIncludeIgnorableWhitespace;
01275     ValueStackOf<DOM_Node>* fNodeStack;
01276     bool                    fParseInProgress;
01277     XMLScanner*             fScanner;
01278     XMLValidator*           fValidator;
01279     bool                    fWithinElement;
01280     DocumentTypeImpl*       fDocumentType;
01281     DocTypeHandler*         fOldDocTypeHandler;
01282     bool                    fToCreateXMLDeclTypeNode;
01283 };
01284 
01285 
01286 
01287 // ---------------------------------------------------------------------------
01288 //  DOMParser: Handlers for the XMLEntityHandler interface
01289 // ---------------------------------------------------------------------------
01290 inline void DOMParser::endInputSource(const InputSource&)

01291 {
01292     // The DOM entity resolver doesn't handle this
01293 }
01294 
01295 inline bool DOMParser::expandSystemId(const XMLCh* const, XMLBuffer&)

01296 {
01297     // The DOM entity resolver doesn't handle this
01298     return false;
01299 }
01300 
01301 inline void DOMParser::resetEntities()

01302 {
01303     // Nothing to do on this one
01304 }
01305 
01306 inline void DOMParser::startInputSource(const InputSource&)

01307 {
01308     // The DOM entity resolver doesn't handle this
01309 }
01310 
01311 
01312 // ---------------------------------------------------------------------------
01313 //  DOMParser: Getter methods
01314 // ---------------------------------------------------------------------------
01315 inline DOM_Document DOMParser::getDocument()

01316 {
01317     return fDocument;
01318 }
01319 
01320 inline ErrorHandler* DOMParser::getErrorHandler()

01321 {
01322     return fErrorHandler;
01323 }
01324 
01325 inline const ErrorHandler* DOMParser::getErrorHandler() const

01326 {
01327     return fErrorHandler;
01328 }
01329 
01330 inline EntityResolver* DOMParser::getEntityResolver()

01331 {
01332     return fEntityResolver;
01333 }
01334 
01335 inline const EntityResolver* DOMParser::getEntityResolver() const

01336 {
01337     return fEntityResolver;
01338 }
01339 
01340 inline bool DOMParser::getExpandEntityReferences() const

01341 {
01342     return fCreateEntityReferenceNodes;
01343 }
01344 inline bool DOMParser::getCreateEntityReferenceNodes() const

01345 {
01346     return fCreateEntityReferenceNodes;
01347 }
01348 
01349 inline bool DOMParser::getIncludeIgnorableWhitespace() const

01350 {
01351     return fIncludeIgnorableWhitespace;
01352 }
01353 
01354 inline const XMLScanner& DOMParser::getScanner() const

01355 {
01356     return *fScanner;
01357 }
01358 
01359 inline bool DOMParser::getToCreateXMLDeclTypeNode() const

01360 {
01361     return fToCreateXMLDeclTypeNode;
01362 }
01363 
01364 
01365 // ---------------------------------------------------------------------------
01366 //  DOMParser: Setter methods
01367 // ---------------------------------------------------------------------------
01368 inline void DOMParser::setExpandEntityReferences(const bool expand)

01369 {
01370     fCreateEntityReferenceNodes = expand;
01371 }
01372 
01373 inline void DOMParser::setCreateEntityReferenceNodes(const bool create)

01374 {
01375     fCreateEntityReferenceNodes = create;
01376 }
01377 
01378 inline void DOMParser::setIncludeIgnorableWhitespace(const bool include)

01379 {
01380     fIncludeIgnorableWhitespace = include;
01381 }
01382 
01383 inline void DOMParser::setToCreateXMLDeclTypeNode(const bool create)

01384 {
01385     fToCreateXMLDeclTypeNode = create;
01386 }
01387 
01388 
01389 // ---------------------------------------------------------------------------
01390 //  DOMParser: Protected getter methods
01391 // ---------------------------------------------------------------------------
01392 inline DOM_Node DOMParser::getCurrentNode()

01393 {
01394     return fCurrentNode;
01395 }
01396 
01397 
01398 // ---------------------------------------------------------------------------
01399 //  DOMParser: Protected setter methods
01400 // ---------------------------------------------------------------------------
01401 inline void DOMParser::setCurrentNode(DOM_Node toSet)

01402 {
01403     fCurrentNode = toSet;
01404 }
01405 
01406 inline void DOMParser::setDocument(DOM_Document toSet)

01407 {
01408     fDocument = toSet;
01409 }
01410 
01411 #endif


Copyright © 2000 The Apache Software Foundation. All Rights Reserved.