http://xml.apache.org/http://www.apache.org/http://www.w3.org/

Home

Readme
Installation
Build

API Docs
Samples
Programming
Migration
FAQs

Releases
Feedback
Bug-Todo

Download
CVS Repository
Mail Archive

API Docs for SAX and DOM
 

Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members  

SAXParser.hpp

Go to the documentation of this file.
00001 /*
00002  * The Apache Software License, Version 1.1

00003  * 

00004  * Copyright (c) 1999-2000 The Apache Software Foundation.  All rights

00005  * reserved.

00006  * 

00007  * Redistribution and use in source and binary forms, with or without

00008  * modification, are permitted provided that the following conditions

00009  * are met:

00010  * 

00011  * 1. Redistributions of source code must retain the above copyright

00012  *    notice, this list of conditions and the following disclaimer. 

00013  * 

00014  * 2. Redistributions in binary form must reproduce the above copyright

00015  *    notice, this list of conditions and the following disclaimer in

00016  *    the documentation and/or other materials provided with the

00017  *    distribution.

00018  * 

00019  * 3. The end-user documentation included with the redistribution,

00020  *    if any, must include the following acknowledgment:  

00021  *       "This product includes software developed by the

00022  *        Apache Software Foundation (http://www.apache.org/)."

00023  *    Alternately, this acknowledgment may appear in the software itself,

00024  *    if and wherever such third-party acknowledgments normally appear.

00025  * 

00026  * 4. The names "Xerces" and "Apache Software Foundation" must

00027  *    not be used to endorse or promote products derived from this

00028  *    software without prior written permission. For written 

00029  *    permission, please contact apache\@apache.org.

00030  * 

00031  * 5. Products derived from this software may not be called "Apache",

00032  *    nor may "Apache" appear in their name, without prior written

00033  *    permission of the Apache Software Foundation.

00034  * 

00035  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED

00036  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES

00037  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE

00038  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR

00039  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

00040  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

00041  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF

00042  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND

00043  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,

00044  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT

00045  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF

00046  * SUCH DAMAGE.

00047  * ====================================================================

00048  * 

00049  * This software consists of voluntary contributions made by many

00050  * individuals on behalf of the Apache Software Foundation, and was

00051  * originally based on software copyright (c) 1999, International

00052  * Business Machines, Inc., http://www.ibm.com .  For more information

00053  * on the Apache Software Foundation, please see

00054  * <http://www.apache.org/>.

00055  */
00056 
00057 /*
00058  * $Log: SAXParser.hpp,v $

00059  * Revision 1.10  2001/01/12 21:23:41  tng

00060  * Documentation Enhancement: explain values of Val_Scheme

00061  *
00062  * Revision 1.9  2000/08/02 18:05:15  jpolast

00063  * changes required for sax2

00064  * (changed private members to protected)

00065  *
00066  * Revision 1.8  2000/04/12 22:58:30  roddey

00067  * Added support for 'auto validate' mode.

00068  *
00069  * Revision 1.7  2000/03/03 01:29:34  roddey

00070  * Added a scanReset()/parseReset() method to the scanner and

00071  * parsers, to allow for reset after early exit from a progressive parse.

00072  * Added calls to new Terminate() call to all of the samples. Improved

00073  * documentation in SAX and DOM parsers.

00074  *
00075  * Revision 1.6  2000/02/17 03:54:27  rahulj

00076  * Added some new getters to query the parser state and

00077  * clarified the documentation.

00078  *
00079  * Revision 1.5  2000/02/16 03:42:58  rahulj

00080  * Finished documenting the SAX Driver implementation.

00081  *
00082  * Revision 1.4  2000/02/15 04:47:37  rahulj

00083  * Documenting the SAXParser framework. Not done yet.

00084  *
00085  * Revision 1.3  2000/02/06 07:47:56  rahulj

00086  * Year 2K copyright swat.

00087  *
00088  * Revision 1.2  1999/12/15 19:57:48  roddey

00089  * Got rid of redundant 'const' on boolean return value. Some compilers choke

00090  * on this and its useless.

00091  *
00092  * Revision 1.1.1.1  1999/11/09 01:07:51  twl

00093  * Initial checkin

00094  *
00095  * Revision 1.6  1999/11/08 20:44:54  rahul

00096  * Swat for adding in Product name and CVS comment log variable.

00097  *
00098  */
00099 
00100 #if !defined(SAXPARSER_HPP)
00101 #define SAXPARSER_HPP
00102 
00103 #include <sax/Parser.hpp>
00104 #include <internal/VecAttrListImpl.hpp>
00105 #include <framework/XMLDocumentHandler.hpp>
00106 #include <framework/XMLElementDecl.hpp>
00107 #include <framework/XMLEntityHandler.hpp>
00108 #include <framework/XMLErrorReporter.hpp>
00109 #include <validators/DTD/DocTypeHandler.hpp>
00110 
00111 class DocumentHandler;
00112 class EntityResolver;
00113 class XMLPScanToken;
00114 class XMLScanner;
00115 class XMLValidator;
00116 
00117 
00127 
00128 class  SAXParser :
00129 
00130     public Parser
00131     , public XMLDocumentHandler
00132     , public XMLErrorReporter
00133     , public XMLEntityHandler
00134     , public DocTypeHandler
00135 {
00136 public :
00137     // -----------------------------------------------------------------------
00138     //  Class types
00139     // -----------------------------------------------------------------------
00140     enum ValSchemes
00141     {
00142         Val_Never
00143         , Val_Always
00144         , Val_Auto
00145     };
00146 
00147 
00148     // -----------------------------------------------------------------------
00149     //  Constructors and Destructor
00150     // -----------------------------------------------------------------------
00151 
00154 
00159     SAXParser(XMLValidator* const valToAdopt = 0);
00160 
00164     ~SAXParser();
00166 
00167 
00170 
00176     DocumentHandler* getDocumentHandler();
00177 
00184     const DocumentHandler* getDocumentHandler() const;
00185 
00192     EntityResolver* getEntityResolver();
00193 
00200     const EntityResolver* getEntityResolver() const;
00201 
00208     ErrorHandler* getErrorHandler();
00209 
00216     const ErrorHandler* getErrorHandler() const;
00217 
00224     const XMLScanner& getScanner() const;
00225 
00232     const XMLValidator& getValidator() const;
00233 
00241     ValSchemes getValidationScheme() const;
00242 
00252     bool getDoNamespaces() const;
00253 
00263     bool getExitOnFirstFatalError() const;
00265 
00266 
00267     // -----------------------------------------------------------------------
00268     //  Setter methods
00269     // -----------------------------------------------------------------------
00270 
00273 
00290     void setDoNamespaces(const bool newState);
00291 
00308     void setValidationScheme(const ValSchemes newScheme);
00309 
00325     void setExitOnFirstFatalError(const bool newState);
00327 
00328 
00329     // -----------------------------------------------------------------------
00330     //  Advanced document handler list maintenance methods
00331     // -----------------------------------------------------------------------
00332 
00335 
00348     void installAdvDocHandler(XMLDocumentHandler* const toInstall);
00349 
00359     bool removeAdvDocHandler(XMLDocumentHandler* const toRemove);
00361 
00362 
00363     // -----------------------------------------------------------------------
00364     //  Implementation of the SAXParser interface
00365     // -----------------------------------------------------------------------
00366 
00369 
00381     virtual void parse(const InputSource& source, const bool reuseValidator = false);
00382 
00395     virtual void parse(const XMLCh* const systemId, const bool reuseValidator = false);
00396 
00407     virtual void parse(const char* const systemId, const bool reuseValidator = false);
00408 
00419     virtual void setDocumentHandler(DocumentHandler* const handler);
00420 
00430     virtual void setDTDHandler(DTDHandler* const handler);
00431 
00442     virtual void setErrorHandler(ErrorHandler* const handler);
00443 
00455     virtual void setEntityResolver(EntityResolver* const resolver);
00457 
00458 
00459     // -----------------------------------------------------------------------
00460     //  Progressive scan methods
00461     // -----------------------------------------------------------------------
00462 
00465 
00497     bool parseFirst
00498     (
00499         const   XMLCh* const    systemId
00500         ,       XMLPScanToken&  toFill
00501         , const bool            reuseValidator = false
00502     );
00503 
00534     bool parseFirst
00535     (
00536         const   char* const     systemId
00537         ,       XMLPScanToken&  toFill
00538         , const bool            reuseValidator = false
00539     );
00540 
00571     bool parseFirst
00572     (
00573         const   InputSource&    source
00574         ,       XMLPScanToken&  toFill
00575         , const bool            reuseValidator = false
00576     );
00577 
00602     bool parseNext(XMLPScanToken& token);
00603 
00625     void parseReset(XMLPScanToken& token);
00626 
00628 
00629 
00630 
00631     // -----------------------------------------------------------------------
00632     //  Implementation of the DocTypeHandler Interface
00633     // -----------------------------------------------------------------------
00634 
00637 
00651     virtual void attDef
00652     (
00653         const   DTDElementDecl& elemDecl
00654         , const DTDAttDef&      attDef
00655         , const bool            ignoring
00656     );
00657 
00667     virtual void doctypeComment
00668     (
00669         const   XMLCh* const    comment
00670     );
00671 
00688     virtual void doctypeDecl
00689     (
00690         const   DTDElementDecl& elemDecl
00691         , const XMLCh* const    publicId
00692         , const XMLCh* const    systemId
00693         , const bool            hasIntSubset
00694     );
00695 
00709     virtual void doctypePI
00710     (
00711         const   XMLCh* const    target
00712         , const XMLCh* const    data
00713     );
00714 
00726     virtual void doctypeWhitespace
00727     (
00728         const   XMLCh* const    chars
00729         , const unsigned int    length
00730     );
00731 
00744     virtual void elementDecl
00745     (
00746         const   DTDElementDecl& decl
00747         , const bool            isIgnored
00748     );
00749 
00760     virtual void endAttList
00761     (
00762         const   DTDElementDecl& elemDecl
00763     );
00764 
00771     virtual void endIntSubset();
00772 
00779     virtual void endExtSubset();
00780 
00795     virtual void entityDecl
00796     (
00797         const   DTDEntityDecl&  entityDecl
00798         , const bool            isPEDecl
00799         , const bool            isIgnored
00800     );
00801 
00806     virtual void resetDocType();
00807 
00820     virtual void notationDecl
00821     (
00822         const   XMLNotationDecl&    notDecl
00823         , const bool                isIgnored
00824     );
00825 
00836     virtual void startAttList
00837     (
00838         const   DTDElementDecl& elemDecl
00839     );
00840 
00847     virtual void startIntSubset();
00848 
00855     virtual void startExtSubset();
00856 
00869     virtual void TextDecl
00870     (
00871         const   XMLCh* const    versionStr
00872         , const XMLCh* const    encodingStr
00873     );
00875 
00876 
00877     // -----------------------------------------------------------------------
00878     //  Implementation of the XMLDocumentHandler interface
00879     // -----------------------------------------------------------------------
00880 
00883 
00898     virtual void docCharacters
00899     (
00900         const   XMLCh* const    chars
00901         , const unsigned int    length
00902         , const bool            cdataSection
00903     );
00904 
00914     virtual void docComment
00915     (
00916         const   XMLCh* const    comment
00917     );
00918 
00938     virtual void docPI
00939     (
00940         const   XMLCh* const    target
00941         , const XMLCh* const    data
00942     );
00943 
00955     virtual void endDocument();
00956 
00973     virtual void endElement
00974     (
00975         const   XMLElementDecl& elemDecl
00976         , const unsigned int    urlId
00977         , const bool            isRoot
00978     );
00979 
00990     virtual void endEntityReference
00991     (
00992         const   XMLEntityDecl&  entDecl
00993     );
00994 
01014     virtual void ignorableWhitespace
01015     (
01016         const   XMLCh* const    chars
01017         , const unsigned int    length
01018         , const bool            cdataSection
01019     );
01020 
01025     virtual void resetDocument();
01026 
01037     virtual void startDocument();
01038 
01065     virtual void startElement
01066     (
01067         const   XMLElementDecl&         elemDecl
01068         , const unsigned int            urlId
01069         , const XMLCh* const            elemPrefix
01070         , const RefVectorOf<XMLAttr>&   attrList
01071         , const unsigned int            attrCount
01072         , const bool                    isEmpty
01073         , const bool                    isRoot
01074     );
01075 
01085     virtual void startEntityReference
01086     (
01087         const   XMLEntityDecl&  entDecl
01088     );
01089 
01107     virtual void XMLDecl
01108     (
01109         const   XMLCh* const    versionStr
01110         , const XMLCh* const    encodingStr
01111         , const XMLCh* const    standaloneStr
01112         , const XMLCh* const    actualEncodingStr
01113     );
01115 
01116 
01117     // -----------------------------------------------------------------------
01118     //  Implementation of the XMLErrorReporter interface
01119     // -----------------------------------------------------------------------
01120 
01123 
01146     virtual void error
01147     (
01148         const   unsigned int                errCode
01149         , const XMLCh* const                msgDomain
01150         , const XMLErrorReporter::ErrTypes  errType
01151         , const XMLCh* const                errorText
01152         , const XMLCh* const                systemId
01153         , const XMLCh* const                publicId
01154         , const unsigned int                lineNum
01155         , const unsigned int                colNum
01156     );
01157 
01166     virtual void resetErrors();
01168 
01169 
01170     // -----------------------------------------------------------------------
01171     //  Implementation of the XMLEntityHandler interface
01172     // -----------------------------------------------------------------------
01173 
01176 
01187     virtual void endInputSource(const InputSource& inputSource);
01188 
01203     virtual bool expandSystemId
01204     (
01205         const   XMLCh* const    systemId
01206         ,       XMLBuffer&      toFill
01207     );
01208 
01216     virtual void resetEntities();
01217 
01232     virtual InputSource* resolveEntity
01233     (
01234         const   XMLCh* const    publicId
01235         , const XMLCh* const    systemId
01236     );
01237 
01249     virtual void startInputSource(const InputSource& inputSource);
01251 
01252 
01255 
01265     bool getDoValidation() const;
01266 
01280     void setDoValidation(const bool newState);
01282 
01283 
01284 protected :
01285     // -----------------------------------------------------------------------
01286     //  Unimplemented constructors and operators
01287     // -----------------------------------------------------------------------
01288     SAXParser(const SAXParser&);
01289     void operator=(const SAXParser&);
01290 
01291 
01292     // -----------------------------------------------------------------------
01293     //  Private data members
01294     //
01295     //  fAttrList
01296     //      A temporary implementation of the basic SAX attribute list
01297     //      interface. We use this one over and over on each startElement
01298     //      event to allow SAX-like access to the element attributes.
01299     //
01300     //  fDocHandler
01301     //      The installed SAX doc handler, if any. Null if none.
01302     //
01303     //  fDTDHandler
01304     //      The installed SAX DTD handler, if any. Null if none.
01305     //
01306     //  fElemDepth
01307     //      This is used to track the element nesting depth, so that we can
01308     //      know when we are inside content. This is so we can ignore char
01309     //      data outside of content.
01310     //
01311     //  fEntityResolver
01312     //      The installed SAX entity handler, if any. Null if none.
01313     //
01314     //  fErrorHandler
01315     //      The installed SAX error handler, if any. Null if none.
01316     //
01317     //  fAdvDHCount
01318     //  fAdvDHList
01319     //  fAdvDHListSize
01320     //      This is an array of pointers to XMLDocumentHandlers, which is
01321     //      how we see installed advanced document handlers. There will
01322     //      usually not be very many at all, so a simple array is used
01323     //      instead of a collection, for performance. It will grow if needed,
01324     //      but that is unlikely.
01325     //
01326     //      The count is how many handlers are currently installed. The size
01327     //      is how big the array itself is (for expansion purposes.) When
01328     //      count == size, is time to expand.
01329     //
01330     //  fParseInProgress
01331     //      This flag is set once a parse starts. It is used to prevent
01332     //      multiple entrance or reentrance of the parser.
01333     //
01334     //  fScanner
01335     //      The scanner being used by this parser. It is created internally
01336     //      during construction.
01337     //
01338     //  fValidator
01339     //      The validator that is installed. If none is provided, we will
01340     //      create and install a DTD validator. We install this on the
01341     //      scanner we create, which it will use to do validation. We set
01342     //      ourself on it as the error reporter for validity errors.
01343     // -----------------------------------------------------------------------
01344     VecAttrListImpl         fAttrList;
01345     DocumentHandler*        fDocHandler;
01346     DTDHandler*             fDTDHandler;
01347     unsigned int            fElemDepth;
01348     EntityResolver*         fEntityResolver;
01349     ErrorHandler*           fErrorHandler;
01350     unsigned int            fAdvDHCount;
01351     XMLDocumentHandler**    fAdvDHList;
01352     unsigned int            fAdvDHListSize;
01353     bool                    fParseInProgress;
01354     XMLScanner*             fScanner;
01355     XMLValidator*           fValidator;
01356 };
01357 
01358 
01359 // ---------------------------------------------------------------------------
01360 //  SAXParser: Getter methods
01361 // ---------------------------------------------------------------------------
01362 inline DocumentHandler* SAXParser::getDocumentHandler()

01363 {
01364     return fDocHandler;
01365 }
01366 
01367 inline const DocumentHandler* SAXParser::getDocumentHandler() const

01368 {
01369     return fDocHandler;
01370 }
01371 
01372 inline EntityResolver* SAXParser::getEntityResolver()

01373 {
01374     return fEntityResolver;
01375 }
01376 
01377 inline const EntityResolver* SAXParser::getEntityResolver() const

01378 {
01379     return fEntityResolver;
01380 }
01381 
01382 inline ErrorHandler* SAXParser::getErrorHandler()

01383 {
01384     return fErrorHandler;
01385 }
01386 
01387 inline const ErrorHandler* SAXParser::getErrorHandler() const

01388 {
01389     return fErrorHandler;
01390 }
01391 
01392 inline const XMLScanner& SAXParser::getScanner() const

01393 {
01394     return *fScanner;
01395 }
01396 
01397 #endif


Copyright © 2000 The Apache Software Foundation. All Rights Reserved.