// Copyright 1999, 2002 Robert Buff

// Contact: http://robertbuff.com/uvm

//

// This file is part of Mtg-Book.

//

// Mtg-Book is free software; you can redistribute it and/or modify

// it under the terms of the GNU General Public License as published

// by the Free Software Foundation; either version 2 of the License,

// or (at your option) any later version.

//

// Mtg-Book is distributed in the hope that it will be useful,

// but WITHOUT ANY WARRANTY; without even the implied warranty of

// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

// GNU General Public License for more details.

//

// You should have received a copy of the GNU General Public License

// along with Mtg-Book; if not, write to the 

//

// Free Software Foundation, Inc.

// 59 Temple Place, Suite 330

// Boston, MA 02111-1307

// USA



#if ! defined(_MTG_HTML_DOC_)

#define _MTG_HTML_DOC_



MTG_BEGIN_NAMESPACE





//

//   t H t m l D o c

//



class tHtmlDoc {



    char* m_sSource;        // HTML source text



        // The following variables are used during parsing:



    char* m_sCurrent;

    char* m_sLiteral;

    int m_nLitSize;



    bool m_bInsideTag;



    void init();

    void cleanup();



    bool eos() const;



    void skipSpace();

    void skipComment();

    void skipData();



    bool isLiteral() const;



    void scanLiteral( const char *&sStart, int &nSize, bool &bIgnCase );

    void getLiteral();

    void skipLiteral();



    void skipBadTag();

    void skipTag();



        // Find next tag. bStartTag == true means the tag is a start

        // tag suc as in <a>; bStartTag == false means the tag is an

        // end tag such as </a>. When the function returns with true,

        // the source pointer points to the first character after

        // the tag name, and we're officialy inside the tag.



    bool getTag( const char *&sName, bool &bStartTag );



        // Inside a tag, find the attribute with name sName. This

        // function is somewhat restricted in that it can find

        // exacly one attribute inside the tag. Then, the rest

        // of the tag is skipped.



    bool getAttribute( const char *sName, const char *&sValue );



        // Read the data before the next tag. If there is no

        // data before the next tag, this function returns 

        // false. Note that data is returned even if it is only

        // white space. sData must be deallocated by the caller!



    bool getData( char *&sData );



    void reset();



    friend class tHtmlReader;



public:



    tHtmlDoc();

    tHtmlDoc( const tHtmlDoc& HtmlDoc );

    tHtmlDoc( const char* sSource );



    ~tHtmlDoc();



    tHtmlDoc& operator=( const tHtmlDoc& HtmlDoc );

    tHtmlDoc& operator=( const char* sSource );



    void copyFrom( const tHtmlDoc& HtmlDoc );



    const char* source() const {

        return m_sSource;

    }

};



MTG_END_NAMESPACE



#endif

