// Copyright 1999, 2002 Robert Buff

// Contact: http://robertbuff.com/uvm

//

// This file is part of Mtg-Book.

//

// Mtg-Book is free software; you can redistribute it and/or modify

// it under the terms of the GNU General Public License as published

// by the Free Software Foundation; either version 2 of the License,

// or (at your option) any later version.

//

// Mtg-Book is distributed in the hope that it will be useful,

// but WITHOUT ANY WARRANTY; without even the implied warranty of

// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

// GNU General Public License for more details.

//

// You should have received a copy of the GNU General Public License

// along with Mtg-Book; if not, write to the 

//

// Free Software Foundation, Inc.

// 59 Temple Place, Suite 330

// Boston, MA 02111-1307

// USA



#if ! defined(_MTG_HTML_READER_)

#define _MTG_HTML_READER_



#include "MtgHeap.h"

#include "MtgHeap2.h"

#include "MtgHtmlDoc.h"



MTG_BEGIN_NAMESPACE





//

//   t H t m l R e a d e r

//



class tHtmlReader {



    struct tTable;



    struct tField {

        tHeap<tTable*> m_Table;

        char* m_sData;              // only used if

                                    // m_Table.numOfElems() == 0

        tField();

        ~tField();

    };



    struct tRow {

        tHeap<tField*> m_Field;

        ~tRow();

    };



    struct tTable {

        tHeap<tRow*> m_Row;

        ~tTable();

    };



    struct tSelection {

        tTable* m_pTable;

        int m_nRow;

        int m_nField;

    };



    tHtmlDoc& m_Doc;



    tHeap<tTable*> m_Table;



    tHeap<tSelection> m_SelStack;

    tSelection m_CurSel;



    void cleanupTables();



    void readTable( tTable& Table );



    bool findSubstring( const tTable& Table, const char* sString,

        tHeap<int>& Selection ) const;



    tRow& curRow() {

        return *m_CurSel.m_pTable->m_Row[m_CurSel.m_nRow];

    }



    tField& curField() {

        return *curRow().m_Field[m_CurSel.m_nField];

    }



public:



        // HTML readers exist only for the parsing of one HTML

        // document, very much like an iterator.



    tHtmlReader( tHtmlDoc& Doc );

    ~tHtmlReader();



        // The following function has no return code; it is always

        // successful, since malformed HTML is treated with some

        // heuristics and turned into good HTML.



    void readTables();



        // Reading is done by selecting tables, rows and fields,

        // if necessary, recursively.



    void top();                     // undo all selections



    void push();                    // push selection

    void pop( bool bRemove = true );// pop selection    



    bool selectTable( int nPos );   // top level or within selected field



    bool selectRow( int nPos );     // within the selected table

    bool selectRow( int nTable, int nRow );



    bool selectField( int nPos );   // within the selected row

    bool selectField( int nRow, int nField );

    bool selectField( int nTable, int nRow, int nField );



    bool selectNextInRow();

    bool selectNextInColumn();



    bool validSelection();



        // Retrieve the data. If there's no data, i.e. validSelection()

        // returns false, then 0 is returned.



    const char* readField();

    const char* readField( int nField );

    const char* readField( int nRow, int nField );

    const char* readField( int nTable, int nRow, int nField );



        // Reading out an entire row or column or matrix is also possible.

        // Note that in this case, the selection remains unmodified.

        // (The current position is substituted for the missing

        // parameters.)



    bool readRow( tHeap<const char*>& Data );

    bool readRow( tHeap<const char*>& Data, int nToField );

    bool readRow( tHeap<const char*>& Data, int nFromField, int nToField );



    bool readColumn( tHeap<const char*>& Data );

    bool readColumn( tHeap<const char*>& Data, int nToRow );

    bool readColumn( tHeap<const char*>& Data, int nFromRow, int nToRow );



    bool readMatrix( tHeap2<const char*>& Data, bool bTranspose = false );

    bool readMatrix( tHeap2<const char*>& Data,

        int nToRow, int nToField, bool bTranspose = false );

    bool readMatrix( tHeap2<const char*>& Data, 

        int nFromRow, int nToRow, int nFromField, int nToField,

        bool bTranspose = false );



        // In order to locate data, the following function can be used:



    bool findSubstring( const char* sString, tHeap<int>& Selection ) const;

};



MTG_END_NAMESPACE



#endif

