// Copyright 1999, 2002 Robert Buff

// Contact: http://robertbuff.com/uvm

//

// This file is part of Mtg-Book.

//

// Mtg-Book is free software; you can redistribute it and/or modify

// it under the terms of the GNU General Public License as published

// by the Free Software Foundation; either version 2 of the License,

// or (at your option) any later version.

//

// Mtg-Book is distributed in the hope that it will be useful,

// but WITHOUT ANY WARRANTY; without even the implied warranty of

// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

// GNU General Public License for more details.

//

// You should have received a copy of the GNU General Public License

// along with Mtg-Book; if not, write to the 

//

// Free Software Foundation, Inc.

// 59 Temple Place, Suite 330

// Boston, MA 02111-1307

// USA



#include "MtgIncl.h"

#include "MtgHtmlReader.h"



MTG_BEGIN_NAMESPACE





//

//   t F i e l d

//



tHtmlReader::tField::tField()



{

    m_sData = 0;

}





//

//   ~ t F i e l d

//



tHtmlReader::tField::~tField()



{

    for( int i = 0; i < m_Table.numOfElems(); ++i )

        delete m_Table[i];

    if( m_sData != 0 )

        delete m_sData;

}





//

//   ~ t R o w

//



tHtmlReader::tRow::~tRow()



{

    for( int i = 0; i < m_Field.numOfElems(); ++i )

        delete m_Field[i];

}





//

//   ~ t T a b l e

//



tHtmlReader::tTable::~tTable()



{

    for( int i = 0; i < m_Row.numOfElems(); ++i )

        delete m_Row[i];

}





//

//   c l e a n u p T a b l e s

//



void tHtmlReader::cleanupTables()



{

    for( int i = 0; i < m_Table.numOfElems(); ++i )

        delete m_Table[i];

    m_Table.reset();

}





//

//   r e a d T a b l e

//



void tHtmlReader::readTable( tTable& Table )



{

    tRow* pRow = 0;

    tField* pField = 0;



    const char* sName;

    bool bStartTag;



    while( m_Doc.getTag( sName, bStartTag ) ) {

        if( strcmp( sName, "td" ) == 0 ) {

            m_Doc.skipTag();

            if( bStartTag ) {

                if( pRow == 0 ) {

                    pRow = new tRow;

                    Table.m_Row.append( pRow );

                }

                pField = new tField;

                pRow->m_Field.append( pField );

            }

            else {

                pField = 0;

            }

        }

        else

        if( strcmp( sName, "tr" ) == 0 ) {

            m_Doc.skipTag();

            if( bStartTag ) {

                pRow = new tRow;

                Table.m_Row.append( pRow );

            }

            else {

                pRow = 0;

            }

            pField = 0;

        }

        else

        if( strcmp( sName, "table" ) == 0 ) {

            m_Doc.skipTag();

            if( bStartTag ) {

                if( pRow == 0 ) {

                    pRow = new tRow;

                    Table.m_Row.append( pRow );

                }

                if( pField == 0 ) {

                    pField = new tField;

                    pRow->m_Field.append( pField );

                }

                

                //if( pField->m_sData != 0 ) {

                //    delete pField->m_sData;

                //    pField->m_sData = 0;

                //}



                tTable* p = new tTable;

                pField->m_Table.append( p );

                readTable( *p );

            }

            else {

                    // end of table found

                m_Doc.skipTag();

                break;

            }

        }

        else {

                // ignore everything else

            m_Doc.skipTag();

        }



        if( pField != 0 /* && pField->m_Table.numOfElems() == 0 */ ) {

                // A field is selected and no table has yet been

                // found in that field.

            char* sData;



            if( m_Doc.getData( sData ) && sData != 0 ) {

                if( strlen( sData ) == 0 ) {

                    delete sData;

                }

                else

                if( pField->m_sData == 0 ) {

                    pField->m_sData = sData;

                }

                else {

                    int l = strlen( pField->m_sData );

                    char* s = new char[l + strlen( sData ) + 1];



                    strcpy( s, pField->m_sData );

                    strcpy( &s[l], sData );



                    delete pField->m_sData;

                    delete sData;



                    pField->m_sData = s;

                }

            }

        }

    }

}





//

//   f i n d S u b s t r i n g

//



bool tHtmlReader::findSubstring( const tTable& Table, const char* sString,

    tHeap<int>& Selection ) const



{

    Selection.append( 0 );

    for( int i = 0; i < Table.m_Row.numOfElems(); ++i ) {

        tRow& R = *Table.m_Row[i];



        Selection.append( 0 );

        for( int j = 0; j < R.m_Field.numOfElems(); ++j ) {

            tField& F = *R.m_Field[j];



            Selection.append( 0 );

            for( int k = 0; k < F.m_Table.numOfElems(); ++k ) {

                if( findSubstring( *F.m_Table[k], sString, Selection ) )

                    return true;

                ++Selection.last();

            }

            --Selection;



            if( F.m_sData != 0 && strstr( F.m_sData, sString ) != 0 )

                return true;



            ++Selection.last();

        }

        --Selection;

        ++Selection.last();

    }

    --Selection;



    return false;

}





//

//   t H t m l R e a d e r

//



tHtmlReader::tHtmlReader( tHtmlDoc& Doc )

    : m_Doc( Doc )



{

}





//

//   ~ t H t m l R e a d e r

//



tHtmlReader::~tHtmlReader()



{

    cleanupTables();

}





//

//   r e a d T a b l e s

//



void tHtmlReader::readTables()



{

    cleanupTables();



    m_Doc.reset();

    m_SelStack.reset();

    top();



    const char* sName;

    bool bStartTag;



    while( m_Doc.getTag( sName, bStartTag ) ) {

        if( strcmp( sName, "table" ) == 0 && bStartTag ) {

            m_Doc.skipTag();

            tTable* p = new tTable;

            m_Table.append( p );

            readTable( *p );

        }

        else {

            m_Doc.skipTag();

        }

    }

}





//

//   t o p

//



void tHtmlReader::top()



{

    m_CurSel.m_pTable = 0;

    m_CurSel.m_nRow = -1;

    m_CurSel.m_nField = -1;

}





//

//   p u s h

//



void tHtmlReader::push()



{

    m_SelStack.append( m_CurSel );

}





//

//   p o p

//



void tHtmlReader::pop( bool bRemove )



{

    m_CurSel = m_SelStack.last();

    if( bRemove )

        --m_SelStack;

}





//

//   s e l e c t T a b l e

//



bool tHtmlReader::selectTable( int nPos )



{

    if( m_CurSel.m_nField < 0 ) {

        if( m_Table.numOfElems() <= nPos )

            return false;

        m_CurSel.m_pTable = m_Table[nPos];

    }

    else {

        if( curField().m_Table.numOfElems() <= nPos )

            return false;

        m_CurSel.m_pTable = curField().m_Table[nPos];

    }



    m_CurSel.m_nRow = -1;

    m_CurSel.m_nField = -1;

    return true;

}





//

//   s e l e c t R o w

//



bool tHtmlReader::selectRow( int nPos )



{

    if( m_CurSel.m_pTable == 0 ) {

        if( ! selectTable( 0 ) )

            return false;

    }



    if( m_CurSel.m_pTable->m_Row.numOfElems() <= nPos )

        return false;



    m_CurSel.m_nRow = nPos;

    m_CurSel.m_nField = -1;



    return true;

}





//

//   s e l e c t R o w

//



bool tHtmlReader::selectRow( int nTable, int nRow )



{

    if( ! selectTable( nTable ) )

        return false;

    return selectRow( nRow );

}





//

//   s e l e c t F i e l d

//



bool tHtmlReader::selectField( int nPos )



{

    if( m_CurSel.m_nRow < 0 ) {

        if( ! selectRow( 0 ) )

            return false;

    }



    if( curRow().m_Field.numOfElems() <= nPos )

        return false;



    m_CurSel.m_nField = nPos;

    return true;

}





//

//   s e l e c t F i e l d

//



bool tHtmlReader::selectField( int nRow, int nField )



{

    if( ! selectRow( nRow ) )

        return false;

    return selectField( nField );

}





//

//   s e l e c t F i e l d

//



bool tHtmlReader::selectField( int nTable, int nRow, int nField )



{

    if( ! selectRow( nTable, nRow ) )

        return false;

    return selectField( nField );

}





//

//   s e l e c t N e x t I n R o w

//



bool tHtmlReader::selectNextInRow()



{

    if( ! validSelection() )

        return false;



    if( curRow().m_Field.numOfElems() <= m_CurSel.m_nField + 1 )

        return false;



    ++m_CurSel.m_nField;

    return true;

}





//

//   s e l e c t N e x t I n C o l u m n

//



bool tHtmlReader::selectNextInColumn()



{

    if( ! validSelection() )

        return false;



    if( m_CurSel.m_pTable->m_Row.numOfElems() <= m_CurSel.m_nRow + 1 )

        return false;



    if( m_CurSel.m_pTable->m_Row[m_CurSel.m_nRow + 1]->m_Field.numOfElems()

            <= m_CurSel.m_nField ) {

        return false;

    }



    ++m_CurSel.m_nRow;

    return true;

}





//

//   v a l i d S e l e c t i o n

//



bool tHtmlReader::validSelection()



{

        // We have a valid selection if a field is selected

        // that has no embedded tables itself.



    if( m_CurSel.m_nField < 0 /* ||

        curField().m_Table.numOfElems() > 0 */ ) {

        return false;

    }

    return true;

}





//

//   r e a d F i e l d

//



const char* tHtmlReader::readField()



{

    if( ! validSelection() )

        return 0;

    return curField().m_sData;

}





//

//   r e a d F i e l d

//



const char* tHtmlReader::readField( int nField )



{

    if( ! selectField( nField ) )

        return 0;

    return readField();

}





//

//   r e a d F i e l d

//



const char* tHtmlReader::readField( int nRow, int nField )



{

    if( ! selectField( nRow, nField ) )

        return 0;

    return readField();

}





//

//   r e a d F i e l d

//



const char* tHtmlReader::readField( int nTable, int nRow, int nField )



{

    if( ! selectField( nTable, nRow, nField ) )

        return 0;

    return readField();

}





//

//   r e a d R o w

//



bool tHtmlReader::readRow( tHeap<const char*>& Data )



{

    return readRow( Data, -1, -1 );

}





//

//   r e a d R o w

//



bool tHtmlReader::readRow( tHeap<const char*>& Data, int nToField )



{

    return readRow( Data, -1, nToField );

}





//

//   r e a d R o w

//



bool tHtmlReader::readRow( tHeap<const char*>& Data,

    int nFromField, int nToField )



{

    if( m_CurSel.m_nRow < 0 )

        return false;



    tRow& R = curRow();



    if( nFromField < 0 ) {

        if( m_CurSel.m_nField < 0 )

            nFromField = 0;

        else

            nFromField = m_CurSel.m_nField;

    }



    if( nToField < 0 )

        nToField = R.m_Field.numOfElems() - 1;



    if( nFromField > nToField || R.m_Field.numOfElems() <= nToField )

        return false;



    for( int i = nFromField; i <= nToField; ++i ) {

        if( R.m_Field[i]->m_Table.numOfElems() > 0 )

            return false;   // recursive table found

    }



    Data.numOfElems( nToField - nFromField + 1 );

    for( MTG_FOR_INIT( int ) i = nFromField; i <= nToField; ++i )

        Data[i - nFromField] = R.m_Field[i]->m_sData;



    return true;

}





//

//   r e a d C o l u m n

//



bool tHtmlReader::readColumn( tHeap<const char*>& Data )



{

    return readColumn( Data, -1, -1 );

}





//

//   r e a d C o l u m n

//



bool tHtmlReader::readColumn( tHeap<const char*>& Data, int nToRow )



{

    return readColumn( Data, -1, nToRow );

}





//

//   r e a d C o l u m n

//



bool tHtmlReader::readColumn( tHeap<const char*>& Data,

    int nFromRow, int nToRow )



{

    if( m_CurSel.m_pTable == 0 )

        return false;



    tTable& T = *m_CurSel.m_pTable;



    if( nFromRow < 0 ) {

        if( m_CurSel.m_nRow < 0 )

            nFromRow = 0;

        else

            nFromRow = m_CurSel.m_nRow;

    }



    if( nToRow < 0 )

        nToRow = T.m_Row.numOfElems() - 1;



    if( nFromRow > nToRow || T.m_Row.numOfElems() <= nToRow )

        return false;



    int nField = ( m_CurSel.m_nField >= 0 ) ? m_CurSel.m_nField : 0;



    for( int i = nFromRow; i <= nToRow; ++i ) {

        if( T.m_Row[i]->m_Field.numOfElems() <= nField )

            return false;   // not enough fields in this row

        if( T.m_Row[i]->m_Field[nField]->m_Table.numOfElems() > 0 )

            return false;   // recursive table found

    }



    Data.numOfElems( nToRow - nFromRow + 1 );

    for( MTG_FOR_INIT( int ) i = nFromRow; i <= nToRow; ++i )

        Data[i - nFromRow] = T.m_Row[i]->m_Field[nField]->m_sData;



    return true;

}





//

//   r e a d M a t r i x

//



bool tHtmlReader::readMatrix( tHeap2<const char*>& Data, bool bTranspose )



{

    return readMatrix( Data, -1, -1, -1, -1, bTranspose );

}





//

//   r e a d M a t r i x

//



bool tHtmlReader::readMatrix( tHeap2<const char*>& Data,

    int nToRow, int nToField, bool bTranspose )



{

    return readMatrix( Data, -1, nToRow, -1, nToField, bTranspose );

}





//

//   r e a d M a t r i x

//



bool tHtmlReader::readMatrix( tHeap2<const char*>& Data,

    int nFromRow, int nToRow, int nFromField, int nToField,

    bool bTranspose )



{

    if( m_CurSel.m_pTable == 0 )

        return false;



    tTable& T = *m_CurSel.m_pTable;



    if( nFromRow < 0 ) {

        if( m_CurSel.m_nRow < 0 )

            nFromRow = 0;

        else

            nFromRow = m_CurSel.m_nRow;

    }



    if( nToRow < 0 )

        nToRow = T.m_Row.numOfElems() - 1;



    if( nToRow < nFromRow || T.m_Row.numOfElems() <= nToRow )

        return false;

         

    if( nFromField < 0 ) {

        if( m_CurSel.m_nField < 0 )

            nFromField = 0;

        else

            nFromField = m_CurSel.m_nField;

    }



    if( nToField < 0 ) {

        nToField = T.m_Row[nFromRow]->m_Field.numOfElems();

        for( int i = nFromRow + 1; i <= nToRow; ++i ) {

            if( T.m_Row[i]->m_Field.numOfElems() < nToField )

                nToField = T.m_Row[i]->m_Field.numOfElems();

        }

        --nToField;

    }

    else {

        for( int i = nFromRow; i <= nToRow; ++i ) {

            if( T.m_Row[i]->m_Field.numOfElems() <= nToField )

                return false;   // not enough fields in this row

        }

    }



    if( nToField < nFromField )

        return false;



    for( int i = nFromRow; i <= nToRow; ++i ) {

        tRow& R = *T.m_Row[i];

        for( int j = nFromField; j <= nToField; ++j ) {

            if( R.m_Field[j]->m_Table.numOfElems() > 0 )

                return false;   // recursive table found

        }

    }



    if( bTranspose ) {

        Data.reset( nToRow - nFromRow + 1 );

        Data.numOfRows( nToField - nFromField + 1 );

    }

    else {

        Data.reset( nToField - nFromField + 1 );

        Data.numOfRows( nToRow - nFromRow + 1 );

    }



    for( MTG_FOR_INIT( int ) i = nFromRow; i <= nToRow; ++i ) {

        tRow& R = *T.m_Row[i];

        if( bTranspose ) {

            for( int j = nFromField; j <= nToField; ++j )

                Data[j - nFromField][i - nFromRow] = R.m_Field[j]->m_sData;

        }

        else {

            for( int j = nFromField; j <= nToField; ++j )

                Data[i - nFromRow][j - nFromField] = R.m_Field[j]->m_sData;

        }

    }



    return true;

}





//

//   f i n d S u b s t r i n g

//



bool tHtmlReader::findSubstring( const char* sString,

    tHeap<int>& Selection ) const



{

    Selection.reset();



    for( int i = 0; i < m_Table.numOfElems(); ++i ) {

        Selection.reset();

        Selection.append( i );

        if( findSubstring( *m_Table[i], sString, Selection ) )

            return true;

    }

    return false;

}



MTG_END_NAMESPACE



//#define _TEST

#if defined(_TEST)



#if defined(_WIN32)

    #include <conio.h>

#else

    #define getche getchar

#endif



MTG_USING_NAMESPACE



//

//   m a i n

//



void main( int argc, char *argv[] )



{

    InitMemory();

    atexit( ExitMemory );



    int cCmd, nTable, nRow, nField, nTo, nFrom;

    tHeap<const char*> Data;

    tHeap<int> Selection;

    char sBuf[1024];

    bool bOk;

    FILE* fp;



    printf( "\nTest tHtmlDoc\n\n" );



    bool bGo = true;



    tHtmlDoc Doc;

    tHtmlReader* R = 0;



    while( bGo ) { 

        printf( "<F>ile <T>op T<a>ble <R>ow F<i>eld <O>ut Ro<w> <C>ol Fin<d> E<x>it: " );

        cCmd = getche();

        printf( "\n" );



        switch( cCmd ) {

            case 'f' :

            case 'F' :

                printf( "File: " );

                gets( sBuf );



                if( ( fp = fopen( sBuf, "r" ) ) != 0 ) {

                    fseek( fp, 0, SEEK_END );

                    size_t nSize = ftell( fp );

                    fseek( fp, 0, SEEK_SET );



                    char* p = new char[nSize + 1];

                    fread( p, nSize, 1, fp );

                    p[nSize] = 0;

                    fclose( fp );



                    tHtmlDoc Q = p;

                    delete p;

                    Doc = Q;

                    if( R != 0 )

                        delete R;

                    R = new tHtmlReader( Doc );

                    R->readTables();

                }

                break;



            case 't' :

            case 'T' :

                if( R != 0 )

                    R->top();

                break;



            case 'a' :

            case 'A' :

                if( R == 0 )

                    break;



                printf( "Table: " );

                gets( sBuf );

                if( sscanf( sBuf, "%d", &nTable ) == 1 ) {

                    if( R->selectTable( nTable ) )

                        printf( "Ok\n" );

                }

                break;



            case 'r' :

            case 'R' :

                if( R == 0 )

                    break;



                printf( "Row: " );

                gets( sBuf );

                if( sscanf( sBuf, "%d %d", &nTable, &nRow ) == 2 ) {

                    if( R->selectRow( nTable, nRow ) )

                        printf( "Ok\n" );

                }

                else

                if( sscanf( sBuf, "%d", &nRow ) == 1 ) {

                    if( R->selectRow( nRow ) )

                        printf( "Ok\n" );

                }

                break;



            case 'i' :

            case 'I' :

                if( R == 0 )

                    break;



                printf( "Field: " );

                gets( sBuf );

                if( sscanf( sBuf, "%d %d %d", &nTable, &nRow, &nField ) == 3 ) {

                    if( R->selectField( nTable, nRow, nField ) )

                        printf( "Ok\n" );

                }

                else

                if( sscanf( sBuf, "%d %d", &nRow, &nField ) == 2 ) {

                    if( R->selectField( nRow, nField ) )

                        printf( "Ok\n" );

                }

                else

                if( sscanf( sBuf, "%d", &nField ) == 1 ) {

                    if( R->selectField( nField ) )

                        printf( "Ok\n" );

                }

                break;



            case 'o' :

            case 'O' :

                if( R != 0 ) {

                    const char* s = R->readField();

                    if( s == 0 )

                        printf( "No data\n" );

                    else

                        printf( "Data: [%s]\n", s );

                }

                break;



            case 'w' :

            case 'W' :

                if( R == 0 )

                    break;



                printf( "Read out row: " );

                gets( sBuf );



                bOk = false;

                if( sscanf( sBuf, "%d %d", &nFrom, &nTo ) == 2 )

                    bOk = R->readRow( Data, nFrom, nTo );

                else

                if( sscanf( sBuf, "%d", &nTo ) == 1 )

                    bOk = R->readRow( Data, nTo );

                else

                    bOk = R->readRow( Data );



                if( bOk ) {

                    for( int i = 0; i < Data.numOfElems(); ++i )

                        printf( "[%s]\n", Data[i] );

                }

                else {

                    printf( "No data\n" );

                }

                break;



            case 'c' :

            case 'C' :

                if( R == 0 )

                    break;



                printf( "Read out column: " );

                gets( sBuf );



                bOk = false;

                if( sscanf( sBuf, "%d %d", &nFrom, &nTo ) == 2 )

                    bOk = R->readColumn( Data, nFrom, nTo );

                else

                if( sscanf( sBuf, "%d", &nTo ) == 1 )

                    bOk = R->readColumn( Data, nTo );

                else

                    bOk = R->readColumn( Data );



                if( bOk ) {

                    for( int i = 0; i < Data.numOfElems(); ++i )

                        printf( "[%s]\n", Data[i] );

                }

                else {

                    printf( "No data\n" );

                }

                break;



            case 'd' :

            case 'D' :

                if( R == 0 )

                    break;



                printf( "Find: " );

                gets( sBuf );

                if( R->findSubstring( sBuf, Selection ) ) {

                    printf( "Selection: " );

                    for( int i = 0; i < Selection.numOfElems(); ++i )

                        printf( " %d", Selection[i] );

                    printf( "\n" );

                }

                else {

                    printf( "No data\n" );

                }

                break;



            case 'x' :

            case 'X' :

                bGo = false;

                break;

        }



        if( R != 0 && R->validSelection() )

            printf( "Valid selection\n" );

    }



    if( R != 0 )

        delete R;



#if ! defined(_WIN32)

    printf( "\n" );

#endif

}



#endif

