
#include <iostream.h>
#include <assert.h>
#include <stdlib.h>  // exit(), getenv()

#include "workspace.h"
#include "mybuiltin.h"
#include "auxid.h"


#define MEGA_BYTE (ulong(1<<20))
#define KILO_BYTE (ulong(1<<10))

ulong workspace::maxsize = 0;

//
//ulong workspace::maxsize = ((ulong)32*KILO_BYTE);
// folded mult with ldn==11

ulong workspace::numsubws = 5;    // max num of sub-workspaces

ulong workspace::wsbytes = 0;     // size in bytes
ulong workspace::wsdoubles = 0;   // size in doubles
                              
double *workspace::ws0 = 0;       // pointer to whole workspace
                              
double **workspace::subws = 0;    // pointers to sub-workspaces
ulong  *workspace::subwslen = 0;  // lengthes of sub-workspaces
int    *workspace::subwsinuse = 0;// whether sub-workspaces are in use


// the 'global' workspace:
workspace gws;

ulong
workspace::max_prec()
{
    return wsdoubles/2;
}
//-----------------------------


void
workspace::get_max_size()
{
    maxsize = ((ulong)4*MEGA_BYTE); // default
    // Increasing the maximal workspace size:
    // With the default maximum of 4MB you can work
    // with precisions of (radix 10,000 assumed)
    // up to 1 million decimal digits.
    // To use bigger sizes than the default
    // set the environment variable HFLOAT_MAX_WORKSPACE.
    // No swap must occur when using the workspace,
    // i.e. it has to be unused RAM.
    // Set HFLOAT_MAX_WORKSPACE to the amount of physical RAM 
    // minus the bytes used by operating system
    // and other running programs.
    // Typically set to RAMsize minus 6MB for nongrafix terminal
    // or to RAMsize minus 20MB for fancy windowing system.
    // Currently if you don't give a power of 2
    // then the size is truncated to the next smaller power of 2.
    // You may append 'k' for kilobyte or 'M' for megabyte.
    // You might have to say something like
    // export HFLOAT_MAX_WORKSPACE='16M'
    // to set it to 16megabytes.
    char *p = getenv("HFLOAT_MAX_WORKSPACE");
//    cout << " workspace::get_max_size(): p=" << (void *)p << endl;

//    cout << " workspace::get_max_size(): HFLOAT_MAX_WORKSPACE=" << p << endl;
    if ( p==NULL )  return;

    long m = atol(p);
//    cout << " workspace::get_max_size(): m=" << m << endl;

    for (int i=strlen(p); i>=0; --i)
    {
        if ( (p[i]=='k') || (p[i]=='K' ) )
        {
            m *= 1024;
            break;
        }

        if ( (p[i]=='m') || (p[i]=='M' ) )
        {
            m *= 1024*1024;
            break;
        }
    }

//    cout << " workspace::get_max_size(): m=" << m << endl;

    m = (1<<ld(m));  // truncate to power of 2
//    cout << " workspace::get_max_size(): m=" << m << endl;

    if ( m>=32*(1<<10) )  maxsize = (ulong)m; // minimal size
}
//----------------------------------------------------


void
workspace::setup(ulong n)
{
    static ulong s = 0;
    if ( s!=0 )  delete [] ws0;

    ulong n2 = (ulong)(1<<ld(n));
    if ( n!=n2 )  n = 2*n2;         // force (next) power of 2

    s = n*4*sizeof(double);         // amount for fft mult
    s = MAX(s,1024);                // avoid trouble with tiny sizes
    //    s=MAX(s,1<<20);  // for pfafft


    get_max_size();
    s = MIN(maxsize,s);  // care about reality


    wsbytes = s;
    wsdoubles = s/sizeof(double);

    ws0 = new double[ wsdoubles ];  // 8byte aligned
    assert( ws0 );

    d_null((double *)ws0,wsdoubles);

    subwsinuse = new int[numsubws];
    assert( subwsinuse );

    subws = new double*[numsubws];
    assert( subws );

    subwslen = new ulong[numsubws];
    assert( subwslen );


    for (ulong i=0; i<numsubws; ++i)
    {
        subwsinuse[i] = 0;
        subws[i] = 0;
        subwslen[i] = 0;
    }
}
//----------------------------------------------------


ulong
workspace::wsdoublesinuse() // const
{
    ulong i;
    double *p = ws0;

    for (i=0; i<numsubws; ++i)
    {
        if ( subwsinuse[i] )
        {
            p = MAX(p,subws[i]+subwslen[i]);
        }
    }

    return (ulong)(p-ws0);
}
//----------------------------------------------------


void
workspace::let_ws(void *p)
{
    ulong i;

    for (i=0; i<numsubws; ++i)
    {
        if ( subwsinuse[i] )
        {
            if ( p==subws[i] )
            {
                subwsinuse[i]=0;
                subwslen[i]=0;

                return;
            }
        }
    }

    cerr << "\n workspace::let_ws():"
         << " FATAL ERROR: nonexisting subworkspace "
         << endl;

    exit(-4);
}
//----------------------------------------------------


double *
workspace::get_ws_doubles(ulong n)
{
    //    dump();

    ulong i;

    if ( n>(wsdoubles-wsdoublesinuse()) )  // not enough mem ?
    {
        cerr<< "\n workspace::get_ws_doubles():"
            << " FATAL ERROR: no more workspace memory "
            << endl;

        exit(-3);
    }

    for (i=0; i<numsubws; ++i)  // find unused subws
    {
        if ( !subwsinuse[i] )  break;
    }

    if ( i>=numsubws )  // no subws found
    {
        cerr<<"\n workspace::get_ws_doubles():"
            << " FATAL ERROR: no more subworkspaces "
            << endl;
        exit(-3);
    }

    subwsinuse[i] = 1;

    subws[i] = (double *)ws0+wsdoublesinuse();
    assert( wsdoublesinuse()<=wsdoubles );


    subwslen[i] = n;

    //    dump();

    return subws[i];
}
//----------------------------------------------------


ulong
bockwurst(ulong n, ulong ss)
//
// user asks for n times ss bytes,
// but:
// subworkspaces are only given
// in sizes that are multples of chunk bytes ...
//
{
    ulong chunk = sizeof(double);
    ulong ret = n*ss/chunk;

    if ( ret*chunk<ss )  ret++;

    return ret;
}
//----------------------------------------------------


LIMB *
workspace::get_ws_limbs(ulong n)
{
    ulong x = bockwurst(n,sizeof(LIMB));

    return (LIMB *)get_ws_doubles( x );
}
//----------------------------------------------------


char *
workspace::get_ws_bytes(ulong n)
{
    ulong x = bockwurst(n,sizeof(char));

    return (char *)get_ws_doubles( x );
}
//----------------------------------------------------


void
workspace::info(ulong n) const
{
    cout<<"\n size of workspace is "
        << wsbytes <<" bytes ";

    cout << " (maxsize= "
         << maxsize << " bytes) ";


    /*
    cout << "(="
        << wsbytes/sizeof(LIMB) <<" LIMBs  ="
        << wsbytes/sizeof(double) <<" doubles) ";
        */

    if ( howto_mul(n) )
    {
        cout << "\n length-" << n
             << " hfloats will use ";

        if ( 1==howto_mul(n))
        {
            cout<<" folded multiply ";
        }
        else 
        {
            cout<<" mass storage multiply ";
            cout << " ... which is not yet implemented ! " <<endl;
            assert( 0*(int)" mass storage multiply not implemented " );
        }
    }

    cout<<endl;
}
//----------------------------------------------------


int
workspace::howto_mul(ulong p) const
{
    // set n to pow of 2:
    ulong n = (1<<ld(p));

    if ( n!=p )  n*=2;

    if ( n*4 <= wsdoubles )  return 0;  // ordinary FFT mul

    if ( n*2 == wsdoubles )  return 1;  // mul by sqr

    return 2;  // mass storage mul
}
//----------------------------------------------------


void
workspace::dump() const
{
    cout<<"\n workspace::dump():"
        << " wsdoublesinuse=" << wsdoublesinuse();

    for (ulong i=0; i<numsubws; ++i)
    {
        if ( subwsinuse[i] )
        {
            cout << "\n subws[" << i << "]: "
                 << " =ws0+" << subws[i]-ws0
                 << "  len=" << subwslen[i]
                 << endl;
        }
    }

    check();
}
//----------------------------------------------------


void
workspace::check() const
{
    for (ulong i=0; i<numsubws; ++i)
    {
        if ( subwsinuse[i] )
        {
            assert( subws[i]-ws0>=0 );
            assert( (ulong)(subws[i]-ws0)<=wsdoubles );
            assert( subwslen[i]<=wsdoubles );
            assert( subws[i]-ws0+subwslen[i]<=wsdoubles );
        }
    }
}
//----------------------------------------------------
