#include <ctype.h>
#include <fstream.h>
#include <strstream.h>
#include <string.h>
#include "ap.h"


// Automatic init stuff

bool apfloatinitialized = false;

class apfloatinit
{
public:
    apfloatinit ()
    {
        int apfloatinitok = apinit ();

        assert (!apfloatinitok);
    };

    ~apfloatinit ()
    {
        apdeinit ();
    };
};

// Constructor is called at program init, destructor at program exit
apfloatinit autoinit;


// Code size in megabytes (approx.)
const size_t CODESIZE = 2;

const size_t BUFSIZE = 256;

float chopper64 = 9223372036854775808.0;        // 2^63
double dmodulus;

size_t rnd2down (size_t x)
{
    size_t r = 1;

    if (!x) return 0;

    while (x >>= 1) r <<= 1;

    return r;
}

size_t rnd23down (size_t x)
{
    size_t r = 1, p;

    if (!x) return 0;

    while (r <= x)
    {
        p = r;
        if (r == 1)
            r = 2;
        else if (r == (r & -r))
            r = r / 2 * 3;
        else
            r = r / 3 * 4;
    }

    return p;
}

size_t sqrt4down (size_t x)
{
    size_t r = 1;

    if (!x) return 0;

    while (x >>= 2) r <<= 1;

    return r;
}

// Inits global variables
bool apinit (void)
{
    if (apfloatinitialized) return false;
    apfloatinitialized = true;

    size_t v;
    char buf[BUFSIZE], str[BUFSIZE], *p;
    fstream fs ("apfloat.ini", ios::in);

    // RAM size
    // Set this to the size of actual RAM memory you have on your computer.
    // Ramsize = 8 * (1 << 20);
    Ramsize = 16 * (1 << 20);

    // L1 cache size
    // 486's and Pentiums have 8KB (data) L1 cache
    CacheL1size = 8 * (1 << 10);

    // L2 cache size
    // set to amount of L2 cache
    CacheL2size = 256 * (1 << 10);

    // cache burst width
    // 16 bytes for 486, 32 for Pentium
    // Cacheburst = 16;
    Cacheburst = 32;

    // Longer numbers than this will be stored by default to disk
    Memorytreshold = 16384;

    // Efficient read/write block size
    Blocksize = 16384;

    if (!fs.fail ())
    {
        while (!fs.eof ())
        {
            fs.getline (buf, BUFSIZE);
            if ((p = strchr (buf, '=')) != 0)
            {
                *p = '\0';
                p++;
                istrstream (buf) >> str;
                for (v = strlen (str); v--;)
                    str[v] = tolower (str[v]);
                istrstream (p) >> v;
                if (!strcmp (str, "ramsize")) Ramsize = v;
                else if (!strcmp (str, "cachel1size")) CacheL1size = v;
                else if (!strcmp (str, "cachel2size")) CacheL2size = v;
                else if (!strcmp (str, "cacheburst")) Cacheburst = v;
                else if (!strcmp (str, "memorytreshold")) Memorytreshold = v;
                else if (!strcmp (str, "blocksize")) Blocksize = v;
            }
        }
    }

    // Set to the maximum 2^n or 3*2^n size block of modints that fits in the memory
    Maxblocksize = rnd23down ((Ramsize - CODESIZE * (1 << 20)) / sizeof (modint));

    // Size of matrix that fits in L2 cache
    Cachetreshold = rnd2down (CacheL2size / sizeof (modint));

    // Cache burst in modints
    Cacheburstblocksize = rnd2down (Cacheburst / sizeof (modint));

    // Block size that fits in L1 cache
    Cachemaxblocksize = rnd2down (CacheL1size / sizeof (modint));

    // Transpose block size, fits in processor L1 cache
    Cacheblocksize = sqrt4down (CacheL1size / sizeof (modint));

    if (Blocksize > Memorytreshold)
        Blocksize = Memorytreshold;

    if ((workspace = new rawtype[Maxblocksize]) == 0)
        return true;

    asm ("
    subl $4, %esp
    fnstcw (%esp)
    movb $15, 1(%esp)
    fldcw (%esp)
    addl $4, %esp
    ");

    return false;
}

void apdeinit (void)
{
    if (!apfloatinitialized) return;
    apfloatinitialized = false;

    delete[] workspace;
    workspace = 0;

    asm ("finit");
}

// Set the fpu for doing fpu multiplication.
void setmodulus (rawtype m)
{
    dmodulus = modulus = modint::modulus = m;

    asm ("
    ffree %st
    fld1
    fidivl _modulus
    ");
}

// Clear what setmodulus () did.
// Call in the end of the program.
void clearmodulus (void)
{
    asm ("ffree %st");
}

// Sets the base
void apbase (rawtype basedigit)
{
    int basedigits = 0, i, t, s;
    rawtype m, base = 1, f[64];

    while (bigmul (&m, &base, basedigit, 1) == 0 && m < moduli[2])
    {
        base = m;
        basedigits++;
    }

    Base = base;
    Basedigit = basedigit;
    Basedigits = basedigits;

    i = factor (f, basedigit);

    for (t = 0, s = 0; t < i; t++)
        if (f[s] != f[t]) f[++s] = f[t];

    i = s + 1;

    NBasefactors = i;

    for (t = 0; t < i; t++)
        Basefactors[t] = f[t];
}
