#if !defined __REVBINPERMUTE_H
#define      __REVBINPERMUTE_H


#include "inline.h"  // swap()
#include "fxttypes.h"
#include "auxbit.h" //  BITS_PER_LONG


inline ulong
revbin(ulong m, ulong ldn)
// return m bitreversed
// ldn := number of bits to reverse
{
    ulong r = 0;
    while ( ldn-- != 0 )
    {
        r <<= 1;
        r += (m&1);
        m >>= 1;
    }

    return  r;
}
//============== end =============

inline ulong
revbin_update(ulong r, ulong n)
// let r = revbin(x, ld(n)) at entry
// then return  revbin(x+1, ld(n))
{
    n >>= 1;
    while ( !((r^=n)&n) )  n >>= 1;  
    return  r;
}
//============== end =============


#define  SYMM  4  // 1, 2, 4 (default is 4)
#define  idx_swap(f, k, r)  { ulong kx=(k), rx=(r);  swap(f[kx], f[rx]); }
template <typename Type>
void
revbin_permute(Type *f, ulong n)
{
    if ( n<=8 )
    {
        if ( n==8 )
        {
            swap(f[1], f[4]);
            swap(f[3], f[6]);
            return;
        }

        if ( n==4 )
        {
            swap(f[1], f[2]);
            return;
        }

        return;
    }

    const ulong nh = (n>>1);
    static ulong x[BITS_PER_LONG];
    x[0] = nh;
    {  // initialize xor-table:
        ulong i, m = nh;
        for (i=1; m!=0; ++i)
        {
            m >>= 1;
            x[i] = x[i-1] ^ m;
        }
    }

#if  ( SYMM >= 2 )
    const ulong n1  = n - 1;    // = 11111111
#if  ( SYMM >= 4 )
    const ulong nx1 = nh - 2;   // = 01111110
    const ulong nx2 = n1 - nx1; // = 10111101
#endif //  ( SYMM >= 4 )
#endif //  ( SYMM >= 2 )
    ulong k=0, r=0;
    while ( k<n/SYMM  )  // n>=16, n/2>=8, n/4>=4
    {
        // ----- k%4 == 0:
        if ( r>k )
        {
            swap(f[k], f[r]);  // <nh, <nh 11
#if  ( SYMM >= 2 )
            idx_swap(f, n1^k, n1^r);  // >nh, >nh 00
#if  ( SYMM >= 4 )
            idx_swap(f, nx1^k, nx1^r);  // <nh, <nh 11
            idx_swap(f, nx2^k, nx2^r);  // >nh, >nh 00
#endif //  ( SYMM >= 4 )
#endif //  ( SYMM >= 2 )
        }

        r ^= nh;
        ++k;

        // ----- k%4 == 1:
        if ( r>k )
        {
            swap(f[k], f[r]);  // <nh, >nh 10
#if  ( SYMM >= 4 )
            idx_swap(f, n1^k, n1^r);  // >nh, <nh 01
#endif //  ( SYMM >= 4 )
        }

        { // scan for lowest unset bit of k:
            ulong m = 2,  i = 1;
            while ( m & k )  { m <<= 1;  ++i; }
            r ^= x[i];
        }
        ++k;

        // ----- k%4 == 2:
        if ( r>k )
        {
            swap(f[k], f[r]);  // <nh, <nh 11
#if  ( SYMM >= 2 )
            idx_swap(f, n1^k, n1^r); // >nh, >nh 00
#endif //  ( SYMM >= 2 )
        }

        r ^= nh;
        ++k;

        // ----- k%4 == 3:
        if ( r>k )
        {
            swap(f[k], f[r]);    // <nh, >nh 10
#if  ( SYMM >= 4 )
            idx_swap(f, nx1^k, nx1^r);   // <nh, >nh 10
#endif //  ( SYMM >= 4 )
        }

        { // scan for lowest unset bit of k:
            ulong m = 4,  i = 2;
            while ( m & k )  { m <<= 1;  ++i; }
            r ^= x[i];
        }
        ++k;
    }
}
// =========================
#undef  idx_swap


#endif // !defined __REVBINPERMUTE_H
