#if !defined __REVBINPERMUTE0_H
#define      __REVBINPERMUTE0_H


#include "inline.h"  // swap(), swap0()
#include "fxttypes.h"
#include "auxbit.h" //  BITS_PER_LONG


#define  SYMM0  2  // 2, 4 (default is 2)
#define  idx_swap(f, k, r)  { ulong kx=(k), rx=(r);  swap(f[kx], f[rx]); }
template <typename Type>
void
revbin_permute0(Type *f, ulong n)
{
    if ( n<=8 )
    {
        if ( n==8 )
        {
            swap0(f[1], f[4]);
            swap0(f[3], f[6]);
            return;
        }

        if ( n==4 )
        {
            swap0(f[1], f[2]);
            return;
        }

        return;
    }

    const ulong nh = (n>>1);
    static ulong x[BITS_PER_LONG];
    x[0] = nh;
    {  // initialize xor-table:
        ulong i, m = nh;
        for (i=1; m!=0; ++i)
        {
            m >>= 1;
            x[i] = x[i-1] ^ m;
        }
    }

#if  ( SYMM0 >= 2 )
#if  ( SYMM0 >= 4 )
    const ulong n1  = n - 1;    // = 11111111
    const ulong nx1 = nh - 2;   // = 01111110
//    const ulong nx2 = n1 - nx1; // = 10111101
#endif //  ( SYMM0 >= 4 )
#endif //  ( SYMM0 >= 2 )
    ulong k=0, r=0;
    while ( k<n/SYMM0  )  // n>=16, n/2>=8, n/4>=4
    {
        // ----- k%4 == 0:
        if ( r>k )
        {
            swap(f[k], f[r]);  // <nh, <nh 11
#if  ( SYMM0 >= 2 )
//            idx_swap(f, n1^k, n1^r);  // >nh, >nh 00
#if  ( SYMM0 >= 4 )
            idx_swap(f, nx1^k, nx1^r);  // <nh, <nh 11
//            idx_swap(f, nx2^k, nx2^r);  // >nh, >nh 00
#endif //  ( SYMM0 >= 4 )
#endif //  ( SYMM0 >= 2 )
        }

        r ^= nh;
        ++k;

        // ----- k%4 == 1:
        if ( r>k )
        {
            swap0(f[k], f[r]);  // <nh, >nh 10
#if  ( SYMM0 >= 4 )
            swap0(f[r^n1], f[k^n1]);
//            idx_swap(f, n1^k, n1^r);  // >nh, <nh 01
#endif //  ( SYMM0 >= 4 )
        }

        { // scan for lowest unset bit of k:
            ulong m = 2,  i = 1;
            while ( m & k )  { m <<= 1;  ++i; }
            r ^= x[i];
        }
        ++k;

        // ----- k%4 == 2:
        if ( r>k )
        {
            swap(f[k], f[r]);  // <nh, <nh 11
#if  ( SYMM0 >= 2 )
//            idx_swap(f, n1^k, n1^r); // >nh, >nh 00
#endif //  ( SYMM0 >= 2 )
        }

        r ^= nh;
        ++k;

        // ----- k%4 == 3:
        if ( r>k )
        {
            swap0(f[k], f[r]);    // <nh, >nh 10
#if  ( SYMM0 >= 4 )
            swap0(f[k^nx1], f[r^nx1]);    // <nh, >nh 10
//            idx_swap(f, nx1^k, nx1^r);   // <nh, >nh 10
#endif //  ( SYMM0 >= 4 )
        }

        { // scan for lowest unset bit of k:
            ulong m = 4,  i = 2;
            while ( m & k )  { m <<= 1;  ++i; }
            r ^= x[i];
        }
        ++k;
    }
}
// =========================
#undef  idx_swap

#endif // !defined __REVBINPERMUTE0_H
