// -*- C -*-
/*
Date: Sat, 3 Apr 93 14:53:25 PST
From: bryant@sioux.stanford.edu (Bryant Marks)
Subject: Re:  SVD


> Hi!   Long ago you sent me an svd routine in C based on code
> from Nash in Pascal.  Has this changed any over the years?  (Your
> email is dated July 1992).  Is your code available by anon ftp?

Hi Ajay,

I don't think I have changed the code -- but here's my most recent
version of the code, you can check to see if it's any different.
Currently it's not available via anonymous ftp but feel free to
redistribute the code -- it seems to work well in the application
I'm using it in.


Bryant
*/

/* This SVD routine is based on pgs 30-48 of "Compact Numerical Methods
   for Computers" by J.C. Nash (1990), used to compute the pseudoinverse.
   Modifications include:
        Translation from Pascal to ANSI C.
        Array indexing from 0 rather than 1.
        Float replaced by double everywhere.
        Support for the Matrix structure.
        I changed the array indexing so that the matricies (float [][])
           could be replaced be a single list (double *) for more
           efficient communication with Mathematica.
*/

#define TOLERANCE 1.0e-22

void SVD(double *W, double *Z, int nRow, int nCol)
{
    int i, j, k, EstColRank, RotCount, SweepCount, slimit;
    double eps, e2, tol, vt, p, h2, x0, y0, q, r, c0, s0, c2, d1, d2;
    eps = TOLERANCE;
    slimit = nCol/4;
    if (slimit < 6.0)
        slimit = 6;
    SweepCount = 0;
    e2 = 10.0*nRow*eps*eps;
    tol = eps*.1;
    EstColRank = nCol;
    for (i=0; i<nCol; i++)
        for (j=0; j<nCol; j++)
        {
            W[nCol*(nRow+i)+j] = 0.0;
            W[nCol*(nRow+i)+i] = 1.0;
        }
    RotCount = EstColRank*(EstColRank-1)/2;
    while (RotCount != 0 && SweepCount <= slimit)
    {
        RotCount = EstColRank*(EstColRank-1)/2;
        SweepCount++;
        for (j=0; j<EstColRank-1; j++)
        {
            for (k=j+1; k<EstColRank; k++)
            {
                p = q = r = 0.0;
                for (i=0; i<nRow; i++)
                {
                    x0 = W[nCol*i+j]; y0 = W[nCol*i+k];
                    p += x0*y0; q += x0*x0; r += y0*y0;
                }
                Z[nCol*j+j] = q; Z[nCol*k+k] = r;
                if (q >= r)
                {
                    if (q<=e2*Z[0] || fabs(p)<=tol*q) RotCount--;
                    else
                    {
                        p /= q; r = 1 - r/q; vt = sqrt(4*p*p+r*r);
                        c0 = sqrt(fabs(.5*(1+r/vt))); s0 = p/(vt*c0);
                        for (i=0; i<nRow+nCol; i++)
                        {
                            d1 = W[nCol*i+j]; d2 = W[nCol*i+k];
                            W[nCol*i+j] = d1*c0+d2*s0; W[nCol*i+k] = -d1*s0+d2*c0;
                        }
                    }
                }
                else
                {
                    p /= r; q = q/r-1; vt = sqrt(4*p*p+q*q);
                    s0 = sqrt(fabs(.5*(1-q/vt)));
                    if (p<0) s0 = -s0;
                    c0 = p/(vt*s0);
                    for (i=0; i<nRow+nCol; i++)
                    {
                        d1 = W[nCol*i+j]; d2 = W[nCol*i+k];
                        W[nCol*i+j] = d1*c0+d2*s0; W[nCol*i+k] = -d1*s0+d2*c0;
                    }
                }
            }
        }
        while (EstColRank>=3 && Z[nCol*(EstColRank-1)+(EstColRank-1)]<=Z[0]*tol+tol*tol)
            EstColRank--;
    }
#if DEBUG
    if (SweepCount > slimit)
        fprintf(stderr, "Sweeps = %d\n", SweepCount);
#endif
}



/*
---------------------------------------------------------------------------

The older version (using matrix addressing) is:

Date: Tue, 29 Oct 91 09:04:40 PST
From: bryant@sioux.stanford.edu (Bryant Marks)
Subject: SVD

Thanx for the help, the SVD route to the pseudoinverse works like a charm!!

I did find another SVD algorithm in JC Nash's "Compact Numerical Methods for
Computers" (I think it was published in 1990).  I was not able to get the
routine in NR to work so I couldn't get a timing comparison between the two
of them.  I've checked Nash's routine against Mathematica and it's been doing
fine so far with the matricies I've tested.  I've translated Nash's code from
Pascal to C, and I've made a few minor modifications; here's the function along
with a driver program.  Nash's program is well commented but I've left these
out because I can't type fast.

A couple of things to note: A needs to have twice as much room allocated for
it (2*n + 2*m) since the W in the svd function requires this (part of a
rotation algorithm).  After the routine has run W contains two maticies
of the decomposition  A = USV'.  The first nRow rows contain the product US
and the next nCol rows contain V (not V').  Z is equal to the vector of the
sqares of the diagonal elements of S.

compile file with : gcc -o svd svd.c -lm

Let me know if you have any difficulties,
Thanx again,
Bryant
*/


#include <stdio.h>
#include <math.h>

void svd(double [][], double [], int, int);

main()
{
    int i, j, nRow = 2, nCol = 2;
    double a[4][4], z[2];
    /*
      a[0][0]=0; a[0][1]=0; a[0][2]=0;
      a[1][0]=0; a[1][1]=0; a[1][2]=0;
      a[2][0]=0; a[2][1]=0; a[2][2]=0;
      */
    /*
      a[0][0]=5; a[0][1]=.000001; a[0][2]=1;
      a[1][0]=6; a[1][1]=.999999; a[1][2]=1;
      a[2][0]=7; a[2][1]=2.00001; a[2][2]=1;
      a[3][0]=8; a[3][1]=2.9999;  a[3][2]=1;
      */

    a[0][0]=1; a[0][1]=3;
    a[1][0]=-4; a[1][1]=3;

    svd(a, z, nRow, nCol);
    for (i=0; i<3; i++)
        printf("%f ", sqrt(z[i]));
    printf("\n");
    for (i=0; i<2*nRow; i++)
    {
        for (j=0; j<nCol; j++)
            printf("%f ", a[i][j]);
        printf("\n");
    }
}

void svd(double W[][4], double Z[], int nRow, int nCol)
{
    int i, j, k, EstColRank, RotCount, SweepCount, slimit;
    double eps, e2, tol, vt, p, h2, x0, y0, q, r, c0, s0, c2, d1, d2;
    eps = .0000001;  /* some small tolerance value */
    slimit = nCol/4;
    if (slimit < 6.0)
        slimit = 6;
    SweepCount = 0;
    e2 = 10.0*nRow*eps*eps;
    tol = eps*.1;
    EstColRank = nCol;
    for (i=0; i<nCol; i++)
        for (j=0; j<nCol; j++)
        {
            W[nRow+i][j] = 0.0;
            W[nRow+i][i] = 1.0;
        }
    RotCount = EstColRank*(EstColRank-1)/2;
    while (RotCount != 0 && SweepCount <= slimit)
    {
        RotCount = EstColRank*(EstColRank-1)/2;
        SweepCount++;
        for (j=0; j<EstColRank-1; j++)
	{
            for (k=j+1; k<EstColRank; k++)
	    {
                p = q = r = 0.0;
                for (i=0; i<nRow; i++)
		{
                    x0 = W[i][j]; y0 = W[i][k];
                    p += x0*y0; q += x0*x0; r += y0*y0;
		}
                Z[j] = q; Z[k] = r;
                if (q >= r)
		{
                    if (q<=e2*Z[0] || fabs(p)<=tol*q) RotCount--;
                    else
		    {
                        p /= q; r = 1 - r/q; vt = sqrt(4*p*p+r*r);
                        c0 = sqrt(.5*(1+r/vt)); s0 = p/(vt*c0);
                        for (i=0; i<nRow+nCol; i++)
			{
                            d1 = W[i][j]; d2 = W[i][k];
                            W[i][j] = d1*c0+d2*s0; W[i][k] = -d1*s0+d2*c0;
			}
		    }
		}
                else
		{
                    p /= r; q /= (r-1); vt = sqrt(4*p*p+q*q);
                    s0 = sqrt(.5*(1-q/vt));
                    if (p<0) s0 = -s0;
                    c0 = p/(vt*s0);
                    for (i=0; i<nRow+nCol; i++)
		    {
                        d1 = W[i][j]; d2 = W[i][k];
                        W[i][j] = d1*c0+d2*s0; W[i][k] = -d1*s0+d2*c0;
		    }
		}
	    }
	}
        fprintf(stderr, "Sweep = %d  # of rotations performed = %d\n", SweepCount, RotCount);
        while (EstColRank>=3 && Z[EstColRank]<=Z[0]*tol+tol*tol)
            EstColRank--;
    }
    if (SweepCount > slimit)
        fprintf(stderr, "Sweep Limit exceeded\n");
}


/*
---------------------------------------------------------------------------

/*
Newsgroups: sci.math.num-analysis
From: bcollett@hamilton.edu (Brian Collett)
Subject: Re: SVD algorithm of Numerical Recipes
Date: Wed, 14 Apr 1993 13:51:28 GMT
*/


/*
The calls to parray were just debugging calls to a routine that could print
the contents of an array in a form that a human could read. I have had no
problems with this code.
Hope that helps.
 Brian.
 */

/*
*       This is a translation of the Singular Value Decomposition algorithm of
*       Golub and Reinsch (Numerische Mathematik 14(1970) pp403-470) from
*       Algol 60 to C. The routine takes a single matrix (A) and computes two
*       additional matrices (U and V) and a vector W such that
*               A = UWV'
*       The calling sequence is
*       svd(a,m,n,u,w,v,eps,flags)
*       where
*       a is the original, m by n matrix
*       u is the upper resultant
*       w is the vector of singular values
*       v is the lower resultant
*       eps is a convergence test constant
*       flags tells what to do, values:-
*                                       0 produce only w
*                                       1 produce w and u
*                                       2 produce w and v
*                                       3 produce u, w, and v.
*       NOTE m must be greater than n or an error is signaled
*
*       BC 5/11/87
*       Moved to 0->n-1 indices for vectors BC 5/12/87
*/
#define WITHV   1
#define WITHU   2

int svd(double *a, int m, int n,
        double *u, double *w, double *v,
        double eps,
        int flags, double *temp)
// double *a,                      /* The original matrix [m,n] */
//        *u,                      /* The new upper matrix [m,n] */
//        *v,                      /* The new lower matrix [m,n] */
//        *w,                      /* The vector of singular values [n] */
//        *temp,                   /* A temporary vector */
//         eps;                    /* Convergence factor */
// int m,                          /* Number of rows */
//     n,                          /* Number of columns (n <= m) */
//     flags;                      /* Flags controlling what gets computed. */
{
    extern double TINY;     /* The smallest representable value */
    int i,j,k,l,l1;         /* Mostly loop variables */
    double tol = TINY / eps;/* tells about machine tolerance */
    double c,f,g,h,s,x,y,z; /* Temporaries */

    if (m < n)  return(-1);

    /*
     *       First copy a into u.
     */
    for (i = 0; i <= m * n; ++i)  u[i] = a[i];

    /*parray("initial array",u,n,m);
      /*
       *       Reduce the u matrix to bidiagonal form with Householder transforms.
       */

    g = (x = 0.0);
    for(i = 0; i < n; ++i)
    {
        temp[i] = g;
        s = 0.0;
        l = i + 1;

        for (j = i; j < m; j++)  s += u[j * n + i] * u[j * n + i];

        if (s < tol)  g = 0.0;
        else
        {
            f = u[i * n + i];
            g = (f < 0.0) ? sqrt(s) : -sqrt(s);
            h = f * g - s;
            u[i * n + i] = f - g;

            for(j = l; j < n; ++j)
            {
                s = 0.0;
                for(k = i; k < m; ++k)  s += u[k * n + i] * u[k * n + j];

                f = s / h;

                for(k = i; k < m; ++k)  u[k * n + j] += f * u[k * n + i];
            }
            /*parray("First loop u =",u,n,m);*/
        }

        w[i] = g;
        s = 0.0;

        for (j = l; j < n; ++j)  s += u[i * n + j] * u[i * n + j];

        if (s < tol)  g = 0.0;
        else
        {
            f = u[i * n + i + 1];
            g = (f < 0.0) ? sqrt(s) : -sqrt(s);
            h = f * g - s;
            u[i * n + i + 1] = f - g;

            for (j = l; j < n; ++j)  temp[j] = u[i * n + j] / h;

            for (j = l; j < m; ++j)
            {
                s = 0.0;

                for (k = l; k < n; ++k)  s += u[j * n + k] * u[i*n+k];

                for (k = l; k < n; ++k)  u[j*n+k] += s * temp[k];
            }
            /*parray("Second loop u = ",u,n,m);*/
        }
        y = fabs(w[i]) + fabs(temp[i]);
        if (y > x)  x = y;
    }
    /*parray("after bidiag u =",u,n,m);
      /*
       *       Now accumulate right-hand transforms if we are building v too.
       */
    if (flags & WITHV)
    {
        for (i = n - 1; i >= 0; --i)
        {
            if (g != 0.0)
            {
                h = u[i * n + i + 1] * g;

                for (j = l; j < n; ++j)  v[j * n + i] = u[i * n + j] / h;

                for (j = l; j < n; ++j)
                {
                    s = 0.0;

                    for (k = l; k < n; ++k)  s += u[i * n + k] * v[k * n + j];

                    for (k = l; k < n; ++k)  v[k * n + j] += s * v[k * n + i];
                }
            }

            for (j = l; j < n; ++j)  v[i * n + j] = (v[j * n + i] = 0.0);

            v[i * n + i] = 1.0;
            g = temp[i];
            l = i;
        }
    }
    /*parray("Computed v =",v,n,m);
      /*
       *       Now accumulate the left-hand transforms.
       */
    if (flags & WITHU)
    {
        for (i = n - 1; i >= 0; --i)
        {
            l = i + 1;
            g = w[i];

            for (j = l; j < n; ++j)  u[i * n + j] = 0.0;

            if (g != 0.0)
            {
                h = u[i * n + i] * g;

                for (j = l; j < n; ++j)
                {
                    s = 0.0;

                    for (k = l; k < m; ++k)  s += u[k * n + i] * u[k * n + j];

                    f = s / h;

                    for (k = i; k < m; ++k)  u[k * n + j] += f * u[k * n + i];
                }

                for (j = i; j < m; ++j)  u[j * n + i] /= g;
            }
            else
            {
                for (j = i; j < m; ++j)  u[j * n + i] = 0.0;
            }

            u[i * n + i] += 1.0;
        }
    }
    /*parray("Computed u =",u,n,m);
      /*
       *       Now diagonalise the bidiagonal form. BEWARE GOTO's IN THE LOOP!!
       */
    eps = eps * x;
    for (k = n - 1; k >= 0; --k)
    {

    testsplitting:
        for (l = k; l >= 0; --l)
        {
            if (fabs(temp[l]) <= eps)  goto testconvergence;
            if (fabs(w[l - 1]) <= eps)  goto cancellation;
        }
        /*
         *               Cancellation of temp[l] if l > 0;
         */

    cancellation:
        c = 0.0;
        s = 1.0;
        l1 = l - 1;

        for (i = l; i <= k; ++i)
        {
            f = s * temp[i];
            temp[i] *= c;
            if (fabs(f) <= eps)  goto testconvergence;
            g = w[i];
            h = (w[i] = sqrt(f * f + g * g));
            c = g/h;
            s = -f/h;
            if (flags & WITHU)
            {
                for (j = 0; j < m; ++j)
                {
                    y = u[j * n + l1];
                    z = u[j * n + i];
                    u[j * n + l1] = y * c + z * s;
                    u[j * n + i] = -y * s + z * c;
                }
            }
        }

    testconvergence:
        /*
          parray("at test conv u =",u,n,m);
          parray("w = ",w,1,m);
          parray("v =",v,n,m);
          */
        z = w[k];
        if (l == k) goto convergence;
        /*
         *               Shift from bottom 2x2 minor.
         */
        x = w[l];
        y = w[k - 1];
        g = temp[k - 1];
        h = temp[k];
        f = ((y - z)*(y + z) + (g - h)*(g + h)) / (2 * h * y);
        g = sqrt(f * f + 1);
        f = ((x - z)*(x + z) + h*(y/((f < 0.0)?f-g:f+g) - h)) / x;
        /*
         *               Next QR transformation.
         */
        c = (s = 1);

        for (i = l + 1; i <= k; ++i)
        {
            g = temp[i];
            y = w[i];
            h = s * g;
            g *= c;
            temp[i - 1] = (z = sqrt(f * f + h * h));
            c = f / z;
            s = h/z;
            f = x * c + g * s;
            g = -x * s + g * c;
            h = y * s;
            y *= c;

            if (flags & WITHV)
            {
                for (j = 0; j < n; ++j)
                {
                    x = v[j * n + i - 1];
                    z = v[j * n + i];
                    v[j * n + i - 1] = x * c + z * s;
                    v[j * n + i] = -x * s + z * c;
                }
            }

            w[i - 1] = (z = sqrt(f * f + h * h));
            c = f / z;
            s = h / z;
            f = c * g + s * y;
            x = -s * g + c * y;

            if (flags & WITHU)
            {
                for (j = 0 ; j < m; ++j)
                {
                    y = u[j * n + i - 1];
                    z = u[j * n + i];
                    u[j * n + i - 1] = y * c + z * s;
                    u[j * n + i] = -y * s + z * c;
                }
            }
        }

        temp[l] = 0.0;
        temp[k] = f;
        w[k] = x;
        goto testsplitting;

    convergence:
        if (z < 0.0)
        {
            /*
             *                       w[k] is made non-negative.
             */
            w[k] = -z;
            if (flags & WITHV)
            {
                for (j = 0; j < n; ++j)  v[j * n + k] = -v[j * n + k];
            }
        }
    }
}
