function [results,a] =  training(a,d)  

% [results,algorithm] =  training(algorithm,data,loss)  

disp(['training ' get_name(a) '.... '])  


X=get_x(d)';
Y=get_y(d)';

idim=size(X,1);
odim=size(Y,1);



verbose=a.algorithm.verbosity;


r=a.k;
r=min([r, idim,odim]);

if r ~=a.k
    disp('WARNING : Changing rank, because input or output has less dimensions than actual rank!')
end

if(isempty(a.ostart))
    O0=orth(randn(odim,r)); 
else
    O0=a.ostart;
end
if(isempty(a.sstart))
    S0=zeros(r,r); 
else
    S0=a.sstart;
end

if(isempty(a.qstart))
    Q0=orth(randn(idim,r)); 
else
    Q0=a.qstart;
end


Z= O0*S0*Q0';

difference = Inf; 

lambda =a.gamma;


options=optimset;
options.Display='off';

iteration=1;
epsilon=a.conv_eps;

while difference>epsilon 
    iteration=iteration+1;

    
    Z= O0*S0*Q0';
    
    
    GradientP =-(- Y*X'* Q0*S0);
    GradientP = GradientP - O0*GradientP'*O0;
    t0=lsqnonlin(@calcT1,0,[],[],options,GradientP,O0,S0,Q0,X,Y);
    [O1,H1]=faddgradient(O0,GradientP,real(t0));        
    O0=O1;
    
    GradientQ =-( -  X*Y'*O0*S0  +  X*X'*Q0*S0*S0); 
    GradientQ = GradientQ - Q0*GradientQ'*Q0;
    t0=lsqnonlin(@calcT2,0,[],[],options,GradientQ,O0,S0,Q0,X,Y);    
    [Q1,H1]=faddgradient(Q0,GradientQ,real(t0));  
    Q0=Q1;
    
    GradientS =-(-  diag(diag( Q0' * X* Y'* O0))  +  diag(diag(S0*Q0'*X*X'*Q0)) + lambda *S0);
    t0=lsqnonlin(@calcT3,0,0,[],options,GradientS,O0,S0,Q0,X,Y); 
    S1=S0+real(t0)*GradientS ;
    
    
    Z1=O1*S1*Q1';
    
    difference=abs( norm(Y-Z*X,'fro') -norm(Y-Z1*X,'fro')) ;
    
    O0=O1;
    Q0=Q1;
    S0=S1;
    
     if(verbose==1)
         fprintf('resiudal %f   ||delta||=%f  ||S||=%f \n',norm(Y-Z1*X,'fro'),difference,    norm(S1,'fro'))
     end

end


a.o=O1;
a.s=S1;
a.q=Q1;
a.w=O1*S1*Q1';

results=test(a,d);


function F=calcT1(t,GradientP,O0,S1,Q1,X,Y)

[O1,H1]=faddgradient(O0,GradientP,t);    
F=reshape(Y-O1*S1*Q1'*X,size(Y,1)*size(Y,2),1);


function F=calcT2(t,GradientQ,O1,S1,Q0,X,Y)

[Q1,H1]=faddgradient(Q0,GradientQ,t);    
F=reshape(Y-O1*S1*Q1'*X,size(Y,1)*size(Y,2),1);

% if(val<0)
%     val=100;
% end


function F=calcT3(t,GradientS,O1,S0,Q1,X,Y)

S1=S0+t*GradientS;
F=reshape(Y-O1*S1*Q1'*X,size(Y,1)*size(Y,2),1);


function [Yt,Ht]=faddgradient(Y,H,t)

[n,p]=size(Y);

if nargin<3, t=1; end

% if 0
    A= Y'*H;A=skew(A);
    
    [Q,R]=qr(H-Y*A,0);
    
    MN=expm(t*[A,-R';R,zeros(p)] );
    Mn=MN(:,1:p);
    
    Yt=Y*Mn(1:p,:)+Q*Mn(p+1:2*p,:);
% else
%     Yt= Y*expm(t*skew(Y'*H));
% end
  if nargout>1, Ht=H*Mn(1:p,:)-Y*(R'*Mn(p+1:2*p,:));end
% Ht=[];

function A=sym(A)
A=(A+A')/2;

function A=skew(A)
A=(A-A')/2;