function [d,a] =  training(a,d)


%if alpha empty in child | alpha full in rss model, train child (first train/retrain).
%if alpha full in child, empty in rss, only train rss.

if isempty(a.child.alpha) | ~isempty(a.alpha) % train algorithm first
   [r a.child]=train(a.child,d);
end    

alpha=a.child.alpha; 
if a.lambda==-1  % set automatically
    a.lambda=mean(diag(0.001*alpha'*K*alpha)); 
end

svs=find(sum(abs(alpha)',1)>1e-5); origsvs=svs;
K=calc(a.child.child,get(a.child.Xsv,svs));
newalpha=alpha(svs,:)*0; alpha=alpha(svs,:); origalpha=alpha;
w2=[]; for i=1:size(alpha,2) w2(i)=alpha(:,i)'*K*alpha(:,i); end;
worig=w2;
loops=1;
disp('compressing..');  
svs=[];
c=ones(size(alpha,1),1);
%c= mean(abs(alpha)) ./ abs(alpha); c= 1./c;

while 1    
   svs=find(abs(c)>max(abs(c))/1000);   
   if isempty(svs) break; end;
   Ksv=K(svs,svs);
   Ksv = Ksv .* (c(svs)*c(svs)');
   K2=K(svs,:) .* repmat(c(svs),1,size(alpha,1));

   
   if a.lambda==0
     beta=minres(Ksv,K2*origalpha,1e-6,length(svs)); 
 else
     
     Ksv=Ksv+eye(length(Ksv))*1e-2;
     opts= optimset('display','on','MaxIter',10000,'LargeScale','off'); 
     [beta,lag,dual] = quadsolve(2*[Ksv -Ksv ; -Ksv Ksv ] ...
         + eye(2*length(Ksv))*0e-5 ,...
     [-2*K2*origalpha+c(svs)*a.lambda ; ...
      +2*K2*origalpha+c(svs)*a.lambda ],[],[],10*max(abs(origalpha)));
     beta= beta(1:length(svs)) - beta(length(svs)+1:end); 
 end   
   
   % calc actual alphas (could do this at end really)
   newalpha=[];
   for j=1:size(alpha,2)
    b=alpha(:,1)*0; b(svs)= c(svs).*beta(:,j);
    newalpha=[newalpha  b];    
   end 

   b=mean(beta',1); f=find(abs(b)>0.01); if abs(max(abs(b(f))-1))<a.tolerance break; end;
   
   %% printing stuff
   w2=[]; for i=1:size(alpha,2) w2(i)=(alpha(:,i)-newalpha(:,i))'*K*(alpha(:,i)-newalpha(:,i)); end;   
   a.w2=w2; % store w2
   txt='iteration %d : max||w_orig-w_new||^2=%1.3f svs=%d';
   disp(sprintf(txt,[loops max(w2) length(svs)]));

   loops=loops+1;
   c(svs)=c(svs).* abs(mean(beta',1)');
   if loops>a.max_loops break; end;
end
 
if a.reoptimize  % reoptimize alphas
    for i=1:size(newalpha,2)
     svs2=find(abs(newalpha(:,i))>max(abs(newalpha(:,i)))/1000);
     newalpha(:,i)=newalpha(:,i)*0; 
     if ~isempty(svs2)
      Ksv=K(svs2,svs2); K2=K(svs2,:); 
      %minv=inv(Ksv+eye(length(svs2))*1e-6);beta2=minv*K2*alpha(:,i);
      beta2=minres(Ksv,K2*alpha(:,i),1e-6,length(Ksv));
      newalpha(svs2,i)=beta2;
     end
    end
end


w2=[]; for i=1:size(alpha,2) w2(i)=(alpha(:,i)-newalpha(:,i))'*K*(alpha(:,i)-newalpha(:,i)); end;   
a.w2=w2; % store w2

a.Xsv=a.child.Xsv;  % these 3 lines if you don't want to compress alphas in model (why?)
a.alpha=a.child.alpha*0; 
a.alpha(origsvs,:)=newalpha;
% uncommented above for usage with the rss_cascade
% a.Xsv=get(d,origsvs);a.alpha=newalpha;

try
    a.b0=a.child.b0;
catch
    a.b0=0;
end

if a.reoptimize_b % reoptimize b // only for pat. rec. right now
   a.b0=0; r=test(a,d); a.b0=[];
   for i=1:size(newalpha,2)  
    [x s2]=sort(r.X(:,i)); y=r.Y(s2,i);
    xs=cumsum(y(end:-1:1)==1); %[cumsum(y==-1)+x(end:-1:1) y]
    %plot(cumsum(y==-1)+x(end:-1:1))
    [m1 m2]=max(cumsum(y==-1)+xs(end:-1:1));
    a.b0=[a.b0 -x(m2)];   
    %if m2+1<=size(x,1) a.b0=[a.b0 -(x(m2)+x(m2+1))/2]; end;
   end 
end

if a.algorithm.do_not_evaluate_training_error==1   
  d=set_x(d,get_y(d));
else
  d=test(a,d);
end
 
