function [d,a] =  training(a,d)


dtrn=d; dtst=a.dtst;
sigmas=a.sigmas; ridges=a.ridges;
kdict=length(sigmas);

K=[]; Kt=[]; r=[];
for i=1:kdict
 kk=kernel(a.child.ker,sigmas(i));
 K=[K ;calc(kk,dtrn)+ridges(i)*eye(get_dim(dtrn))];
 Kt=[Kt calc(kk,dtrn,dtst)];
end 

r=dtrn.Y; % residual

if a.plotting>0 clf; end;res=[];

alpha=zeros(get_dim(dtrn)*kdict,1);
obj=[];  I=[];

for i=1:a.max_loops
    
    % find best x_k
        % max: abs((K(x_k,:) .* r)/ sqrt(K(x_k,x_k)));
        % p(1:pnum) is kernel columns we check
        % q(1:qnum) is error rates we check 
        
        pnum=size(K,1); qnum=length(r);
        pnum=min(10000,size(K,1)); qnum=floor(min(length(r)*0.5,length(r)));
        p=randperm(size(K,1)); p=p(1:pnum);
        q=randperm(length(r)); q=q(1:qnum);

        j=1; scores=[];
        if a.basis_cv==0
          tst=[1:length(r)]; trn=tst; 
           sc_err1=(K(p,trn)*r(trn)) ./ ((sum(K(p,trn).^2,2)));
           sc_gtst2=((sum(K(p,tst).^2,2)));
           sc_rgtst=(K(p,tst)*r(tst));
           scores=dot(r(tst),r(tst)) -2*sc_rgtst.*sc_err1 + sc_gtst2 .* (sc_err1.^2);
        else
        j=1; scores=[];
        for ps=p
         trn=randperm(length(r));
         e=floor(length(r)/2);
         tst=trn(1:e); trn=trn(e+1:end);
         sc_err1=(K(ps,trn)*r(trn)) ./ ((sum(K(ps,trn).^2,2)));
         sc_gtst2=((sum(K(ps,tst).^2,2)));
         sc_rgtst=(K(ps,tst)*r(tst));
         scores(j)=dot(r(tst),r(tst)) -2*sc_rgtst.*sc_err1 + sc_gtst2 .* (sc_err1.^2);
        j=j+1;
        end;
       end 

        [v k]=min(scores); k=p(k);
        %disp(sprintf('             [pred=%g]',v));
    
   % set a_k 
        r=r + (alpha(k)) * K(k,:)'; trns=1:length(r);
        alpha(k) = sum( r(trns) .* K(k,trns)' ) / sum( K(k,trns).^2 );

   % update predictions, r_i
        r=r - alpha(k) * K(k,:)';

    
    % store train and test error
    res=[res; sum(r.^2) sum((dtst.Y-Kt*alpha).^2)];

        
    if mod(i,a.plotting)==0 % if plotting on round a.plotting
      disp(sprintf('%d: trn=%g tst=%g  ',i,sum(r.^2),sum((dtst.Y-Kt*alpha).^2)));    
      %disp(sprintf('%d: trn=%g tst=%g  (%g,%g)',i,sum(r.^2),sum((dtst.Y-Kt*alpha).^2),res1(i,1),res1(i,2)));    
      subplot(2,1,1);
      plot(dtst.X,(Kt*alpha)','b-',dtst.X,(dtst.Y)','r:')
      hold on;  plot(dtrn.X,dtrn.Y,'r*'); hold off; 
      alpha1=(reshape(alpha,length(r),kdict)');
      alp=sum(abs(alpha1));
      hold on;  plot(dtrn.X(abs(alp)>0,:),dtrn.Y(abs(alp)>0,:),'og'); hold off; 
      axis([-16 16 -1 1]);
      subplot(2,1,2);   
      col='ygcbk';
      for j=1:kdict
          x=dtrn.X; y=alpha1(j,:); f=find(abs(y)>0); x=x(f); y=y(f);
          stem(x,y,[col(j) '-']); hold on;
          %plot(x+kdict*0.1,y,[col(j) '-']); hold on;    
      end
      %colormap summer;
      %bar(dtrn.X',alpha1,'k-*');hold on;
      plot(dtrn.X,dtrn.Y,'c.'); hold on; 
      title('alphas (weights)'); hold off;
      axis([-16 16 -1 1]);
      drawnow;
      if a.pause==1 pause; end;
    end
end

a.alpha=alpha;
a.Xsv=d;
a.res=res;

if a.algorithm.do_not_evaluate_training_error==1   
  d=set_x(d,get_y(d));
else
  d=test(a,d);
end
 