function [ret,a] =  training(a,d)
tiny = 1e-23;

r=get_y(d); % residual
alpha=zeros(get_dim(d),1);
obj=[];  I=[]; I_old = [];
K2 = []; K = [];
ridge = a.ridge;
errtrack = [];

i = 0;
% while length(I) < a.max_loops 
%     i=i+1; % loop counter 
for i=1:a.max_loops
    
    % find best x_k
    % max: abs((K(x_k,:) .* r)/ sqrt(K(x_k,x_k)));
    % p(1:pnum) is kernel columns we check
    % q(1:qnum) is error rates we check 
    
    % only do a matrix inversion every nth iteration
    no_inversion=1;
    if mod(i,a.inversion_every)==0 
        no_inversion=0; 
    end;
    
    
    pnum=60; qnum=length(r);
    p=randperm(qnum); % p are indices from 1 to the length of the residual
    if no_inversion==0 %~strcmp(a.optimizer,'iterative')
       p=setdiff(p,I); % if we do a inversion at this step
    end
    p=p(1:min(pnum,length(p)));% 
    q=randperm(length(r)); 
    q=q(1:min(length(r),round(length(r)*a.trn_err_frac)));
    %q=[1:length(r)];
    switch (a.selection_criterion)
        case 'sparse_greedy'
            if length(I) == 0
                tmp = randperm(length(r)); I = [tmp(1)];     
            end
            Ksearch=calc(a.child,get(d,I),get(d,p)); % can optimize, because different every iteration
            [v k] = min(sum(abs(Ksearch),2)); % take the example, that is furthest away from all others
            k=p(k);
        case 'kmp'
            Ksearch=calc(a.child,get(d,q),get(d,p)); % can optimize, because different every iteration
            scores=(Ksearch*r(q))./ (sqrt(sum(Ksearch'.^2))+tiny)';
            [v k]=max(abs(scores)); k=p(k); 
        otherwise
            error(['@kmp has no selction criterion named ' a.selection_criterion]);
    end
    
    
    if isempty(intersect(I,k)) I=[I k]; end;
    
    
    if strcmp(a.optimizer,'iterative') | no_inversion  
        % optimize one alpha at a time
        % set a_k
        Kk=calc(a.child,d,get(d,k))';
        r=r + (alpha(k)) * Kk;
        alpha(k) = sum( r .* Kk ) / sum( Kk.^2 );
        % update predictions, r_i
        r=r - alpha(k) * Kk;
    end
    
    if no_inversion==0 & (strcmp(a.optimizer,'inversion') | ...
            strcmp(a.optimizer,'full_inversion') )
        disp(['Inverting with ridge=' num2str(ridge)])   
        %% now optimize the alpha
        
        % Kernel caching
        % should do the same as K2=calc(a.child,get(d,I),get(d));
        I_tmp = I(length(I_old)+1:end);
        Ktmp =  calc(a.child,get(d,I_tmp),get(d));
        K2=[K2,Ktmp];
        
        if  strcmp(a.optimizer,'full_inversion')
            if issparse(K2)
                ca=inv(full(K2'*K2) + ridge*eye(size(K2,2)))*K2'*r;
            else
                ca=inv(K2'*K2+ ridge*eye(size(K2,2)))*K2'*r;
            end
        else
            %           optimized code for:   K=calc(a.child,get(d,I));
            I_tmp = I(length(I_old)+1:end);
            K_leri = []; K_low = [];
            if ~isempty(I_old)
                K_leri = calc(a.child,get(d,I_tmp),get(d,I_old));
                K_low = calc(a.child,get(d,I_tmp),get(d,I_tmp));
            else
                K=calc(a.child,get(d,I));
            end
            K = [K,K_leri;K_leri',K_low];
            
            ca = inv(K'*K + ridge*eye(size(K,2)))*r(I);
            %ca=minres(K,r(I),1e-6,20,[],[],alpha(I));  % <-should be same as line above
        end
        r=r-K2*ca;
        alpha(I)=alpha(I)+ca;        
        I_old = I;
    end
    

    if mod(i,a.verbosity)==0
        disp(sprintf('iter: %d  SVs: %d  MSE: %g RMSE: %g VAR: %g',i,length(I),mean(r.^2), sqrt(mean(r.^2)),var(r)));
    end
errtrack = [errtrack,mean(r.^2)];
plot(errtrack,'r'),pause(0.01)
end

a.alpha=alpha(I);
a.Xsv=get(d,I);

if a.algorithm.do_not_evaluate_training_error==1   
    %    d=set_x(d,get_y(d));
    ret = data(get_name(d),get_y(d),d.Y);
else
    ret = test(a,d);
end


