function [d, a] =  training(a,d)
  
  
  if a.algorithm.verbosity>0
    disp(['calculating reduced set vectors by learning pre-images method for ', get_name(a.child)])
    disp('initializing...')
  end
  
  % train algorithm first
  if isempty(a.child.alpha) | ~isempty(a.alpha) 
   [r a.child]=train(a.child,d);
  end    
  
  % set RNG
  rand('state',sum(100*clock));             
  
  % get data from old algo
  k = a.child.child;
  alpha = a.child.alpha;                    
  svs = find(sum(abs(alpha)',1)>1e-5);
  alpha = alpha(svs);
  Xsv = get(a.child.Xsv,svs);    
  range = max(max(abs(Xsv.X)));
  dim = size(Xsv.X,2);
  Kxx = calc(k,Xsv);
  a.child.Xsv = Xsv;
  a.child.alpha = alpha;
  
  % calculate desired # of rsvs
  if a.rsv < 1
      a.rsv = ceil(sum(abs(alpha)>0) * a.rsv);
  end;
  
  disp(['compressing for ', num2str(a.rsv) , ' rsvs']);

  dw(1) = alpha' * Kxx * alpha;

  it = 1;
  % first step: find base in feature space

  % PXsv contains coordinates of projection of Xsv to a kpca base in the RKHS
  % b is the trained kpca algorithm

  [PXsv b] = train(kpca(kernel(k.ker,k.kerparam)),Xsv);

  % construct data object RKHS x X
  D_rkhs = data(PXsv.X, Xsv.X);

  % find rr_G: RKHS -> X by ridge regression
  [t rr_G] = train(multi_rr({kernel(a.rr_k.ker,a.rr_k.kerparam), ['ridge=' num2str(a.ridge)]}),D_rkhs);
  
  
  % second step: find pre-images iteratively
  z = Xsv.X;
  Kzz = calc(k,data(z));
  gamma = alpha;
  while size(z,1) > a.rsv
    disp(['compression progress ', num2str(it/(size(Xsv.X,1) -a.rsv)*100), '%'])
    Pz = test(b,data(z)); 
    [i,j] = find_closest(k, Kzz, z);
    Px = .5 * (get_x(Pz, i) + get_x(Pz, j));
    if a.norm == 1
        Px = Px / norm(Px);
    end;
    x = test(rr_G, data(Px));
    z([i,j],:) = [];
    
    if size(z,1)~=0
      kzz = calc(k,data(z),x);
      z = [z; get_x(x)];
      Kzz([i,j],:)=[];
      Kzz(:,[i,j])=[];
      Kzz = [Kzz; kzz];
      Kzz = [Kzz, [kzz';calc(k,x)]]  ;
      gamma = minres(Kzz, calc(k, Xsv, data(z))*alpha);
    else
      z = get_x(x,1);  
      Kzz = calc(k,data(x)); 
      gamma = calc(k, Xsv, data(z))*alpha;
    end;
     
    
    dw(it+1) = fz(z(size(z,1),:)',a.child,z,gamma,Kxx);
    it = it+1;
end;

% set results
a.w2 = dw(length(dw));
a.delta_w = dw(1) - a.w2;
a.dw = dw;
a.Xsv = data(z);
a.alpha = gamma;
a.b0 = a.child.b0;

disp(' ')
disp(' ')
disp(' ')
disp('------------------------------------------------------')
disp('stats for reduced set contruction with LP method')
disp('------------------------------------------------------')
disp(['original support vectors: ' num2str(sum(abs(alpha)>0))]);
disp(['reduced set vectors found: ' num2str(a.rsv)]);
disp(['decrease in ||w-w*||^2: ' num2str(a.delta_w)]);
disp(['final value of ||w-w*||^2: ' num2str(a.w2)]);
disp('------------------------------------------------------')

  


d = test(a,d);


% subfunction fz
function ret = fz(x0, a, Z, G, Kxx)
% returns value of target function ||w-w*||^2
x0 = x0';
Z(size(Z,1), :) = x0;
ret = a.alpha' * Kxx * a.alpha + G' * calc(a.child, data(Z)) * G - 2 * a.alpha' * calc(a.child, data(Z), data(a.Xsv.X)) * G;


% subfunction find_closest
function [i,j] = find_closest(k, K, z)
% returns indices of closest elements in feature space

if strcmp(k.ker,'rbf')
    K = K - diag(diag(K));
    [K1 I] = max(K);
    [tmp J] = max(K1);
    i = I(J);
    j = J;
else
     K = K - diag(diag(K));
     for i=1:size(K,1)
         for j=1:size(K,2)
             K(i,j) = K(i,j) / sqrt(calc(k,data(z(i,:)))*calc(k,data(z(j,:))));
         end;
     end;
     [K1 I] = max(K);
     [tmp J] = max(K1);
     i = I(J);
     j = J;
end;

	

