Skip to content
Snippets Groups Projects
linhuber.m 11.8 KiB
Newer Older
  • Learn to ignore specific revisions
  • tsal's avatar
    tsal committed
    function  [X, info, perf] = linhuber(A,b, c, L, par, init)
    %LINHUBER  Minimizer of the piecewise quadratic
    %     f(x) = sum(phi(ri(x))) + c'*x + 0.5mu*||L*x||2^2 ,
    % where ri(x) is the i'th element in r(x) = b - A*x and phi is the
    % (possibly one-sided) Huber function with threshold gamma.
    %  
    % Call:    [X, info] = linhuber(A,b, c, L, par)
    %          [X, info] = linhuber(A,b, c, L, par, init)
    %          [X, info, perf] = linhuber(......)
    %
    % Input parameters
    % A,b :  m*n matrix and m-vector, respectively.
    % c   :  n-vector or empty, in which case the term c'*x is omitted.
    % L   :  Matrix with n columns or empty.  In the case mu = par(2) > 0
    %        an empty L is treated as L = I, the identity matrix.
    % par :  Vector with one, two or three elements.
    %        par(1) = gamma, Huber threshold.
    %        par(2) = mu.  Default: mu = 0.
    %        par(3) : Choice of Huber function,
    %                 1 : one-sided,  rho(r) = 0 for r > 0,
    %                 2 : one-sided,  all ri <= gamma are active
    %                 Otherwise: standard Huber (default), ie 
    %                            equations with  abs(ri)<=gamma  are active.
    % init:  Choice of starting point,
    %        init is a vector:  given x0,
    %        init is a struct like info below: given active set and
    %                                          factorization,
    %        If nargin < 6, x0 is the least squares solution to Ax "=" b.   
    %
    % Output parameters
    % X    :  If  perf  is present, then array, holding the iterates
    %         columnwise.  Otherwise, computed minimizer of f.
    % info :  Struct with information on the performance and computed solution.
    %         Fields
    %         pf :  Vector  [f(x)  ||g(x)||inf  #iterations #factorizations] ,
    %               where x is the computed minimizer.
    %               pf(1) = -Inf  indicates an unbounded problem.
    %          S :  Struct with the Huber active set at the solution.  
    %               Fields
    %               s :  Huber sign vector,
    %               A :  active set (indices of small residuals),
    %               N :  inactive set (indices of large residuals),
    %               L :  vector  [length(S.A), length(S.N)].
    %          R :  n*n array with Cholesky factor of active set.
    %          p :  Permutation vector used in the factorization.
    % perf :  Iteration history.  Struct with fields
    %          f :  values of  f(x) ,
    %         ng :  values of  || g(x) ||inf
    %         nA :  number of active equations.
    %
    % Method
    % Essentially (without downdating of the factorization) as described 
    % in H.B. Nielsen, "Computing a minimizer of a piecewise quadratic -
    % Implementation", IMM-REP-1998-14, IMM, DTU.
    
    % Version 10.10.05.  hbn(a)imm.dtu.dk
    
    % Initialize
    X = [];  S = [];  R = [];  p = [];
    pf = [-Inf NaN 0 0];  perf = [];
    [aux Htype] = lhaux(A,b,c,L,par);
    [m n] = size(A);   kmax = 2*m;   fact = 0;
    kcomp = 1;
    
    if  nargin > 5  % given info on starting point
      if  isstruct(init)  % given active set and factorization
        kcomp = 2;
        S = init.S;
        [f g] = lhobj(zeros(n,1),b,S,c,L,A,aux);
        if  aux(3) == 0  % no L-contribution.  Reuse factorization
          [x u cf R p fact] = lhfacsolv(S,A,L,g,aux,S,init.R,init.p);  
        else
          [x u cf R p fact] = lhfacsolv(S,A,L,g,aux);  
        end
      else,  x = init(:); end  % given x0
    else  % Start with least sqares solution to Ax "=" b
      x = pinv(A)*b;  fact = 1;
    end
    
    Trace = nargout > 2;
    if  Trace
      X = zeros(n,kmax);  perf = zeros(3,kmax);
    end 
    
    % Iterate
    k = 0;   stop = 0;
    
    while  ~stop & k < kmax   
      Sp = S;
      k = k + 1;
      [r S tau] = lhsign(A,b,x,aux,Htype);
      [f g] = lhobj(x,r,S,c,L,A,aux);  ng = norm(g,inf);
      if  Trace,  X(:,k) = x;  perf(:,k) = [f; ng; S.L(1)]; end
      
      if  ng == 0 | ( k > kcomp & isequal(Sp,S) )
        stop = 1;  t = 0;
      else
        [h u lc R p rf] = lhfacsolv(S,A,L,g,aux,Sp,R,p);  fact = fact + rf;
        t = lhlines(S,r,u,lc,aux,Htype);
        if  t > 0,  x = x + t*h; 
        else,       stop = 1;    end
      end
    end
    
    if  t*ng == 0,  pf(1) = f; end
    pf(2:4) = [ng k-1 fact];
    info = struct('pf',pf, 'S',S, 'R',R, 'p',p);
    if  Trace
      kk = 1 : k;  X = X(:,kk);  
      perf = struct('f',perf(1,kk), 'ng',perf(2,kk), 'nA',perf(3,kk)); 
    else,       X = x; end
    
    %%%%%%%%%%%%%%%%%%%%  Auxiliary functions  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    
    function  [aux, Htype] = lhaux(A,b,c,L,par)
    % Simple checks and set thresholds.  04.08.24
    aux = [];
    e = [A(:); b(:); c(:); L(:); par(:)];
    v = isreal(e) & all(~isinf(e)) & all(~isnan(e));
    if  ~v
      error('Indata contains non-real elements')
    end
    [m n] = size(A);  sb = size(b);
    v = n > 0 & (min(sb) == 1 & max(sb) == m);
    if  ~v
      disp('A must be a matrix and ')
      error(sprintf('b must be a vector of length %g',m))
    end
    sc = size(c);  sL = size(L);  tn = sprintf(' %g',n);
    v = max(sc) == 0 | (min(sc) == 1 & max(sc) == n);
    if  ~v
      error(['c must be empty or a vector of length' tn])
    end
    v = max(sL) == 0 | sL(2) == n;
    if  ~v
      error(['L must be empty or a matrix with' tn ' columns'])
    end
    sp = size(par);
    if  min(sp) ~= 1
      error('par must be a scalar or a vector')
    end
    if  max(sp) < 2,  par = [par 0]; end
    if  ~all(par(1:2) >= 0)
      error('par(1:2) = [gamma  mu]  cannot be negative')
    end
    
    gamma = par(1);   mu = par(2);   gm = gamma*mu;
    % Rounding error threshold
    tau = 100*eps*norm(b,inf);   
    if  par(1) < tau
      error(sprintf('gamma must be at least %9.2e',tau))
    end
    
    % Diverse norms and thresholds
    nAi = norm(A,inf);  nA1 = norm(A,1);
    sigma0 = 10*sqrt(eps * nA1*nAi);  sigma = sigma0;
    if  gm > 0 
      sgm = sqrt(gm);
      if  isempty(L)  % L = I
        nL1 = 1;  nLi = 1;
        if  sgm >= sigma0, sigma = sgm; end
      else
        nL1 = norm(L,1);   nLi = norm(L,inf);
        if  gm*nL1*nLi >= sigma0^2
          nBB = (nA1 + sgm*nL1) * max(nAi, sgm*nLi);
          sigma = 10*sqrt(eps * nBB);
          sigma0 = sigma;
        end
      end
    else
      nL1 = 0;  nLi = 0;
    end  % mu-contribution
    
    % aux = [gamma  tau  gamma*mu  sigma  sigma0   ...
    %        ||A||inf  gamma*(||A||1 + mu*||L||1*||L||inf)  gamma*||c||inf]
    aux = [gamma  tau  gm  sigma  sigma0 ...
        nAi  gamma*nA1 + gm*nL1*nLi  gamma*norm(c,inf)];
    if  length(par) < 3 | abs(par(3) - 1.5) ~= 0.5,  Htype = 3;
    else,                                            Htype = par(3); end
    
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    
    function  [h,u,lc,R,p,refac] = lhfacsolv(S,A,L,g,aux,Sp,R,p)
    % Factorize Hessian and find Newton step.  04.08.25
    n = length(g);  
    if  nargin < 6 | isempty(Sp),  refac = 1;
    else  % possible update from previos factorization
      w = zeros(size(A,1),1);  w(S.A) = 1;  w(Sp.A) = w(Sp.A) - 1;
      if  any(w == -1),  refac = 1;  % downdates
      else
        refac = 0;   E = find(w);  % augmented active set
        if  ~isempty(E)  % update factorization
          [Q R p1] = qr([R; A(E,p)],0);  p = p(p1);
        end
      end
    end
    
    if  refac  % refactorize  
      d = aux(4)*ones(size(g));
      if  aux(3) > 0 & ~isempty(L)  % mu-contribution with L ~= I
        C = [A(S.A,:); sqrt(aux(3))*L; diag(d)];
      else
        C = [A(S.A,:); diag(d)];
      end
      [Q R p] = qr(C,0);
    end
    
    % Check for almost rank deficient
    rd = any(abs(diag(R)) < 10*aux(5));
    % Get solution to  Hx = -g
    g = -g(:);   x(p) = R \ (g(p)' / R)';  x = x(:);
    if  rd  % Check consistency
      v(p) = R \ (aux(4)^2*x(p)' / R)';   v = v(:);  h = x + v;
      if  S.L(1) > 0,  gR = A(S.A,:)' * (A(S.A,:)*h);
      else,            gR = zeros(n,1);               end
      if  aux(3) > 0  % mu-contribution
        if  isempty(L),  gR = gR + aux(3)*h;
        else,            gR = gR + aux(3)*(L' * (L*h)); end
      end
      if  norm(g - gR) > sqrt(eps)*norm(g)  % inconsistent (?)
        h = v / norm(v);
      end
    else  % full rank.  iterative refinement step
      if  S.L(1) > 0,  Hx = A(S.A,:)' * (A(S.A,:)*x);
      else,            Hx = zeros(n,1);               end
      if  aux(3) > 0  % mu-contribution
        if  isempty(L),  Hx = Hx + aux(3)*x;
        else,            Hx = Hx + aux(3)*(L' * (L*x)); end
      end
      v(p) = R \ ((g(p) - Hx(p))' / R)';   h = x + v(:);
    end
    
    % u = A*h, modified for 'zero' elements
    u = A*h;
    z = find(abs(u) <= eps * aux(4) * norm(h,inf) );
    u(z) = 0;
    
    % Offset values for line search coefficients
    if  aux(3) > 0  % mu-contribution
      if  isempty(L),  c2 = aux(3)*norm(h)^2;
      else,            c2 = aux(3)*norm(L*h)^2; end
    else
      c2 = 0;
    end
    lc = [dot(g,h)  c2];
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    
    function  [kv, lc] = lhkinks(S,r,u,lc,aux,Htype)
    % Get kink values and finish initialization of line search coeffs.  04.08.23
    gam = aux(1);  tau = aux(2);  
    if  nargin < 6 | abs(Htype - 1.5) ~= 0.5,  Htype = 3; end
    thr = [-1 0; -inf 1; -1 1];  thr = gam*thr(Htype,:);  % Huber thresholds
    kv = zeros(2*length(r),2);  k = 0;
    if  ~isempty(S.A)
      % Remove leaving 'maybes'
      act = S.A;   w = ones(size(act));
      o = find((u(act) > 0 & abs(r(act) - thr(1)) <= tau) | ...
        (u(act) < 0 & abs(r(act) - thr(2)) <= tau));
      w(o) = 0;   act = act(find(w));
      if  ~isempty(act)  % remaining actives
        lc(2) = lc(2) + norm(u(act))^2;  % update initial line search coeffs.
        n = find(u(act) < 0);   p = find(u(act) > 0);  
        ii = act([p; n])';   li = length(ii);
        if  li  % get leave values
          d = [repmat(thr(1),length(p),1); repmat(thr(2),length(n),1)];
          kv(k+[1:li],:) = [(r(ii) - d)./u(ii) -ii];   k = k + li;
        end
      end
    end  % actives
    
    % Enter-leave kinks
    i = find(u(S.N) .* r(S.N) > 0);   E = S.N(i);
    if  Htype == 1  % add neglected components
      E = [E(:); find(r > tau & u > 0)]';
    end
    if  ~isempty(E)
      i = E(find(u(E) > 0));  j = E(find(u(E) < 0));  
      ii = [i  j]';   li = length(ii);
      d = [repmat(thr([2 1]),length(i),1); repmat(thr,length(j),1)];
      kv(k+[1:li],:) = [(r(ii) - d(:,1))./u(ii) ii];   k = k + li;
      kv(k+[1:li],:) = [(r(ii) - d(:,2))./u(ii) -ii];   k = k + li;
    end
    
    % sort by kink values
    [sk s] = sort(kv(1:k,1));
    % if  k > 2 & kv(s(k),2) < 0,  s = s(1:k-1); end % remove last leave
    kv = kv(s,:);
    
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    
    function  t = lhlines(S,r,u,lc,aux,Htype)
    % Piecewise linear line search.  04.08.23
    [kv lc] = lhkinks(S,r,u,lc,aux,Htype); 
    c1 = lc(1);  c2 = lc(2);
    tj = 0;  j = 0;  nk = size(kv,1);  fd = 0;
    while  ~fd & j < nk
      j = j+1;  t1 = tj;  tj = kv(j,1);
      dt = tj - t1;  cj = c2*dt - c1;
      if  c2 & (cj >= 0)  % passed minimum
        t = t1 + c1/c2;   fd = 1;
      else  % update coefficients
        c1 = -cj;  ii = kv(j,2);
        c2 = c2 + sign(ii)*u(abs(ii))^2;
      end
    end
    if  fd,           return
    elseif  nk > 1,   t = mean(kv(nk-1:nk,1));
    elseif  nk == 1,  t = kv(1,1) + aux(2)/abs(u(abs(kv(1,2))));
    elseif  c2,       t = c1/c2;
    else,             t = -1;  end  % error return
    
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    
    function  [f,g] = lhobj(x,r,S,c,L,A,aux)
    % Value and gradient of objective function.  04.08.24
    
    % Compute function
    fA = r(S.A);  fN = r(S.N);  gamma = aux(1);
    f = norm(fA)^2/(2*gamma) + norm(fN,1) - .5*gamma*S.L(2);
    if  ~isempty(c),  f = f + dot(c,x); end
    if  aux(3) > 0
      if  isempty(L),  Lx = x; else,  Lx = L*x; end
      f = f + aux(3)/(2*gamma)*norm(Lx)^2;
    end
    
    if  nargout > 1  % gamma*gradient
      fv = -gamma*S.s';  fv(S.A) = -fA;  g = A'*fv;
      if  ~isempty(c),  g = g + gamma*c; end
      if  aux(3) > 0
        if  isempty(L),  g = g + aux(3)*x;
        else,            g = g + L'*(L*(aux(3)*x)); end
      end
      % Threshold for rounding errors
      thr = 100*eps*(aux(7)*norm(fv,inf) + aux(7)*norm(x,inf) + aux(8));
      if  norm(g,inf) <= thr,  g = zeros(size(g)); end
    end  % gradient
    
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    
    function  [r,S,tau] = lhsign(A,b,x,aux,Htype)
    % Residual and Huber sign vector etc
    
    % Rounding error and effective thresholds
    tau = max(aux(2), 10*eps*aux(6)*norm(x,inf));
    thr = aux(1) + tau;
    
    % Residual and sign vector
    r = b - A*x;   s = sign(r);
    
    if  nargin < 5 | abs(Htype - 1.5) ~= 0.5  % simple Huber function
      w = find(abs(r) <= thr);    s(w) = 0;   
    elseif  Htype == 1  % Neglect positive contributions
      w = find(-thr <= r & r <= tau);    s(w) = 0;
      p = find(s > 0);   s(p) = 0;
    else                % Negative contributions are active
      w = find(r <= thr);   s(w) = 0;
    end
    N = find(s);
    
    % Return result
    S = struct('s',s', 'A',w', 'N',N', 'L',[length(w)  length(N)]);