core/vnl/algo/vnl_sparse_lm.cxx
Go to the documentation of this file.
00001 // This is core/vnl/algo/vnl_sparse_lm.cxx
00002 #ifdef VCL_NEEDS_PRAGMA_INTERFACE
00003 #pragma implementation
00004 #endif
00005 //:
00006 // \file
00007 // \author Matt Leotta (Brown)
00008 // \date   April 14, 2005
00009 //
00010 //-----------------------------------------------------------------------------
00011 
00012 #include "vnl_sparse_lm.h"
00013 
00014 #include <vcl_iostream.h>
00015 #include <vcl_iomanip.h>
00016 #include <vcl_algorithm.h>
00017 
00018 #include <vnl/vnl_vector.h>
00019 #include <vnl/vnl_matrix.h>
00020 #include <vnl/vnl_fastops.h>
00021 #include <vnl/vnl_vector_ref.h>
00022 #include <vnl/vnl_crs_index.h>
00023 #include <vnl/vnl_sparse_lst_sqr_function.h>
00024 
00025 #include <vnl/algo/vnl_cholesky.h>
00026 #include <vnl/algo/vnl_svd.h>
00027 
00028 
00029 //: Initialize with the function object that is to be minimized.
00030 vnl_sparse_lm::vnl_sparse_lm(vnl_sparse_lst_sqr_function& f)
00031  : num_a_(f.number_of_a()),
00032    num_b_(f.number_of_b()),
00033    num_e_(f.number_of_e()),
00034    num_nz_(f.residual_indices().num_non_zero()),
00035    size_a_(f.index_a(num_a_)),
00036    size_b_(f.index_b(num_b_)),
00037    size_c_(f.number_of_params_c()),
00038    size_e_(f.index_e(num_e_)),
00039    A_(num_nz_),
00040    B_(num_nz_),
00041    C_(num_nz_),
00042    U_(num_a_),
00043    V_(num_b_),
00044    T_(size_c_, size_c_),
00045    W_(num_nz_),
00046    R_(num_b_),
00047    Q_(num_a_),
00048    ea_(size_a_),
00049    eb_(size_b_),
00050    ec_(size_c_),
00051    e_(size_e_),
00052    weights_(f.has_weights() ? num_e_ : 0, 1.0),
00053    inv_V_(num_b_),
00054    Y_(num_nz_),
00055    Z_(num_a_),
00056    Ma_(num_a_),
00057    Mb_(num_b_)
00058 {
00059   init(&f);
00060 }
00061 
00062 
00063 // ctor
00064 void vnl_sparse_lm::init(vnl_sparse_lst_sqr_function* f)
00065 {
00066   f_ = f;
00067 
00068   // If changing these defaults, check the help comments in vnl_sparse_lm.h,
00069   // and MAKE SURE they're consistent.
00070   xtol = 1e-15;          // Termination tolerance on X (solution vector)
00071   maxfev = 1000;         // Termination maximum number of iterations.
00072   ftol = 1e-15;          // Termination tolerance on F (sum of squared residuals)
00073   gtol = 1e-15;          // Termination tolerance on Grad(F)' * F = 0
00074   epsfcn = 0.001;        // Step length for FD Jacobian
00075 
00076   tau_ = 0.001;
00077 
00078   allocate_matrices();
00079 }
00080 
00081 vnl_sparse_lm::~vnl_sparse_lm()
00082 {
00083 }
00084 
00085 
00086 //: Minimize the function supplied in the constructor until convergence or failure.
00087 //  On return, a, b, and c are such that f(a,b,c) is the lowest value achieved.
00088 //  Returns true for convergence, false for failure.
00089 //  If use_gradient is set to false, a finite difference approximation will be used,
00090 //  even if the Jacobian functions have been provided.
00091 //  If use_weights is set to false, weights will not be computed even if a
00092 //  weighting function has been provided.
00093 bool vnl_sparse_lm::minimize(vnl_vector<double>& a,
00094                              vnl_vector<double>& b,
00095                              vnl_vector<double>& c,
00096                              bool use_gradient,
00097                              bool use_weights)
00098 {
00099   // verify that the vectors are of the correct size
00100   if (!check_vector_sizes(a,b,c))
00101     return false;
00102 
00103   //: Systems to solve will be Sc*dc=sec and Sa*da=sea
00104   vnl_matrix<double> Sc(size_c_,size_c_), Sa(size_a_, size_a_);
00105   vnl_vector<double> sec(size_c_), sea(size_a_);
00106   // update vectors
00107   vnl_vector<double> da(size_a_), db(size_b_), dc(size_c_);
00108 
00109 
00110   // mu is initialized now because the compiler produces warnings -MM
00111   double mu=0; // damping term (initialized later)
00112   double nu=2.0;
00113   bool stop = false;
00114   // compute the initial residual
00115   f_->f(a,b,c,e_);
00116   num_evaluations_ = 1;
00117 
00118   // Compute and apply the weights if applicable
00119   if (use_weights && f_->has_weights())
00120   {
00121     f_->compute_weights(a,b,c,e_,weights_);
00122     f_->apply_weights(weights_, e_);
00123   }
00124 
00125   double sqr_error = e_.squared_magnitude();
00126   start_error_ = vcl_sqrt(sqr_error/e_.size()); // RMS error
00127 
00128   for (num_iterations_=0; num_iterations_<(unsigned int)maxfev && !stop; ++num_iterations_)
00129   {
00130     if (verbose_)
00131       vcl_cout << "iteration "<<vcl_setw(4)<<num_iterations_
00132                << " RMS error = "<< vcl_setprecision(6)<< vcl_setw(12)<<vcl_sqrt(sqr_error/e_.size())
00133                << " mu = "<<vcl_setprecision(6)<< vcl_setw(12) <<mu<< " nu = "<< nu << vcl_endl;
00134     if (trace)
00135       f_->trace(num_iterations_,a,b,c,e_);
00136 
00137     // Compute the Jacobian in block form J = [A|B|C]
00138     // where A, B, and C are sparse and contain subblocks Aij, Bij, and Cij
00139     if (use_gradient && f_->has_gradient())
00140       f_->jac_blocks(a,b,c,A_,B_,C_);
00141     else
00142       f_->fd_jac_blocks(a,b,c,A_,B_,C_,epsfcn); // use finite differences
00143 
00144     // Apply the weights if applicable
00145     if (use_weights && f_->has_weights())
00146     {
00147       f_->apply_weights(weights_, A_,B_,C_);
00148     }
00149 
00150     compute_normal_equations();
00151 
00152     // check for convergence in gradient
00153     if (vcl_max(vcl_max(ea_.inf_norm(),eb_.inf_norm()),ec_.inf_norm()) <= gtol)
00154     {
00155       failure_code_ = CONVERGED_GTOL;
00156       stop = true;
00157       break;
00158     }
00159 
00160 
00161     double sqr_params = a.squared_magnitude();
00162     sqr_params += b.squared_magnitude();
00163     sqr_params += c.squared_magnitude();
00164 
00165     // Extract the diagonal of J^T*J as a vector
00166     vnl_vector<double> diag_UVT = extract_diagonal();
00167 
00168     // initialize mu if this is the first iteration
00169     // proportional to the diagonal entry with the largest magnitude
00170     if (num_iterations_==0)
00171       mu = tau_*diag_UVT.inf_norm();
00172 
00173     // Re-solve the system while adapting mu until we decrease error or converge
00174     while (true)
00175     {
00176       // augment the diagonals with damping term mu
00177       set_diagonal(diag_UVT + mu);
00178 
00179       // compute inv(Vj) and Yij
00180       compute_invV_Y();
00181 
00182       if ( size_c_ > 0 )
00183       {
00184         // compute Z = RYt-Q and Sa
00185         compute_Z_Sa(Sa);
00186 
00187         // this large inverse is the bottle neck of this algorithm
00188         vnl_matrix<double> H;
00189         vnl_cholesky Sa_cholesky(Sa,vnl_cholesky::quiet);
00190         vnl_svd<double> *Sa_svd = 0;
00191         // use SVD as a backup if Cholesky is deficient
00192         if ( Sa_cholesky.rank_deficiency() > 0 )
00193         {
00194           Sa_svd = new vnl_svd<double>(Sa);
00195           H = Sa_svd->inverse();
00196         }
00197         else
00198           H = Sa_cholesky.inverse();
00199 
00200         // construct the Ma = ZH
00201         compute_Ma(H);
00202         // construct Mb = (R+MaW)inv(V)
00203         compute_Mb();
00204 
00205         // use Ma and Mb to solve for dc
00206         solve_dc(dc);
00207 
00208         // compute sea from ea, Z, dc, Y, and eb
00209         compute_sea(dc,sea);
00210 
00211 
00212         if ( Sa_svd )
00213           da = Sa_svd->solve(sea);
00214         else
00215           da = Sa_cholesky.solve(sea);
00216         delete Sa_svd;
00217       }
00218       else // size_c_ == 0
00219       {
00220         // |I -W*inv(V)| * |U  W| * |da| = |I -W*inv(V)| * |ea|
00221         // |0     I    |   |Wt V|   |db|   |0     I    |   |eb|
00222         //
00223         // premultiplying as shown above gives:
00224         // |Sa 0| * |da| = |sea|
00225         // |Wt V|   |db|   |eb |
00226         //
00227         // so we can first solve  Sa*da = sea  and then substitute to find db
00228 
00229         // compute Sa and sea
00230         compute_Sa_sea(Sa,sea);
00231 
00232 #ifdef DEBUG
00233         vcl_cout << "singular values = "<< vnl_svd<double>(Sa).W() <<vcl_endl;
00234 #endif
00235         // We could use a faster solver here, maybe conjugate gradients?
00236         // Solve the system  Sa*da = sea  for da
00237 
00238         vnl_cholesky Sa_cholesky(Sa,vnl_cholesky::quiet);
00239         // use SVD as a backup if Cholesky is deficient
00240         if ( Sa_cholesky.rank_deficiency() > 0 )
00241         {
00242           vnl_svd<double> Sa_svd(Sa);
00243           da = Sa_svd.solve(sea);
00244         }
00245         else
00246           da = Sa_cholesky.solve(sea);
00247       }
00248 
00249       // substitute da and dc to compute db
00250       backsolve_db(da, dc, db);
00251 
00252       // check for convergence in parameters
00253       // (change in parameters is below a tolerance)
00254       double sqr_delta = da.squared_magnitude();
00255       sqr_delta += db.squared_magnitude();
00256       sqr_delta += dc.squared_magnitude();
00257       if (sqr_delta < xtol*xtol*sqr_params) {
00258         failure_code_ = CONVERGED_XTOL;
00259         stop = true;
00260         break;
00261       }
00262 
00263       // compute updated parameters and residuals of the new parameters
00264       vnl_vector<double> new_a(a-da), new_b(b-db), new_c(c-dc);
00265       vnl_vector<double> new_e(e_.size()), new_weights(weights_.size());
00266       f_->f(new_a,new_b,new_c,new_e); // compute the new residual vector
00267       ++num_evaluations_;
00268 
00269       // Compute and apply the weights if applicable
00270       if (use_weights && f_->has_weights())
00271       {
00272         f_->compute_weights(new_a,new_b,new_c,new_e,new_weights);
00273         f_->apply_weights(new_weights, new_e);
00274       }
00275 
00276       double new_sqr_error = new_e.squared_magnitude();
00277 
00278       double dF = sqr_error - new_sqr_error;
00279       double dL = dot_product(da,(mu*da+ea_))
00280                  +dot_product(db,(mu*db+eb_))
00281                  +dot_product(dc,(mu*dc+ec_));
00282       if (dF>0.0 && dL>0.0) {
00283         double tmp = 2.0*dF/dL-1.0;
00284         mu *= vcl_max(1.0/3.0, 1.0 - tmp*tmp*tmp);
00285         nu = 2.0;
00286         a.swap(new_a);
00287         b.swap(new_b);
00288         c.swap(new_c);
00289         e_.swap(new_e);
00290         weights_.swap(new_weights);
00291         sqr_error = new_sqr_error;
00292         break;
00293       }
00294 
00295       mu *= nu;
00296       nu *= 2.0;
00297 
00298       if (verbose_)
00299         vcl_cout <<"               RMS error = "<< vcl_setprecision(6)
00300                  << vcl_setw(12) << vcl_sqrt(sqr_error/e_.size())
00301                  << " mu = " << vcl_setprecision(6) << vcl_setw(12) << mu
00302                  << " nu = " << nu << vcl_endl;
00303     }
00304   }
00305 
00306 
00307   end_error_ = vcl_sqrt(sqr_error/e_.size()); // RMS error
00308 
00309   if ((int)num_iterations_ >= maxfev) {
00310     failure_code_ = TOO_MANY_ITERATIONS;
00311   }
00312 
00313   // Translate status code
00314   switch (failure_code_) {
00315    case CONVERGED_FTOL:
00316    case CONVERGED_XTOL:
00317    case CONVERGED_XFTOL:
00318    case CONVERGED_GTOL:
00319     return true;
00320    default:
00321     diagnose_outcome();
00322     return false;
00323   }
00324 }
00325 
00326 
00327 //: check vector sizes and verify that they match the problem size
00328 bool vnl_sparse_lm::check_vector_sizes(vnl_vector<double> const& a,
00329                                        vnl_vector<double> const& b,
00330                                        vnl_vector<double> const& c)
00331 {
00332   if (size_a_+size_b_ > size_e_) {
00333     vcl_cerr << "vnl_sparse_lm: Number of unknowns("<<size_a_+size_b_<<')'
00334              << " greater than number of data ("<<size_e_<<")\n";
00335     failure_code_ = ERROR_DODGY_INPUT;
00336     return false;
00337   }
00338 
00339   if (int(a.size()) != size_a_) {
00340     vcl_cerr << "vnl_sparse_lm: Input vector \"a\" length ("<<a.size()<<')'
00341              << " not equal to num parameters in \"a\" ("<<size_a_<<")\n";
00342     failure_code_ = ERROR_DODGY_INPUT;
00343     return false;
00344   }
00345 
00346   if (int(b.size()) != size_b_) {
00347     vcl_cerr << "vnl_sparse_lm: Input vector \"b\" length ("<<b.size()<<')'
00348              << " not equal to num parameters in \"b\" ("<<size_b_<<")\n";
00349     failure_code_ = ERROR_DODGY_INPUT;
00350     return false;
00351   }
00352 
00353   if (int(c.size()) != size_c_) {
00354     vcl_cerr << "vnl_sparse_lm: Input vector \"c\" length ("<<c.size()<<')'
00355              << " not equal to num parameters in \"c\" ("<<size_c_<<")\n";
00356     failure_code_ = ERROR_DODGY_INPUT;
00357     return false;
00358   }
00359 
00360   return true;
00361 }
00362 
00363 
00364 //: allocate matrix memory by setting all the matrix sizes
00365 void vnl_sparse_lm::allocate_matrices()
00366 {
00367   // CRS matrix of indices into e, A, B, C, W, Y
00368   const vnl_crs_index& crs = f_->residual_indices();
00369   // sparse vector iterator
00370   typedef vnl_crs_index::sparse_vector::iterator sv_itr;
00371 
00372   // Iterate through all i and j to set the size of the matrices and vectors defined above
00373   for (int i=0; i<num_a_; ++i)
00374   {
00375     const unsigned int ai_size = f_->number_of_params_a(i);
00376     U_[i].set_size(ai_size,ai_size);
00377     Q_[i].set_size(size_c_, ai_size);
00378     Z_[i].set_size(size_c_, ai_size);
00379     Ma_[i].set_size(size_c_, ai_size);
00380 
00381     vnl_crs_index::sparse_vector row = crs.sparse_row(i);
00382     for (sv_itr r_itr=row.begin(); r_itr!=row.end(); ++r_itr)
00383     {
00384       const unsigned int j = r_itr->second;
00385       const unsigned int k = r_itr->first;
00386       const unsigned int bj_size = f_->number_of_params_b(j);
00387       const unsigned int eij_size = f_->number_of_residuals(k);
00388       A_[k].set_size(eij_size, ai_size);
00389       B_[k].set_size(eij_size, bj_size);
00390       C_[k].set_size(eij_size, size_c_);
00391       W_[k].set_size(ai_size, bj_size);
00392       Y_[k].set_size(ai_size, bj_size);
00393     }
00394   }
00395   for (int j=0; j<num_b_; ++j)
00396   {
00397     const unsigned int bj_size = f_->number_of_params_b(j);
00398     V_[j].set_size(bj_size,bj_size);
00399     R_[j].set_size(size_c_, bj_size);
00400     Mb_[j].set_size(size_c_, bj_size);
00401     inv_V_[j].set_size(bj_size,bj_size);
00402   }
00403 }
00404 
00405 
00406 //: compute the blocks making up the the normal equations: Jt J d = Jt e
00407 void vnl_sparse_lm::compute_normal_equations()
00408 {
00409   // CRS matrix of indices into e, A, B, C, W, Y
00410   const vnl_crs_index& crs = f_->residual_indices();
00411   // sparse vector iterator
00412   typedef vnl_crs_index::sparse_vector::iterator sv_itr;
00413 
00414   // clear the ea and eb for summation
00415   ea_.fill(0.0);
00416   eb_.fill(0.0);
00417   ec_.fill(0.0);
00418   // clear the V for summation
00419   for (unsigned int j=0; j<f_->number_of_b(); ++j)
00420   {
00421     V_[j].fill(0.0);
00422     R_[j].fill(0.0);
00423   }
00424   T_.fill(0.0);
00425   // compute blocks T, Q, R, U, V, W, ea, eb, and ec
00426   // JtJ = |T  Q  R|
00427   //       |Qt U  W|  with U and V block diagonal
00428   //       |Rt Wt V|  and W with same sparsity as residuals
00429   for (unsigned int i=0; i<f_->number_of_a(); ++i)
00430   {
00431     vnl_matrix<double>& Ui = U_[i];
00432     Ui.fill(0.0);
00433     vnl_matrix<double>& Qi = Q_[i];
00434     Qi.fill(0.0);
00435     unsigned int ai_size = f_->number_of_params_a(i);
00436     vnl_vector_ref<double> eai(ai_size, ea_.data_block()+f_->index_a(i));
00437 
00438     vnl_crs_index::sparse_vector row = crs.sparse_row(i);
00439     for (sv_itr r_itr=row.begin(); r_itr!=row.end(); ++r_itr)
00440     {
00441       unsigned int j = r_itr->second;
00442       unsigned int k = r_itr->first;;
00443       vnl_matrix<double>& Aij = A_[k];
00444       vnl_matrix<double>& Bij = B_[k];
00445       vnl_matrix<double>& Cij = C_[k];
00446       vnl_matrix<double>& Vj = V_[j];
00447       vnl_matrix<double>& Rj = R_[j];
00448       vnl_vector_ref<double> ebj(Bij.cols(), eb_.data_block()+f_->index_b(j));
00449 
00450       vnl_fastops::inc_X_by_AtA(T_, Cij);       // T = C^T * C
00451       vnl_fastops::inc_X_by_AtA(Ui,Aij);       // Ui += A_ij^T * A_ij
00452       vnl_fastops::inc_X_by_AtA(Vj,Bij);       // Vj += B_ij^T * B_ij
00453       vnl_fastops::AtB(W_[k],Aij,Bij);          // Wij = A_ij^T * B_ij
00454       vnl_fastops::inc_X_by_AtB(Qi,Cij,Aij);   // Qi += C_ij^T * A_ij
00455       vnl_fastops::inc_X_by_AtB(Rj,Cij,Bij);   // Rj += C_ij^T * B_ij
00456 
00457       vnl_vector_ref<double> eij(f_->number_of_residuals(k), e_.data_block()+f_->index_e(k));
00458       vnl_fastops::inc_X_by_AtB(eai,Aij,eij);  // e_a_i += A_ij^T * e_ij
00459       vnl_fastops::inc_X_by_AtB(ebj,Bij,eij);  // e_b_j += B_ij^T * e_ij
00460       vnl_fastops::inc_X_by_AtB(ec_,Cij,eij);   // e_c   += C_ij^T * e_ij
00461     }
00462   }
00463 }
00464 
00465 
00466 //: extract the vector on the diagonal of Jt J
00467 vnl_vector<double> vnl_sparse_lm::extract_diagonal() const
00468 {
00469   // Extract the diagonal of J^T*J as a vector
00470   vnl_vector<double> diag_UVT(size_a_+size_b_+size_c_);
00471   int z = 0;
00472   for (int i=0; i<num_a_; ++i) {
00473     const vnl_matrix<double>& Ui = U_[i];
00474     for (unsigned int ii=0; ii<Ui.rows(); ++ii)
00475       diag_UVT[z++] = Ui(ii,ii);
00476   }
00477   for (int j=0; j<num_b_; ++j) {
00478     const vnl_matrix<double>& Vj = V_[j];
00479     for (unsigned int ii=0; ii<Vj.rows(); ++ii)
00480       diag_UVT[z++] = Vj(ii,ii);
00481   }
00482   for (int ii=0; ii<size_c_; ++ii)
00483     diag_UVT[z++] = T_(ii,ii);
00484 
00485   return diag_UVT;
00486 }
00487 
00488 
00489 //: set the vector on the diagonal of Jt J
00490 void vnl_sparse_lm::set_diagonal(const vnl_vector<double>& diag)
00491 {
00492   int z=0;
00493   for (int i=0; i<num_a_; ++i) {
00494     vnl_matrix<double>& Ui = U_[i];
00495     for (unsigned int ii=0; ii<Ui.rows(); ++ii)
00496       Ui(ii,ii) = diag[z++];
00497   }
00498   for (int j=0; j<num_b_; ++j) {
00499     vnl_matrix<double>& Vj = V_[j];
00500     for (unsigned int ii=0; ii<Vj.rows(); ++ii)
00501       Vj(ii,ii) = diag[z++];
00502   }
00503   for (int ii=0; ii<size_c_; ++ii)
00504     T_(ii,ii) = diag[z++];
00505 }
00506 
00507 
00508 //: compute all inv(Vi) and Yij
00509 void vnl_sparse_lm::compute_invV_Y()
00510 {
00511   // CRS matrix of indices into e, A, B, C, W, Y
00512   const vnl_crs_index& crs = f_->residual_indices();
00513   // sparse vector iterator
00514   typedef vnl_crs_index::sparse_vector::iterator sv_itr;
00515 
00516   for (int j=0; j<num_b_; ++j) {
00517     vnl_matrix<double>& inv_Vj = inv_V_[j];
00518     vnl_cholesky Vj_cholesky(V_[j],vnl_cholesky::quiet);
00519     // use SVD as a backup if Cholesky is deficient
00520     if ( Vj_cholesky.rank_deficiency() > 0 )
00521     {
00522       vnl_svd<double> Vj_svd(V_[j]);
00523       inv_Vj = Vj_svd.inverse();
00524     }
00525     else
00526       inv_Vj = Vj_cholesky.inverse();
00527 
00528     vnl_crs_index::sparse_vector col = crs.sparse_col(j);
00529     for (sv_itr c_itr=col.begin(); c_itr!=col.end(); ++c_itr)
00530     {
00531       unsigned int k = c_itr->first;
00532       Y_[k] = W_[k]*inv_Vj;  // Y_ij = W_ij * inv(V_j)
00533     }
00534   }
00535 }
00536 
00537 
00538 // compute Z and Sa
00539 void vnl_sparse_lm::compute_Z_Sa(vnl_matrix<double>& Sa)
00540 {
00541   // CRS matrix of indices into e, A, B, C, W, Y
00542   const vnl_crs_index& crs = f_->residual_indices();
00543   // sparse vector iterator
00544   typedef vnl_crs_index::sparse_vector::iterator sv_itr;
00545 
00546   // compute Z = RYt-Q and Sa
00547   for (int i=0; i<num_a_; ++i)
00548   {
00549     vnl_crs_index::sparse_vector row_i = crs.sparse_row(i);
00550     vnl_matrix<double>& Zi = Z_[i];
00551     Zi.fill(0.0);
00552     Zi -= Q_[i];
00553 
00554     // handle the diagonal blocks separately
00555     vnl_matrix<double> Sii(U_[i]); // copy Ui to initialize Sii
00556     for (sv_itr ri = row_i.begin(); ri != row_i.end();  ++ri)
00557     {
00558       unsigned int j = ri->second;
00559       unsigned int k = ri->first;
00560       vnl_matrix<double>& Yij = Y_[k];
00561       vnl_fastops::dec_X_by_ABt(Sii,Yij,W_[k]); // S_ii -= Y_ij * W_ij^T
00562       vnl_fastops::inc_X_by_ABt(Zi,R_[j],Yij);  // Z_i  += R_j * Y_ij^T
00563     }
00564     Sa.update(Sii,f_->index_a(i),f_->index_a(i));
00565 
00566     // handle the (symmetric) off diagonal blocks
00567     for (int h=i+1; h<num_a_; ++h)
00568     {
00569       vnl_crs_index::sparse_vector row_h = crs.sparse_row(h);
00570       vnl_matrix<double> Sih(f_->number_of_params_a(i),
00571                              f_->number_of_params_a(h), 0.0);
00572 
00573       // iterate through both sparse rows finding matching columns
00574       bool row_done = false;
00575       for (sv_itr ri = row_i.begin(), rh = row_h.begin();
00576            ri != row_i.end() && rh != row_h.end();  ++ri, ++rh)
00577       {
00578         while (!row_done && ri->second != rh->second)
00579         {
00580           while (!row_done && ri->second < rh->second)
00581             row_done = (++ri == row_i.end());
00582           while (!row_done && rh->second < ri->second)
00583             row_done = (++rh == row_h.end());
00584         }
00585         if (row_done)
00586           break;
00587         // S_ih -= Y_ij * W_hj^T
00588         vnl_fastops::dec_X_by_ABt(Sih,Y_[ri->first],W_[rh->first]);
00589       }
00590       // this should also be a symmetric matrix
00591       Sa.update(Sih,f_->index_a(i),f_->index_a(h));
00592       Sa.update(Sih.transpose(),f_->index_a(h),f_->index_a(i));
00593     }
00594   }
00595 }
00596 
00597 
00598 //: compute Ma
00599 void vnl_sparse_lm::compute_Ma(const vnl_matrix<double>& H)
00600 {
00601   // construct Ma = ZH
00602   vnl_matrix<double> Hik;
00603   for (int i=0; i<num_a_; ++i)
00604   {
00605     vnl_matrix<double>& Mai = Ma_[i];
00606     Mai.fill(0.0);
00607 
00608     for (int k=0; k<num_a_; ++k)
00609     {
00610       Hik.set_size(f_->number_of_params_a(i), f_->number_of_params_a(k));
00611       H.extract(Hik,f_->index_a(i), f_->index_a(k));
00612       vnl_fastops::inc_X_by_AB(Mai, Z_[k], Hik);
00613     }
00614   }
00615 }
00616 
00617 
00618 //: compute Mb
00619 void vnl_sparse_lm::compute_Mb()
00620 {
00621   // CRS matrix of indices into e, A, B, C, W, Y
00622   const vnl_crs_index& crs = f_->residual_indices();
00623   // sparse vector iterator
00624   typedef vnl_crs_index::sparse_vector::iterator sv_itr;
00625 
00626   vnl_matrix<double> temp;
00627   // construct Mb = (-R-MaW)inv(V)
00628   for (int j=0; j<num_b_; ++j)
00629   {
00630     temp.set_size(size_c_,f_->number_of_params_b(j));
00631     temp.fill(0.0);
00632     temp -= R_[j];
00633 
00634     vnl_crs_index::sparse_vector col = crs.sparse_col(j);
00635     for (sv_itr c_itr=col.begin(); c_itr!=col.end(); ++c_itr)
00636     {
00637       unsigned int k = c_itr->first;
00638       unsigned int i = c_itr->second;
00639       vnl_fastops::dec_X_by_AB(temp,Ma_[i],W_[k]);
00640     }
00641     vnl_fastops::AB(Mb_[j],temp,inv_V_[j]);
00642   }
00643 }
00644 
00645 
00646 //: solve for dc
00647 void vnl_sparse_lm::solve_dc(vnl_vector<double>& dc)
00648 {
00649   // sparse vector iterator
00650   typedef vnl_crs_index::sparse_vector::iterator sv_itr;
00651 
00652   vnl_matrix<double> Sc(T_); // start with a copy of T
00653   vnl_vector<double> sec(ec_); // start with a copy of ec
00654 
00655   for (int i=0; i<num_a_; ++i)
00656   {
00657     const vnl_vector_ref<double> eai(f_->number_of_params_a(i),
00658                                      const_cast<double*>(ea_.data_block()+f_->index_a(i)));
00659     vnl_fastops::inc_X_by_ABt(Sc,Ma_[i],Q_[i]);
00660     sec += Ma_[i] * eai;
00661   }
00662   for (int j=0; j<num_b_; ++j)
00663   {
00664     const vnl_vector_ref<double> ebi(f_->number_of_params_b(j),
00665                                      const_cast<double*>(eb_.data_block()+f_->index_b(j)));
00666     vnl_fastops::inc_X_by_ABt(Sc,Mb_[j],R_[j]);
00667     sec += Mb_[j] * ebi;
00668   }
00669 
00670   if (size_c_ == 1)
00671   {
00672     dc[0] = sec[0] / Sc(0,0);
00673   }
00674   else
00675   {
00676     // Solve Sc*dc = sec for dc
00677     vnl_cholesky Sc_cholesky(Sc,vnl_cholesky::quiet);
00678     // use SVD as a backup if Cholesky is deficient
00679     if ( Sc_cholesky.rank_deficiency() > 0 )
00680     {
00681       vnl_svd<double> Sc_svd(Sc);
00682       dc = Sc_svd.solve(sec);
00683     }
00684     else
00685       dc = Sc_cholesky.solve(sec);
00686   }
00687 }
00688 
00689 
00690 //: compute sea using ea, Z, dc, Y, and eb
00691 void vnl_sparse_lm::compute_sea(vnl_vector<double> const& dc,
00692                                 vnl_vector<double>& sea)
00693 {
00694   // CRS matrix of indices into e, A, B, C, W, Y
00695   const vnl_crs_index& crs = f_->residual_indices();
00696   // sparse vector iterator
00697   typedef vnl_crs_index::sparse_vector::iterator sv_itr;
00698 
00699   sea = ea_; // initialize se to ea_
00700   for (int i=0; i<num_a_; ++i)
00701   {
00702     vnl_vector_ref<double> sei(f_->number_of_params_a(i),sea.data_block()+f_->index_a(i));
00703     vnl_crs_index::sparse_vector row_i = crs.sparse_row(i);
00704 
00705     vnl_fastops::inc_X_by_AtB(sei,Z_[i],dc);
00706 
00707     for (sv_itr ri = row_i.begin(); ri != row_i.end();  ++ri)
00708     {
00709       unsigned int k = ri->first;
00710       vnl_matrix<double>& Yij = Y_[k];
00711       vnl_vector_ref<double> ebj(Yij.cols(), eb_.data_block()+f_->index_b(ri->second));
00712       sei -= Yij*ebj;  // se_i -= Y_ij * e_b_j
00713     }
00714   }
00715 }
00716 
00717 
00718 //: compute Sa and sea
00719 // only used when size_c_ == 0
00720 void vnl_sparse_lm::compute_Sa_sea(vnl_matrix<double>& Sa,
00721                                    vnl_vector<double>& sea)
00722 {
00723   // CRS matrix of indices into e, A, B, C, W, Y
00724   const vnl_crs_index& crs = f_->residual_indices();
00725   // sparse vector iterator
00726   typedef vnl_crs_index::sparse_vector::iterator sv_itr;
00727 
00728   sea = ea_; // initialize se to ea_
00729   for (int i=0; i<num_a_; ++i)
00730   {
00731     vnl_vector_ref<double> sei(f_->number_of_params_a(i),sea.data_block()+f_->index_a(i));
00732     vnl_crs_index::sparse_vector row_i = crs.sparse_row(i);
00733 
00734     // handle the diagonal blocks and computation of se separately
00735     vnl_matrix<double> Sii(U_[i]); // copy Ui to initialize Sii
00736     for (sv_itr ri = row_i.begin(); ri != row_i.end();  ++ri)
00737     {
00738       unsigned int k = ri->first;
00739       vnl_matrix<double>& Yij = Y_[k];
00740       vnl_fastops::dec_X_by_ABt(Sii,Yij,W_[k]); // S_ii -= Y_ij * W_ij^T
00741       vnl_vector_ref<double> ebj(Yij.cols(), eb_.data_block()+f_->index_b(ri->second));
00742       sei -= Yij*ebj;  // se_i -= Y_ij * e_b_j
00743     }
00744     Sa.update(Sii,f_->index_a(i),f_->index_a(i));
00745 
00746     // handle the (symmetric) off diagonal blocks
00747     for (int h=i+1; h<num_a_; ++h)
00748     {
00749       vnl_crs_index::sparse_vector row_h = crs.sparse_row(h);
00750       vnl_matrix<double> Sih(f_->number_of_params_a(i),f_->number_of_params_a(h),0.0);
00751 
00752       // iterate through both sparse rows finding matching columns
00753       bool row_done = false;
00754       for (sv_itr ri = row_i.begin(), rh = row_h.begin();
00755            ri != row_i.end() && rh != row_h.end();  ++ri, ++rh)
00756       {
00757         while (!row_done && ri->second != rh->second)
00758         {
00759           while (!row_done && ri->second < rh->second)
00760             row_done = (++ri == row_i.end());
00761           while (!row_done && rh->second < ri->second)
00762             row_done = (++rh == row_h.end());
00763         }
00764         if (row_done)
00765           break;
00766         // S_ih -= Y_ij * W_hj^T
00767         vnl_fastops::dec_X_by_ABt(Sih,Y_[ri->first],W_[rh->first]);
00768       }
00769       // this should also be a symmetric matrix
00770       Sa.update(Sih,f_->index_a(i),f_->index_a(h));
00771       Sa.update(Sih.transpose(),f_->index_a(h),f_->index_a(i));
00772     }
00773   }
00774 }
00775 
00776 
00777 //: back solve to find db using da and dc
00778 void vnl_sparse_lm::backsolve_db(vnl_vector<double> const& da,
00779                                  vnl_vector<double> const& dc,
00780                                  vnl_vector<double>& db)
00781 {
00782   // CRS matrix of indices into e, A, B, C, W, Y
00783   const vnl_crs_index& crs = f_->residual_indices();
00784   // sparse vector iterator
00785   typedef vnl_crs_index::sparse_vector::iterator sv_itr;
00786 
00787   for (int j=0; j<num_b_; ++j)
00788   {
00789     vnl_vector<double> seb(eb_.data_block()+f_->index_b(j),f_->number_of_params_b(j));
00790     vnl_crs_index::sparse_vector col = crs.sparse_col(j);
00791     if ( size_c_ > 0 )
00792     {
00793       vnl_fastops::dec_X_by_AtB(seb,R_[j],dc);
00794     }
00795     for (sv_itr c_itr=col.begin(); c_itr!=col.end(); ++c_itr)
00796     {
00797       unsigned int k = c_itr->first;
00798       unsigned int i = c_itr->second;
00799       const vnl_vector_ref<double> dai(f_->number_of_params_a(i),
00800                                        const_cast<double*>(da.data_block()+f_->index_a(i)));
00801       vnl_fastops::dec_X_by_AtB(seb,W_[k],dai);
00802     }
00803     vnl_vector_ref<double> dbi(f_->number_of_params_b(j),db.data_block()+f_->index_b(j));
00804     vnl_fastops::Ab(dbi,inv_V_[j],seb);
00805   }
00806 }
00807 
00808 //------------------------------------------------------------------------------
00809 
00810 void vnl_sparse_lm::diagnose_outcome() const
00811 {
00812   diagnose_outcome(vcl_cerr);
00813 }
00814 
00815 // fsm: should this function be a method on vnl_nonlinear_minimizer?
00816 // if not, the return codes should be moved into LM.
00817 void vnl_sparse_lm::diagnose_outcome(vcl_ostream& s) const
00818 {
00819 #define whoami "vnl_sparse_lm"
00820   //if (!verbose_) return;
00821   switch (failure_code_)
00822   {
00823    case ERROR_FAILURE:
00824     s << (whoami ": OIOIOI -- failure in leastsquares function\n");
00825     break;
00826    case ERROR_DODGY_INPUT:
00827     s << (whoami ": OIOIOI -- lmdif dodgy input\n");
00828     break;
00829    case CONVERGED_FTOL:
00830     s << (whoami ": converged to ftol\n");
00831     break;
00832    case CONVERGED_XTOL:
00833     s << (whoami ": converged to xtol\n");
00834     break;
00835    case CONVERGED_XFTOL:
00836     s << (whoami ": converged nicely\n");
00837     break;
00838    case CONVERGED_GTOL:
00839     s << (whoami ": converged via gtol\n");
00840     break;
00841    case TOO_MANY_ITERATIONS:
00842     s << (whoami ": too many iterations\n");
00843     break;
00844    case FAILED_FTOL_TOO_SMALL:
00845     s << (whoami ": ftol is too small. no further reduction in the sum of squares is possible.\n");
00846     break;
00847    case FAILED_XTOL_TOO_SMALL:
00848     s << (whoami ": xtol is too small. no further improvement in the approximate solution x is possible.\n");
00849     break;
00850    case FAILED_GTOL_TOO_SMALL:
00851     s << (whoami ": gtol is too small. f(a,b) is orthogonal to the columns of the jacobian to machine precision.\n");
00852     break;
00853    default:
00854     s << (whoami ": OIOIOI: unkown info code from lmder.\n");
00855     break;
00856   }
00857   unsigned int num_e = f_->number_of_e();
00858   s << whoami ": " << num_iterations_ << " iterations, "
00859     << num_evaluations_ << " evaluations, "<< num_e <<" residuals.  RMS error start/end "
00860     << get_start_error() << '/' << get_end_error() << vcl_endl;
00861 #undef whoami
00862 }
00863 
00864 
00865 vnl_matrix<double> const& vnl_sparse_lm::get_JtJ()
00866 {
00867   return inv_covar_;
00868 }