[ VIGRA Homepage | Function Index | Class Index | Namespaces | File List | Main Page ]
vigra/regression.hxx | ![]() |
00001 /************************************************************************/ 00002 /* */ 00003 /* Copyright 2008 by Ullrich Koethe */ 00004 /* */ 00005 /* This file is part of the VIGRA computer vision library. */ 00006 /* The VIGRA Website is */ 00007 /* http://hci.iwr.uni-heidelberg.de/vigra/ */ 00008 /* Please direct questions, bug reports, and contributions to */ 00009 /* ullrich.koethe@iwr.uni-heidelberg.de or */ 00010 /* vigra@informatik.uni-hamburg.de */ 00011 /* */ 00012 /* Permission is hereby granted, free of charge, to any person */ 00013 /* obtaining a copy of this software and associated documentation */ 00014 /* files (the "Software"), to deal in the Software without */ 00015 /* restriction, including without limitation the rights to use, */ 00016 /* copy, modify, merge, publish, distribute, sublicense, and/or */ 00017 /* sell copies of the Software, and to permit persons to whom the */ 00018 /* Software is furnished to do so, subject to the following */ 00019 /* conditions: */ 00020 /* */ 00021 /* The above copyright notice and this permission notice shall be */ 00022 /* included in all copies or substantial portions of the */ 00023 /* Software. */ 00024 /* */ 00025 /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND */ 00026 /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES */ 00027 /* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND */ 00028 /* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT */ 00029 /* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, */ 00030 /* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING */ 00031 /* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR */ 00032 /* OTHER DEALINGS IN THE SOFTWARE. */ 00033 /* */ 00034 /************************************************************************/ 00035 00036 00037 #ifndef VIGRA_REGRESSION_HXX 00038 #define VIGRA_REGRESSION_HXX 00039 00040 #include "matrix.hxx" 00041 #include "linear_solve.hxx" 00042 #include "singular_value_decomposition.hxx" 00043 #include "numerictraits.hxx" 00044 #include "functorexpression.hxx" 00045 00046 00047 namespace vigra 00048 { 00049 00050 namespace linalg 00051 { 00052 00053 /** \addtogroup Optimization Optimization and Regression 00054 */ 00055 //@{ 00056 /** Ordinary Least Squares Regression. 00057 00058 Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>), 00059 and a column vector \a b of length <tt>m</tt> rows, this function computes 00060 the column vector \a x of length <tt>n</tt> rows that solves the optimization problem 00061 00062 \f[ \tilde \textrm{\bf x} = \textrm{argmin} 00063 \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2 00064 \f] 00065 00066 When \a b is a matrix with <tt>k</tt> columns, \a x must also have 00067 <tt>k</tt> columns, which will contain the solutions for the corresponding columns of 00068 \a b. Note that all matrices must already have the correct shape. 00069 00070 This function is just another name for \ref linearSolve(), perhaps 00071 leading to more readable code when \a A is a rectangular matrix. It returns 00072 <tt>false</tt> when the rank of \a A is less than <tt>n</tt>. 00073 See \ref linearSolve() for more documentation. 00074 00075 <b>\#include</b> <vigra/regression.hxx> 00076 Namespaces: vigra and vigra::linalg 00077 */ 00078 template <class T, class C1, class C2, class C3> 00079 inline bool 00080 leastSquares(MultiArrayView<2, T, C1> const & A, 00081 MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> &x, 00082 std::string method = "QR") 00083 { 00084 return linearSolve(A, b, x, method); 00085 } 00086 00087 /** Weighted Least Squares Regression. 00088 00089 Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>), 00090 a vector \a b of length <tt>m</tt>, and a weight vector \a weights of length <tt>m</tt> 00091 with non-negative entries, this function computes the vector \a x of length <tt>n</tt> 00092 that solves the optimization problem 00093 00094 \f[ \tilde \textrm{\bf x} = \textrm{argmin} 00095 \left(\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right)^T 00096 \textrm{diag}(\textrm{\bf weights}) 00097 \left(\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right) 00098 \f] 00099 00100 where <tt>diag(weights)</tt> creates a diagonal matrix from \a weights. 00101 The algorithm calls \ref leastSquares() on the equivalent problem 00102 00103 \f[ \tilde \textrm{\bf x} = \textrm{argmin} 00104 \left|\left|\textrm{diag}(\textrm{\bf weights})^{1/2}\textrm{\bf A} \textrm{\bf x} - 00105 \textrm{diag}(\textrm{\bf weights})^{1/2} \textrm{\bf b}\right|\right|_2^2 00106 \f] 00107 00108 where the square root of \a weights is just taken element-wise. 00109 00110 When \a b is a matrix with <tt>k</tt> columns, \a x must also have 00111 <tt>k</tt> columns, which will contain the solutions for the corresponding columns of 00112 \a b. Note that all matrices must already have the correct shape. 00113 00114 The function returns 00115 <tt>false</tt> when the rank of the weighted matrix \a A is less than <tt>n</tt>. 00116 00117 <b>\#include</b> <vigra/regression.hxx> 00118 Namespaces: vigra and vigra::linalg 00119 */ 00120 template <class T, class C1, class C2, class C3, class C4> 00121 bool 00122 weightedLeastSquares(MultiArrayView<2, T, C1> const & A, 00123 MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> const &weights, 00124 MultiArrayView<2, T, C4> &x, std::string method = "QR") 00125 { 00126 typedef T Real; 00127 00128 const unsigned int rows = rowCount(A); 00129 const unsigned int cols = columnCount(A); 00130 const unsigned int rhsCount = columnCount(b); 00131 vigra_precondition(rows >= cols, 00132 "weightedLeastSquares(): Input matrix A must be rectangular with rowCount >= columnCount."); 00133 vigra_precondition(rowCount(b) == rows, 00134 "weightedLeastSquares(): Shape mismatch between matrices A and b."); 00135 vigra_precondition(rowCount(b) == rowCount(weights) && columnCount(weights) == 1, 00136 "weightedLeastSquares(): Weight matrix has wrong shape."); 00137 vigra_precondition(rowCount(x) == cols && columnCount(x) == rhsCount, 00138 "weightedLeastSquares(): Result matrix x has wrong shape."); 00139 00140 Matrix<T> wa(A.shape()), wb(b.shape()); 00141 00142 for(unsigned int k=0; k<rows; ++k) 00143 { 00144 vigra_precondition(weights(k,0) >= 0, 00145 "weightedLeastSquares(): Weights must be positive."); 00146 T w = std::sqrt(weights(k,0)); 00147 for(unsigned int l=0; l<cols; ++l) 00148 wa(k,l) = w * A(k,l); 00149 for(unsigned int l=0; l<rhsCount; ++l) 00150 wb(k,l) = w * b(k,l); 00151 } 00152 00153 return leastSquares(wa, wb, x, method); 00154 } 00155 00156 /** Ridge Regression. 00157 00158 Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>), 00159 a vector \a b of length <tt>m</tt>, and a regularization parameter <tt>lambda >= 0.0</tt>, 00160 this function computes the vector \a x of length <tt>n</tt> 00161 that solves the optimization problem 00162 00163 \f[ \tilde \textrm{\bf x} = \textrm{argmin} 00164 \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2 + 00165 \lambda \textrm{\bf x}^T\textrm{\bf x} 00166 \f] 00167 00168 This is implemented by means of \ref singularValueDecomposition(). 00169 00170 When \a b is a matrix with <tt>k</tt> columns, \a x must also have 00171 <tt>k</tt> columns, which will contain the solutions for the corresponding columns of 00172 \a b. Note that all matrices must already have the correct shape. 00173 00174 The function returns <tt>false</tt> if the rank of \a A is less than <tt>n</tt> 00175 and <tt>lambda == 0.0</tt>. 00176 00177 <b>\#include</b> <vigra/regression.hxx> 00178 Namespaces: vigra and vigra::linalg 00179 */ 00180 template <class T, class C1, class C2, class C3> 00181 bool 00182 ridgeRegression(MultiArrayView<2, T, C1> const & A, 00183 MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> &x, double lambda) 00184 { 00185 typedef T Real; 00186 00187 const unsigned int rows = rowCount(A); 00188 const unsigned int cols = columnCount(A); 00189 const unsigned int rhsCount = columnCount(b); 00190 vigra_precondition(rows >= cols, 00191 "ridgeRegression(): Input matrix A must be rectangular with rowCount >= columnCount."); 00192 vigra_precondition(rowCount(b) == rows, 00193 "ridgeRegression(): Shape mismatch between matrices A and b."); 00194 vigra_precondition(rowCount(x) == cols && columnCount(x) == rhsCount, 00195 "ridgeRegression(): Result matrix x has wrong shape."); 00196 vigra_precondition(lambda >= 0.0, 00197 "ridgeRegression(): lambda >= 0.0 required."); 00198 00199 unsigned int m = rows; 00200 unsigned int n = cols; 00201 00202 Matrix<T> u(m, n), s(n, 1), v(n, n); 00203 00204 unsigned int rank = singularValueDecomposition(A, u, s, v); 00205 if(rank < n && lambda == 0.0) 00206 return false; 00207 00208 Matrix<T> t = transpose(u)*b; 00209 for(unsigned int k=0; k<cols; ++k) 00210 for(unsigned int l=0; l<rhsCount; ++l) 00211 t(k,l) *= s(k,0) / (sq(s(k,0)) + lambda); 00212 x = v*t; 00213 return true; 00214 } 00215 00216 /** Weighted ridge Regression. 00217 00218 Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>), 00219 a vector \a b of length <tt>m</tt>, a weight vector \a weights of length <tt>m</tt> 00220 with non-negative entries, and a regularization parameter <tt>lambda >= 0.0</tt> 00221 this function computes the vector \a x of length <tt>n</tt> 00222 that solves the optimization problem 00223 00224 \f[ \tilde \textrm{\bf x} = \textrm{argmin} 00225 \left(\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right)^T 00226 \textrm{diag}(\textrm{\bf weights}) 00227 \left(\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right) + 00228 \lambda \textrm{\bf x}^T\textrm{\bf x} 00229 \f] 00230 00231 where <tt>diag(weights)</tt> creates a diagonal matrix from \a weights. 00232 The algorithm calls \ref ridgeRegression() on the equivalent problem 00233 00234 \f[ \tilde \textrm{\bf x} = \textrm{argmin} 00235 \left|\left|\textrm{diag}(\textrm{\bf weights})^{1/2}\textrm{\bf A} \textrm{\bf x} - 00236 \textrm{diag}(\textrm{\bf weights})^{1/2} \textrm{\bf b}\right|\right|_2^2 + 00237 \lambda \textrm{\bf x}^T\textrm{\bf x} 00238 \f] 00239 00240 where the square root of \a weights is just taken element-wise. This solution is 00241 computed by means of \ref singularValueDecomposition(). 00242 00243 When \a b is a matrix with <tt>k</tt> columns, \a x must also have 00244 <tt>k</tt> columns, which will contain the solutions for the corresponding columns of 00245 \a b. Note that all matrices must already have the correct shape. 00246 00247 The function returns <tt>false</tt> if the rank of \a A is less than <tt>n</tt> 00248 and <tt>lambda == 0.0</tt>. 00249 00250 <b>\#include</b> <vigra/regression.hxx> 00251 Namespaces: vigra and vigra::linalg 00252 */ 00253 template <class T, class C1, class C2, class C3, class C4> 00254 bool 00255 weightedRidgeRegression(MultiArrayView<2, T, C1> const & A, 00256 MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> const &weights, 00257 MultiArrayView<2, T, C4> &x, double lambda) 00258 { 00259 typedef T Real; 00260 00261 const unsigned int rows = rowCount(A); 00262 const unsigned int cols = columnCount(A); 00263 const unsigned int rhsCount = columnCount(b); 00264 vigra_precondition(rows >= cols, 00265 "weightedRidgeRegression(): Input matrix A must be rectangular with rowCount >= columnCount."); 00266 vigra_precondition(rowCount(b) == rows, 00267 "weightedRidgeRegression(): Shape mismatch between matrices A and b."); 00268 vigra_precondition(rowCount(b) == rowCount(weights) && columnCount(weights) == 1, 00269 "weightedRidgeRegression(): Weight matrix has wrong shape."); 00270 vigra_precondition(rowCount(x) == cols && columnCount(x) == rhsCount, 00271 "weightedRidgeRegression(): Result matrix x has wrong shape."); 00272 vigra_precondition(lambda >= 0.0, 00273 "weightedRidgeRegression(): lambda >= 0.0 required."); 00274 00275 Matrix<T> wa(A.shape()), wb(b.shape()); 00276 00277 for(unsigned int k=0; k<rows; ++k) 00278 { 00279 vigra_precondition(weights(k,0) >= 0, 00280 "weightedRidgeRegression(): Weights must be positive."); 00281 T w = std::sqrt(weights(k,0)); 00282 for(unsigned int l=0; l<cols; ++l) 00283 wa(k,l) = w * A(k,l); 00284 for(unsigned int l=0; l<rhsCount; ++l) 00285 wb(k,l) = w * b(k,l); 00286 } 00287 00288 return ridgeRegression(wa, wb, x, lambda); 00289 } 00290 00291 /** Ridge Regression with many lambdas. 00292 00293 This executes \ref ridgeRegression() for a sequence of regularization parameters. This 00294 is implemented so that the \ref singularValueDecomposition() has to be executed only once. 00295 \a lambda must be an array conforming to the <tt>std::vector</tt> interface, i.e. must 00296 support <tt>lambda.size()</tt> and <tt>lambda[k]</tt>. The columns of the matrix \a x 00297 will contain the solutions for the corresponding lambda, so the number of columns of 00298 the matrix \a x must be equal to <tt>lambda.size()</tt>, and \a b must be a columns vector, 00299 i.e. cannot contain several right hand sides at once. 00300 00301 The function returns <tt>false</tt> when the matrix \a A is rank deficient. If this 00302 happens, and one of the lambdas is zero, the corresponding column of \a x will be skipped. 00303 00304 <b>\#include</b> <vigra/regression.hxx> 00305 Namespaces: vigra and vigra::linalg 00306 */ 00307 template <class T, class C1, class C2, class C3, class Array> 00308 bool 00309 ridgeRegressionSeries(MultiArrayView<2, T, C1> const & A, 00310 MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> &x, Array const & lambda) 00311 { 00312 typedef T Real; 00313 00314 const unsigned int rows = rowCount(A); 00315 const unsigned int cols = columnCount(A); 00316 const unsigned int lambdaCount = lambda.size(); 00317 vigra_precondition(rows >= cols, 00318 "ridgeRegressionSeries(): Input matrix A must be rectangular with rowCount >= columnCount."); 00319 vigra_precondition(rowCount(b) == rows && columnCount(b) == 1, 00320 "ridgeRegressionSeries(): Shape mismatch between matrices A and b."); 00321 vigra_precondition(rowCount(x) == cols && columnCount(x) == lambdaCount, 00322 "ridgeRegressionSeries(): Result matrix x has wrong shape."); 00323 00324 unsigned int m = rows; 00325 unsigned int n = cols; 00326 00327 Matrix<T> u(m, n), s(n, 1), v(n, n); 00328 00329 unsigned int rank = singularValueDecomposition(A, u, s, v); 00330 00331 Matrix<T> xl = transpose(u)*b; 00332 Matrix<T> xt(cols,1); 00333 for(unsigned int i=0; i<lambdaCount; ++i) 00334 { 00335 vigra_precondition(lambda[i] >= 0.0, 00336 "ridgeRegressionSeries(): lambda >= 0.0 required."); 00337 if(lambda[i] == 0.0 && rank < rows) 00338 continue; 00339 for(unsigned int k=0; k<cols; ++k) 00340 xt(k,0) = xl(k,0) * s(k,0) / (sq(s(k,0)) + lambda[i]); 00341 columnVector(x, i) = v*xt; 00342 } 00343 return (rank == n); 00344 } 00345 00346 /** \brief Pass options to leastAngleRegression(). 00347 00348 <b>\#include</b> <vigra/regression.hxx> 00349 Namespaces: vigra and vigra::linalg 00350 */ 00351 class LeastAngleRegressionOptions 00352 { 00353 public: 00354 enum Mode { LARS, LASSO, NNLASSO }; 00355 00356 /** Initialize all options with default values. 00357 */ 00358 LeastAngleRegressionOptions() 00359 : max_solution_count(0), 00360 unconstrained_dimension_count(0), 00361 mode(LASSO), 00362 least_squares_solutions(true) 00363 {} 00364 00365 /** Maximum number of solutions to be computed. 00366 00367 If \a n is 0 (the default), the number of solutions is determined by the length 00368 of the solution array. Otherwise, the minimum of maxSolutionCount() and that 00369 length is taken.<br> 00370 Default: 0 (use length of solution array) 00371 */ 00372 LeastAngleRegressionOptions & maxSolutionCount(unsigned int n) 00373 { 00374 max_solution_count = (int)n; 00375 return *this; 00376 } 00377 00378 /** Set the mode of the algorithm. 00379 00380 Mode must be one of "lars", "lasso", "nnlasso". The function just calls 00381 the member function of the corresponding name to set the mode. 00382 00383 Default: "lasso" 00384 */ 00385 LeastAngleRegressionOptions & setMode(std::string mode) 00386 { 00387 mode = tolower(mode); 00388 if(mode == "lars") 00389 this->lars(); 00390 else if(mode == "lasso") 00391 this->lasso(); 00392 else if(mode == "nnlasso") 00393 this->nnlasso(); 00394 else 00395 vigra_fail("LeastAngleRegressionOptions.setMode(): Invalid mode."); 00396 return *this; 00397 } 00398 00399 00400 /** Use the plain LARS algorithm. 00401 00402 Default: inactive 00403 */ 00404 LeastAngleRegressionOptions & lars() 00405 { 00406 mode = LARS; 00407 return *this; 00408 } 00409 00410 /** Use the LASSO modification of the LARS algorithm. 00411 00412 This allows features to be removed from the active set under certain conditions.<br> 00413 Default: active 00414 */ 00415 LeastAngleRegressionOptions & lasso() 00416 { 00417 mode = LASSO; 00418 return *this; 00419 } 00420 00421 /** Use the non-negative LASSO modification of the LARS algorithm. 00422 00423 This enforces all non-zero entries in the solution to be positive.<br> 00424 Default: inactive 00425 */ 00426 LeastAngleRegressionOptions & nnlasso() 00427 { 00428 mode = NNLASSO; 00429 return *this; 00430 } 00431 00432 /** Compute least squares solutions. 00433 00434 Use least angle regression to determine active sets, but 00435 return least squares solutions for the features in each active set, 00436 instead of constrained solutions.<br> 00437 Default: <tt>true</tt> 00438 */ 00439 LeastAngleRegressionOptions & leastSquaresSolutions(bool select = true) 00440 { 00441 least_squares_solutions = select; 00442 return *this; 00443 } 00444 00445 int max_solution_count, unconstrained_dimension_count; 00446 Mode mode; 00447 bool least_squares_solutions; 00448 }; 00449 00450 namespace detail { 00451 00452 template <class T, class C1, class C2> 00453 struct LarsData 00454 { 00455 typedef typename MultiArrayShape<2>::type Shape; 00456 00457 int activeSetSize; 00458 MultiArrayView<2, T, C1> A; 00459 MultiArrayView<2, T, C2> b; 00460 Matrix<T> R, qtb, lars_solution, lars_prediction, next_lsq_solution, next_lsq_prediction, searchVector; 00461 ArrayVector<MultiArrayIndex> columnPermutation; 00462 00463 // init data for a new run 00464 LarsData(MultiArrayView<2, T, C1> const & Ai, MultiArrayView<2, T, C2> const & bi) 00465 : activeSetSize(1), 00466 A(Ai), b(bi), R(A), qtb(b), 00467 lars_solution(A.shape(1), 1), lars_prediction(A.shape(0), 1), 00468 next_lsq_solution(A.shape(1), 1), next_lsq_prediction(A.shape(0), 1), searchVector(A.shape(0), 1), 00469 columnPermutation(A.shape(1)) 00470 { 00471 for(unsigned int k=0; k<columnPermutation.size(); ++k) 00472 columnPermutation[k] = k; 00473 } 00474 00475 // copy data for the recursive call in nnlassolsq 00476 LarsData(LarsData const & d, int asetSize) 00477 : activeSetSize(asetSize), 00478 A(d.R.subarray(Shape(0,0), Shape(d.A.shape(0), activeSetSize))), b(d.qtb), R(A), qtb(b), 00479 lars_solution(d.lars_solution.subarray(Shape(0,0), Shape(activeSetSize, 1))), lars_prediction(d.lars_prediction), 00480 next_lsq_solution(d.next_lsq_solution.subarray(Shape(0,0), Shape(activeSetSize, 1))), 00481 next_lsq_prediction(d.next_lsq_prediction), searchVector(d.searchVector), 00482 columnPermutation(A.shape(1)) 00483 { 00484 for(unsigned int k=0; k<columnPermutation.size(); ++k) 00485 columnPermutation[k] = k; 00486 } 00487 }; 00488 00489 template <class T, class C1, class C2, class Array1, class Array2, class Array3> 00490 unsigned int 00491 leastAngleRegressionMainLoop(LarsData<T, C1, C2> & d, 00492 Array1 & activeSets, 00493 Array2 * lars_solutions, Array3 * lsq_solutions, 00494 LeastAngleRegressionOptions const & options) 00495 { 00496 using namespace vigra::functor; 00497 00498 typedef typename MultiArrayShape<2>::type Shape; 00499 typedef typename Matrix<T>::view_type Subarray; 00500 typedef ArrayVector<MultiArrayIndex> Permutation; 00501 typedef typename Permutation::view_type ColumnSet; 00502 00503 vigra_precondition(d.activeSetSize > 0, 00504 "leastAngleRegressionMainLoop() must not be called with empty active set."); 00505 00506 bool enforce_positive = (options.mode == LeastAngleRegressionOptions::NNLASSO); 00507 bool lasso_modification = (options.mode != LeastAngleRegressionOptions::LARS); 00508 00509 const MultiArrayIndex rows = rowCount(d.R); 00510 const MultiArrayIndex cols = columnCount(d.R); 00511 const MultiArrayIndex maxRank = std::min(rows, cols); 00512 00513 MultiArrayIndex maxSolutionCount = options.max_solution_count; 00514 if(maxSolutionCount == 0) 00515 maxSolutionCount = lasso_modification 00516 ? 10*maxRank 00517 : maxRank; 00518 00519 bool needToRemoveColumn = false; 00520 MultiArrayIndex columnToBeAdded = 0, columnToBeRemoved = 0; 00521 MultiArrayIndex currentSolutionCount = 0; 00522 while(currentSolutionCount < maxSolutionCount) 00523 { 00524 //ColumnSet activeSet = d.columnPermutation.subarray(0, (unsigned int)d.activeSetSize); 00525 ColumnSet inactiveSet = d.columnPermutation.subarray((unsigned int)d.activeSetSize, (unsigned int)cols); 00526 00527 // find next dimension to be activated 00528 Matrix<T> cLARS = transpose(d.A) * (d.b - d.lars_prediction), // correlation with LARS residual 00529 cLSQ = transpose(d.A) * (d.b - d.next_lsq_prediction); // correlation with LSQ residual 00530 00531 // In theory, all vectors in the active set should have the same correlation C, and 00532 // the correlation of all others should not exceed this. In practice, we may find the 00533 // maximum correlation in any variable due to tiny numerical inaccuracies. Therefore, we 00534 // determine C from the entire set of variables. 00535 MultiArrayIndex cmaxIndex = enforce_positive 00536 ? argMax(cLARS) 00537 : argMax(abs(cLARS)); 00538 T C = abs(cLARS(cmaxIndex, 0)); 00539 00540 Matrix<T> ac(cols - d.activeSetSize, 1); 00541 for(MultiArrayIndex k = 0; k<cols-d.activeSetSize; ++k) 00542 { 00543 T rho = cLSQ(inactiveSet[k], 0), 00544 cc = C - sign(rho)*cLARS(inactiveSet[k], 0); 00545 00546 if(rho == 0.0) // make sure that 0/0 cannot happen in the other cases 00547 ac(k,0) = 1.0; // variable k is linearly dependent on the active set 00548 else if(rho > 0.0) 00549 ac(k,0) = cc / (cc + rho); // variable k would enter the active set with positive sign 00550 else if(enforce_positive) 00551 ac(k,0) = 1.0; // variable k cannot enter the active set because it would be negative 00552 else 00553 ac(k,0) = cc / (cc - rho); // variable k would enter the active set with negative sign 00554 } 00555 00556 // in the non-negative case: make sure that a column just removed cannot re-enter right away 00557 // (in standard LASSO, this is allowed, because the variable may re-enter with opposite sign) 00558 if(enforce_positive && needToRemoveColumn) 00559 ac(columnToBeRemoved-d.activeSetSize,0) = 1.0; 00560 00561 // find candidate 00562 // Note: R uses Arg1() > epsilon, but this is only possible because it allows several variables to 00563 // join the active set simultaneously, so that gamma = 0 cannot occur. 00564 columnToBeAdded = argMin(ac); 00565 00566 // if no new column can be added, we do a full step gamma = 1.0 and then stop, unless a column is removed below 00567 T gamma = (d.activeSetSize == maxRank) 00568 ? 1.0 00569 : ac(columnToBeAdded, 0); 00570 00571 // adjust columnToBeAdded: we skipped the active set 00572 if(columnToBeAdded >= 0) 00573 columnToBeAdded += d.activeSetSize; 00574 00575 // check whether we have to remove a column from the active set 00576 needToRemoveColumn = false; 00577 if(lasso_modification) 00578 { 00579 // find dimensions whose weight changes sign below gamma*searchDirection 00580 Matrix<T> s(Shape(d.activeSetSize, 1), NumericTraits<T>::max()); 00581 for(MultiArrayIndex k=0; k<d.activeSetSize; ++k) 00582 { 00583 if(( enforce_positive && d.next_lsq_solution(k,0) < 0.0) || 00584 (!enforce_positive && sign(d.lars_solution(k,0))*sign(d.next_lsq_solution(k,0)) == -1.0)) 00585 s(k,0) = d.lars_solution(k,0) / (d.lars_solution(k,0) - d.next_lsq_solution(k,0)); 00586 } 00587 00588 columnToBeRemoved = argMinIf(s, Arg1() <= Param(gamma)); 00589 if(columnToBeRemoved >= 0) 00590 { 00591 needToRemoveColumn = true; // remove takes precedence over add 00592 gamma = s(columnToBeRemoved, 0); 00593 } 00594 } 00595 00596 // compute the current solutions 00597 d.lars_prediction = gamma * d.next_lsq_prediction + (1.0 - gamma) * d.lars_prediction; 00598 d.lars_solution = gamma * d.next_lsq_solution + (1.0 - gamma) * d.lars_solution; 00599 if(needToRemoveColumn) 00600 d.lars_solution(columnToBeRemoved, 0) = 0.0; // turn possible epsilon into an exact zero 00601 00602 // write the current solution 00603 ++currentSolutionCount; 00604 activeSets.push_back(typename Array1::value_type(d.columnPermutation.begin(), d.columnPermutation.begin()+d.activeSetSize)); 00605 00606 if(lsq_solutions != 0) 00607 { 00608 if(enforce_positive) 00609 { 00610 ArrayVector<Matrix<T> > nnresults; 00611 ArrayVector<ArrayVector<MultiArrayIndex> > nnactiveSets; 00612 LarsData<T, C1, C2> nnd(d, d.activeSetSize); 00613 00614 leastAngleRegressionMainLoop(nnd, nnactiveSets, &nnresults, (Array3*)0, 00615 LeastAngleRegressionOptions().leastSquaresSolutions(false).nnlasso()); 00616 //Matrix<T> nnlsq_solution(d.activeSetSize, 1); 00617 typename Array2::value_type nnlsq_solution(Shape(d.activeSetSize, 1)); 00618 for(unsigned int k=0; k<nnactiveSets.back().size(); ++k) 00619 { 00620 nnlsq_solution(nnactiveSets.back()[k],0) = nnresults.back()[k]; 00621 } 00622 //lsq_solutions->push_back(nnlsq_solution); 00623 lsq_solutions->push_back(typename Array3::value_type()); 00624 lsq_solutions->back() = nnlsq_solution; 00625 } 00626 else 00627 { 00628 //lsq_solutions->push_back(d.next_lsq_solution.subarray(Shape(0,0), Shape(d.activeSetSize, 1))); 00629 lsq_solutions->push_back(typename Array3::value_type()); 00630 lsq_solutions->back() = d.next_lsq_solution.subarray(Shape(0,0), Shape(d.activeSetSize, 1)); 00631 } 00632 } 00633 if(lars_solutions != 0) 00634 { 00635 //lars_solutions->push_back(d.lars_solution.subarray(Shape(0,0), Shape(d.activeSetSize, 1))); 00636 lars_solutions->push_back(typename Array2::value_type()); 00637 lars_solutions->back() = d.lars_solution.subarray(Shape(0,0), Shape(d.activeSetSize, 1)); 00638 } 00639 00640 // no further solutions possible 00641 if(gamma == 1.0) 00642 break; 00643 00644 if(needToRemoveColumn) 00645 { 00646 --d.activeSetSize; 00647 if(columnToBeRemoved != d.activeSetSize) 00648 { 00649 // remove column 'columnToBeRemoved' and restore triangular form of R 00650 // note: columnPermutation is automatically swapped here 00651 detail::upperTriangularSwapColumns(columnToBeRemoved, d.activeSetSize, d.R, d.qtb, d.columnPermutation); 00652 00653 // swap solution entries 00654 std::swap(d.lars_solution(columnToBeRemoved, 0), d.lars_solution(d.activeSetSize,0)); 00655 std::swap(d.next_lsq_solution(columnToBeRemoved, 0), d.next_lsq_solution(d.activeSetSize,0)); 00656 columnToBeRemoved = d.activeSetSize; // keep track of removed column 00657 } 00658 d.lars_solution(d.activeSetSize,0) = 0.0; 00659 d.next_lsq_solution(d.activeSetSize,0) = 0.0; 00660 } 00661 else 00662 { 00663 vigra_invariant(columnToBeAdded >= 0, 00664 "leastAngleRegression(): internal error (columnToBeAdded < 0)"); 00665 // add column 'columnToBeAdded' 00666 if(d.activeSetSize != columnToBeAdded) 00667 { 00668 std::swap(d.columnPermutation[d.activeSetSize], d.columnPermutation[columnToBeAdded]); 00669 columnVector(d.R, d.activeSetSize).swapData(columnVector(d.R, columnToBeAdded)); 00670 columnToBeAdded = d.activeSetSize; // keep track of added column 00671 } 00672 00673 // zero the corresponding entries of the solutions 00674 d.next_lsq_solution(d.activeSetSize,0) = 0.0; 00675 d.lars_solution(d.activeSetSize,0) = 0.0; 00676 00677 // reduce R (i.e. its newly added column) to triangular form 00678 detail::qrColumnHouseholderStep(d.activeSetSize, d.R, d.qtb); 00679 ++d.activeSetSize; 00680 } 00681 00682 // compute the LSQ solution of the new active set 00683 Subarray Ractive = d.R.subarray(Shape(0,0), Shape(d.activeSetSize, d.activeSetSize)); 00684 Subarray qtbactive = d.qtb.subarray(Shape(0,0), Shape(d.activeSetSize, 1)); 00685 Subarray next_lsq_solution_view = d.next_lsq_solution.subarray(Shape(0,0), Shape(d.activeSetSize, 1)); 00686 linearSolveUpperTriangular(Ractive, qtbactive, next_lsq_solution_view); 00687 00688 // compute the LSQ prediction of the new active set 00689 d.next_lsq_prediction.init(0.0); 00690 for(MultiArrayIndex k=0; k<d.activeSetSize; ++k) 00691 d.next_lsq_prediction += next_lsq_solution_view(k,0)*columnVector(d.A, d.columnPermutation[k]); 00692 } 00693 00694 return (unsigned int)currentSolutionCount; 00695 } 00696 00697 template <class T, class C1, class C2, class Array1, class Array2> 00698 unsigned int 00699 leastAngleRegressionImpl(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b, 00700 Array1 & activeSets, Array2 * lasso_solutions, Array2 * lsq_solutions, 00701 LeastAngleRegressionOptions const & options) 00702 { 00703 using namespace vigra::functor; 00704 00705 const MultiArrayIndex rows = rowCount(A); 00706 00707 vigra_precondition(rowCount(b) == rows && columnCount(b) == 1, 00708 "leastAngleRegression(): Shape mismatch between matrices A and b."); 00709 00710 bool enforce_positive = (options.mode == LeastAngleRegressionOptions::NNLASSO); 00711 00712 detail::LarsData<T, C1, C2> d(A, b); 00713 00714 // find dimension with largest correlation 00715 Matrix<T> c = transpose(A)*b; 00716 MultiArrayIndex initialColumn = enforce_positive 00717 ? argMaxIf(c, Arg1() > Param(0.0)) 00718 : argMax(abs(c)); 00719 if(initialColumn == -1) 00720 return 0; // no solution found 00721 00722 // prepare initial active set and search direction etc. 00723 std::swap(d.columnPermutation[0], d.columnPermutation[initialColumn]); 00724 columnVector(d.R, 0).swapData(columnVector(d.R, initialColumn)); 00725 detail::qrColumnHouseholderStep(0, d.R, d.qtb); 00726 d.next_lsq_solution(0,0) = d.qtb(0,0) / d.R(0,0); 00727 d.next_lsq_prediction = d.next_lsq_solution(0,0) * columnVector(A, d.columnPermutation[0]); 00728 d.searchVector = d.next_lsq_solution(0,0) * columnVector(A, d.columnPermutation[0]); 00729 00730 return leastAngleRegressionMainLoop(d, activeSets, lasso_solutions, lsq_solutions, options); 00731 } 00732 00733 } // namespace detail 00734 00735 /** Least Angle Regression. 00736 00737 <b>\#include</b> <vigra/regression.hxx> 00738 Namespaces: vigra and vigra::linalg 00739 00740 <b> Declarations:</b> 00741 00742 \code 00743 namespace vigra { 00744 namespace linalg { 00745 // compute either LASSO or least squares solutions 00746 template <class T, class C1, class C2, class Array1, class Array2> 00747 unsigned int 00748 leastAngleRegression(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b, 00749 Array1 & activeSets, Array2 & solutions, 00750 LeastAngleRegressionOptions const & options = LeastAngleRegressionOptions()); 00751 00752 // compute LASSO and least squares solutions 00753 template <class T, class C1, class C2, class Array1, class Array2> 00754 unsigned int 00755 leastAngleRegression(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b, 00756 Array1 & activeSets, Array2 & lasso_solutions, Array2 & lsq_solutions, 00757 LeastAngleRegressionOptions const & options = LeastAngleRegressionOptions()); 00758 } 00759 using linalg::leastAngleRegression; 00760 } 00761 \endcode 00762 00763 This function implements Least Angle Regression (LARS) as described in 00764 00765 00766 B.Efron, T.Hastie, I.Johnstone, and R.Tibshirani: <i>"Least Angle Regression"</i>, 00767 Annals of Statistics 32(2):407-499, 2004. 00768 00769 It is an efficient algorithm to solve the L1-regularized least squares (LASSO) problem 00770 00771 \f[ \tilde \textrm{\bf x} = \textrm{argmin} 00772 \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2 00773 \textrm{ subject to } \left|\left|\textrm{\bf x}\right|\right|_1\le s 00774 \f] 00775 00776 and the L1-regularized non-negative least squares (NN-LASSO) problem 00777 00778 \f[ \tilde \textrm{\bf x} = \textrm{argmin} \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2 00779 \textrm{ subject to } \left|\left|\textrm{\bf x}\right|\right|_1\le s \textrm{ and } \textrm{\bf x}\ge \textrm{\bf 0} 00780 \f] 00781 00782 where \a A is a matrix with <tt>m</tt> rows and <tt>n</tt> columns (often with <tt>m < n</tt>), 00783 \a b a vector of length <tt>m</tt>, and a regularization parameter s >= 0.0. 00784 L1-regularization has the desirable effect that it causes the solution \a x to be sparse, i.e. only 00785 the most important variables (called the <em>active set</em>) have non-zero values. The 00786 key insight of the LARS algorithm is the following: When the solution vector is considered 00787 as a function of the regularization parameter s, then <b>x</b>(s) is a piecewise 00788 linear function, i.e. a polyline in n-dimensional space. The knots of the polyline 00789 occur precisely at those values of s where one variable enters or leaves the active set, 00790 and can be efficiently computed. 00791 00792 Therefore, leastAngleRegression() returns the entire solution path as a sequence of knot points, starting 00793 at \f$\textrm{\bf x}(s=0)\f$ (where the only feasible solution is obviously <b>x</b> = 0) and ending at 00794 \f$\textrm{\bf x}(s=\infty)\f$ (where the solution becomes the ordinary least squares solution). Actually, 00795 the initial null solution is not explicitly returned, i.e. the sequence starts at the first non-zero 00796 solution with one variable in the active set. The function leastAngleRegression() returns the number 00797 of solutions( i.e. knot points) computed. 00798 00799 The sequences of active sets and corresponding variable weights are returned in \a activeSets and 00800 \a solutions respectively. That is, <tt>activeSets[i]</tt> is an \ref vigra::ArrayVector "ArrayVector<int>" 00801 containing the indices of the variables that are active at the i-th knot, and <tt>solutions</tt> is a 00802 \ref vigra::linalg::Matrix "Matrix<T>" containing the weights of those variables, in the same order (see 00803 example below). Variables not contained in <tt>activeSets[i]</tt> are zero at this solution. 00804 00805 The behavior of the algorithm can be adapted by \ref vigra::linalg::LeastAngleRegressionOptions 00806 "LeastAngleRegressionOptions": 00807 <DL> 00808 <DT><b>options.lasso()</b> (active by default) 00809 <DD> Compute the LASSO solution as described above. 00810 <DT><b>options.nnlasso()</b> (inactive by default) 00811 <DD> Compute non-negative LASSO solutions, i.e. use the additional constraint that 00812 <b>x</b> >= 0 in all solutions. 00813 <DT><b>options.lars()</b> (inactive by default) 00814 <DD> Compute a solution path according to the plain LARS rule, i.e. never remove 00815 a variable from the active set once it entered. 00816 <DT><b>options.leastSquaresSolutions(bool)</b> (default: true) 00817 <DD> Use the algorithm mode selected above 00818 to determine the sequence of active sets, but then compute and return an 00819 ordinary (unconstrained) least squares solution for every active set.<br> 00820 <b>Note:</b> The second form of leastAngleRegression() ignores this option and 00821 does always compute both constrained and unconstrained solutions (returned in 00822 \a lasso_solutions and \a lsq_solutions respectively). 00823 <DT><b>maxSolutionCount(unsigned int n)</b> (default: n = 0, i.e. compute all solutions) 00824 <DD> Compute at most <tt>n</tt> solutions. 00825 </DL> 00826 00827 <b>Usage:</b> 00828 00829 \code 00830 int m = ..., n = ...; 00831 Matrix<double> A(m, n), b(m, 1); 00832 ... // fill A and b 00833 00834 // normalize the input 00835 Matrix<double> offset(1,n), scaling(1,n); 00836 prepareColumns(A, A, offset, scaling, DataPreparationGoals(ZeroMean|UnitVariance)); 00837 prepareColumns(b, b, DataPreparationGoals(ZeroMean)); 00838 00839 // arrays to hold the output 00840 ArrayVector<ArrayVector<int> > activeSets; 00841 ArrayVector<Matrix<double> > solutions; 00842 00843 // run leastAngleRegression() in non-negative LASSO mode 00844 int numSolutions = leastAngleRegression(A, b, activeSets, solutions, 00845 LeastAngleRegressionOptions().nnlasso()); 00846 00847 // print results 00848 Matrix<double> denseSolution(1, n); 00849 for (MultiArrayIndex k = 0; k < numSolutions; ++k) 00850 { 00851 // transform the sparse solution into a dense vector 00852 denseSolution.init(0.0); // ensure that inactive variables are zero 00853 for (unsigned int i = 0; i < activeSets[k].size(); ++i) 00854 { 00855 // set the values of the active variables; 00856 // activeSets[k][i] is the true index of the i-th variable in the active set 00857 denseSolution(0, activeSets[k][i]) = solutions[k](i,0); 00858 } 00859 00860 // invert the input normalization 00861 denseSolution = denseSolution * pointWise(scaling); 00862 00863 // output the solution 00864 std::cout << "solution " << k << ":\n" << denseSolution << std::endl; 00865 } 00866 \endcode 00867 00868 <b>Required Interface:</b> 00869 00870 <ul> 00871 <li> <tt>T</tt> must be numeric type (compatible to double) 00872 <li> <tt>Array1 a1;</tt><br> 00873 <tt>a1.push_back(ArrayVector<int>());</tt> 00874 <li> <tt>Array2 a2;</tt><br> 00875 <tt>a2.push_back(Matrix<T>());</tt> 00876 </ul> 00877 */ 00878 doxygen_overloaded_function(template <...> unsigned int leastAngleRegression) 00879 00880 template <class T, class C1, class C2, class Array1, class Array2> 00881 inline unsigned int 00882 leastAngleRegression(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b, 00883 Array1 & activeSets, Array2 & solutions, 00884 LeastAngleRegressionOptions const & options = LeastAngleRegressionOptions()) 00885 { 00886 if(options.least_squares_solutions) 00887 return detail::leastAngleRegressionImpl(A, b, activeSets, (Array2*)0, &solutions, options); 00888 else 00889 return detail::leastAngleRegressionImpl(A, b, activeSets, &solutions, (Array2*)0, options); 00890 } 00891 00892 template <class T, class C1, class C2, class Array1, class Array2> 00893 inline unsigned int 00894 leastAngleRegression(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b, 00895 Array1 & activeSets, Array2 & lasso_solutions, Array2 & lsq_solutions, 00896 LeastAngleRegressionOptions const & options = LeastAngleRegressionOptions()) 00897 { 00898 return detail::leastAngleRegressionImpl(A, b, activeSets, &lasso_solutions, &lsq_solutions, options); 00899 } 00900 00901 /** Non-negative Least Squares Regression. 00902 00903 Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>), 00904 and a column vector \a b of length <tt>m</tt> rows, this function computes 00905 a column vector \a x of length <tt>n</tt> with <b>non-negative entries</b> that solves the optimization problem 00906 00907 \f[ \tilde \textrm{\bf x} = \textrm{argmin} 00908 \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2 00909 \textrm{ subject to } \textrm{\bf x} \ge \textrm{\bf 0} 00910 \f] 00911 00912 Both \a b and \a x must be column vectors (i.e. matrices with <tt>1</tt> column). 00913 Note that all matrices must already have the correct shape. The solution is computed by means 00914 of \ref leastAngleRegression() with non-negativity constraint. 00915 00916 <b>\#include</b> <vigra/regression.hxx> 00917 Namespaces: vigra and vigra::linalg 00918 */ 00919 template <class T, class C1, class C2, class C3> 00920 inline void 00921 nonnegativeLeastSquares(MultiArrayView<2, T, C1> const & A, 00922 MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> &x) 00923 { 00924 vigra_precondition(columnCount(A) == rowCount(x) && rowCount(A) == rowCount(b), 00925 "nonnegativeLeastSquares(): Matrix shape mismatch."); 00926 vigra_precondition(columnCount(b) == 1 && columnCount(x) == 1, 00927 "nonnegativeLeastSquares(): RHS and solution must be vectors (i.e. columnCount == 1)."); 00928 00929 ArrayVector<ArrayVector<MultiArrayIndex> > activeSets; 00930 ArrayVector<Matrix<T> > results; 00931 00932 leastAngleRegression(A, b, activeSets, results, 00933 LeastAngleRegressionOptions().leastSquaresSolutions(false).nnlasso()); 00934 x.init(NumericTraits<T>::zero()); 00935 if(activeSets.size() > 0) 00936 for(unsigned int k=0; k<activeSets.back().size(); ++k) 00937 x(activeSets.back()[k],0) = results.back()[k]; 00938 } 00939 00940 00941 //@} 00942 00943 } // namespace linalg 00944 00945 using linalg::leastSquares; 00946 using linalg::weightedLeastSquares; 00947 using linalg::ridgeRegression; 00948 using linalg::weightedRidgeRegression; 00949 using linalg::ridgeRegressionSeries; 00950 using linalg::nonnegativeLeastSquares; 00951 using linalg::leastAngleRegression; 00952 using linalg::LeastAngleRegressionOptions; 00953 00954 } // namespace vigra 00955 00956 #endif // VIGRA_REGRESSION_HXX
© Ullrich Köthe (ullrich.koethe@iwr.uni-heidelberg.de) |
html generated using doxygen and Python
|