1 /* 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 */ 19 package org.apache.hadoop.hbase.util; 20 21 import java.util.Arrays; 22 import java.util.Deque; 23 import java.util.LinkedList; 24 25 import org.apache.hadoop.classification.InterfaceAudience; 26 27 /** 28 * Computes the optimal (minimal cost) assignment of jobs to workers (or other 29 * analogous) concepts given a cost matrix of each pair of job and worker, using 30 * the algorithm by James Munkres in "Algorithms for the Assignment and 31 * Transportation Problems", with additional optimizations as described by Jin 32 * Kue Wong in "A New Implementation of an Algorithm for the Optimal Assignment 33 * Problem: An Improved Version of Munkres' Algorithm". The algorithm runs in 34 * O(n^3) time and need O(n^2) auxiliary space where n is the number of jobs or 35 * workers, whichever is greater. 36 */ 37 @InterfaceAudience.Private 38 public class MunkresAssignment { 39 40 // The original algorithm by Munkres uses the terms STAR and PRIME to denote 41 // different states of zero values in the cost matrix. These values are 42 // represented as byte constants instead of enums to save space in the mask 43 // matrix by a factor of 4n^2 where n is the size of the problem. 44 private static final byte NONE = 0; 45 private static final byte STAR = 1; 46 private static final byte PRIME = 2; 47 48 // The algorithm requires that the number of column is at least as great as 49 // the number of rows. If that is not the case, then the cost matrix should 50 // be transposed before computation, and the solution matrix transposed before 51 // returning to the caller. 52 private final boolean transposed; 53 54 // The number of rows of internal matrices. 55 private final int rows; 56 57 // The number of columns of internal matrices. 58 private final int cols; 59 60 // The cost matrix, the cost of assigning each row index to column index. 61 private float[][] cost; 62 63 // Mask of zero cost assignment states. 64 private byte[][] mask; 65 66 // Covering some rows of the cost matrix. 67 private boolean[] rowsCovered; 68 69 // Covering some columns of the cost matrix. 70 private boolean[] colsCovered; 71 72 // The alternating path between starred zeroes and primed zeroes 73 private Deque<Pair<Integer, Integer>> path; 74 75 // The solution, marking which rows should be assigned to which columns. The 76 // positions of elements in this array correspond to the rows of the cost 77 // matrix, and the value of each element correspond to the columns of the cost 78 // matrix, i.e. assignments[i] = j indicates that row i should be assigned to 79 // column j. 80 private int[] assignments; 81 82 // Improvements described by Jin Kue Wong cache the least value in each row, 83 // as well as the column index of the least value in each row, and the pending 84 // adjustments to each row and each column. 85 private float[] leastInRow; 86 private int[] leastInRowIndex; 87 private float[] rowAdjust; 88 private float[] colAdjust; 89 90 /** 91 * Construct a new problem instance with the specified cost matrix. The cost 92 * matrix must be rectangular, though not necessarily square. If one dimension 93 * is greater than the other, some elements in the greater dimension will not 94 * be assigned. The input cost matrix will not be modified. 95 * @param costMatrix 96 */ 97 public MunkresAssignment(float[][] costMatrix) { 98 // The algorithm assumes that the number of columns is at least as great as 99 // the number of rows. If this is not the case of the input matrix, then 100 // all internal structures must be transposed relative to the input. 101 this.transposed = costMatrix.length > costMatrix[0].length; 102 if (this.transposed) { 103 this.rows = costMatrix[0].length; 104 this.cols = costMatrix.length; 105 } else { 106 this.rows = costMatrix.length; 107 this.cols = costMatrix[0].length; 108 } 109 110 cost = new float[rows][cols]; 111 mask = new byte[rows][cols]; 112 rowsCovered = new boolean[rows]; 113 colsCovered = new boolean[cols]; 114 path = new LinkedList<Pair<Integer, Integer>>(); 115 116 leastInRow = new float[rows]; 117 leastInRowIndex = new int[rows]; 118 rowAdjust = new float[rows]; 119 colAdjust = new float[cols]; 120 121 assignments = null; 122 123 // Copy cost matrix. 124 if (transposed) { 125 for (int r = 0; r < rows; r++) { 126 for (int c = 0; c < cols; c++) { 127 cost[r][c] = costMatrix[c][r]; 128 } 129 } 130 } else { 131 for (int r = 0; r < rows; r++) { 132 for (int c = 0; c < cols; c++) { 133 cost[r][c] = costMatrix[r][c]; 134 } 135 } 136 } 137 138 // Costs must be finite otherwise the matrix can get into a bad state where 139 // no progress can be made. If your use case depends on a distinction 140 // between costs of MAX_VALUE and POSITIVE_INFINITY, you're doing it wrong. 141 for (int r = 0; r < rows; r++) { 142 for (int c = 0; c < cols; c++) { 143 if (cost[r][c] == Float.POSITIVE_INFINITY) { 144 cost[r][c] = Float.MAX_VALUE; 145 } 146 } 147 } 148 } 149 150 /** 151 * Get the optimal assignments. The returned array will have the same number 152 * of elements as the number of elements as the number of rows in the input 153 * cost matrix. Each element will indicate which column should be assigned to 154 * that row or -1 if no column should be assigned, i.e. if result[i] = j then 155 * row i should be assigned to column j. Subsequent invocations of this method 156 * will simply return the same object without additional computation. 157 * @return an array with the optimal assignments 158 */ 159 public int[] solve() { 160 // If this assignment problem has already been solved, return the known 161 // solution 162 if (assignments != null) { 163 return assignments; 164 } 165 166 preliminaries(); 167 168 // Find the optimal assignments. 169 while (!testIsDone()) { 170 while (!stepOne()) { 171 stepThree(); 172 } 173 stepTwo(); 174 } 175 176 // Extract the assignments from the mask matrix. 177 if (transposed) { 178 assignments = new int[cols]; 179 outer: 180 for (int c = 0; c < cols; c++) { 181 for (int r = 0; r < rows; r++) { 182 if (mask[r][c] == STAR) { 183 assignments[c] = r; 184 continue outer; 185 } 186 } 187 // There is no assignment for this row of the input/output. 188 assignments[c] = -1; 189 } 190 } else { 191 assignments = new int[rows]; 192 outer: 193 for (int r = 0; r < rows; r++) { 194 for (int c = 0; c < cols; c++) { 195 if (mask[r][c] == STAR) { 196 assignments[r] = c; 197 continue outer; 198 } 199 } 200 } 201 } 202 203 // Once the solution has been computed, there is no need to keep any of the 204 // other internal structures. Clear all unnecessary internal references so 205 // the garbage collector may reclaim that memory. 206 cost = null; 207 mask = null; 208 rowsCovered = null; 209 colsCovered = null; 210 path = null; 211 leastInRow = null; 212 leastInRowIndex = null; 213 rowAdjust = null; 214 colAdjust = null; 215 216 return assignments; 217 } 218 219 /** 220 * Corresponds to the "preliminaries" step of the original algorithm. 221 * Guarantees that the matrix is an equivalent non-negative matrix with at 222 * least one zero in each row. 223 */ 224 private void preliminaries() { 225 for (int r = 0; r < rows; r++) { 226 // Find the minimum cost of each row. 227 float min = Float.POSITIVE_INFINITY; 228 for (int c = 0; c < cols; c++) { 229 min = Math.min(min, cost[r][c]); 230 } 231 232 // Subtract that minimum cost from each element in the row. 233 for (int c = 0; c < cols; c++) { 234 cost[r][c] -= min; 235 236 // If the element is now zero and there are no zeroes in the same row 237 // or column which are already starred, then star this one. There 238 // must be at least one zero because of subtracting the min cost. 239 if (cost[r][c] == 0 && !rowsCovered[r] && !colsCovered[c]) { 240 mask[r][c] = STAR; 241 // Cover this row and column so that no other zeroes in them can be 242 // starred. 243 rowsCovered[r] = true; 244 colsCovered[c] = true; 245 } 246 } 247 } 248 249 // Clear the covered rows and columns. 250 Arrays.fill(rowsCovered, false); 251 Arrays.fill(colsCovered, false); 252 } 253 254 /** 255 * Test whether the algorithm is done, i.e. we have the optimal assignment. 256 * This occurs when there is exactly one starred zero in each row. 257 * @return true if the algorithm is done 258 */ 259 private boolean testIsDone() { 260 // Cover all columns containing a starred zero. There can be at most one 261 // starred zero per column. Therefore, a covered column has an optimal 262 // assignment. 263 for (int r = 0; r < rows; r++) { 264 for (int c = 0; c < cols; c++) { 265 if (mask[r][c] == STAR) { 266 colsCovered[c] = true; 267 } 268 } 269 } 270 271 // Count the total number of covered columns. 272 int coveredCols = 0; 273 for (int c = 0; c < cols; c++) { 274 coveredCols += colsCovered[c] ? 1 : 0; 275 } 276 277 // Apply an row and column adjustments that are pending. 278 for (int r = 0; r < rows; r++) { 279 for (int c = 0; c < cols; c++) { 280 cost[r][c] += rowAdjust[r]; 281 cost[r][c] += colAdjust[c]; 282 } 283 } 284 285 // Clear the pending row and column adjustments. 286 Arrays.fill(rowAdjust, 0); 287 Arrays.fill(colAdjust, 0); 288 289 // The covers on columns and rows may have been reset, recompute the least 290 // value for each row. 291 for (int r = 0; r < rows; r++) { 292 leastInRow[r] = Float.POSITIVE_INFINITY; 293 for (int c = 0; c < cols; c++) { 294 if (!rowsCovered[r] && !colsCovered[c] && cost[r][c] < leastInRow[r]) { 295 leastInRow[r] = cost[r][c]; 296 leastInRowIndex[r] = c; 297 } 298 } 299 } 300 301 // If all columns are covered, then we are done. Since there may be more 302 // columns than rows, we are also done if the number of covered columns is 303 // at least as great as the number of rows. 304 return (coveredCols == cols || coveredCols >= rows); 305 } 306 307 /** 308 * Corresponds to step 1 of the original algorithm. 309 * @return false if all zeroes are covered 310 */ 311 private boolean stepOne() { 312 while (true) { 313 Pair<Integer, Integer> zero = findUncoveredZero(); 314 if (zero == null) { 315 // No uncovered zeroes, need to manipulate the cost matrix in step 316 // three. 317 return false; 318 } else { 319 // Prime the uncovered zero and find a starred zero in the same row. 320 mask[zero.getFirst()][zero.getSecond()] = PRIME; 321 Pair<Integer, Integer> star = starInRow(zero.getFirst()); 322 if (star != null) { 323 // Cover the row with both the newly primed zero and the starred zero. 324 // Since this is the only place where zeroes are primed, and we cover 325 // it here, and rows are only uncovered when primes are erased, then 326 // there can be at most one primed uncovered zero. 327 rowsCovered[star.getFirst()] = true; 328 colsCovered[star.getSecond()] = false; 329 updateMin(star.getFirst(), star.getSecond()); 330 } else { 331 // Will go to step two after, where a path will be constructed, 332 // starting from the uncovered primed zero (there is only one). Since 333 // we have already found it, save it as the first node in the path. 334 path.clear(); 335 path.offerLast(new Pair<Integer, Integer>(zero.getFirst(), 336 zero.getSecond())); 337 return true; 338 } 339 } 340 } 341 } 342 343 /** 344 * Corresponds to step 2 of the original algorithm. 345 */ 346 private void stepTwo() { 347 // Construct a path of alternating starred zeroes and primed zeroes, where 348 // each starred zero is in the same column as the previous primed zero, and 349 // each primed zero is in the same row as the previous starred zero. The 350 // path will always end in a primed zero. 351 while (true) { 352 Pair<Integer, Integer> star = starInCol(path.getLast().getSecond()); 353 if (star != null) { 354 path.offerLast(star); 355 } else { 356 break; 357 } 358 Pair<Integer, Integer> prime = primeInRow(path.getLast().getFirst()); 359 path.offerLast(prime); 360 } 361 362 // Augment path - unmask all starred zeroes and star all primed zeroes. All 363 // nodes in the path will be either starred or primed zeroes. The set of 364 // starred zeroes is independent and now one larger than before. 365 for (Pair<Integer, Integer> p : path) { 366 if (mask[p.getFirst()][p.getSecond()] == STAR) { 367 mask[p.getFirst()][p.getSecond()] = NONE; 368 } else { 369 mask[p.getFirst()][p.getSecond()] = STAR; 370 } 371 } 372 373 // Clear all covers from rows and columns. 374 Arrays.fill(rowsCovered, false); 375 Arrays.fill(colsCovered, false); 376 377 // Remove the prime mask from all primed zeroes. 378 for (int r = 0; r < rows; r++) { 379 for (int c = 0; c < cols; c++) { 380 if (mask[r][c] == PRIME) { 381 mask[r][c] = NONE; 382 } 383 } 384 } 385 } 386 387 /** 388 * Corresponds to step 3 of the original algorithm. 389 */ 390 private void stepThree() { 391 // Find the minimum uncovered cost. 392 float min = leastInRow[0]; 393 for (int r = 1; r < rows; r++) { 394 if (leastInRow[r] < min) { 395 min = leastInRow[r]; 396 } 397 } 398 399 // Add the minimum cost to each of the costs in a covered row, or subtract 400 // the minimum cost from each of the costs in an uncovered column. As an 401 // optimization, do not actually modify the cost matrix yet, but track the 402 // adjustments that need to be made to each row and column. 403 for (int r = 0; r < rows; r++) { 404 if (rowsCovered[r]) { 405 rowAdjust[r] += min; 406 } 407 } 408 for (int c = 0; c < cols; c++) { 409 if (!colsCovered[c]) { 410 colAdjust[c] -= min; 411 } 412 } 413 414 // Since the cost matrix is not being updated yet, the minimum uncovered 415 // cost per row must be updated. 416 for (int r = 0; r < rows; r++) { 417 if (!colsCovered[leastInRowIndex[r]]) { 418 // The least value in this row was in an uncovered column, meaning that 419 // it would have had the minimum value subtracted from it, and therefore 420 // will still be the minimum value in that row. 421 leastInRow[r] -= min; 422 } else { 423 // The least value in this row was in a covered column and would not 424 // have had the minimum value subtracted from it, so the minimum value 425 // could be some in another column. 426 for (int c = 0; c < cols; c++) { 427 if (cost[r][c] + colAdjust[c] + rowAdjust[r] < leastInRow[r]) { 428 leastInRow[r] = cost[r][c] + colAdjust[c] + rowAdjust[r]; 429 leastInRowIndex[r] = c; 430 } 431 } 432 } 433 } 434 } 435 436 /** 437 * Find a zero cost assignment which is not covered. If there are no zero cost 438 * assignments which are uncovered, then null will be returned. 439 * @return pair of row and column indices of an uncovered zero or null 440 */ 441 private Pair<Integer, Integer> findUncoveredZero() { 442 for (int r = 0; r < rows; r++) { 443 if (leastInRow[r] == 0) { 444 return new Pair<Integer, Integer>(r, leastInRowIndex[r]); 445 } 446 } 447 return null; 448 } 449 450 /** 451 * A specified row has become covered, and a specified column has become 452 * uncovered. The least value per row may need to be updated. 453 * @param row the index of the row which was just covered 454 * @param col the index of the column which was just uncovered 455 */ 456 private void updateMin(int row, int col) { 457 // If the row is covered we want to ignore it as far as least values go. 458 leastInRow[row] = Float.POSITIVE_INFINITY; 459 460 for (int r = 0; r < rows; r++) { 461 // Since the column has only just been uncovered, it could not have any 462 // pending adjustments. Only covered rows can have pending adjustments 463 // and covered costs do not count toward row minimums. Therefore, we do 464 // not need to consider rowAdjust[r] or colAdjust[col]. 465 if (!rowsCovered[r] && cost[r][col] < leastInRow[r]) { 466 leastInRow[r] = cost[r][col]; 467 leastInRowIndex[r] = col; 468 } 469 } 470 } 471 472 /** 473 * Find a starred zero in a specified row. If there are no starred zeroes in 474 * the specified row, then null will be returned. 475 * @param r the index of the row to be searched 476 * @return pair of row and column indices of starred zero or null 477 */ 478 private Pair<Integer, Integer> starInRow(int r) { 479 for (int c = 0; c < cols; c++) { 480 if (mask[r][c] == STAR) { 481 return new Pair<Integer, Integer>(r, c); 482 } 483 } 484 return null; 485 } 486 487 /** 488 * Find a starred zero in the specified column. If there are no starred zeroes 489 * in the specified row, then null will be returned. 490 * @param c the index of the column to be searched 491 * @return pair of row and column indices of starred zero or null 492 */ 493 private Pair<Integer, Integer> starInCol(int c) { 494 for (int r = 0; r < rows; r++) { 495 if (mask[r][c] == STAR) { 496 return new Pair<Integer, Integer>(r, c); 497 } 498 } 499 return null; 500 } 501 502 /** 503 * Find a primed zero in the specified row. If there are no primed zeroes in 504 * the specified row, then null will be returned. 505 * @param r the index of the row to be searched 506 * @return pair of row and column indices of primed zero or null 507 */ 508 private Pair<Integer, Integer> primeInRow(int r) { 509 for (int c = 0; c < cols; c++) { 510 if (mask[r][c] == PRIME) { 511 return new Pair<Integer, Integer>(r, c); 512 } 513 } 514 return null; 515 } 516 }