View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.util;
20  
21  import java.util.Arrays;
22  import java.util.Deque;
23  import java.util.LinkedList;
24  
25  import org.apache.hadoop.classification.InterfaceAudience;
26  
27  /**
28   * Computes the optimal (minimal cost) assignment of jobs to workers (or other
29   * analogous) concepts given a cost matrix of each pair of job and worker, using
30   * the algorithm by James Munkres in "Algorithms for the Assignment and
31   * Transportation Problems", with additional optimizations as described by Jin
32   * Kue Wong in "A New Implementation of an Algorithm for the Optimal Assignment
33   * Problem: An Improved Version of Munkres' Algorithm". The algorithm runs in
34   * O(n^3) time and need O(n^2) auxiliary space where n is the number of jobs or
35   * workers, whichever is greater.
36   */
37  @InterfaceAudience.Private
38  public class MunkresAssignment {
39  
40    // The original algorithm by Munkres uses the terms STAR and PRIME to denote
41    // different states of zero values in the cost matrix. These values are
42    // represented as byte constants instead of enums to save space in the mask
43    // matrix by a factor of 4n^2 where n is the size of the problem.
44    private static final byte NONE = 0;
45    private static final byte STAR = 1;
46    private static final byte PRIME = 2;
47  
48    // The algorithm requires that the number of column is at least as great as
49    // the number of rows. If that is not the case, then the cost matrix should
50    // be transposed before computation, and the solution matrix transposed before
51    // returning to the caller.
52    private final boolean transposed;
53  
54    // The number of rows of internal matrices.
55    private final int rows;
56  
57    // The number of columns of internal matrices.
58    private final int cols;
59  
60    // The cost matrix, the cost of assigning each row index to column index.
61    private float[][] cost;
62  
63    // Mask of zero cost assignment states.
64    private byte[][] mask;
65  
66    // Covering some rows of the cost matrix.
67    private boolean[] rowsCovered;
68  
69    // Covering some columns of the cost matrix.
70    private boolean[] colsCovered;
71  
72    // The alternating path between starred zeroes and primed zeroes
73    private Deque<Pair<Integer, Integer>> path;
74  
75    // The solution, marking which rows should be assigned to which columns. The
76    // positions of elements in this array correspond to the rows of the cost
77    // matrix, and the value of each element correspond to the columns of the cost
78    // matrix, i.e. assignments[i] = j indicates that row i should be assigned to
79    // column j.
80    private int[] assignments;
81  
82    // Improvements described by Jin Kue Wong cache the least value in each row,
83    // as well as the column index of the least value in each row, and the pending
84    // adjustments to each row and each column.
85    private float[] leastInRow;
86    private int[] leastInRowIndex;
87    private float[] rowAdjust;
88    private float[] colAdjust;
89  
90    /**
91     * Construct a new problem instance with the specified cost matrix. The cost
92     * matrix must be rectangular, though not necessarily square. If one dimension
93     * is greater than the other, some elements in the greater dimension will not
94     * be assigned. The input cost matrix will not be modified.
95     * @param costMatrix
96     */
97    public MunkresAssignment(float[][] costMatrix) {
98      // The algorithm assumes that the number of columns is at least as great as
99      // the number of rows. If this is not the case of the input matrix, then
100     // all internal structures must be transposed relative to the input.
101     this.transposed = costMatrix.length > costMatrix[0].length;
102     if (this.transposed) {
103       this.rows = costMatrix[0].length;
104       this.cols = costMatrix.length;
105     } else {
106       this.rows = costMatrix.length;
107       this.cols = costMatrix[0].length;
108     }
109 
110     cost = new float[rows][cols];
111     mask = new byte[rows][cols];
112     rowsCovered = new boolean[rows];
113     colsCovered = new boolean[cols];
114     path = new LinkedList<Pair<Integer, Integer>>();
115 
116     leastInRow = new float[rows];
117     leastInRowIndex = new int[rows];
118     rowAdjust = new float[rows];
119     colAdjust = new float[cols];
120 
121     assignments = null;
122 
123     // Copy cost matrix.
124     if (transposed) {
125       for (int r = 0; r < rows; r++) {
126         for (int c = 0; c < cols; c++) {
127           cost[r][c] = costMatrix[c][r];
128         }
129       }
130     } else {
131       for (int r = 0; r < rows; r++) {
132         for (int c = 0; c < cols; c++) {
133           cost[r][c] = costMatrix[r][c];
134         }
135       }
136     }
137 
138     // Costs must be finite otherwise the matrix can get into a bad state where
139     // no progress can be made. If your use case depends on a distinction
140     // between costs of MAX_VALUE and POSITIVE_INFINITY, you're doing it wrong.
141     for (int r = 0; r < rows; r++) {
142       for (int c = 0; c < cols; c++) {
143         if (cost[r][c] == Float.POSITIVE_INFINITY) {
144           cost[r][c] = Float.MAX_VALUE;
145         }
146       }
147     }
148   }
149 
150   /**
151    * Get the optimal assignments. The returned array will have the same number
152    * of elements as the number of elements as the number of rows in the input
153    * cost matrix. Each element will indicate which column should be assigned to
154    * that row or -1 if no column should be assigned, i.e. if result[i] = j then
155    * row i should be assigned to column j. Subsequent invocations of this method
156    * will simply return the same object without additional computation.
157    * @return an array with the optimal assignments
158    */
159   public int[] solve() {
160     // If this assignment problem has already been solved, return the known
161     // solution
162     if (assignments != null) {
163       return assignments;
164     }
165 
166     preliminaries();
167 
168     // Find the optimal assignments.
169     while (!testIsDone()) {
170       while (!stepOne()) {
171         stepThree();
172       }
173       stepTwo();
174     }
175 
176     // Extract the assignments from the mask matrix.
177     if (transposed) {
178       assignments = new int[cols];
179       outer:
180       for (int c = 0; c < cols; c++) {
181         for (int r = 0; r < rows; r++) {
182           if (mask[r][c] == STAR) {
183             assignments[c] = r;
184             continue outer;
185           }
186         }
187         // There is no assignment for this row of the input/output.
188         assignments[c] = -1;
189       }
190     } else {
191       assignments = new int[rows];
192       outer:
193       for (int r = 0; r < rows; r++) {
194         for (int c = 0; c < cols; c++) {
195           if (mask[r][c] == STAR) {
196             assignments[r] = c;
197             continue outer;
198           }
199         }
200       }
201     }
202 
203     // Once the solution has been computed, there is no need to keep any of the
204     // other internal structures. Clear all unnecessary internal references so
205     // the garbage collector may reclaim that memory.
206     cost = null;
207     mask = null;
208     rowsCovered = null;
209     colsCovered = null;
210     path = null;
211     leastInRow = null;
212     leastInRowIndex = null;
213     rowAdjust = null;
214     colAdjust = null;
215 
216     return assignments;
217   }
218 
219   /**
220    * Corresponds to the "preliminaries" step of the original algorithm.
221    * Guarantees that the matrix is an equivalent non-negative matrix with at
222    * least one zero in each row.
223    */
224   private void preliminaries() {
225     for (int r = 0; r < rows; r++) {
226       // Find the minimum cost of each row.
227       float min = Float.POSITIVE_INFINITY;
228       for (int c = 0; c < cols; c++) {
229         min = Math.min(min, cost[r][c]);
230       }
231 
232       // Subtract that minimum cost from each element in the row.
233       for (int c = 0; c < cols; c++) {
234         cost[r][c] -= min;
235 
236         // If the element is now zero and there are no zeroes in the same row
237         // or column which are already starred, then star this one. There
238         // must be at least one zero because of subtracting the min cost.
239         if (cost[r][c] == 0 && !rowsCovered[r] && !colsCovered[c]) {
240           mask[r][c] = STAR;
241           // Cover this row and column so that no other zeroes in them can be
242           // starred.
243           rowsCovered[r] = true;
244           colsCovered[c] = true;
245         }
246       }
247     }
248 
249     // Clear the covered rows and columns.
250     Arrays.fill(rowsCovered, false);
251     Arrays.fill(colsCovered, false);
252   }
253 
254   /**
255    * Test whether the algorithm is done, i.e. we have the optimal assignment.
256    * This occurs when there is exactly one starred zero in each row.
257    * @return true if the algorithm is done
258    */
259   private boolean testIsDone() {
260     // Cover all columns containing a starred zero. There can be at most one
261     // starred zero per column. Therefore, a covered column has an optimal
262     // assignment.
263     for (int r = 0; r < rows; r++) {
264       for (int c = 0; c < cols; c++) {
265         if (mask[r][c] == STAR) {
266           colsCovered[c] = true;
267         }
268       }
269     }
270 
271     // Count the total number of covered columns.
272     int coveredCols = 0;
273     for (int c = 0; c < cols; c++) {
274       coveredCols += colsCovered[c] ? 1 : 0;
275     }
276 
277     // Apply an row and column adjustments that are pending.
278     for (int r = 0; r < rows; r++) {
279       for (int c = 0; c < cols; c++) {
280         cost[r][c] += rowAdjust[r];
281         cost[r][c] += colAdjust[c];
282       }
283     }
284 
285     // Clear the pending row and column adjustments.
286     Arrays.fill(rowAdjust, 0);
287     Arrays.fill(colAdjust, 0);
288 
289     // The covers on columns and rows may have been reset, recompute the least
290     // value for each row.
291     for (int r = 0; r < rows; r++) {
292       leastInRow[r] = Float.POSITIVE_INFINITY;
293       for (int c = 0; c < cols; c++) {
294         if (!rowsCovered[r] && !colsCovered[c] && cost[r][c] < leastInRow[r]) {
295           leastInRow[r] = cost[r][c];
296           leastInRowIndex[r] = c;
297         }
298       }
299     }
300 
301     // If all columns are covered, then we are done. Since there may be more
302     // columns than rows, we are also done if the number of covered columns is
303     // at least as great as the number of rows.
304     return (coveredCols == cols || coveredCols >= rows);
305   }
306 
307   /**
308    * Corresponds to step 1 of the original algorithm.
309    * @return false if all zeroes are covered
310    */
311   private boolean stepOne() {
312     while (true) {
313       Pair<Integer, Integer> zero = findUncoveredZero();
314       if (zero == null) {
315         // No uncovered zeroes, need to manipulate the cost matrix in step
316         // three.
317         return false;
318       } else {
319         // Prime the uncovered zero and find a starred zero in the same row.
320         mask[zero.getFirst()][zero.getSecond()] = PRIME;
321         Pair<Integer, Integer> star = starInRow(zero.getFirst());
322         if (star != null) {
323           // Cover the row with both the newly primed zero and the starred zero.
324           // Since this is the only place where zeroes are primed, and we cover
325           // it here, and rows are only uncovered when primes are erased, then
326           // there can be at most one primed uncovered zero.
327           rowsCovered[star.getFirst()] = true;
328           colsCovered[star.getSecond()] = false;
329           updateMin(star.getFirst(), star.getSecond());
330         } else {
331           // Will go to step two after, where a path will be constructed,
332           // starting from the uncovered primed zero (there is only one). Since
333           // we have already found it, save it as the first node in the path.
334           path.clear();
335           path.offerLast(new Pair<Integer, Integer>(zero.getFirst(),
336               zero.getSecond()));
337           return true;
338         }
339       }
340     }
341   }
342 
343   /**
344    * Corresponds to step 2 of the original algorithm.
345    */
346   private void stepTwo() {
347     // Construct a path of alternating starred zeroes and primed zeroes, where
348     // each starred zero is in the same column as the previous primed zero, and
349     // each primed zero is in the same row as the previous starred zero. The
350     // path will always end in a primed zero.
351     while (true) {
352       Pair<Integer, Integer> star = starInCol(path.getLast().getSecond());
353       if (star != null) {
354         path.offerLast(star);
355       } else {
356         break;
357       }
358       Pair<Integer, Integer> prime = primeInRow(path.getLast().getFirst());
359       path.offerLast(prime);
360     }
361 
362     // Augment path - unmask all starred zeroes and star all primed zeroes. All
363     // nodes in the path will be either starred or primed zeroes. The set of
364     // starred zeroes is independent and now one larger than before.
365     for (Pair<Integer, Integer> p : path) {
366       if (mask[p.getFirst()][p.getSecond()] == STAR) {
367         mask[p.getFirst()][p.getSecond()] = NONE;
368       } else {
369         mask[p.getFirst()][p.getSecond()] = STAR;
370       }
371     }
372 
373     // Clear all covers from rows and columns.
374     Arrays.fill(rowsCovered, false);
375     Arrays.fill(colsCovered, false);
376 
377     // Remove the prime mask from all primed zeroes.
378     for (int r = 0; r < rows; r++) {
379       for (int c = 0; c < cols; c++) {
380         if (mask[r][c] == PRIME) {
381           mask[r][c] = NONE;
382         }
383       }
384     }
385   }
386 
387   /**
388    * Corresponds to step 3 of the original algorithm.
389    */
390   private void stepThree() {
391     // Find the minimum uncovered cost.
392     float min = leastInRow[0];
393     for (int r = 1; r < rows; r++) {
394       if (leastInRow[r] < min) {
395         min = leastInRow[r];
396       }
397     }
398 
399     // Add the minimum cost to each of the costs in a covered row, or subtract
400     // the minimum cost from each of the costs in an uncovered column. As an
401     // optimization, do not actually modify the cost matrix yet, but track the
402     // adjustments that need to be made to each row and column.
403     for (int r = 0; r < rows; r++) {
404       if (rowsCovered[r]) {
405         rowAdjust[r] += min;
406       }
407     }
408     for (int c = 0; c < cols; c++) {
409       if (!colsCovered[c]) {
410         colAdjust[c] -= min;
411       }
412     }
413 
414     // Since the cost matrix is not being updated yet, the minimum uncovered
415     // cost per row must be updated.
416     for (int r = 0; r < rows; r++) {
417       if (!colsCovered[leastInRowIndex[r]]) {
418         // The least value in this row was in an uncovered column, meaning that
419         // it would have had the minimum value subtracted from it, and therefore
420         // will still be the minimum value in that row.
421         leastInRow[r] -= min;
422       } else {
423         // The least value in this row was in a covered column and would not
424         // have had the minimum value subtracted from it, so the minimum value
425         // could be some in another column.
426         for (int c = 0; c < cols; c++) {
427           if (cost[r][c] + colAdjust[c] + rowAdjust[r] < leastInRow[r]) {
428             leastInRow[r] = cost[r][c] + colAdjust[c] + rowAdjust[r];
429             leastInRowIndex[r] = c;
430           }
431         }
432       }
433     }
434   }
435 
436   /**
437    * Find a zero cost assignment which is not covered. If there are no zero cost
438    * assignments which are uncovered, then null will be returned.
439    * @return pair of row and column indices of an uncovered zero or null
440    */
441   private Pair<Integer, Integer> findUncoveredZero() {
442     for (int r = 0; r < rows; r++) {
443       if (leastInRow[r] == 0) {
444         return new Pair<Integer, Integer>(r, leastInRowIndex[r]);
445       }
446     }
447     return null;
448   }
449 
450   /**
451    * A specified row has become covered, and a specified column has become
452    * uncovered. The least value per row may need to be updated.
453    * @param row the index of the row which was just covered
454    * @param col the index of the column which was just uncovered
455    */
456   private void updateMin(int row, int col) {
457     // If the row is covered we want to ignore it as far as least values go.
458     leastInRow[row] = Float.POSITIVE_INFINITY;
459 
460     for (int r = 0; r < rows; r++) {
461       // Since the column has only just been uncovered, it could not have any
462       // pending adjustments. Only covered rows can have pending adjustments
463       // and covered costs do not count toward row minimums. Therefore, we do
464       // not need to consider rowAdjust[r] or colAdjust[col].
465       if (!rowsCovered[r] && cost[r][col] < leastInRow[r]) {
466         leastInRow[r] = cost[r][col];
467         leastInRowIndex[r] = col;
468       }
469     }
470   }
471 
472   /**
473    * Find a starred zero in a specified row. If there are no starred zeroes in
474    * the specified row, then null will be returned.
475    * @param r the index of the row to be searched
476    * @return pair of row and column indices of starred zero or null
477    */
478   private Pair<Integer, Integer> starInRow(int r) {
479     for (int c = 0; c < cols; c++) {
480       if (mask[r][c] == STAR) {
481         return new Pair<Integer, Integer>(r, c);
482       }
483     }
484     return null;
485   }
486 
487   /**
488    * Find a starred zero in the specified column. If there are no starred zeroes
489    * in the specified row, then null will be returned.
490    * @param c the index of the column to be searched
491    * @return pair of row and column indices of starred zero or null
492    */
493   private Pair<Integer, Integer> starInCol(int c) {
494     for (int r = 0; r < rows; r++) {
495       if (mask[r][c] == STAR) {
496         return new Pair<Integer, Integer>(r, c);
497       }
498     }
499     return null;
500   }
501 
502   /**
503    * Find a primed zero in the specified row. If there are no primed zeroes in
504    * the specified row, then null will be returned.
505    * @param r the index of the row to be searched
506    * @return pair of row and column indices of primed zero or null
507    */
508   private Pair<Integer, Integer> primeInRow(int r) {
509     for (int c = 0; c < cols; c++) {
510       if (mask[r][c] == PRIME) {
511         return new Pair<Integer, Integer>(r, c);
512       }
513     }
514     return null;
515   }
516 }