View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.util;
20  
21  import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertErrors;
22  import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertNoErrors;
23  import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck;
24  import static org.junit.Assert.assertEquals;
25  import static org.junit.Assert.assertFalse;
26  import static org.junit.Assert.assertNotEquals;
27  import static org.junit.Assert.assertNotNull;
28  import static org.junit.Assert.assertTrue;
29  import static org.junit.Assert.fail;
30  
31  import java.io.IOException;
32  import java.util.ArrayList;
33  import java.util.Collection;
34  import java.util.HashMap;
35  import java.util.LinkedList;
36  import java.util.List;
37  import java.util.Map;
38  import java.util.Map.Entry;
39  import java.util.concurrent.CountDownLatch;
40  import java.util.concurrent.ExecutorService;
41  import java.util.concurrent.ScheduledThreadPoolExecutor;
42  import java.util.concurrent.SynchronousQueue;
43  import java.util.concurrent.ThreadPoolExecutor;
44  import java.util.concurrent.TimeUnit;
45  
46  import org.apache.commons.io.IOUtils;
47  import org.apache.commons.logging.Log;
48  import org.apache.commons.logging.LogFactory;
49  import org.apache.hadoop.conf.Configuration;
50  import org.apache.hadoop.fs.FileStatus;
51  import org.apache.hadoop.fs.FileSystem;
52  import org.apache.hadoop.fs.Path;
53  import org.apache.hadoop.hbase.ClusterStatus;
54  import org.apache.hadoop.hbase.HBaseTestingUtility;
55  import org.apache.hadoop.hbase.HColumnDescriptor;
56  import org.apache.hadoop.hbase.HConstants;
57  import org.apache.hadoop.hbase.HRegionInfo;
58  import org.apache.hadoop.hbase.HRegionLocation;
59  import org.apache.hadoop.hbase.HTableDescriptor;
60  import org.apache.hadoop.hbase.LargeTests;
61  import org.apache.hadoop.hbase.MiniHBaseCluster;
62  import org.apache.hadoop.hbase.ServerName;
63  import org.apache.hadoop.hbase.TableName;
64  import org.apache.hadoop.hbase.catalog.MetaEditor;
65  import org.apache.hadoop.hbase.client.Delete;
66  import org.apache.hadoop.hbase.client.Durability;
67  import org.apache.hadoop.hbase.client.Get;
68  import org.apache.hadoop.hbase.client.HBaseAdmin;
69  import org.apache.hadoop.hbase.client.HConnection;
70  import org.apache.hadoop.hbase.client.HConnectionManager;
71  import org.apache.hadoop.hbase.client.HTable;
72  import org.apache.hadoop.hbase.client.MetaScanner;
73  import org.apache.hadoop.hbase.client.Put;
74  import org.apache.hadoop.hbase.client.Result;
75  import org.apache.hadoop.hbase.client.ResultScanner;
76  import org.apache.hadoop.hbase.client.Scan;
77  import org.apache.hadoop.hbase.io.hfile.TestHFile;
78  import org.apache.hadoop.hbase.master.AssignmentManager;
79  import org.apache.hadoop.hbase.master.HMaster;
80  import org.apache.hadoop.hbase.master.RegionStates;
81  import org.apache.hadoop.hbase.master.TableLockManager;
82  import org.apache.hadoop.hbase.master.TableLockManager.TableLock;
83  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
84  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
85  import org.apache.hadoop.hbase.regionserver.HRegion;
86  import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
87  import org.apache.hadoop.hbase.regionserver.HRegionServer;
88  import org.apache.hadoop.hbase.regionserver.TestEndToEndSplitTransaction;
89  import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter;
90  import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
91  import org.apache.hadoop.hbase.util.HBaseFsck.HbckInfo;
92  import org.apache.hadoop.hbase.util.HBaseFsck.PrintingErrorReporter;
93  import org.apache.hadoop.hbase.util.HBaseFsck.TableInfo;
94  import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
95  import org.apache.hadoop.hbase.util.hbck.HbckTestingUtil;
96  import org.apache.hadoop.hbase.zookeeper.MetaRegionTracker;
97  import org.apache.zookeeper.KeeperException;
98  import org.junit.AfterClass;
99  import org.junit.BeforeClass;
100 import org.junit.Ignore;
101 import org.junit.Test;
102 import org.junit.experimental.categories.Category;
103 import org.junit.rules.TestName;
104 
105 import com.google.common.collect.Multimap;
106 
107 /**
108  * This tests HBaseFsck's ability to detect reasons for inconsistent tables.
109  */
110 @Category(LargeTests.class)
111 public class TestHBaseFsck {
112   final static Log LOG = LogFactory.getLog(TestHBaseFsck.class);
113   private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
114   private final static Configuration conf = TEST_UTIL.getConfiguration();
115   private final static String FAM_STR = "fam";
116   private final static byte[] FAM = Bytes.toBytes(FAM_STR);
117   private final static int REGION_ONLINE_TIMEOUT = 800;
118   private static RegionStates regionStates;
119   private static ExecutorService executorService;
120 
121   // for the instance, reset every test run
122   private HTable tbl;
123   private final static byte[][] SPLITS = new byte[][] { Bytes.toBytes("A"),
124     Bytes.toBytes("B"), Bytes.toBytes("C") };
125   // one row per region.
126   private final static byte[][] ROWKEYS= new byte[][] {
127     Bytes.toBytes("00"), Bytes.toBytes("50"), Bytes.toBytes("A0"), Bytes.toBytes("A5"),
128     Bytes.toBytes("B0"), Bytes.toBytes("B5"), Bytes.toBytes("C0"), Bytes.toBytes("C5") };
129 
130   @BeforeClass
131   public static void setUpBeforeClass() throws Exception {
132     TEST_UTIL.getConfiguration().setInt("hbase.regionserver.handler.count", 2);
133     TEST_UTIL.getConfiguration().setInt("hbase.regionserver.metahandler.count", 2);
134     TEST_UTIL.startMiniCluster(3);
135 
136     executorService = new ThreadPoolExecutor(1, Integer.MAX_VALUE, 60, TimeUnit.SECONDS,
137         new SynchronousQueue<Runnable>(), Threads.newDaemonThreadFactory("testhbck"));
138 
139     AssignmentManager assignmentManager =
140       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
141     regionStates = assignmentManager.getRegionStates();
142     TEST_UTIL.getHBaseAdmin().setBalancerRunning(false, true);
143   }
144 
145   @AfterClass
146   public static void tearDownAfterClass() throws Exception {
147     TEST_UTIL.shutdownMiniCluster();
148   }
149 
150   @Test
151   public void testHBaseFsck() throws Exception {
152     assertNoErrors(doFsck(conf, false));
153     String table = "tableBadMetaAssign";
154     TEST_UTIL.createTable(Bytes.toBytes(table), FAM);
155 
156     // We created 1 table, should be fine
157     assertNoErrors(doFsck(conf, false));
158 
159     // Now let's mess it up and change the assignment in hbase:meta to
160     // point to a different region server
161     HTable meta = new HTable(conf, HTableDescriptor.META_TABLEDESC.getTableName(),
162         executorService);
163     Scan scan = new Scan();
164     scan.setStartRow(Bytes.toBytes(table+",,"));
165     ResultScanner scanner = meta.getScanner(scan);
166     HRegionInfo hri = null;
167 
168     Result res = scanner.next();
169     ServerName currServer =
170       ServerName.parseFrom(res.getValue(HConstants.CATALOG_FAMILY,
171           HConstants.SERVER_QUALIFIER));
172     long startCode = Bytes.toLong(res.getValue(HConstants.CATALOG_FAMILY,
173         HConstants.STARTCODE_QUALIFIER));
174 
175     for (JVMClusterUtil.RegionServerThread rs :
176         TEST_UTIL.getHBaseCluster().getRegionServerThreads()) {
177 
178       ServerName sn = rs.getRegionServer().getServerName();
179 
180       // When we find a diff RS, change the assignment and break
181       if (!currServer.getHostAndPort().equals(sn.getHostAndPort()) ||
182           startCode != sn.getStartcode()) {
183         Put put = new Put(res.getRow());
184         put.setDurability(Durability.SKIP_WAL);
185         put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
186           Bytes.toBytes(sn.getHostAndPort()));
187         put.add(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER,
188           Bytes.toBytes(sn.getStartcode()));
189         meta.put(put);
190         hri = HRegionInfo.getHRegionInfo(res);
191         break;
192       }
193     }
194 
195     // Try to fix the data
196     assertErrors(doFsck(conf, true), new ERROR_CODE[]{
197         ERROR_CODE.SERVER_DOES_NOT_MATCH_META});
198 
199     TEST_UTIL.getHBaseCluster().getMaster()
200       .getAssignmentManager().waitForAssignment(hri);
201 
202     // Should be fixed now
203     assertNoErrors(doFsck(conf, false));
204 
205     // comment needed - what is the purpose of this line
206     HTable t = new HTable(conf, Bytes.toBytes(table), executorService);
207     ResultScanner s = t.getScanner(new Scan());
208     s.close();
209     t.close();
210 
211     scanner.close();
212     meta.close();
213   }
214 
215   @Test(timeout=180000)
216   public void testFixAssignmentsWhenMETAinTransition() throws Exception {
217     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
218     HBaseAdmin admin = null;
219     try {
220       admin = new HBaseAdmin(TEST_UTIL.getConfiguration());
221       admin.closeRegion(cluster.getServerHoldingMeta(),
222           HRegionInfo.FIRST_META_REGIONINFO);
223     } finally {
224       if (admin != null) {
225         admin.close();
226       }
227     }
228     regionStates.regionOffline(HRegionInfo.FIRST_META_REGIONINFO);
229     MetaRegionTracker.deleteMetaLocation(cluster.getMaster().getZooKeeper());
230     assertFalse(regionStates.isRegionOnline(HRegionInfo.FIRST_META_REGIONINFO));
231     HBaseFsck hbck = doFsck(conf, true);
232     assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.UNKNOWN, ERROR_CODE.NO_META_REGION,
233         ERROR_CODE.NULL_META_REGION });
234     assertNoErrors(doFsck(conf, false));
235   }
236 
237   /**
238    * Create a new region in META.
239    */
240   private HRegionInfo createRegion(Configuration conf, final HTableDescriptor
241       htd, byte[] startKey, byte[] endKey)
242       throws IOException {
243     HTable meta = new HTable(conf, TableName.META_TABLE_NAME, executorService);
244     HRegionInfo hri = new HRegionInfo(htd.getTableName(), startKey, endKey);
245     MetaEditor.addRegionToMeta(meta, hri);
246     meta.close();
247     return hri;
248   }
249 
250   /**
251    * Debugging method to dump the contents of meta.
252    */
253   private void dumpMeta(TableName tableName) throws IOException {
254     List<byte[]> metaRows = TEST_UTIL.getMetaTableRows(tableName);
255     for (byte[] row : metaRows) {
256       LOG.info(Bytes.toString(row));
257     }
258   }
259 
260   /**
261    * This method is used to undeploy a region -- close it and attempt to
262    * remove its state from the Master.
263    */
264   private void undeployRegion(HBaseAdmin admin, ServerName sn,
265       HRegionInfo hri) throws IOException, InterruptedException {
266     try {
267       HBaseFsckRepair.closeRegionSilentlyAndWait(admin, sn, hri);
268       if (!hri.isMetaTable()) {
269         admin.offline(hri.getRegionName());
270       }
271     } catch (IOException ioe) {
272       LOG.warn("Got exception when attempting to offline region "
273           + Bytes.toString(hri.getRegionName()), ioe);
274     }
275   }
276   /**
277    * Delete a region from assignments, meta, or completely from hdfs.
278    * @param unassign if true unassign region if assigned
279    * @param metaRow  if true remove region's row from META
280    * @param hdfs if true remove region's dir in HDFS
281    */
282   private void deleteRegion(Configuration conf, final HTableDescriptor htd,
283       byte[] startKey, byte[] endKey, boolean unassign, boolean metaRow,
284       boolean hdfs) throws IOException, InterruptedException {
285     deleteRegion(conf, htd, startKey, endKey, unassign, metaRow, hdfs, false);
286   }
287 
288   /**
289    * Delete a region from assignments, meta, or completely from hdfs.
290    * @param unassign if true unassign region if assigned
291    * @param metaRow  if true remove region's row from META
292    * @param hdfs if true remove region's dir in HDFS
293    * @param regionInfoOnly if true remove a region dir's .regioninfo file
294    */
295   private void deleteRegion(Configuration conf, final HTableDescriptor htd,
296       byte[] startKey, byte[] endKey, boolean unassign, boolean metaRow,
297       boolean hdfs, boolean regionInfoOnly) throws IOException, InterruptedException {
298     LOG.info("** Before delete:");
299     dumpMeta(htd.getTableName());
300 
301     Map<HRegionInfo, ServerName> hris = tbl.getRegionLocations();
302     for (Entry<HRegionInfo, ServerName> e: hris.entrySet()) {
303       HRegionInfo hri = e.getKey();
304       ServerName hsa = e.getValue();
305       if (Bytes.compareTo(hri.getStartKey(), startKey) == 0
306           && Bytes.compareTo(hri.getEndKey(), endKey) == 0) {
307 
308         LOG.info("RegionName: " +hri.getRegionNameAsString());
309         byte[] deleteRow = hri.getRegionName();
310 
311         if (unassign) {
312           LOG.info("Undeploying region " + hri + " from server " + hsa);
313           undeployRegion(new HBaseAdmin(conf), hsa, hri);
314         }
315 
316         if (regionInfoOnly) {
317           LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString());
318           Path rootDir = FSUtils.getRootDir(conf);
319           FileSystem fs = rootDir.getFileSystem(conf);
320           Path p = new Path(FSUtils.getTableDir(rootDir, htd.getTableName()),
321               hri.getEncodedName());
322           Path hriPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE);
323           fs.delete(hriPath, true);
324         }
325 
326         if (hdfs) {
327           LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
328           Path rootDir = FSUtils.getRootDir(conf);
329           FileSystem fs = rootDir.getFileSystem(conf);
330           Path p = new Path(FSUtils.getTableDir(rootDir, htd.getTableName()),
331               hri.getEncodedName());
332           HBaseFsck.debugLsr(conf, p);
333           boolean success = fs.delete(p, true);
334           LOG.info("Deleted " + p + " sucessfully? " + success);
335           HBaseFsck.debugLsr(conf, p);
336         }
337 
338         if (metaRow) {
339           HTable meta = new HTable(conf, TableName.META_TABLE_NAME, executorService);
340           Delete delete = new Delete(deleteRow);
341           meta.delete(delete);
342         }
343       }
344       LOG.info(hri.toString() + hsa.toString());
345     }
346 
347     TEST_UTIL.getMetaTableRows(htd.getTableName());
348     LOG.info("*** After delete:");
349     dumpMeta(htd.getTableName());
350   }
351 
352   /**
353    * Setup a clean table before we start mucking with it.
354    *
355    * @throws IOException
356    * @throws InterruptedException
357    * @throws KeeperException
358    */
359   HTable setupTable(TableName tablename) throws Exception {
360     HTableDescriptor desc = new HTableDescriptor(tablename);
361     HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
362     desc.addFamily(hcd); // If a table has no CF's it doesn't get checked
363     TEST_UTIL.getHBaseAdmin().createTable(desc, SPLITS);
364     tbl = new HTable(TEST_UTIL.getConfiguration(), tablename, executorService);
365 
366     List<Put> puts = new ArrayList<Put>();
367     for (byte[] row : ROWKEYS) {
368       Put p = new Put(row);
369       p.add(FAM, Bytes.toBytes("val"), row);
370       puts.add(p);
371     }
372     tbl.put(puts);
373     tbl.flushCommits();
374     return tbl;
375   }
376 
377   /**
378    * Counts the number of row to verify data loss or non-dataloss.
379    */
380   int countRows() throws IOException {
381      Scan s = new Scan();
382      ResultScanner rs = tbl.getScanner(s);
383      int i = 0;
384      while(rs.next() !=null) {
385        i++;
386      }
387      return i;
388   }
389 
390   /**
391    * delete table in preparation for next test
392    *
393    * @param tablename
394    * @throws IOException
395    */
396   void deleteTable(TableName tablename) throws IOException {
397     HBaseAdmin admin = new HBaseAdmin(conf);
398     admin.getConnection().clearRegionCache();
399     if (admin.isTableEnabled(tablename)) {
400       admin.disableTableAsync(tablename);
401     }
402     long totalWait = 0;
403     long maxWait = 30*1000;
404     long sleepTime = 250;
405     while (!admin.isTableDisabled(tablename)) {
406       try {
407         Thread.sleep(sleepTime);
408         totalWait += sleepTime;
409         if (totalWait >= maxWait) {
410           fail("Waited too long for table to be disabled + " + tablename);
411         }
412       } catch (InterruptedException e) {
413         e.printStackTrace();
414         fail("Interrupted when trying to disable table " + tablename);
415       }
416     }
417     admin.deleteTable(tablename);
418   }
419 
420   /**
421    * This creates a clean table and confirms that the table is clean.
422    */
423   @Test
424   public void testHBaseFsckClean() throws Exception {
425     assertNoErrors(doFsck(conf, false));
426     TableName table = TableName.valueOf("tableClean");
427     try {
428       HBaseFsck hbck = doFsck(conf, false);
429       assertNoErrors(hbck);
430 
431       setupTable(table);
432       assertEquals(ROWKEYS.length, countRows());
433 
434       // We created 1 table, should be fine
435       hbck = doFsck(conf, false);
436       assertNoErrors(hbck);
437       assertEquals(0, hbck.getOverlapGroups(table).size());
438       assertEquals(ROWKEYS.length, countRows());
439     } finally {
440       deleteTable(table);
441     }
442   }
443 
444   /**
445    * Test thread pooling in the case where there are more regions than threads
446    */
447   @Test
448   public void testHbckThreadpooling() throws Exception {
449     TableName table =
450         TableName.valueOf("tableDupeStartKey");
451     try {
452       // Create table with 4 regions
453       setupTable(table);
454 
455       // limit number of threads to 1.
456       Configuration newconf = new Configuration(conf);
457       newconf.setInt("hbasefsck.numthreads", 1);
458       assertNoErrors(doFsck(newconf, false));
459 
460       // We should pass without triggering a RejectedExecutionException
461     } finally {
462       deleteTable(table);
463     }
464   }
465 
466   @Test
467   public void testHbckFixOrphanTable() throws Exception {
468     TableName table = TableName.valueOf("tableInfo");
469     FileSystem fs = null;
470     Path tableinfo = null;
471     try {
472       setupTable(table);
473       HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
474 
475       Path hbaseTableDir = FSUtils.getTableDir(
476           FSUtils.getRootDir(conf), table);
477       fs = hbaseTableDir.getFileSystem(conf);
478       FileStatus status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
479       tableinfo = status.getPath();
480       fs.rename(tableinfo, new Path("/.tableinfo"));
481 
482       //to report error if .tableinfo is missing.
483       HBaseFsck hbck = doFsck(conf, false);
484       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_TABLEINFO_FILE });
485 
486       // fix OrphanTable with default .tableinfo (htd not yet cached on master)
487       hbck = doFsck(conf, true);
488       assertNoErrors(hbck);
489       status = null;
490       status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
491       assertNotNull(status);
492 
493       HTableDescriptor htd = admin.getTableDescriptor(table);
494       htd.setValue("NOT_DEFAULT", "true");
495       admin.disableTable(table);
496       admin.modifyTable(table, htd);
497       admin.enableTable(table);
498       fs.delete(status.getPath(), true);
499 
500       // fix OrphanTable with cache
501       htd = admin.getTableDescriptor(table); // warms up cached htd on master
502       hbck = doFsck(conf, true);
503       assertNoErrors(hbck);
504       status = null;
505       status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
506       assertNotNull(status);
507       htd = admin.getTableDescriptor(table);
508       assertEquals(htd.getValue("NOT_DEFAULT"), "true");
509     } finally {
510       fs.rename(new Path("/.tableinfo"), tableinfo);
511       deleteTable(table);
512     }
513   }
514 
515   /**
516    * This create and fixes a bad table with regions that have a duplicate
517    * start key
518    */
519   @Test
520   public void testDupeStartKey() throws Exception {
521     TableName table =
522         TableName.valueOf("tableDupeStartKey");
523     try {
524       setupTable(table);
525       assertNoErrors(doFsck(conf, false));
526       assertEquals(ROWKEYS.length, countRows());
527 
528       // Now let's mess it up, by adding a region with a duplicate startkey
529       HRegionInfo hriDupe = createRegion(conf, tbl.getTableDescriptor(),
530           Bytes.toBytes("A"), Bytes.toBytes("A2"));
531       TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
532       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
533           .waitForAssignment(hriDupe);
534       ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
535       TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
536 
537       HBaseFsck hbck = doFsck(conf, false);
538       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS,
539             ERROR_CODE.DUPE_STARTKEYS});
540       assertEquals(2, hbck.getOverlapGroups(table).size());
541       assertEquals(ROWKEYS.length, countRows()); // seems like the "bigger" region won.
542 
543       // fix the degenerate region.
544       doFsck(conf,true);
545 
546       // check that the degenerate region is gone and no data loss
547       HBaseFsck hbck2 = doFsck(conf,false);
548       assertNoErrors(hbck2);
549       assertEquals(0, hbck2.getOverlapGroups(table).size());
550       assertEquals(ROWKEYS.length, countRows());
551     } finally {
552       deleteTable(table);
553     }
554   }
555 
556   /**
557    * Get region info from local cluster.
558    */
559   Map<ServerName, List<String>> getDeployedHRIs(
560       final HBaseAdmin admin) throws IOException {
561     ClusterStatus status = admin.getClusterStatus();
562     Collection<ServerName> regionServers = status.getServers();
563     Map<ServerName, List<String>> mm =
564         new HashMap<ServerName, List<String>>();
565     HConnection connection = admin.getConnection();
566     for (ServerName hsi : regionServers) {
567       AdminProtos.AdminService.BlockingInterface server = connection.getAdmin(hsi);
568 
569       // list all online regions from this region server
570       List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(server);
571       List<String> regionNames = new ArrayList<String>();
572       for (HRegionInfo hri : regions) {
573         regionNames.add(hri.getRegionNameAsString());
574       }
575       mm.put(hsi, regionNames);
576     }
577     return mm;
578   }
579 
580   /**
581    * Returns the HSI a region info is on.
582    */
583   ServerName findDeployedHSI(Map<ServerName, List<String>> mm, HRegionInfo hri) {
584     for (Map.Entry<ServerName,List <String>> e : mm.entrySet()) {
585       if (e.getValue().contains(hri.getRegionNameAsString())) {
586         return e.getKey();
587       }
588     }
589     return null;
590   }
591 
592   /**
593    * This create and fixes a bad table with regions that have a duplicate
594    * start key
595    */
596   @Test
597   public void testDupeRegion() throws Exception {
598     TableName table =
599         TableName.valueOf("tableDupeRegion");
600     try {
601       setupTable(table);
602       assertNoErrors(doFsck(conf, false));
603       assertEquals(ROWKEYS.length, countRows());
604 
605       // Now let's mess it up, by adding a region with a duplicate startkey
606       HRegionInfo hriDupe = createRegion(conf, tbl.getTableDescriptor(),
607           Bytes.toBytes("A"), Bytes.toBytes("B"));
608 
609       TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
610       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
611           .waitForAssignment(hriDupe);
612       ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
613       TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
614 
615       // Yikes! The assignment manager can't tell between diff between two
616       // different regions with the same start/endkeys since it doesn't
617       // differentiate on ts/regionId!  We actually need to recheck
618       // deployments!
619       HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
620       while (findDeployedHSI(getDeployedHRIs(admin), hriDupe) == null) {
621         Thread.sleep(250);
622       }
623 
624       LOG.debug("Finished assignment of dupe region");
625 
626       // TODO why is dupe region different from dupe start keys?
627       HBaseFsck hbck = doFsck(conf, false);
628       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS,
629             ERROR_CODE.DUPE_STARTKEYS});
630       assertEquals(2, hbck.getOverlapGroups(table).size());
631       assertEquals(ROWKEYS.length, countRows()); // seems like the "bigger" region won.
632 
633       // fix the degenerate region.
634       doFsck(conf,true);
635 
636       // check that the degenerate region is gone and no data loss
637       HBaseFsck hbck2 = doFsck(conf,false);
638       assertNoErrors(hbck2);
639       assertEquals(0, hbck2.getOverlapGroups(table).size());
640       assertEquals(ROWKEYS.length, countRows());
641     } finally {
642       deleteTable(table);
643     }
644   }
645 
646   /**
647    * This creates and fixes a bad table with regions that has startkey == endkey
648    */
649   @Test
650   public void testDegenerateRegions() throws Exception {
651     TableName table =
652         TableName.valueOf("tableDegenerateRegions");
653     try {
654       setupTable(table);
655       assertNoErrors(doFsck(conf,false));
656       assertEquals(ROWKEYS.length, countRows());
657 
658       // Now let's mess it up, by adding a region with a duplicate startkey
659       HRegionInfo hriDupe = createRegion(conf, tbl.getTableDescriptor(),
660           Bytes.toBytes("B"), Bytes.toBytes("B"));
661       TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
662       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
663           .waitForAssignment(hriDupe);
664       ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
665       TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
666 
667       HBaseFsck hbck = doFsck(conf,false);
668       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DEGENERATE_REGION,
669           ERROR_CODE.DUPE_STARTKEYS, ERROR_CODE.DUPE_STARTKEYS});
670       assertEquals(2, hbck.getOverlapGroups(table).size());
671       assertEquals(ROWKEYS.length, countRows());
672 
673       // fix the degenerate region.
674       doFsck(conf,true);
675 
676       // check that the degenerate region is gone and no data loss
677       HBaseFsck hbck2 = doFsck(conf,false);
678       assertNoErrors(hbck2);
679       assertEquals(0, hbck2.getOverlapGroups(table).size());
680       assertEquals(ROWKEYS.length, countRows());
681     } finally {
682       deleteTable(table);
683     }
684   }
685 
686   /**
687    * This creates and fixes a bad table where a region is completely contained
688    * by another region.
689    */
690   @Test
691   public void testContainedRegionOverlap() throws Exception {
692     TableName table =
693         TableName.valueOf("tableContainedRegionOverlap");
694     try {
695       setupTable(table);
696       assertEquals(ROWKEYS.length, countRows());
697 
698       // Mess it up by creating an overlap in the metadata
699       HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(),
700           Bytes.toBytes("A2"), Bytes.toBytes("B"));
701       TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
702       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
703           .waitForAssignment(hriOverlap);
704       ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
705       TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
706 
707       HBaseFsck hbck = doFsck(conf, false);
708       assertErrors(hbck, new ERROR_CODE[] {
709           ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
710       assertEquals(2, hbck.getOverlapGroups(table).size());
711       assertEquals(ROWKEYS.length, countRows());
712 
713       // fix the problem.
714       doFsck(conf, true);
715 
716       // verify that overlaps are fixed
717       HBaseFsck hbck2 = doFsck(conf,false);
718       assertNoErrors(hbck2);
719       assertEquals(0, hbck2.getOverlapGroups(table).size());
720       assertEquals(ROWKEYS.length, countRows());
721     } finally {
722        deleteTable(table);
723     }
724   }
725 
726   /**
727    * This creates and fixes a bad table where an overlap group of
728    * 3 regions. Set HBaseFsck.maxMerge to 2 to trigger sideline overlapped
729    * region. Mess around the meta data so that closeRegion/offlineRegion
730    * throws exceptions.
731    */
732   @Test
733   public void testSidelineOverlapRegion() throws Exception {
734     TableName table =
735         TableName.valueOf("testSidelineOverlapRegion");
736     try {
737       setupTable(table);
738       assertEquals(ROWKEYS.length, countRows());
739 
740       // Mess it up by creating an overlap
741       MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
742       HMaster master = cluster.getMaster();
743       HRegionInfo hriOverlap1 = createRegion(conf, tbl.getTableDescriptor(),
744         Bytes.toBytes("A"), Bytes.toBytes("AB"));
745       master.assignRegion(hriOverlap1);
746       master.getAssignmentManager().waitForAssignment(hriOverlap1);
747       HRegionInfo hriOverlap2 = createRegion(conf, tbl.getTableDescriptor(),
748         Bytes.toBytes("AB"), Bytes.toBytes("B"));
749       master.assignRegion(hriOverlap2);
750       master.getAssignmentManager().waitForAssignment(hriOverlap2);
751 
752       HBaseFsck hbck = doFsck(conf, false);
753       assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.DUPE_STARTKEYS,
754         ERROR_CODE.DUPE_STARTKEYS, ERROR_CODE.OVERLAP_IN_REGION_CHAIN});
755       assertEquals(3, hbck.getOverlapGroups(table).size());
756       assertEquals(ROWKEYS.length, countRows());
757 
758       // mess around the overlapped regions, to trigger NotServingRegionException
759       Multimap<byte[], HbckInfo> overlapGroups = hbck.getOverlapGroups(table);
760       ServerName serverName = null;
761       byte[] regionName = null;
762       for (HbckInfo hbi: overlapGroups.values()) {
763         if ("A".equals(Bytes.toString(hbi.getStartKey()))
764             && "B".equals(Bytes.toString(hbi.getEndKey()))) {
765           regionName = hbi.getRegionName();
766 
767           // get an RS not serving the region to force bad assignment info in to META.
768           int k = cluster.getServerWith(regionName);
769           for (int i = 0; i < 3; i++) {
770             if (i != k) {
771               HRegionServer rs = cluster.getRegionServer(i);
772               serverName = rs.getServerName();
773               break;
774             }
775           }
776 
777           HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
778           HBaseFsckRepair.closeRegionSilentlyAndWait(admin,
779             cluster.getRegionServer(k).getServerName(), hbi.getHdfsHRI());
780           admin.offline(regionName);
781           break;
782         }
783       }
784 
785       assertNotNull(regionName);
786       assertNotNull(serverName);
787       HTable meta = new HTable(conf, TableName.META_TABLE_NAME, executorService);
788       Put put = new Put(regionName);
789       put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
790         Bytes.toBytes(serverName.getHostAndPort()));
791       meta.put(put);
792 
793       // fix the problem.
794       HBaseFsck fsck = new HBaseFsck(conf);
795       fsck.connect();
796       fsck.setDisplayFullReport(); // i.e. -details
797       fsck.setTimeLag(0);
798       fsck.setFixAssignments(true);
799       fsck.setFixMeta(true);
800       fsck.setFixHdfsHoles(true);
801       fsck.setFixHdfsOverlaps(true);
802       fsck.setFixHdfsOrphans(true);
803       fsck.setFixVersionFile(true);
804       fsck.setSidelineBigOverlaps(true);
805       fsck.setMaxMerge(2);
806       fsck.onlineHbck();
807 
808       // verify that overlaps are fixed, and there are less rows
809       // since one region is sidelined.
810       HBaseFsck hbck2 = doFsck(conf,false);
811       assertNoErrors(hbck2);
812       assertEquals(0, hbck2.getOverlapGroups(table).size());
813       assertTrue(ROWKEYS.length > countRows());
814     } finally {
815        deleteTable(table);
816     }
817   }
818 
819   /**
820    * This creates and fixes a bad table where a region is completely contained
821    * by another region, and there is a hole (sort of like a bad split)
822    */
823   @Test
824   public void testOverlapAndOrphan() throws Exception {
825     TableName table =
826         TableName.valueOf("tableOverlapAndOrphan");
827     try {
828       setupTable(table);
829       assertEquals(ROWKEYS.length, countRows());
830 
831       // Mess it up by creating an overlap in the metadata
832       TEST_UTIL.getHBaseAdmin().disableTable(table);
833       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
834           Bytes.toBytes("B"), true, true, false, true);
835       TEST_UTIL.getHBaseAdmin().enableTable(table);
836 
837       HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(),
838           Bytes.toBytes("A2"), Bytes.toBytes("B"));
839       TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
840       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
841           .waitForAssignment(hriOverlap);
842       ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
843       TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
844 
845       HBaseFsck hbck = doFsck(conf, false);
846       assertErrors(hbck, new ERROR_CODE[] {
847           ERROR_CODE.ORPHAN_HDFS_REGION, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
848           ERROR_CODE.HOLE_IN_REGION_CHAIN});
849 
850       // fix the problem.
851       doFsck(conf, true);
852 
853       // verify that overlaps are fixed
854       HBaseFsck hbck2 = doFsck(conf,false);
855       assertNoErrors(hbck2);
856       assertEquals(0, hbck2.getOverlapGroups(table).size());
857       assertEquals(ROWKEYS.length, countRows());
858     } finally {
859        deleteTable(table);
860     }
861   }
862 
863   /**
864    * This creates and fixes a bad table where a region overlaps two regions --
865    * a start key contained in another region and its end key is contained in
866    * yet another region.
867    */
868   @Test
869   public void testCoveredStartKey() throws Exception {
870     TableName table =
871         TableName.valueOf("tableCoveredStartKey");
872     try {
873       setupTable(table);
874       assertEquals(ROWKEYS.length, countRows());
875 
876       // Mess it up by creating an overlap in the metadata
877       HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(),
878           Bytes.toBytes("A2"), Bytes.toBytes("B2"));
879       TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
880       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
881           .waitForAssignment(hriOverlap);
882       ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
883       TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
884 
885       HBaseFsck hbck = doFsck(conf, false);
886       assertErrors(hbck, new ERROR_CODE[] {
887           ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
888           ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
889       assertEquals(3, hbck.getOverlapGroups(table).size());
890       assertEquals(ROWKEYS.length, countRows());
891 
892       // fix the problem.
893       doFsck(conf, true);
894 
895       // verify that overlaps are fixed
896       HBaseFsck hbck2 = doFsck(conf, false);
897       assertErrors(hbck2, new ERROR_CODE[0]);
898       assertEquals(0, hbck2.getOverlapGroups(table).size());
899       assertEquals(ROWKEYS.length, countRows());
900     } finally {
901       deleteTable(table);
902     }
903   }
904 
905   /**
906    * This creates and fixes a bad table with a missing region -- hole in meta
907    * and data missing in the fs.
908    */
909   @Test
910   public void testRegionHole() throws Exception {
911     TableName table =
912         TableName.valueOf("tableRegionHole");
913     try {
914       setupTable(table);
915       assertEquals(ROWKEYS.length, countRows());
916 
917       // Mess it up by leaving a hole in the assignment, meta, and hdfs data
918       TEST_UTIL.getHBaseAdmin().disableTable(table);
919       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
920           Bytes.toBytes("C"), true, true, true);
921       TEST_UTIL.getHBaseAdmin().enableTable(table);
922 
923       HBaseFsck hbck = doFsck(conf, false);
924       assertErrors(hbck, new ERROR_CODE[] {
925           ERROR_CODE.HOLE_IN_REGION_CHAIN});
926       // holes are separate from overlap groups
927       assertEquals(0, hbck.getOverlapGroups(table).size());
928 
929       // fix hole
930       doFsck(conf, true);
931 
932       // check that hole fixed
933       assertNoErrors(doFsck(conf,false));
934       assertEquals(ROWKEYS.length - 2 , countRows()); // lost a region so lost a row
935     } finally {
936       deleteTable(table);
937     }
938   }
939 
940   /**
941    * This creates and fixes a bad table with a missing region -- hole in meta
942    * and data present but .regioinfino missing (an orphan hdfs region)in the fs.
943    */
944   @Test
945   public void testHDFSRegioninfoMissing() throws Exception {
946     TableName table =
947         TableName.valueOf("tableHDFSRegioininfoMissing");
948     try {
949       setupTable(table);
950       assertEquals(ROWKEYS.length, countRows());
951 
952       // Mess it up by leaving a hole in the meta data
953       TEST_UTIL.getHBaseAdmin().disableTable(table);
954       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
955           Bytes.toBytes("C"), true, true, false, true);
956       TEST_UTIL.getHBaseAdmin().enableTable(table);
957 
958       HBaseFsck hbck = doFsck(conf, false);
959       assertErrors(hbck, new ERROR_CODE[] {
960           ERROR_CODE.ORPHAN_HDFS_REGION,
961           ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
962           ERROR_CODE.HOLE_IN_REGION_CHAIN});
963       // holes are separate from overlap groups
964       assertEquals(0, hbck.getOverlapGroups(table).size());
965 
966       // fix hole
967       doFsck(conf, true);
968 
969       // check that hole fixed
970       assertNoErrors(doFsck(conf, false));
971       assertEquals(ROWKEYS.length, countRows());
972     } finally {
973       deleteTable(table);
974     }
975   }
976 
977   /**
978    * This creates and fixes a bad table with a region that is missing meta and
979    * not assigned to a region server.
980    */
981   @Test
982   public void testNotInMetaOrDeployedHole() throws Exception {
983     TableName table =
984         TableName.valueOf("tableNotInMetaOrDeployedHole");
985     try {
986       setupTable(table);
987       assertEquals(ROWKEYS.length, countRows());
988 
989       // Mess it up by leaving a hole in the meta data
990       TEST_UTIL.getHBaseAdmin().disableTable(table);
991       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
992           Bytes.toBytes("C"), true, true, false); // don't rm from fs
993       TEST_UTIL.getHBaseAdmin().enableTable(table);
994 
995       HBaseFsck hbck = doFsck(conf, false);
996       assertErrors(hbck, new ERROR_CODE[] {
997           ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
998       // holes are separate from overlap groups
999       assertEquals(0, hbck.getOverlapGroups(table).size());
1000 
1001       // fix hole
1002       assertErrors(doFsck(conf, true) , new ERROR_CODE[] {
1003           ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1004 
1005       // check that hole fixed
1006       assertNoErrors(doFsck(conf,false));
1007       assertEquals(ROWKEYS.length, countRows());
1008     } finally {
1009       deleteTable(table);
1010     }
1011   }
1012 
1013   /**
1014    * This creates fixes a bad table with a hole in meta.
1015    */
1016   @Test
1017   public void testNotInMetaHole() throws Exception {
1018     TableName table =
1019         TableName.valueOf("tableNotInMetaHole");
1020     try {
1021       setupTable(table);
1022       assertEquals(ROWKEYS.length, countRows());
1023 
1024       // Mess it up by leaving a hole in the meta data
1025       TEST_UTIL.getHBaseAdmin().disableTable(table);
1026       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1027           Bytes.toBytes("C"), false, true, false); // don't rm from fs
1028       TEST_UTIL.getHBaseAdmin().enableTable(table);
1029 
1030       HBaseFsck hbck = doFsck(conf, false);
1031       assertErrors(hbck, new ERROR_CODE[] {
1032           ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1033       // holes are separate from overlap groups
1034       assertEquals(0, hbck.getOverlapGroups(table).size());
1035 
1036       // fix hole
1037       assertErrors(doFsck(conf, true) , new ERROR_CODE[] {
1038           ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1039 
1040       // check that hole fixed
1041       assertNoErrors(doFsck(conf,false));
1042       assertEquals(ROWKEYS.length, countRows());
1043     } finally {
1044       deleteTable(table);
1045     }
1046   }
1047 
1048   /**
1049    * This creates and fixes a bad table with a region that is in meta but has
1050    * no deployment or data hdfs
1051    */
1052   @Test
1053   public void testNotInHdfs() throws Exception {
1054     TableName table =
1055         TableName.valueOf("tableNotInHdfs");
1056     try {
1057       setupTable(table);
1058       assertEquals(ROWKEYS.length, countRows());
1059 
1060       // make sure data in regions, if in hlog only there is no data loss
1061       TEST_UTIL.getHBaseAdmin().flush(table.getName());
1062 
1063       // Mess it up by leaving a hole in the hdfs data
1064       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1065           Bytes.toBytes("C"), false, false, true); // don't rm meta
1066 
1067       HBaseFsck hbck = doFsck(conf, false);
1068       assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS});
1069       // holes are separate from overlap groups
1070       assertEquals(0, hbck.getOverlapGroups(table).size());
1071 
1072       // fix hole
1073       doFsck(conf, true);
1074 
1075       // check that hole fixed
1076       assertNoErrors(doFsck(conf,false));
1077       assertEquals(ROWKEYS.length - 2, countRows());
1078     } finally {
1079       deleteTable(table);
1080     }
1081   }
1082 
1083   /**
1084    * This creates entries in hbase:meta with no hdfs data.  This should cleanly
1085    * remove the table.
1086    */
1087   @Test
1088   public void testNoHdfsTable() throws Exception {
1089     TableName table = TableName.valueOf("NoHdfsTable");
1090     setupTable(table);
1091     assertEquals(ROWKEYS.length, countRows());
1092 
1093     // make sure data in regions, if in hlog only there is no data loss
1094     TEST_UTIL.getHBaseAdmin().flush(table.getName());
1095 
1096     // Mess it up by deleting hdfs dirs
1097     deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""),
1098         Bytes.toBytes("A"), false, false, true); // don't rm meta
1099     deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1100         Bytes.toBytes("B"), false, false, true); // don't rm meta
1101     deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1102         Bytes.toBytes("C"), false, false, true); // don't rm meta
1103     deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"),
1104         Bytes.toBytes(""), false, false, true); // don't rm meta
1105 
1106     // also remove the table directory in hdfs
1107     deleteTableDir(table);
1108 
1109     HBaseFsck hbck = doFsck(conf, false);
1110     assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS,
1111         ERROR_CODE.NOT_IN_HDFS, ERROR_CODE.NOT_IN_HDFS,
1112         ERROR_CODE.NOT_IN_HDFS,});
1113     // holes are separate from overlap groups
1114     assertEquals(0, hbck.getOverlapGroups(table).size());
1115 
1116     // fix hole
1117     doFsck(conf, true); // detect dangling regions and remove those
1118 
1119     // check that hole fixed
1120     assertNoErrors(doFsck(conf,false));
1121     assertFalse("Table "+ table + " should have been deleted",
1122         TEST_UTIL.getHBaseAdmin().tableExists(table));
1123   }
1124 
1125   public void deleteTableDir(TableName table) throws IOException {
1126     Path rootDir = FSUtils.getRootDir(conf);
1127     FileSystem fs = rootDir.getFileSystem(conf);
1128     Path p = FSUtils.getTableDir(rootDir, table);
1129     HBaseFsck.debugLsr(conf, p);
1130     boolean success = fs.delete(p, true);
1131     LOG.info("Deleted " + p + " sucessfully? " + success);
1132   }
1133 
1134   /**
1135    * when the hbase.version file missing, It is fix the fault.
1136    */
1137   @Test
1138   public void testNoVersionFile() throws Exception {
1139     // delete the hbase.version file
1140     Path rootDir = FSUtils.getRootDir(conf);
1141     FileSystem fs = rootDir.getFileSystem(conf);
1142     Path versionFile = new Path(rootDir, HConstants.VERSION_FILE_NAME);
1143     fs.delete(versionFile, true);
1144 
1145     // test
1146     HBaseFsck hbck = doFsck(conf, false);
1147     assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_VERSION_FILE });
1148     // fix hbase.version missing
1149     doFsck(conf, true);
1150 
1151     // no version file fixed
1152     assertNoErrors(doFsck(conf, false));
1153   }
1154 
1155   /**
1156    * The region is not deployed when the table is disabled.
1157    */
1158   @Test
1159   public void testRegionShouldNotBeDeployed() throws Exception {
1160     TableName table =
1161         TableName.valueOf("tableRegionShouldNotBeDeployed");
1162     try {
1163       LOG.info("Starting testRegionShouldNotBeDeployed.");
1164       MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1165       assertTrue(cluster.waitForActiveAndReadyMaster());
1166 
1167 
1168       byte[][] SPLIT_KEYS = new byte[][] { new byte[0], Bytes.toBytes("aaa"),
1169           Bytes.toBytes("bbb"), Bytes.toBytes("ccc"), Bytes.toBytes("ddd") };
1170       HTableDescriptor htdDisabled = new HTableDescriptor(table);
1171       htdDisabled.addFamily(new HColumnDescriptor(FAM));
1172 
1173       // Write the .tableinfo
1174       FSTableDescriptors fstd = new FSTableDescriptors(conf);
1175       fstd.createTableDescriptor(htdDisabled);
1176       List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
1177           TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
1178 
1179       // Let's just assign everything to first RS
1180       HRegionServer hrs = cluster.getRegionServer(0);
1181 
1182       // Create region files.
1183       TEST_UTIL.getHBaseAdmin().disableTable(table);
1184       TEST_UTIL.getHBaseAdmin().enableTable(table);
1185 
1186       // Disable the table and close its regions
1187       TEST_UTIL.getHBaseAdmin().disableTable(table);
1188       HRegionInfo region = disabledRegions.remove(0);
1189       byte[] regionName = region.getRegionName();
1190 
1191       // The region should not be assigned currently
1192       assertTrue(cluster.getServerWith(regionName) == -1);
1193 
1194       // Directly open a region on a region server.
1195       // If going through AM/ZK, the region won't be open.
1196       // Even it is opened, AM will close it which causes
1197       // flakiness of this test.
1198       HRegion r = HRegion.openHRegion(
1199         region, htdDisabled, hrs.getWAL(region), conf);
1200       hrs.addToOnlineRegions(r);
1201 
1202       HBaseFsck hbck = doFsck(conf, false);
1203       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.SHOULD_NOT_BE_DEPLOYED });
1204 
1205       // fix this fault
1206       doFsck(conf, true);
1207 
1208       // check result
1209       assertNoErrors(doFsck(conf, false));
1210     } finally {
1211       TEST_UTIL.getHBaseAdmin().enableTable(table);
1212       deleteTable(table);
1213     }
1214   }
1215 
1216   /**
1217    * This creates two tables and mess both of them and fix them one by one
1218    */
1219   @Test
1220   public void testFixByTable() throws Exception {
1221     TableName table1 =
1222         TableName.valueOf("testFixByTable1");
1223     TableName table2 =
1224         TableName.valueOf("testFixByTable2");
1225     try {
1226       setupTable(table1);
1227       // make sure data in regions, if in hlog only there is no data loss
1228       TEST_UTIL.getHBaseAdmin().flush(table1.getName());
1229       // Mess them up by leaving a hole in the hdfs data
1230       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1231         Bytes.toBytes("C"), false, false, true); // don't rm meta
1232 
1233       setupTable(table2);
1234       // make sure data in regions, if in hlog only there is no data loss
1235       TEST_UTIL.getHBaseAdmin().flush(table2.getName());
1236       // Mess them up by leaving a hole in the hdfs data
1237       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1238         Bytes.toBytes("C"), false, false, true); // don't rm meta
1239 
1240       HBaseFsck hbck = doFsck(conf, false);
1241       assertErrors(hbck, new ERROR_CODE[] {
1242         ERROR_CODE.NOT_IN_HDFS, ERROR_CODE.NOT_IN_HDFS});
1243 
1244       // fix hole in table 1
1245       doFsck(conf, true, table1);
1246       // check that hole in table 1 fixed
1247       assertNoErrors(doFsck(conf, false, table1));
1248       // check that hole in table 2 still there
1249       assertErrors(doFsck(conf, false, table2),
1250         new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS});
1251 
1252       // fix hole in table 2
1253       doFsck(conf, true, table2);
1254       // check that hole in both tables fixed
1255       assertNoErrors(doFsck(conf, false));
1256       assertEquals(ROWKEYS.length - 2, countRows());
1257     } finally {
1258       deleteTable(table1);
1259       deleteTable(table2);
1260     }
1261   }
1262   /**
1263    * A split parent in meta, in hdfs, and not deployed
1264    */
1265   @Test
1266   public void testLingeringSplitParent() throws Exception {
1267     TableName table =
1268         TableName.valueOf("testLingeringSplitParent");
1269     HTable meta = null;
1270     try {
1271       setupTable(table);
1272       assertEquals(ROWKEYS.length, countRows());
1273 
1274       // make sure data in regions, if in hlog only there is no data loss
1275       TEST_UTIL.getHBaseAdmin().flush(table.getName());
1276       HRegionLocation location = tbl.getRegionLocation("B");
1277 
1278       // Delete one region from meta, but not hdfs, unassign it.
1279       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1280         Bytes.toBytes("C"), true, true, false);
1281 
1282       // Create a new meta entry to fake it as a split parent.
1283       meta = new HTable(conf, HTableDescriptor.META_TABLEDESC.getTableName(),
1284           executorService);
1285       HRegionInfo hri = location.getRegionInfo();
1286 
1287       HRegionInfo a = new HRegionInfo(tbl.getName(),
1288         Bytes.toBytes("B"), Bytes.toBytes("BM"));
1289       HRegionInfo b = new HRegionInfo(tbl.getName(),
1290         Bytes.toBytes("BM"), Bytes.toBytes("C"));
1291 
1292       hri.setOffline(true);
1293       hri.setSplit(true);
1294 
1295       MetaEditor.addRegionToMeta(meta, hri, a, b);
1296       meta.flushCommits();
1297       TEST_UTIL.getHBaseAdmin().flush(TableName.META_TABLE_NAME.getName());
1298 
1299       HBaseFsck hbck = doFsck(conf, false);
1300       assertErrors(hbck, new ERROR_CODE[] {
1301         ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1302 
1303       // regular repair cannot fix lingering split parent
1304       hbck = doFsck(conf, true);
1305       assertErrors(hbck, new ERROR_CODE[] {
1306         ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1307       assertFalse(hbck.shouldRerun());
1308       hbck = doFsck(conf, false);
1309       assertErrors(hbck, new ERROR_CODE[] {
1310         ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1311 
1312       // fix lingering split parent
1313       hbck = new HBaseFsck(conf);
1314       hbck.connect();
1315       hbck.setDisplayFullReport(); // i.e. -details
1316       hbck.setTimeLag(0);
1317       hbck.setFixSplitParents(true);
1318       hbck.onlineHbck();
1319       assertTrue(hbck.shouldRerun());
1320 
1321       Get get = new Get(hri.getRegionName());
1322       Result result = meta.get(get);
1323       assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY,
1324         HConstants.SPLITA_QUALIFIER).isEmpty());
1325       assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY,
1326         HConstants.SPLITB_QUALIFIER).isEmpty());
1327       TEST_UTIL.getHBaseAdmin().flush(TableName.META_TABLE_NAME.getName());
1328 
1329       // fix other issues
1330       doFsck(conf, true);
1331 
1332       // check that all are fixed
1333       assertNoErrors(doFsck(conf, false));
1334       assertEquals(ROWKEYS.length, countRows());
1335     } finally {
1336       deleteTable(table);
1337       IOUtils.closeQuietly(meta);
1338     }
1339   }
1340 
1341   /**
1342    * Tests that LINGERING_SPLIT_PARENT is not erroneously reported for
1343    * valid cases where the daughters are there.
1344    */
1345   @Test
1346   public void testValidLingeringSplitParent() throws Exception {
1347     TableName table =
1348         TableName.valueOf("testLingeringSplitParent");
1349     HTable meta = null;
1350     try {
1351       setupTable(table);
1352       assertEquals(ROWKEYS.length, countRows());
1353 
1354       // make sure data in regions, if in hlog only there is no data loss
1355       TEST_UTIL.getHBaseAdmin().flush(table.getName());
1356       HRegionLocation location = tbl.getRegionLocation("B");
1357 
1358       meta = new HTable(conf, HTableDescriptor.META_TABLEDESC.getTableName());
1359       HRegionInfo hri = location.getRegionInfo();
1360 
1361       // do a regular split
1362       HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
1363       byte[] regionName = location.getRegionInfo().getRegionName();
1364       admin.split(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM"));
1365       TestEndToEndSplitTransaction.blockUntilRegionSplit(
1366           TEST_UTIL.getConfiguration(), 60000, regionName, true);
1367 
1368       // TODO: fixHdfsHoles does not work against splits, since the parent dir lingers on
1369       // for some time until children references are deleted. HBCK erroneously sees this as
1370       // overlapping regions
1371       HBaseFsck hbck = doFsck(conf, true, true, false, false, false, true, true, true, false, false, null);
1372       assertErrors(hbck, new ERROR_CODE[] {}); //no LINGERING_SPLIT_PARENT reported
1373 
1374       // assert that the split hbase:meta entry is still there.
1375       Get get = new Get(hri.getRegionName());
1376       Result result = meta.get(get);
1377       assertNotNull(result);
1378       assertNotNull(HRegionInfo.getHRegionInfo(result));
1379 
1380       assertEquals(ROWKEYS.length, countRows());
1381 
1382       // assert that we still have the split regions
1383       assertEquals(tbl.getStartKeys().length, SPLITS.length + 1 + 1); //SPLITS + 1 is # regions pre-split.
1384       assertNoErrors(doFsck(conf, false));
1385     } finally {
1386       deleteTable(table);
1387       IOUtils.closeQuietly(meta);
1388     }
1389   }
1390 
1391   /**
1392    * Split crashed after write to hbase:meta finished for the parent region, but
1393    * failed to write daughters (pre HBASE-7721 codebase)
1394    */
1395   @Test(timeout=75000)
1396   public void testSplitDaughtersNotInMeta() throws Exception {
1397     TableName table =
1398         TableName.valueOf("testSplitdaughtersNotInMeta");
1399     HTable meta = null;
1400     try {
1401       setupTable(table);
1402       assertEquals(ROWKEYS.length, countRows());
1403 
1404       // make sure data in regions, if in hlog only there is no data loss
1405       TEST_UTIL.getHBaseAdmin().flush(table.getName());
1406       HRegionLocation location = tbl.getRegionLocation("B");
1407 
1408       meta = new HTable(conf, HTableDescriptor.META_TABLEDESC.getTableName());
1409       HRegionInfo hri = location.getRegionInfo();
1410 
1411       // do a regular split
1412       HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
1413       byte[] regionName = location.getRegionInfo().getRegionName();
1414       admin.split(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM"));
1415       TestEndToEndSplitTransaction.blockUntilRegionSplit(
1416           TEST_UTIL.getConfiguration(), 60000, regionName, true);
1417 
1418       PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(meta.get(new Get(regionName)));
1419 
1420       // Delete daughter regions from meta, but not hdfs, unassign it.
1421       Map<HRegionInfo, ServerName> hris = tbl.getRegionLocations();
1422       undeployRegion(admin, hris.get(daughters.getFirst()), daughters.getFirst());
1423       undeployRegion(admin, hris.get(daughters.getSecond()), daughters.getSecond());
1424 
1425       meta.delete(new Delete(daughters.getFirst().getRegionName()));
1426       meta.delete(new Delete(daughters.getSecond().getRegionName()));
1427       meta.flushCommits();
1428 
1429       HBaseFsck hbck = doFsck(conf, false);
1430       assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1431           ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN}); //no LINGERING_SPLIT_PARENT
1432 
1433       // now fix it. The fix should not revert the region split, but add daughters to META
1434       hbck = doFsck(conf, true, true, false, false, false, false, false, false, false, false, null);
1435       assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1436           ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1437 
1438       // assert that the split hbase:meta entry is still there.
1439       Get get = new Get(hri.getRegionName());
1440       Result result = meta.get(get);
1441       assertNotNull(result);
1442       assertNotNull(HRegionInfo.getHRegionInfo(result));
1443 
1444       assertEquals(ROWKEYS.length, countRows());
1445 
1446       // assert that we still have the split regions
1447       assertEquals(tbl.getStartKeys().length, SPLITS.length + 1 + 1); //SPLITS + 1 is # regions pre-split.
1448       assertNoErrors(doFsck(conf, false)); //should be fixed by now
1449     } finally {
1450       deleteTable(table);
1451       IOUtils.closeQuietly(meta);
1452     }
1453   }
1454 
1455   /**
1456    * This creates and fixes a bad table with a missing region which is the 1st region -- hole in
1457    * meta and data missing in the fs.
1458    */
1459   @Test(timeout=120000)
1460   public void testMissingFirstRegion() throws Exception {
1461     TableName table =
1462         TableName.valueOf("testMissingFirstRegion");
1463     try {
1464       setupTable(table);
1465       assertEquals(ROWKEYS.length, countRows());
1466 
1467       // Mess it up by leaving a hole in the assignment, meta, and hdfs data
1468       TEST_UTIL.getHBaseAdmin().disableTable(table);
1469       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""), Bytes.toBytes("A"), true,
1470           true, true);
1471       TEST_UTIL.getHBaseAdmin().enableTable(table);
1472 
1473       HBaseFsck hbck = doFsck(conf, false);
1474       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY });
1475       // fix hole
1476       doFsck(conf, true);
1477       // check that hole fixed
1478       assertNoErrors(doFsck(conf, false));
1479     } finally {
1480       deleteTable(table);
1481     }
1482   }
1483 
1484   /**
1485    * This creates and fixes a bad table with missing last region -- hole in meta and data missing in
1486    * the fs.
1487    */
1488   @Test(timeout=120000)
1489   public void testMissingLastRegion() throws Exception {
1490     TableName table =
1491         TableName.valueOf("testMissingLastRegion");
1492     try {
1493       setupTable(table);
1494       assertEquals(ROWKEYS.length, countRows());
1495 
1496       // Mess it up by leaving a hole in the assignment, meta, and hdfs data
1497       TEST_UTIL.getHBaseAdmin().disableTable(table);
1498       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"), Bytes.toBytes(""), true,
1499           true, true);
1500       TEST_UTIL.getHBaseAdmin().enableTable(table);
1501 
1502       HBaseFsck hbck = doFsck(conf, false);
1503       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY });
1504       // fix hole
1505       doFsck(conf, true);
1506       // check that hole fixed
1507       assertNoErrors(doFsck(conf, false));
1508     } finally {
1509       deleteTable(table);
1510     }
1511   }
1512 
1513   /**
1514    * Test -noHdfsChecking option can detect and fix assignments issue.
1515    */
1516   @Test
1517   public void testFixAssignmentsAndNoHdfsChecking() throws Exception {
1518     TableName table =
1519         TableName.valueOf("testFixAssignmentsAndNoHdfsChecking");
1520     try {
1521       setupTable(table);
1522       assertEquals(ROWKEYS.length, countRows());
1523 
1524       // Mess it up by closing a region
1525       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1526         Bytes.toBytes("B"), true, false, false, false);
1527 
1528       // verify there is no other errors
1529       HBaseFsck hbck = doFsck(conf, false);
1530       assertErrors(hbck, new ERROR_CODE[] {
1531         ERROR_CODE.NOT_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1532 
1533       // verify that noHdfsChecking report the same errors
1534       HBaseFsck fsck = new HBaseFsck(conf);
1535       fsck.connect();
1536       fsck.setDisplayFullReport(); // i.e. -details
1537       fsck.setTimeLag(0);
1538       fsck.setCheckHdfs(false);
1539       fsck.onlineHbck();
1540       assertErrors(fsck, new ERROR_CODE[] {
1541         ERROR_CODE.NOT_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1542 
1543       // verify that fixAssignments works fine with noHdfsChecking
1544       fsck = new HBaseFsck(conf);
1545       fsck.connect();
1546       fsck.setDisplayFullReport(); // i.e. -details
1547       fsck.setTimeLag(0);
1548       fsck.setCheckHdfs(false);
1549       fsck.setFixAssignments(true);
1550       fsck.onlineHbck();
1551       assertTrue(fsck.shouldRerun());
1552       fsck.onlineHbck();
1553       assertNoErrors(fsck);
1554 
1555       assertEquals(ROWKEYS.length, countRows());
1556     } finally {
1557       deleteTable(table);
1558     }
1559   }
1560 
1561   /**
1562    * Test -noHdfsChecking option can detect region is not in meta but deployed.
1563    * However, it can not fix it without checking Hdfs because we need to get
1564    * the region info from Hdfs in this case, then to patch the meta.
1565    */
1566   @Test
1567   public void testFixMetaNotWorkingWithNoHdfsChecking() throws Exception {
1568     TableName table =
1569         TableName.valueOf("testFixMetaNotWorkingWithNoHdfsChecking");
1570     try {
1571       setupTable(table);
1572       assertEquals(ROWKEYS.length, countRows());
1573 
1574       // Mess it up by deleting a region from the metadata
1575       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1576         Bytes.toBytes("B"), false, true, false, false);
1577 
1578       // verify there is no other errors
1579       HBaseFsck hbck = doFsck(conf, false);
1580       assertErrors(hbck, new ERROR_CODE[] {
1581         ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1582 
1583       // verify that noHdfsChecking report the same errors
1584       HBaseFsck fsck = new HBaseFsck(conf);
1585       fsck.connect();
1586       fsck.setDisplayFullReport(); // i.e. -details
1587       fsck.setTimeLag(0);
1588       fsck.setCheckHdfs(false);
1589       fsck.onlineHbck();
1590       assertErrors(fsck, new ERROR_CODE[] {
1591         ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1592 
1593       // verify that fixMeta doesn't work with noHdfsChecking
1594       fsck = new HBaseFsck(conf);
1595       fsck.connect();
1596       fsck.setDisplayFullReport(); // i.e. -details
1597       fsck.setTimeLag(0);
1598       fsck.setCheckHdfs(false);
1599       fsck.setFixAssignments(true);
1600       fsck.setFixMeta(true);
1601       fsck.onlineHbck();
1602       assertFalse(fsck.shouldRerun());
1603       assertErrors(fsck, new ERROR_CODE[] {
1604         ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1605     } finally {
1606       deleteTable(table);
1607     }
1608   }
1609 
1610   /**
1611    * Test -fixHdfsHoles doesn't work with -noHdfsChecking option,
1612    * and -noHdfsChecking can't detect orphan Hdfs region.
1613    */
1614   @Test
1615   public void testFixHdfsHolesNotWorkingWithNoHdfsChecking() throws Exception {
1616     TableName table =
1617         TableName.valueOf("testFixHdfsHolesNotWorkingWithNoHdfsChecking");
1618     try {
1619       setupTable(table);
1620       assertEquals(ROWKEYS.length, countRows());
1621 
1622       // Mess it up by creating an overlap in the metadata
1623       TEST_UTIL.getHBaseAdmin().disableTable(table);
1624       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1625         Bytes.toBytes("B"), true, true, false, true);
1626       TEST_UTIL.getHBaseAdmin().enableTable(table);
1627 
1628       HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(),
1629         Bytes.toBytes("A2"), Bytes.toBytes("B"));
1630       TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
1631       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
1632         .waitForAssignment(hriOverlap);
1633       ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
1634       TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
1635 
1636       HBaseFsck hbck = doFsck(conf, false);
1637       assertErrors(hbck, new ERROR_CODE[] {
1638         ERROR_CODE.ORPHAN_HDFS_REGION, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1639         ERROR_CODE.HOLE_IN_REGION_CHAIN});
1640 
1641       // verify that noHdfsChecking can't detect ORPHAN_HDFS_REGION
1642       HBaseFsck fsck = new HBaseFsck(conf);
1643       fsck.connect();
1644       fsck.setDisplayFullReport(); // i.e. -details
1645       fsck.setTimeLag(0);
1646       fsck.setCheckHdfs(false);
1647       fsck.onlineHbck();
1648       assertErrors(fsck, new ERROR_CODE[] {
1649         ERROR_CODE.HOLE_IN_REGION_CHAIN});
1650 
1651       // verify that fixHdfsHoles doesn't work with noHdfsChecking
1652       fsck = new HBaseFsck(conf);
1653       fsck.connect();
1654       fsck.setDisplayFullReport(); // i.e. -details
1655       fsck.setTimeLag(0);
1656       fsck.setCheckHdfs(false);
1657       fsck.setFixHdfsHoles(true);
1658       fsck.setFixHdfsOverlaps(true);
1659       fsck.setFixHdfsOrphans(true);
1660       fsck.onlineHbck();
1661       assertFalse(fsck.shouldRerun());
1662       assertErrors(fsck, new ERROR_CODE[] {
1663         ERROR_CODE.HOLE_IN_REGION_CHAIN});
1664     } finally {
1665       if (TEST_UTIL.getHBaseAdmin().isTableDisabled(table)) {
1666         TEST_UTIL.getHBaseAdmin().enableTable(table);
1667       }
1668       deleteTable(table);
1669     }
1670   }
1671 
1672   /**
1673    * We don't have an easy way to verify that a flush completed, so we loop until we find a
1674    * legitimate hfile and return it.
1675    * @param fs
1676    * @param table
1677    * @return Path of a flushed hfile.
1678    * @throws IOException
1679    */
1680   Path getFlushedHFile(FileSystem fs, TableName table) throws IOException {
1681     Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
1682     Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
1683     Path famDir = new Path(regionDir, FAM_STR);
1684 
1685     // keep doing this until we get a legit hfile
1686     while (true) {
1687       FileStatus[] hfFss = fs.listStatus(famDir);
1688       if (hfFss.length == 0) {
1689         continue;
1690       }
1691       for (FileStatus hfs : hfFss) {
1692         if (!hfs.isDir()) {
1693           return hfs.getPath();
1694         }
1695       }
1696     }
1697   }
1698 
1699   /**
1700    * This creates a table and then corrupts an hfile.  Hbck should quarantine the file.
1701    */
1702   @Test(timeout=180000)
1703   public void testQuarantineCorruptHFile() throws Exception {
1704     TableName table = TableName.valueOf(name.getMethodName());
1705     try {
1706       setupTable(table);
1707       assertEquals(ROWKEYS.length, countRows());
1708       TEST_UTIL.getHBaseAdmin().flush(table.getName()); // flush is async.
1709 
1710       FileSystem fs = FileSystem.get(conf);
1711       Path hfile = getFlushedHFile(fs, table);
1712 
1713       // Mess it up by leaving a hole in the assignment, meta, and hdfs data
1714       TEST_UTIL.getHBaseAdmin().disableTable(table);
1715 
1716       // create new corrupt file called deadbeef (valid hfile name)
1717       Path corrupt = new Path(hfile.getParent(), "deadbeef");
1718       TestHFile.truncateFile(fs, hfile, corrupt);
1719       LOG.info("Created corrupted file " + corrupt);
1720       HBaseFsck.debugLsr(conf, FSUtils.getRootDir(conf));
1721 
1722       // we cannot enable here because enable never finished due to the corrupt region.
1723       HBaseFsck res = HbckTestingUtil.doHFileQuarantine(conf, table);
1724       assertEquals(res.getRetCode(), 0);
1725       HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
1726       assertEquals(hfcc.getHFilesChecked(), 5);
1727       assertEquals(hfcc.getCorrupted().size(), 1);
1728       assertEquals(hfcc.getFailures().size(), 0);
1729       assertEquals(hfcc.getQuarantined().size(), 1);
1730       assertEquals(hfcc.getMissing().size(), 0);
1731 
1732       // Its been fixed, verify that we can enable.
1733       TEST_UTIL.getHBaseAdmin().enableTable(table);
1734     } finally {
1735       deleteTable(table);
1736     }
1737   }
1738 
1739   /**
1740   * Test that use this should have a timeout, because this method could potentially wait forever.
1741   */
1742   private void doQuarantineTest(TableName table, HBaseFsck hbck, int check,
1743                                 int corrupt, int fail, int quar, int missing) throws Exception {
1744     try {
1745       setupTable(table);
1746       assertEquals(ROWKEYS.length, countRows());
1747       TEST_UTIL.getHBaseAdmin().flush(table.getName()); // flush is async.
1748 
1749       // Mess it up by leaving a hole in the assignment, meta, and hdfs data
1750       TEST_UTIL.getHBaseAdmin().disableTable(table);
1751 
1752       String[] args = {"-sidelineCorruptHFiles", "-repairHoles", "-ignorePreCheckPermission",
1753           table.getNameAsString()};
1754       ExecutorService exec = new ScheduledThreadPoolExecutor(10);
1755       HBaseFsck res = hbck.exec(exec, args);
1756 
1757       HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
1758       assertEquals(hfcc.getHFilesChecked(), check);
1759       assertEquals(hfcc.getCorrupted().size(), corrupt);
1760       assertEquals(hfcc.getFailures().size(), fail);
1761       assertEquals(hfcc.getQuarantined().size(), quar);
1762       assertEquals(hfcc.getMissing().size(), missing);
1763 
1764       // its been fixed, verify that we can enable
1765       HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
1766       admin.enableTableAsync(table);
1767       while (!admin.isTableEnabled(table)) {
1768         try {
1769           Thread.sleep(250);
1770         } catch (InterruptedException e) {
1771           e.printStackTrace();
1772           fail("Interrupted when trying to enable table " + table);
1773         }
1774       }
1775     } finally {
1776       deleteTable(table);
1777     }
1778   }
1779 
1780   /**
1781    * This creates a table and simulates the race situation where a concurrent compaction or split
1782    * has removed an hfile after the corruption checker learned about it.
1783    */
1784   @Test(timeout=180000)
1785   public void testQuarantineMissingHFile() throws Exception {
1786     TableName table = TableName.valueOf(name.getMethodName());
1787     ExecutorService exec = new ScheduledThreadPoolExecutor(10);
1788     // inject a fault in the hfcc created.
1789     final FileSystem fs = FileSystem.get(conf);
1790     HBaseFsck hbck = new HBaseFsck(conf, exec) {
1791       @Override
1792       public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
1793         return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
1794           boolean attemptedFirstHFile = false;
1795           @Override
1796           protected void checkHFile(Path p) throws IOException {
1797             if (!attemptedFirstHFile) {
1798               attemptedFirstHFile = true;
1799               assertTrue(fs.delete(p, true)); // make sure delete happened.
1800             }
1801             super.checkHFile(p);
1802           }
1803         };
1804       }
1805     };
1806     doQuarantineTest(table, hbck, 4, 0, 0, 0, 1); // 4 attempted, but 1 missing.
1807   }
1808 
1809   /**
1810    * This creates a table and simulates the race situation where a concurrent compaction or split
1811    * has removed an colfam dir before the corruption checker got to it.
1812    */
1813   // Disabled because fails sporadically.  Is this test right?  Timing-wise, there could be no
1814   // files in a column family on initial creation -- as suggested by Matteo.
1815   @Ignore @Test(timeout=180000)
1816   public void testQuarantineMissingFamdir() throws Exception {
1817     TableName table = TableName.valueOf(name.getMethodName());
1818     ExecutorService exec = new ScheduledThreadPoolExecutor(10);
1819     // inject a fault in the hfcc created.
1820     final FileSystem fs = FileSystem.get(conf);
1821     HBaseFsck hbck = new HBaseFsck(conf, exec) {
1822       @Override
1823       public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
1824         return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
1825           boolean attemptedFirstFamDir = false;
1826           @Override
1827           protected void checkColFamDir(Path p) throws IOException {
1828             if (!attemptedFirstFamDir) {
1829               attemptedFirstFamDir = true;
1830               assertTrue(fs.delete(p, true)); // make sure delete happened.
1831             }
1832             super.checkColFamDir(p);
1833           }
1834         };
1835       }
1836     };
1837     doQuarantineTest(table, hbck, 3, 0, 0, 0, 1);
1838   }
1839 
1840   /**
1841    * This creates a table and simulates the race situation where a concurrent compaction or split
1842    * has removed a region dir before the corruption checker got to it.
1843    */
1844   @Test(timeout=180000)
1845   public void testQuarantineMissingRegionDir() throws Exception {
1846     TableName table = TableName.valueOf(name.getMethodName());
1847     ExecutorService exec = new ScheduledThreadPoolExecutor(10);
1848     // inject a fault in the hfcc created.
1849     final FileSystem fs = FileSystem.get(conf);
1850     HBaseFsck hbck = new HBaseFsck(conf, exec) {
1851       @Override
1852       public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
1853         return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
1854           boolean attemptedFirstRegionDir = false;
1855           @Override
1856           protected void checkRegionDir(Path p) throws IOException {
1857             if (!attemptedFirstRegionDir) {
1858               attemptedFirstRegionDir = true;
1859               assertTrue(fs.delete(p, true)); // make sure delete happened.
1860             }
1861             super.checkRegionDir(p);
1862           }
1863         };
1864       }
1865     };
1866     doQuarantineTest(table, hbck, 3, 0, 0, 0, 1);
1867   }
1868 
1869   /**
1870    * Test fixing lingering reference file.
1871    */
1872   @Test
1873   public void testLingeringReferenceFile() throws Exception {
1874     TableName table =
1875         TableName.valueOf("testLingeringReferenceFile");
1876     try {
1877       setupTable(table);
1878       assertEquals(ROWKEYS.length, countRows());
1879 
1880       // Mess it up by creating a fake reference file
1881       FileSystem fs = FileSystem.get(conf);
1882       Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
1883       Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
1884       Path famDir = new Path(regionDir, FAM_STR);
1885       Path fakeReferenceFile = new Path(famDir, "fbce357483ceea.12144538");
1886       fs.create(fakeReferenceFile);
1887 
1888       HBaseFsck hbck = doFsck(conf, false);
1889       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.LINGERING_REFERENCE_HFILE });
1890       // fix reference file
1891       doFsck(conf, true);
1892       // check that reference file fixed
1893       assertNoErrors(doFsck(conf, false));
1894     } finally {
1895       deleteTable(table);
1896     }
1897   }
1898 
1899   /**
1900    * Test mission REGIONINFO_QUALIFIER in hbase:meta
1901    */
1902   @Test
1903   public void testMissingRegionInfoQualifier() throws Exception {
1904     TableName table =
1905         TableName.valueOf("testMissingRegionInfoQualifier");
1906     try {
1907       setupTable(table);
1908 
1909       // Mess it up by removing the RegionInfo for one region.
1910       final List<Delete> deletes = new LinkedList<Delete>();
1911       HTable meta = new HTable(conf, HTableDescriptor.META_TABLEDESC.getTableName());
1912       MetaScanner.metaScan(conf, new MetaScanner.MetaScannerVisitor() {
1913 
1914         @Override
1915         public boolean processRow(Result rowResult) throws IOException {
1916           HRegionInfo hri = MetaScanner.getHRegionInfo(rowResult);
1917           if (hri != null && !hri.getTable().isSystemTable()) {
1918             Delete delete = new Delete(rowResult.getRow());
1919             delete.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
1920             deletes.add(delete);
1921           }
1922           return true;
1923         }
1924 
1925         @Override
1926         public void close() throws IOException {
1927         }
1928       });
1929       meta.delete(deletes);
1930 
1931       // Mess it up by creating a fake hbase:meta entry with no associated RegionInfo
1932       meta.put(new Put(Bytes.toBytes(table + ",,1361911384013.810e28f59a57da91c66")).add(
1933         HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER, Bytes.toBytes("node1:60020")));
1934       meta.put(new Put(Bytes.toBytes(table + ",,1361911384013.810e28f59a57da91c66")).add(
1935         HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER, Bytes.toBytes(1362150791183L)));
1936       meta.close();
1937 
1938       HBaseFsck hbck = doFsck(conf, false);
1939       assertTrue(hbck.getErrors().getErrorList().contains(ERROR_CODE.EMPTY_META_CELL));
1940 
1941       // fix reference file
1942       hbck = doFsck(conf, true);
1943 
1944       // check that reference file fixed
1945       assertFalse(hbck.getErrors().getErrorList().contains(ERROR_CODE.EMPTY_META_CELL));
1946     } finally {
1947       deleteTable(table);
1948     }
1949   }
1950 
1951 
1952   /**
1953    * Test pluggable error reporter. It can be plugged in
1954    * from system property or configuration.
1955    */
1956   @Test
1957   public void testErrorReporter() throws Exception {
1958     try {
1959       MockErrorReporter.calledCount = 0;
1960       doFsck(conf, false);
1961       assertEquals(MockErrorReporter.calledCount, 0);
1962 
1963       conf.set("hbasefsck.errorreporter", MockErrorReporter.class.getName());
1964       doFsck(conf, false);
1965       assertTrue(MockErrorReporter.calledCount > 20);
1966     } finally {
1967       conf.set("hbasefsck.errorreporter",
1968         PrintingErrorReporter.class.getName());
1969       MockErrorReporter.calledCount = 0;
1970     }
1971   }
1972 
1973   static class MockErrorReporter implements ErrorReporter {
1974     static int calledCount = 0;
1975 
1976     @Override
1977     public void clear() {
1978       calledCount++;
1979     }
1980 
1981     @Override
1982     public void report(String message) {
1983       calledCount++;
1984     }
1985 
1986     @Override
1987     public void reportError(String message) {
1988       calledCount++;
1989     }
1990 
1991     @Override
1992     public void reportError(ERROR_CODE errorCode, String message) {
1993       calledCount++;
1994     }
1995 
1996     @Override
1997     public void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
1998       calledCount++;
1999     }
2000 
2001     @Override
2002     public void reportError(ERROR_CODE errorCode,
2003         String message, TableInfo table, HbckInfo info) {
2004       calledCount++;
2005     }
2006 
2007     @Override
2008     public void reportError(ERROR_CODE errorCode, String message,
2009         TableInfo table, HbckInfo info1, HbckInfo info2) {
2010       calledCount++;
2011     }
2012 
2013     @Override
2014     public int summarize() {
2015       return ++calledCount;
2016     }
2017 
2018     @Override
2019     public void detail(String details) {
2020       calledCount++;
2021     }
2022 
2023     @Override
2024     public ArrayList<ERROR_CODE> getErrorList() {
2025       calledCount++;
2026       return new ArrayList<ERROR_CODE>();
2027     }
2028 
2029     @Override
2030     public void progress() {
2031       calledCount++;
2032     }
2033 
2034     @Override
2035     public void print(String message) {
2036       calledCount++;
2037     }
2038 
2039     @Override
2040     public void resetErrors() {
2041       calledCount++;
2042     }
2043 
2044     @Override
2045     public boolean tableHasErrors(TableInfo table) {
2046       calledCount++;
2047       return false;
2048     }
2049   }
2050 
2051   @Test(timeout=60000)
2052   public void testCheckTableLocks() throws Exception {
2053     IncrementingEnvironmentEdge edge = new IncrementingEnvironmentEdge(0);
2054     EnvironmentEdgeManager.injectEdge(edge);
2055     // check no errors
2056     HBaseFsck hbck = doFsck(conf, false);
2057     assertNoErrors(hbck);
2058 
2059     ServerName mockName = ServerName.valueOf("localhost", 60000, 1);
2060 
2061     // obtain one lock
2062     final TableLockManager tableLockManager = TableLockManager.createTableLockManager(conf, TEST_UTIL.getZooKeeperWatcher(), mockName);
2063     TableLock writeLock = tableLockManager.writeLock(TableName.valueOf("foo"),
2064         "testCheckTableLocks");
2065     writeLock.acquire();
2066     hbck = doFsck(conf, false);
2067     assertNoErrors(hbck); // should not have expired, no problems
2068 
2069     edge.incrementTime(conf.getLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT,
2070         TableLockManager.DEFAULT_TABLE_LOCK_EXPIRE_TIMEOUT_MS)); // let table lock expire
2071 
2072     hbck = doFsck(conf, false);
2073     assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.EXPIRED_TABLE_LOCK});
2074 
2075     final CountDownLatch latch = new CountDownLatch(1);
2076     new Thread() {
2077       @Override
2078       public void run() {
2079         TableLock readLock = tableLockManager.writeLock(TableName.valueOf("foo"),
2080             "testCheckTableLocks");
2081         try {
2082           latch.countDown();
2083           readLock.acquire();
2084         } catch (IOException ex) {
2085           fail();
2086         } catch (IllegalStateException ex) {
2087           return; // expected, since this will be reaped under us.
2088         }
2089         fail("should not have come here");
2090       };
2091     }.start();
2092 
2093     latch.await(); // wait until thread starts
2094     Threads.sleep(300); // wait some more to ensure writeLock.acquire() is called
2095 
2096     hbck = doFsck(conf, false);
2097     assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.EXPIRED_TABLE_LOCK}); // still one expired, one not-expired
2098 
2099     edge.incrementTime(conf.getLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT,
2100         TableLockManager.DEFAULT_TABLE_LOCK_EXPIRE_TIMEOUT_MS)); // let table lock expire
2101 
2102     hbck = doFsck(conf, false);
2103     assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.EXPIRED_TABLE_LOCK, ERROR_CODE.EXPIRED_TABLE_LOCK}); // both are expired
2104 
2105     conf.setLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT, 1); // reaping from ZKInterProcessWriteLock uses znode cTime,
2106                                                                  // which is not injectable through EnvironmentEdge
2107     Threads.sleep(10);
2108     hbck = doFsck(conf, true); // now fix both cases
2109 
2110     hbck = doFsck(conf, false);
2111     assertNoErrors(hbck);
2112 
2113     // ensure that locks are deleted
2114     writeLock = tableLockManager.writeLock(TableName.valueOf("foo"),
2115         "should acquire without blocking");
2116     writeLock.acquire(); // this should not block.
2117     writeLock.release(); // release for clean state
2118   }
2119 
2120   @Test
2121   public void testMetaOffline() throws Exception {
2122     // check no errors
2123     HBaseFsck hbck = doFsck(conf, false);
2124     assertNoErrors(hbck);
2125     deleteMetaRegion(conf, true, false, false);
2126     hbck = doFsck(conf, false);
2127     // ERROR_CODE.UNKNOWN is coming because we reportError with a message for the hbase:meta
2128     // inconsistency and whether we will be fixing it or not.
2129     assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN });
2130     hbck = doFsck(conf, true);
2131     assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN });
2132     hbck = doFsck(conf, false);
2133     assertNoErrors(hbck);
2134   }
2135 
2136   private void deleteMetaRegion(Configuration conf, boolean unassign, boolean hdfs,
2137       boolean regionInfoOnly) throws IOException, InterruptedException {
2138     HConnection connection = HConnectionManager.getConnection(conf);
2139     HRegionLocation metaLocation = connection.locateRegion(TableName.META_TABLE_NAME,
2140         HConstants.EMPTY_START_ROW);
2141     ServerName hsa = metaLocation.getServerName();
2142     HRegionInfo hri = metaLocation.getRegionInfo();
2143     if (unassign) {
2144       LOG.info("Undeploying meta region " + hri + " from server " + hsa);
2145       undeployRegion(new HBaseAdmin(conf), hsa, hri);
2146     }
2147 
2148     if (regionInfoOnly) {
2149       LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString());
2150       Path rootDir = FSUtils.getRootDir(conf);
2151       FileSystem fs = rootDir.getFileSystem(conf);
2152       Path p = new Path(rootDir + "/" + HTableDescriptor.META_TABLEDESC.getNameAsString(),
2153           hri.getEncodedName());
2154       Path hriPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE);
2155       fs.delete(hriPath, true);
2156     }
2157 
2158     if (hdfs) {
2159       LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
2160       Path rootDir = FSUtils.getRootDir(conf);
2161       FileSystem fs = rootDir.getFileSystem(conf);
2162       Path p = new Path(rootDir + "/" + HTableDescriptor.META_TABLEDESC.getNameAsString(),
2163           hri.getEncodedName());
2164       HBaseFsck.debugLsr(conf, p);
2165       boolean success = fs.delete(p, true);
2166       LOG.info("Deleted " + p + " sucessfully? " + success);
2167       HBaseFsck.debugLsr(conf, p);
2168     }
2169   }
2170 
2171   @Test
2172   public void testTableWithNoRegions() throws Exception {
2173     // We might end up with empty regions in a table
2174     // see also testNoHdfsTable()
2175     TableName table =
2176         TableName.valueOf(name.getMethodName());
2177     try {
2178       // create table with one region
2179       HTableDescriptor desc = new HTableDescriptor(table);
2180       HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
2181       desc.addFamily(hcd); // If a table has no CF's it doesn't get checked
2182       TEST_UTIL.getHBaseAdmin().createTable(desc);
2183       tbl = new HTable(TEST_UTIL.getConfiguration(), table, executorService);
2184 
2185       // Mess it up by leaving a hole in the assignment, meta, and hdfs data
2186       deleteRegion(conf, tbl.getTableDescriptor(), HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW, false,
2187           false, true);
2188 
2189       HBaseFsck hbck = doFsck(conf, false);
2190       assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_HDFS });
2191 
2192       doFsck(conf, true);
2193 
2194       // fix hole
2195       doFsck(conf, true);
2196 
2197       // check that hole fixed
2198       assertNoErrors(doFsck(conf, false));
2199     } finally {
2200       deleteTable(table);
2201     }
2202 
2203   }
2204 
2205   @Test
2206   public void testHbckAfterRegionMerge() throws Exception {
2207     TableName table = TableName.valueOf("testMergeRegionFilesInHdfs");
2208     HTable meta = null;
2209     try {
2210       // disable CatalogJanitor
2211       TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false);
2212       setupTable(table);
2213       assertEquals(ROWKEYS.length, countRows());
2214 
2215       // make sure data in regions, if in hlog only there is no data loss
2216       TEST_UTIL.getHBaseAdmin().flush(table.getName());
2217       HRegionInfo region1 = tbl.getRegionLocation("A").getRegionInfo();
2218       HRegionInfo region2 = tbl.getRegionLocation("B").getRegionInfo();
2219 
2220       int regionCountBeforeMerge = tbl.getRegionLocations().size();
2221 
2222       assertNotEquals(region1, region2);
2223 
2224       // do a region merge
2225       HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
2226       admin.mergeRegions(region1.getEncodedNameAsBytes(),
2227           region2.getEncodedNameAsBytes(), false);
2228 
2229       // wait until region merged
2230       long timeout = System.currentTimeMillis() + 30 * 1000;
2231       while (true) {
2232         if (tbl.getRegionLocations().size() < regionCountBeforeMerge) {
2233           break;
2234         } else if (System.currentTimeMillis() > timeout) {
2235           fail("Time out waiting on region " + region1.getEncodedName()
2236               + " and " + region2.getEncodedName() + " be merged");
2237         }
2238         Thread.sleep(10);
2239       }
2240 
2241       assertEquals(ROWKEYS.length, countRows());
2242 
2243       HBaseFsck hbck = doFsck(conf, false);
2244       assertNoErrors(hbck); // no errors
2245 
2246     } finally {
2247       TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(true);
2248       deleteTable(table);
2249       IOUtils.closeQuietly(meta);
2250     }
2251   }
2252 
2253   @Test
2254   public void testRegionBoundariesCheck() throws Exception {
2255     HBaseFsck hbck = doFsck(conf, false);
2256     assertNoErrors(hbck); // no errors
2257     try {
2258       hbck.checkRegionBoundaries();
2259     } catch (IllegalArgumentException e) {
2260       if (e.getMessage().endsWith("not a valid DFS filename.")) {
2261         fail("Table directory path is not valid." + e.getMessage());
2262       }
2263     }
2264   }
2265 
2266   @org.junit.Rule
2267   public TestName name = new TestName();
2268 }