View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertFalse;
22  import static org.junit.Assert.assertNotNull;
23  import static org.junit.Assert.assertNull;
24  import static org.junit.Assert.assertTrue;
25  import static org.junit.Assert.fail;
26  
27  import java.io.IOException;
28  import java.util.ArrayList;
29  import java.util.List;
30  import java.util.Set;
31  import java.util.concurrent.atomic.AtomicBoolean;
32  
33  import org.apache.hadoop.conf.Configuration;
34  import org.apache.hadoop.fs.FileSystem;
35  import org.apache.hadoop.fs.Path;
36  import org.apache.hadoop.hbase.HBaseTestingUtility;
37  import org.apache.hadoop.hbase.HColumnDescriptor;
38  import org.apache.hadoop.hbase.HConstants;
39  import org.apache.hadoop.hbase.HRegionInfo;
40  import org.apache.hadoop.hbase.HTableDescriptor;
41  import org.apache.hadoop.hbase.MediumTests;
42  import org.apache.hadoop.hbase.MiniHBaseCluster;
43  import org.apache.hadoop.hbase.ServerLoad;
44  import org.apache.hadoop.hbase.ServerName;
45  import org.apache.hadoop.hbase.TableName;
46  import org.apache.hadoop.hbase.UnknownRegionException;
47  import org.apache.hadoop.hbase.Waiter;
48  import org.apache.hadoop.hbase.catalog.MetaEditor;
49  import org.apache.hadoop.hbase.client.HBaseAdmin;
50  import org.apache.hadoop.hbase.client.HTable;
51  import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
52  import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
53  import org.apache.hadoop.hbase.coprocessor.ObserverContext;
54  import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
55  import org.apache.hadoop.hbase.coprocessor.RegionObserver;
56  import org.apache.hadoop.hbase.executor.EventType;
57  import org.apache.hadoop.hbase.master.RegionState.State;
58  import org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer;
59  import org.apache.hadoop.hbase.regionserver.HRegionServer;
60  import org.apache.hadoop.hbase.util.Bytes;
61  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
62  import org.apache.hadoop.hbase.util.FSUtils;
63  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
64  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
65  import org.apache.zookeeper.KeeperException;
66  import org.junit.AfterClass;
67  import org.junit.BeforeClass;
68  import org.junit.Test;
69  import org.junit.experimental.categories.Category;
70  
71  /**
72   * This tests AssignmentManager with a testing cluster.
73   */
74  @Category(MediumTests.class)
75  public class TestAssignmentManagerOnCluster {
76    private final static byte[] FAMILY = Bytes.toBytes("FAMILY");
77    private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
78    private final static Configuration conf = TEST_UTIL.getConfiguration();
79    private static HBaseAdmin admin;
80  
81    @BeforeClass
82    public static void setUpBeforeClass() throws Exception {
83      // Using the our load balancer to control region plans
84      conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
85        MyLoadBalancer.class, LoadBalancer.class);
86      conf.setClass(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,
87        MyRegionObserver.class, RegionObserver.class);
88      // Reduce the maximum attempts to speed up the test
89      conf.setInt("hbase.assignment.maximum.attempts", 3);
90  
91      TEST_UTIL.startMiniCluster(1, 4, null, MyMaster.class, null);
92      admin = TEST_UTIL.getHBaseAdmin();
93    }
94  
95    @AfterClass
96    public static void tearDownAfterClass() throws Exception {
97      TEST_UTIL.shutdownMiniCluster();
98    }
99  
100   /**
101    * This tests region assignment
102    */
103   @Test (timeout=60000)
104   public void testAssignRegion() throws Exception {
105     String table = "testAssignRegion";
106     try {
107       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
108       desc.addFamily(new HColumnDescriptor(FAMILY));
109       admin.createTable(desc);
110 
111       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
112       HRegionInfo hri = new HRegionInfo(
113         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
114       MetaEditor.addRegionToMeta(meta, hri);
115 
116       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
117       master.assignRegion(hri);
118       AssignmentManager am = master.getAssignmentManager();
119       am.waitForAssignment(hri);
120 
121       RegionStates regionStates = am.getRegionStates();
122       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
123       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
124 
125       // Region is assigned now. Let's assign it again.
126       // Master should not abort, and region should be assigned.
127       RegionState oldState = regionStates.getRegionState(hri);
128       TEST_UTIL.getHBaseAdmin().assign(hri.getRegionName());
129       master.getAssignmentManager().waitForAssignment(hri);
130       RegionState newState = regionStates.getRegionState(hri);
131       assertTrue(newState.isOpened()
132         && newState.getStamp() != oldState.getStamp());
133     } finally {
134       TEST_UTIL.deleteTable(Bytes.toBytes(table));
135     }
136   }
137 
138   /**
139    * This tests region assignment on a simulated restarted server
140    */
141   @Test (timeout=120000)
142   public void testAssignRegionOnRestartedServer() throws Exception {
143     String table = "testAssignRegionOnRestartedServer";
144     TEST_UTIL.getMiniHBaseCluster().getConf().setInt("hbase.assignment.maximum.attempts", 20);
145     TEST_UTIL.getMiniHBaseCluster().stopMaster(0);
146     TEST_UTIL.getMiniHBaseCluster().startMaster(); //restart the master so that conf take into affect
147 
148     ServerName deadServer = null;
149     HMaster master = null;
150     try {
151       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
152       desc.addFamily(new HColumnDescriptor(FAMILY));
153       admin.createTable(desc);
154 
155       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
156       final HRegionInfo hri = new HRegionInfo(
157         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
158       MetaEditor.addRegionToMeta(meta, hri);
159 
160       master = TEST_UTIL.getHBaseCluster().getMaster();
161       Set<ServerName> onlineServers = master.serverManager.getOnlineServers().keySet();
162       assertFalse("There should be some servers online", onlineServers.isEmpty());
163 
164       // Use the first server as the destination server
165       ServerName destServer = onlineServers.iterator().next();
166 
167       // Created faked dead server
168       deadServer = ServerName.valueOf(destServer.getHostname(),
169           destServer.getPort(), destServer.getStartcode() - 100L);
170       master.serverManager.recordNewServerWithLock(deadServer, ServerLoad.EMPTY_SERVERLOAD);
171 
172       final AssignmentManager am = master.getAssignmentManager();
173       RegionPlan plan = new RegionPlan(hri, null, deadServer);
174       am.addPlan(hri.getEncodedName(), plan);
175       master.assignRegion(hri);
176 
177       int version = ZKAssign.transitionNode(master.getZooKeeper(), hri,
178         destServer, EventType.M_ZK_REGION_OFFLINE,
179         EventType.RS_ZK_REGION_OPENING, 0);
180       assertEquals("TansitionNode should fail", -1, version);
181 
182       TEST_UTIL.waitFor(60000, new Waiter.Predicate<Exception>() {
183         @Override
184         public boolean evaluate() throws Exception {
185           return ! am.getRegionStates().isRegionInTransition(hri);
186         }
187       });
188 
189     assertFalse("Region should be assigned", am.getRegionStates().isRegionInTransition(hri));
190     } finally {
191       if (deadServer != null) {
192         master.serverManager.expireServer(deadServer);
193       }
194 
195       TEST_UTIL.deleteTable(Bytes.toBytes(table));
196     }
197   }
198 
199   /**
200    * This tests offlining a region
201    */
202   @Test (timeout=60000)
203   public void testOfflineRegion() throws Exception {
204     TableName table =
205         TableName.valueOf("testOfflineRegion");
206     try {
207       HRegionInfo hri = createTableAndGetOneRegion(table);
208 
209       RegionStates regionStates = TEST_UTIL.getHBaseCluster().
210         getMaster().getAssignmentManager().getRegionStates();
211       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
212       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
213       admin.offline(hri.getRegionName());
214 
215       long timeoutTime = System.currentTimeMillis() + 800;
216       while (true) {
217         List<HRegionInfo> regions =
218           regionStates.getRegionsOfTable(table);
219         if (!regions.contains(hri)) break;
220         long now = System.currentTimeMillis();
221         if (now > timeoutTime) {
222           fail("Failed to offline the region in time");
223           break;
224         }
225         Thread.sleep(10);
226       }
227       RegionState regionState = regionStates.getRegionState(hri);
228       assertTrue(regionState.isOffline());
229     } finally {
230       TEST_UTIL.deleteTable(table);
231     }
232   }
233 
234   /**
235    * This tests moving a region
236    */
237   @Test (timeout=50000)
238   public void testMoveRegion() throws Exception {
239     TableName table =
240         TableName.valueOf("testMoveRegion");
241     try {
242       HRegionInfo hri = createTableAndGetOneRegion(table);
243 
244       RegionStates regionStates = TEST_UTIL.getHBaseCluster().
245         getMaster().getAssignmentManager().getRegionStates();
246       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
247       ServerName destServerName = null;
248       for (int i = 0; i < 3; i++) {
249         HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
250         if (!destServer.getServerName().equals(serverName)) {
251           destServerName = destServer.getServerName();
252           break;
253         }
254       }
255       assertTrue(destServerName != null
256         && !destServerName.equals(serverName));
257       TEST_UTIL.getHBaseAdmin().move(hri.getEncodedNameAsBytes(),
258         Bytes.toBytes(destServerName.getServerName()));
259 
260       long timeoutTime = System.currentTimeMillis() + 30000;
261       while (true) {
262         ServerName sn = regionStates.getRegionServerOfRegion(hri);
263         if (sn != null && sn.equals(destServerName)) {
264           TEST_UTIL.assertRegionOnServer(hri, sn, 200);
265           break;
266         }
267         long now = System.currentTimeMillis();
268         if (now > timeoutTime) {
269           fail("Failed to move the region in time: "
270             + regionStates.getRegionState(hri));
271         }
272         regionStates.waitForUpdate(50);
273       }
274 
275     } finally {
276       TEST_UTIL.deleteTable(table);
277     }
278   }
279 
280   /**
281    * If a table is deleted, we should not be able to move it anymore.
282    * Otherwise, the region will be brought back.
283    * @throws Exception
284    */
285   @Test (timeout=50000)
286   public void testMoveRegionOfDeletedTable() throws Exception {
287     TableName table =
288         TableName.valueOf("testMoveRegionOfDeletedTable");
289     HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
290     try {
291       HRegionInfo hri = createTableAndGetOneRegion(table);
292 
293       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
294       AssignmentManager am = master.getAssignmentManager();
295       RegionStates regionStates = am.getRegionStates();
296       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
297       ServerName destServerName = null;
298       for (int i = 0; i < 3; i++) {
299         HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
300         if (!destServer.getServerName().equals(serverName)) {
301           destServerName = destServer.getServerName();
302           break;
303         }
304       }
305       assertTrue(destServerName != null
306         && !destServerName.equals(serverName));
307 
308       TEST_UTIL.deleteTable(table);
309 
310       try {
311         admin.move(hri.getEncodedNameAsBytes(),
312           Bytes.toBytes(destServerName.getServerName()));
313         fail("We should not find the region");
314       } catch (IOException ioe) {
315         assertTrue(ioe instanceof UnknownRegionException);
316       }
317 
318       am.balance(new RegionPlan(hri, serverName, destServerName));
319       assertFalse("The region should not be in transition",
320         regionStates.isRegionInTransition(hri));
321     } finally {
322       if (admin.tableExists(table)) {
323         TEST_UTIL.deleteTable(table);
324       }
325     }
326   }
327 
328   HRegionInfo createTableAndGetOneRegion(
329       final TableName tableName) throws IOException, InterruptedException {
330     HTableDescriptor desc = new HTableDescriptor(tableName);
331     desc.addFamily(new HColumnDescriptor(FAMILY));
332     admin.createTable(desc, Bytes.toBytes("A"), Bytes.toBytes("Z"), 5);
333 
334     // wait till the table is assigned
335     HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
336     long timeoutTime = System.currentTimeMillis() + 1000;
337     while (true) {
338       List<HRegionInfo> regions = master.getAssignmentManager().
339         getRegionStates().getRegionsOfTable(tableName);
340       if (regions.size() > 3) {
341         return regions.get(2);
342       }
343       long now = System.currentTimeMillis();
344       if (now > timeoutTime) {
345         fail("Could not find an online region");
346       }
347       Thread.sleep(10);
348     }
349   }
350 
351   /**
352    * This test should not be flaky. If it is flaky, it means something
353    * wrong with AssignmentManager which should be reported and fixed
354    *
355    * This tests forcefully assign a region while it's closing and re-assigned.
356    */
357   @Test (timeout=60000)
358   public void testForceAssignWhileClosing() throws Exception {
359     String table = "testForceAssignWhileClosing";
360     try {
361       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
362       desc.addFamily(new HColumnDescriptor(FAMILY));
363       admin.createTable(desc);
364 
365       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
366       HRegionInfo hri = new HRegionInfo(
367         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
368       MetaEditor.addRegionToMeta(meta, hri);
369 
370       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
371       master.assignRegion(hri);
372       AssignmentManager am = master.getAssignmentManager();
373       assertTrue(am.waitForAssignment(hri));
374 
375       MyRegionObserver.preCloseEnabled.set(true);
376       am.unassign(hri);
377       RegionState state = am.getRegionStates().getRegionState(hri);
378       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
379 
380       MyRegionObserver.preCloseEnabled.set(false);
381       am.unassign(hri, true);
382 
383       // region is closing now, will be re-assigned automatically.
384       // now, let's forcefully assign it again. it should be
385       // assigned properly and no double-assignment
386       am.assign(hri, true, true);
387 
388       // let's check if it's assigned after it's out of transition
389       am.waitOnRegionToClearRegionsInTransition(hri);
390       assertTrue(am.waitForAssignment(hri));
391 
392       ServerName serverName = master.getAssignmentManager().
393         getRegionStates().getRegionServerOfRegion(hri);
394       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 200);
395     } finally {
396       MyRegionObserver.preCloseEnabled.set(false);
397       TEST_UTIL.deleteTable(Bytes.toBytes(table));
398     }
399   }
400 
401   /**
402    * This tests region close failed
403    */
404   @Test (timeout=60000)
405   public void testCloseFailed() throws Exception {
406     String table = "testCloseFailed";
407     try {
408       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
409       desc.addFamily(new HColumnDescriptor(FAMILY));
410       admin.createTable(desc);
411 
412       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
413       HRegionInfo hri = new HRegionInfo(
414         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
415       MetaEditor.addRegionToMeta(meta, hri);
416 
417       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
418       master.assignRegion(hri);
419       AssignmentManager am = master.getAssignmentManager();
420       assertTrue(am.waitForAssignment(hri));
421 
422       MyRegionObserver.preCloseEnabled.set(true);
423       am.unassign(hri);
424       RegionState state = am.getRegionStates().getRegionState(hri);
425       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
426 
427       MyRegionObserver.preCloseEnabled.set(false);
428       am.unassign(hri, true);
429 
430       // region may still be assigned now since it's closing,
431       // let's check if it's assigned after it's out of transition
432       am.waitOnRegionToClearRegionsInTransition(hri);
433 
434       // region should be closed and re-assigned
435       assertTrue(am.waitForAssignment(hri));
436       ServerName serverName = master.getAssignmentManager().
437         getRegionStates().getRegionServerOfRegion(hri);
438       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
439     } finally {
440       MyRegionObserver.preCloseEnabled.set(false);
441       TEST_UTIL.deleteTable(Bytes.toBytes(table));
442     }
443   }
444 
445   /**
446    * This tests region open failed
447    */
448   @Test (timeout=60000)
449   public void testOpenFailed() throws Exception {
450     String table = "testOpenFailed";
451     try {
452       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
453       desc.addFamily(new HColumnDescriptor(FAMILY));
454       admin.createTable(desc);
455 
456       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
457       HRegionInfo hri = new HRegionInfo(
458         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
459       MetaEditor.addRegionToMeta(meta, hri);
460 
461       MyLoadBalancer.controledRegion = hri.getEncodedName();
462 
463       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
464       master.assignRegion(hri);
465       AssignmentManager am = master.getAssignmentManager();
466       assertFalse(am.waitForAssignment(hri));
467 
468       RegionState state = am.getRegionStates().getRegionState(hri);
469       assertEquals(RegionState.State.FAILED_OPEN, state.getState());
470       // Failed to open since no plan, so it's on no server
471       assertNull(state.getServerName());
472 
473       MyLoadBalancer.controledRegion = null;
474       master.assignRegion(hri);
475       assertTrue(am.waitForAssignment(hri));
476 
477       ServerName serverName = master.getAssignmentManager().
478         getRegionStates().getRegionServerOfRegion(hri);
479       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
480     } finally {
481       MyLoadBalancer.controledRegion = null;
482       TEST_UTIL.deleteTable(Bytes.toBytes(table));
483     }
484   }
485 
486   /**
487    * This tests region open failure which is not recoverable
488    */
489   @Test (timeout=60000)
490   public void testOpenFailedUnrecoverable() throws Exception {
491     TableName table =
492         TableName.valueOf("testOpenFailedUnrecoverable");
493     try {
494       HTableDescriptor desc = new HTableDescriptor(table);
495       desc.addFamily(new HColumnDescriptor(FAMILY));
496       admin.createTable(desc);
497 
498       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
499       HRegionInfo hri = new HRegionInfo(
500         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
501       MetaEditor.addRegionToMeta(meta, hri);
502 
503       FileSystem fs = FileSystem.get(conf);
504       Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
505       Path regionDir = new Path(tableDir, hri.getEncodedName());
506       // create a file named the same as the region dir to
507       // mess up with region opening
508       fs.create(regionDir, true);
509 
510       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
511       master.assignRegion(hri);
512       AssignmentManager am = master.getAssignmentManager();
513       assertFalse(am.waitForAssignment(hri));
514 
515       RegionState state = am.getRegionStates().getRegionState(hri);
516       assertEquals(RegionState.State.FAILED_OPEN, state.getState());
517       // Failed to open due to file system issue. Region state should
518       // carry the opening region server so that we can force close it
519       // later on before opening it again. See HBASE-9092.
520       assertNotNull(state.getServerName());
521 
522       // remove the blocking file, so that region can be opened
523       fs.delete(regionDir, true);
524       master.assignRegion(hri);
525       assertTrue(am.waitForAssignment(hri));
526 
527       ServerName serverName = master.getAssignmentManager().
528         getRegionStates().getRegionServerOfRegion(hri);
529       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
530     } finally {
531       TEST_UTIL.deleteTable(table);
532     }
533   }
534 
535   @Test (timeout=60000)
536   public void testSSHWhenDisablingTableRegionsInOpeningOrPendingOpenState() throws Exception {
537     final TableName table =
538         TableName.valueOf
539             ("testSSHWhenDisablingTableRegionsInOpeningOrPendingOpenState");
540     AssignmentManager am = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
541     HRegionInfo hri = null;
542     ServerName serverName = null;
543     try {
544       hri = createTableAndGetOneRegion(table);
545       serverName = am.getRegionStates().getRegionServerOfRegion(hri);
546       ServerName destServerName = null;
547       HRegionServer destServer = null;
548       for (int i = 0; i < 3; i++) {
549         destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
550         if (!destServer.getServerName().equals(serverName)) {
551           destServerName = destServer.getServerName();
552           break;
553         }
554       }
555       am.regionOffline(hri);
556       ZooKeeperWatcher zkw = TEST_UTIL.getHBaseCluster().getMaster().getZooKeeper();
557       am.getRegionStates().updateRegionState(hri, State.OFFLINE);
558       ZKAssign.createNodeOffline(zkw, hri, destServerName);
559       ZKAssign.transitionNodeOpening(zkw, hri, destServerName);
560 
561       // Wait till the event is processed and the region is in transition
562       long timeoutTime = System.currentTimeMillis() + 20000;
563       while (!am.getRegionStates().isRegionInTransition(hri)) {
564         assertTrue("Failed to process ZK opening event in time",
565           System.currentTimeMillis() < timeoutTime);
566         Thread.sleep(100);
567       }
568 
569       am.getZKTable().setDisablingTable(table);
570       List<HRegionInfo> toAssignRegions = am.processServerShutdown(destServerName);
571       assertTrue("Regions to be assigned should be empty.", toAssignRegions.isEmpty());
572       assertTrue("Regions to be assigned should be empty.", am.getRegionStates()
573           .getRegionState(hri).isOffline());
574     } finally {
575       if (hri != null && serverName != null) {
576         am.regionOnline(hri, serverName);
577       }
578       am.getZKTable().setDisabledTable(table);
579       TEST_UTIL.deleteTable(table);
580     }
581   }
582 
583   /**
584    * This tests region close hanging
585    */
586   @Test (timeout=60000)
587   public void testCloseHang() throws Exception {
588     String table = "testCloseHang";
589     try {
590       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
591       desc.addFamily(new HColumnDescriptor(FAMILY));
592       admin.createTable(desc);
593 
594       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
595       HRegionInfo hri = new HRegionInfo(
596         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
597       MetaEditor.addRegionToMeta(meta, hri);
598 
599       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
600       master.assignRegion(hri);
601       AssignmentManager am = master.getAssignmentManager();
602       assertTrue(am.waitForAssignment(hri));
603 
604       MyRegionObserver.postCloseEnabled.set(true);
605       am.unassign(hri);
606       // Now region should pending_close or closing
607       // Unassign it again forcefully so that we can trigger already
608       // in transition exception. This test is to make sure this scenario
609       // is handled properly.
610       am.server.getConfiguration().setLong(
611         AssignmentManager.ALREADY_IN_TRANSITION_WAITTIME, 1000);
612       am.unassign(hri, true);
613       RegionState state = am.getRegionStates().getRegionState(hri);
614       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
615 
616       // Let region closing move ahead. The region should be closed
617       // properly and re-assigned automatically
618       MyRegionObserver.postCloseEnabled.set(false);
619 
620       // region may still be assigned now since it's closing,
621       // let's check if it's assigned after it's out of transition
622       am.waitOnRegionToClearRegionsInTransition(hri);
623 
624       // region should be closed and re-assigned
625       assertTrue(am.waitForAssignment(hri));
626       ServerName serverName = master.getAssignmentManager().
627         getRegionStates().getRegionServerOfRegion(hri);
628       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
629     } finally {
630       MyRegionObserver.postCloseEnabled.set(false);
631       TEST_UTIL.deleteTable(Bytes.toBytes(table));
632     }
633   }
634 
635   /**
636    * This tests region close racing with open
637    */
638   @Test (timeout=60000)
639   public void testOpenCloseRacing() throws Exception {
640     String table = "testOpenCloseRacing";
641     try {
642       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
643       desc.addFamily(new HColumnDescriptor(FAMILY));
644       admin.createTable(desc);
645 
646       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
647       HRegionInfo hri = new HRegionInfo(
648         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
649       MetaEditor.addRegionToMeta(meta, hri);
650       meta.close();
651 
652       MyRegionObserver.postOpenEnabled.set(true);
653       MyRegionObserver.postOpenCalled = false;
654       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
655       // Region will be opened, but it won't complete
656       master.assignRegion(hri);
657       long end = EnvironmentEdgeManager.currentTimeMillis() + 20000;
658       // Wait till postOpen is called
659       while (!MyRegionObserver.postOpenCalled ) {
660         assertFalse("Timed out waiting for postOpen to be called",
661           EnvironmentEdgeManager.currentTimeMillis() > end);
662         Thread.sleep(300);
663       }
664 
665       AssignmentManager am = master.getAssignmentManager();
666       // Now let's unassign it, it should do nothing
667       am.unassign(hri);
668       RegionState state = am.getRegionStates().getRegionState(hri);
669       ServerName oldServerName = state.getServerName();
670       assertTrue(state.isPendingOpenOrOpening() && oldServerName != null);
671 
672       // Now the region is stuck in opening
673       // Let's forcefully re-assign it to trigger closing/opening
674       // racing. This test is to make sure this scenario
675       // is handled properly.
676       MyRegionObserver.postOpenEnabled.set(false);
677       ServerName destServerName = null;
678       int numRS = TEST_UTIL.getHBaseCluster().getLiveRegionServerThreads().size();
679       for (int i = 0; i < numRS; i++) {
680         HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
681         if (!destServer.getServerName().equals(oldServerName)) {
682           destServerName = destServer.getServerName();
683           break;
684         }
685       }
686       assertNotNull(destServerName);
687       assertFalse("Region should be assigned on a new region server",
688         oldServerName.equals(destServerName));
689       List<HRegionInfo> regions = new ArrayList<HRegionInfo>();
690       regions.add(hri);
691       am.assign(destServerName, regions);
692 
693       // let's check if it's assigned after it's out of transition
694       am.waitOnRegionToClearRegionsInTransition(hri);
695       assertTrue(am.waitForAssignment(hri));
696 
697       ServerName serverName = master.getAssignmentManager().
698         getRegionStates().getRegionServerOfRegion(hri);
699       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 200);
700       assertFalse("Region should be assigned on a new region server",
701         oldServerName.equals(serverName));
702     } finally {
703       MyRegionObserver.postOpenEnabled.set(false);
704       TEST_UTIL.deleteTable(Bytes.toBytes(table));
705     }
706   }
707 
708   /**
709    * Test force unassign/assign a region hosted on a dead server
710    */
711   @Test (timeout=60000)
712   public void testAssignRacingWithSSH() throws Exception {
713     String table = "testAssignRacingWithSSH";
714     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
715     MyMaster master = null;
716     try {
717       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
718       desc.addFamily(new HColumnDescriptor(FAMILY));
719       admin.createTable(desc);
720 
721       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
722       HRegionInfo hri = new HRegionInfo(
723         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
724       MetaEditor.addRegionToMeta(meta, hri);
725 
726       // Assign the region
727       master = (MyMaster)cluster.getMaster();
728       master.assignRegion(hri);
729 
730       // Hold SSH before killing the hosting server
731       master.enableSSH(false);
732 
733       AssignmentManager am = master.getAssignmentManager();
734       RegionStates regionStates = am.getRegionStates();
735       ServerName metaServer = regionStates.getRegionServerOfRegion(
736         HRegionInfo.FIRST_META_REGIONINFO);
737       while (true) {
738         assertTrue(am.waitForAssignment(hri));
739         RegionState state = regionStates.getRegionState(hri);
740         ServerName oldServerName = state.getServerName();
741         if (!ServerName.isSameHostnameAndPort(oldServerName, metaServer)) {
742           // Kill the hosting server, which doesn't have meta on it.
743           cluster.killRegionServer(oldServerName);
744           cluster.waitForRegionServerToStop(oldServerName, -1);
745           break;
746         }
747         int i = cluster.getServerWithMeta();
748         HRegionServer rs = cluster.getRegionServer(i == 0 ? 1 : 0);
749         oldServerName = rs.getServerName();
750         master.move(hri.getEncodedNameAsBytes(),
751           Bytes.toBytes(oldServerName.getServerName()));
752       }
753 
754       // You can't assign a dead region before SSH
755       am.assign(hri, true, true);
756       RegionState state = regionStates.getRegionState(hri);
757       assertTrue(state.isFailedClose());
758 
759       // You can't unassign a dead region before SSH either
760       am.unassign(hri, true);
761       assertTrue(state.isFailedClose());
762 
763       // Enable SSH so that log can be split
764       master.enableSSH(true);
765 
766       // let's check if it's assigned after it's out of transition.
767       // no need to assign it manually, SSH should do it
768       am.waitOnRegionToClearRegionsInTransition(hri);
769       assertTrue(am.waitForAssignment(hri));
770 
771       ServerName serverName = master.getAssignmentManager().
772         getRegionStates().getRegionServerOfRegion(hri);
773       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 200);
774     } finally {
775       if (master != null) {
776         master.enableSSH(true);
777       }
778       TEST_UTIL.deleteTable(Bytes.toBytes(table));
779     }
780   }
781 
782   /**
783    * Test force unassign/assign a region of a disabled table
784    */
785   @Test (timeout=60000)
786   public void testAssignDisabledRegion() throws Exception {
787     String table = "testAssignDisabledRegion";
788     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
789     MyMaster master = null;
790     try {
791       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
792       desc.addFamily(new HColumnDescriptor(FAMILY));
793       admin.createTable(desc);
794 
795       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
796       HRegionInfo hri = new HRegionInfo(
797         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
798       MetaEditor.addRegionToMeta(meta, hri);
799 
800       // Assign the region
801       master = (MyMaster)cluster.getMaster();
802       master.assignRegion(hri);
803       AssignmentManager am = master.getAssignmentManager();
804       RegionStates regionStates = am.getRegionStates();
805       assertTrue(am.waitForAssignment(hri));
806 
807       // Disable the table
808       admin.disableTable(table);
809       assertTrue(regionStates.isRegionOffline(hri));
810 
811       // You can't assign a disabled region
812       am.assign(hri, true, true);
813       assertTrue(regionStates.isRegionOffline(hri));
814 
815       // You can't unassign a disabled region either
816       am.unassign(hri, true);
817       assertTrue(regionStates.isRegionOffline(hri));
818     } finally {
819       TEST_UTIL.deleteTable(Bytes.toBytes(table));
820     }
821   }
822 
823   static class MyLoadBalancer extends StochasticLoadBalancer {
824     // For this region, if specified, always assign to nowhere
825     static volatile String controledRegion = null;
826 
827     @Override
828     public ServerName randomAssignment(HRegionInfo regionInfo,
829         List<ServerName> servers) {
830       if (regionInfo.getEncodedName().equals(controledRegion)) {
831         return null;
832       }
833       return super.randomAssignment(regionInfo, servers);
834     }
835   }
836 
837   public static class MyMaster extends HMaster {
838     AtomicBoolean enabled = new AtomicBoolean(true);
839 
840     public MyMaster(Configuration conf) throws IOException, KeeperException,
841         InterruptedException {
842       super(conf);
843     }
844 
845     @Override
846     public boolean isServerShutdownHandlerEnabled() {
847       return enabled.get() && super.isServerShutdownHandlerEnabled();
848     }
849 
850     public void enableSSH(boolean enabled) {
851       this.enabled.set(enabled);
852       if (enabled) {
853         serverManager.processQueuedDeadServers();
854       }
855     }
856   }
857 
858   public static class MyRegionObserver extends BaseRegionObserver {
859     // If enabled, fail all preClose calls
860     static AtomicBoolean preCloseEnabled = new AtomicBoolean(false);
861 
862     // If enabled, stall postClose calls
863     static AtomicBoolean postCloseEnabled = new AtomicBoolean(false);
864 
865     // If enabled, stall postOpen calls
866     static AtomicBoolean postOpenEnabled = new AtomicBoolean(false);
867 
868     // A flag to track if postOpen is called
869     static volatile boolean postOpenCalled = false;
870 
871     @Override
872     public void preClose(ObserverContext<RegionCoprocessorEnvironment> c,
873         boolean abortRequested) throws IOException {
874       if (preCloseEnabled.get()) throw new IOException("fail preClose from coprocessor");
875     }
876 
877     @Override
878     public void postClose(ObserverContext<RegionCoprocessorEnvironment> c,
879         boolean abortRequested) {
880       stallOnFlag(postCloseEnabled);
881     }
882 
883     @Override
884     public void postOpen(ObserverContext<RegionCoprocessorEnvironment> c) {
885       postOpenCalled = true;
886       stallOnFlag(postOpenEnabled);
887     }
888 
889     private void stallOnFlag(final AtomicBoolean flag) {
890       try {
891         // If enabled, stall
892         while (flag.get()) {
893           Thread.sleep(1000);
894         }
895       } catch (InterruptedException ie) {
896         Thread.currentThread().interrupt();
897       }
898     }
899   }
900 }