View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertFalse;
22  import static org.junit.Assert.assertNotSame;
23  import static org.junit.Assert.assertTrue;
24  import static org.junit.Assert.fail;
25  
26  import java.io.IOException;
27  import java.util.ArrayList;
28  import java.util.HashMap;
29  import java.util.List;
30  import java.util.Map;
31  import java.util.concurrent.atomic.AtomicBoolean;
32  
33  import org.apache.hadoop.hbase.CellScannable;
34  import org.apache.hadoop.hbase.CellUtil;
35  import org.apache.hadoop.hbase.DoNotRetryIOException;
36  import org.apache.hadoop.hbase.HBaseConfiguration;
37  import org.apache.hadoop.hbase.HBaseTestingUtility;
38  import org.apache.hadoop.hbase.HConstants;
39  import org.apache.hadoop.hbase.HRegionInfo;
40  import org.apache.hadoop.hbase.MediumTests;
41  import org.apache.hadoop.hbase.RegionException;
42  import org.apache.hadoop.hbase.RegionTransition;
43  import org.apache.hadoop.hbase.Server;
44  import org.apache.hadoop.hbase.ServerLoad;
45  import org.apache.hadoop.hbase.ServerName;
46  import org.apache.hadoop.hbase.TableName;
47  import org.apache.hadoop.hbase.ZooKeeperConnectionException;
48  import org.apache.hadoop.hbase.catalog.CatalogTracker;
49  import org.apache.hadoop.hbase.catalog.MetaMockingUtil;
50  import org.apache.hadoop.hbase.client.HConnection;
51  import org.apache.hadoop.hbase.client.HConnectionTestingUtility;
52  import org.apache.hadoop.hbase.client.Result;
53  import org.apache.hadoop.hbase.exceptions.DeserializationException;
54  import org.apache.hadoop.hbase.executor.EventType;
55  import org.apache.hadoop.hbase.executor.ExecutorService;
56  import org.apache.hadoop.hbase.executor.ExecutorType;
57  import org.apache.hadoop.hbase.ipc.PayloadCarryingRpcController;
58  import org.apache.hadoop.hbase.master.RegionState.State;
59  import org.apache.hadoop.hbase.master.TableLockManager.NullTableLockManager;
60  import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory;
61  import org.apache.hadoop.hbase.master.balancer.SimpleLoadBalancer;
62  import org.apache.hadoop.hbase.master.handler.EnableTableHandler;
63  import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler;
64  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
65  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
66  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.GetRequest;
67  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.GetResponse;
68  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanRequest;
69  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanResponse;
70  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.Table;
71  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
72  import org.apache.hadoop.hbase.regionserver.RegionOpeningState;
73  import org.apache.hadoop.hbase.util.Bytes;
74  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
75  import org.apache.hadoop.hbase.util.Threads;
76  import org.apache.hadoop.hbase.zookeeper.RecoverableZooKeeper;
77  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
78  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
79  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
80  import org.apache.zookeeper.KeeperException;
81  import org.apache.zookeeper.KeeperException.NodeExistsException;
82  import org.apache.zookeeper.Watcher;
83  import org.junit.After;
84  import org.junit.AfterClass;
85  import org.junit.Before;
86  import org.junit.BeforeClass;
87  import org.junit.Test;
88  import org.junit.experimental.categories.Category;
89  import org.mockito.Mockito;
90  import org.mockito.internal.util.reflection.Whitebox;
91  import org.mockito.invocation.InvocationOnMock;
92  import org.mockito.stubbing.Answer;
93  
94  import com.google.protobuf.RpcController;
95  import com.google.protobuf.ServiceException;
96  
97  
98  /**
99   * Test {@link AssignmentManager}
100  */
101 @Category(MediumTests.class)
102 public class TestAssignmentManager {
103   private static final HBaseTestingUtility HTU = new HBaseTestingUtility();
104   private static final ServerName SERVERNAME_A =
105       ServerName.valueOf("example.org", 1234, 5678);
106   private static final ServerName SERVERNAME_B =
107       ServerName.valueOf("example.org", 0, 5678);
108   private static final HRegionInfo REGIONINFO =
109     new HRegionInfo(TableName.valueOf("t"),
110       HConstants.EMPTY_START_ROW, HConstants.EMPTY_START_ROW);
111   private static int assignmentCount;
112   private static boolean enabling = false;
113 
114   // Mocked objects or; get redone for each test.
115   private Server server;
116   private ServerManager serverManager;
117   private ZooKeeperWatcher watcher;
118   private LoadBalancer balancer;
119   private HMaster master;
120 
121   @BeforeClass
122   public static void beforeClass() throws Exception {
123     HTU.startMiniZKCluster();
124   }
125 
126   @AfterClass
127   public static void afterClass() throws IOException {
128     HTU.shutdownMiniZKCluster();
129   }
130 
131   @Before
132   public void before() throws ZooKeeperConnectionException, IOException {
133     // TODO: Make generic versions of what we do below and put up in a mocking
134     // utility class or move up into HBaseTestingUtility.
135 
136     // Mock a Server.  Have it return a legit Configuration and ZooKeeperWatcher.
137     // If abort is called, be sure to fail the test (don't just swallow it
138     // silently as is mockito default).
139     this.server = Mockito.mock(Server.class);
140     Mockito.when(server.getServerName()).thenReturn(ServerName.valueOf("master,1,1"));
141     Mockito.when(server.getConfiguration()).thenReturn(HTU.getConfiguration());
142     this.watcher =
143       new ZooKeeperWatcher(HTU.getConfiguration(), "mockedServer", this.server, true);
144     Mockito.when(server.getZooKeeper()).thenReturn(this.watcher);
145     Mockito.doThrow(new RuntimeException("Aborted")).
146       when(server).abort(Mockito.anyString(), (Throwable)Mockito.anyObject());
147 
148     // Mock a ServerManager.  Say server SERVERNAME_{A,B} are online.  Also
149     // make it so if close or open, we return 'success'.
150     this.serverManager = Mockito.mock(ServerManager.class);
151     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(true);
152     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_B)).thenReturn(true);
153     Mockito.when(this.serverManager.getDeadServers()).thenReturn(new DeadServer());
154     final Map<ServerName, ServerLoad> onlineServers = new HashMap<ServerName, ServerLoad>();
155     onlineServers.put(SERVERNAME_B, ServerLoad.EMPTY_SERVERLOAD);
156     onlineServers.put(SERVERNAME_A, ServerLoad.EMPTY_SERVERLOAD);
157     Mockito.when(this.serverManager.getOnlineServersList()).thenReturn(
158         new ArrayList<ServerName>(onlineServers.keySet()));
159     Mockito.when(this.serverManager.getOnlineServers()).thenReturn(onlineServers);
160 
161     List<ServerName> avServers = new ArrayList<ServerName>();
162     avServers.addAll(onlineServers.keySet());
163     Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(avServers);
164     Mockito.when(this.serverManager.createDestinationServersList(null)).thenReturn(avServers);
165 
166     Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_A, REGIONINFO, -1)).
167       thenReturn(true);
168     Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_B, REGIONINFO, -1)).
169       thenReturn(true);
170     // Ditto on open.
171     Mockito.when(this.serverManager.sendRegionOpen(SERVERNAME_A, REGIONINFO, -1, null)).
172       thenReturn(RegionOpeningState.OPENED);
173     Mockito.when(this.serverManager.sendRegionOpen(SERVERNAME_B, REGIONINFO, -1, null)).
174       thenReturn(RegionOpeningState.OPENED);
175     this.master = Mockito.mock(HMaster.class);
176 
177     Mockito.when(this.master.getServerManager()).thenReturn(serverManager);
178   }
179 
180   @After
181     public void after() throws KeeperException {
182     if (this.watcher != null) {
183       // Clean up all znodes
184       ZKAssign.deleteAllNodes(this.watcher);
185       this.watcher.close();
186     }
187   }
188 
189   /**
190    * Test a balance going on at same time as a master failover
191    *
192    * @throws IOException
193    * @throws KeeperException
194    * @throws InterruptedException
195    * @throws DeserializationException
196    */
197   @Test(timeout = 60000)
198   public void testBalanceOnMasterFailoverScenarioWithOpenedNode()
199   throws IOException, KeeperException, InterruptedException, ServiceException, DeserializationException {
200     AssignmentManagerWithExtrasForTesting am =
201       setUpMockedAssignmentManager(this.server, this.serverManager);
202     try {
203       createRegionPlanAndBalance(am, SERVERNAME_A, SERVERNAME_B, REGIONINFO);
204       startFakeFailedOverMasterAssignmentManager(am, this.watcher);
205       while (!am.processRITInvoked) Thread.sleep(1);
206       // As part of the failover cleanup, the balancing region plan is removed.
207       // So a random server will be used to open the region. For testing purpose,
208       // let's assume it is going to open on server b:
209       am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
210 
211       Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
212 
213       // Now fake the region closing successfully over on the regionserver; the
214       // regionserver will have set the region in CLOSED state. This will
215       // trigger callback into AM. The below zk close call is from the RS close
216       // region handler duplicated here because its down deep in a private
217       // method hard to expose.
218       int versionid =
219         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
220       assertNotSame(versionid, -1);
221       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
222 
223       // Get current versionid else will fail on transition from OFFLINE to
224       // OPENING below
225       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
226       assertNotSame(-1, versionid);
227       // This uglyness below is what the openregionhandler on RS side does.
228       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
229         SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
230         EventType.RS_ZK_REGION_OPENING, versionid);
231       assertNotSame(-1, versionid);
232       // Move znode from OPENING to OPENED as RS does on successful open.
233       versionid = ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO,
234         SERVERNAME_B, versionid);
235       assertNotSame(-1, versionid);
236       am.gate.set(false);
237       // Block here until our znode is cleared or until this test times out.
238       ZKAssign.blockUntilNoRIT(watcher);
239     } finally {
240       am.getExecutorService().shutdown();
241       am.shutdown();
242     }
243   }
244 
245   @Test(timeout = 60000)
246   public void testBalanceOnMasterFailoverScenarioWithClosedNode()
247   throws IOException, KeeperException, InterruptedException, ServiceException, DeserializationException {
248     AssignmentManagerWithExtrasForTesting am =
249       setUpMockedAssignmentManager(this.server, this.serverManager);
250     try {
251       createRegionPlanAndBalance(am, SERVERNAME_A, SERVERNAME_B, REGIONINFO);
252       startFakeFailedOverMasterAssignmentManager(am, this.watcher);
253       while (!am.processRITInvoked) Thread.sleep(1);
254       // As part of the failover cleanup, the balancing region plan is removed.
255       // So a random server will be used to open the region. For testing purpose,
256       // let's assume it is going to open on server b:
257       am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
258 
259       Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
260 
261       // Now fake the region closing successfully over on the regionserver; the
262       // regionserver will have set the region in CLOSED state. This will
263       // trigger callback into AM. The below zk close call is from the RS close
264       // region handler duplicated here because its down deep in a private
265       // method hard to expose.
266       int versionid =
267         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
268       assertNotSame(versionid, -1);
269       am.gate.set(false);
270       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
271 
272       // Get current versionid else will fail on transition from OFFLINE to
273       // OPENING below
274       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
275       assertNotSame(-1, versionid);
276       // This uglyness below is what the openregionhandler on RS side does.
277       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
278           SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
279           EventType.RS_ZK_REGION_OPENING, versionid);
280       assertNotSame(-1, versionid);
281       // Move znode from OPENING to OPENED as RS does on successful open.
282       versionid = ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO,
283           SERVERNAME_B, versionid);
284       assertNotSame(-1, versionid);
285 
286       // Block here until our znode is cleared or until this test timesout.
287       ZKAssign.blockUntilNoRIT(watcher);
288     } finally {
289       am.getExecutorService().shutdown();
290       am.shutdown();
291     }
292   }
293 
294   @Test(timeout = 60000)
295   public void testBalanceOnMasterFailoverScenarioWithOfflineNode()
296   throws IOException, KeeperException, InterruptedException, ServiceException, DeserializationException {
297     AssignmentManagerWithExtrasForTesting am =
298       setUpMockedAssignmentManager(this.server, this.serverManager);
299     try {
300       createRegionPlanAndBalance(am, SERVERNAME_A, SERVERNAME_B, REGIONINFO);
301       startFakeFailedOverMasterAssignmentManager(am, this.watcher);
302       while (!am.processRITInvoked) Thread.sleep(1);
303       // As part of the failover cleanup, the balancing region plan is removed.
304       // So a random server will be used to open the region. For testing purpose,
305       // let's assume it is going to open on server b:
306       am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
307 
308       Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
309 
310       // Now fake the region closing successfully over on the regionserver; the
311       // regionserver will have set the region in CLOSED state. This will
312       // trigger callback into AM. The below zk close call is from the RS close
313       // region handler duplicated here because its down deep in a private
314       // method hard to expose.
315       int versionid =
316         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
317       assertNotSame(versionid, -1);
318       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
319 
320       am.gate.set(false);
321       // Get current versionid else will fail on transition from OFFLINE to
322       // OPENING below
323       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
324       assertNotSame(-1, versionid);
325       // This uglyness below is what the openregionhandler on RS side does.
326       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
327           SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
328           EventType.RS_ZK_REGION_OPENING, versionid);
329       assertNotSame(-1, versionid);
330       // Move znode from OPENING to OPENED as RS does on successful open.
331       versionid = ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO,
332           SERVERNAME_B, versionid);
333       assertNotSame(-1, versionid);
334       // Block here until our znode is cleared or until this test timesout.
335       ZKAssign.blockUntilNoRIT(watcher);
336     } finally {
337       am.getExecutorService().shutdown();
338       am.shutdown();
339     }
340   }
341 
342   private void createRegionPlanAndBalance(
343       final AssignmentManager am, final ServerName from,
344       final ServerName to, final HRegionInfo hri) throws RegionException {
345     // Call the balance function but fake the region being online first at
346     // servername from.
347     am.regionOnline(hri, from);
348     // Balance region from 'from' to 'to'. It calls unassign setting CLOSING state
349     // up in zk.  Create a plan and balance
350     am.balance(new RegionPlan(hri, from, to));
351   }
352 
353   /**
354    * Tests AssignmentManager balance function.  Runs a balance moving a region
355    * from one server to another mocking regionserver responding over zk.
356    * @throws IOException
357    * @throws KeeperException
358    * @throws DeserializationException
359    */
360   @Test
361   public void testBalance()
362     throws IOException, KeeperException, DeserializationException, InterruptedException {
363     // Create and startup an executor.  This is used by AssignmentManager
364     // handling zk callbacks.
365     ExecutorService executor = startupMasterExecutor("testBalanceExecutor");
366 
367     // We need a mocked catalog tracker.
368     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
369     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(server
370         .getConfiguration());
371     // Create an AM.
372     AssignmentManager am = new AssignmentManager(this.server,
373       this.serverManager, ct, balancer, executor, null, master.getTableLockManager());
374     am.failoverCleanupDone.set(true);
375     try {
376       // Make sure our new AM gets callbacks; once registered, can't unregister.
377       // Thats ok because we make a new zk watcher for each test.
378       this.watcher.registerListenerFirst(am);
379       // Call the balance function but fake the region being online first at
380       // SERVERNAME_A.  Create a balance plan.
381       am.regionOnline(REGIONINFO, SERVERNAME_A);
382       // Balance region from A to B.
383       RegionPlan plan = new RegionPlan(REGIONINFO, SERVERNAME_A, SERVERNAME_B);
384       am.balance(plan);
385 
386       RegionStates regionStates = am.getRegionStates();
387       // Must be failed to close since the server is fake
388       assertTrue(regionStates.isRegionInTransition(REGIONINFO)
389         && regionStates.isRegionInState(REGIONINFO, State.FAILED_CLOSE));
390       // Move it back to pending_close
391       regionStates.updateRegionState(REGIONINFO, State.PENDING_CLOSE);
392 
393       // Now fake the region closing successfully over on the regionserver; the
394       // regionserver will have set the region in CLOSED state.  This will
395       // trigger callback into AM. The below zk close call is from the RS close
396       // region handler duplicated here because its down deep in a private
397       // method hard to expose.
398       int versionid =
399         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
400       assertNotSame(versionid, -1);
401       // AM is going to notice above CLOSED and queue up a new assign.  The
402       // assign will go to open the region in the new location set by the
403       // balancer.  The zk node will be OFFLINE waiting for regionserver to
404       // transition it through OPENING, OPENED.  Wait till we see the OFFLINE
405       // zk node before we proceed.
406       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
407 
408       // Get current versionid else will fail on transition from OFFLINE to OPENING below
409       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
410       assertNotSame(-1, versionid);
411       // This uglyness below is what the openregionhandler on RS side does.
412       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
413         SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
414         EventType.RS_ZK_REGION_OPENING, versionid);
415       assertNotSame(-1, versionid);
416       // Move znode from OPENING to OPENED as RS does on successful open.
417       versionid =
418         ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO, SERVERNAME_B, versionid);
419       assertNotSame(-1, versionid);
420       // Wait on the handler removing the OPENED znode.
421       while(regionStates.isRegionInTransition(REGIONINFO)) Threads.sleep(1);
422     } finally {
423       executor.shutdown();
424       am.shutdown();
425       // Clean up all znodes
426       ZKAssign.deleteAllNodes(this.watcher);
427     }
428   }
429 
430   /**
431    * Run a simple server shutdown handler.
432    * @throws KeeperException
433    * @throws IOException
434    */
435   @Test
436   public void testShutdownHandler()
437       throws KeeperException, IOException, ServiceException {
438     // Create and startup an executor.  This is used by AssignmentManager
439     // handling zk callbacks.
440     ExecutorService executor = startupMasterExecutor("testShutdownHandler");
441 
442     // We need a mocked catalog tracker.
443     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
444     // Create an AM.
445     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
446         this.server, this.serverManager);
447     try {
448       processServerShutdownHandler(ct, am, false);
449     } finally {
450       executor.shutdown();
451       am.shutdown();
452       // Clean up all znodes
453       ZKAssign.deleteAllNodes(this.watcher);
454     }
455   }
456 
457   /**
458    * To test closed region handler to remove rit and delete corresponding znode
459    * if region in pending close or closing while processing shutdown of a region
460    * server.(HBASE-5927).
461    *
462    * @throws KeeperException
463    * @throws IOException
464    * @throws ServiceException
465    */
466   @Test
467   public void testSSHWhenDisableTableInProgress() throws KeeperException, IOException,
468       ServiceException {
469     testCaseWithPartiallyDisabledState(Table.State.DISABLING);
470     testCaseWithPartiallyDisabledState(Table.State.DISABLED);
471   }
472 
473 
474   /**
475    * To test if the split region is removed from RIT if the region was in SPLITTING state but the RS
476    * has actually completed the splitting in hbase:meta but went down. See HBASE-6070 and also HBASE-5806
477    *
478    * @throws KeeperException
479    * @throws IOException
480    */
481   @Test
482   public void testSSHWhenSplitRegionInProgress() throws KeeperException, IOException, Exception {
483     // true indicates the region is split but still in RIT
484     testCaseWithSplitRegionPartial(true);
485     // false indicate the region is not split
486     testCaseWithSplitRegionPartial(false);
487   }
488 
489   private void testCaseWithSplitRegionPartial(boolean regionSplitDone) throws KeeperException,
490       IOException, NodeExistsException, InterruptedException, ServiceException {
491     // Create and startup an executor. This is used by AssignmentManager
492     // handling zk callbacks.
493     ExecutorService executor = startupMasterExecutor("testSSHWhenSplitRegionInProgress");
494     // We need a mocked catalog tracker.
495     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
496     ZKAssign.deleteAllNodes(this.watcher);
497 
498     // Create an AM.
499     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
500       this.server, this.serverManager);
501     // adding region to regions and servers maps.
502     am.regionOnline(REGIONINFO, SERVERNAME_A);
503     // adding region in pending close.
504     am.getRegionStates().updateRegionState(
505       REGIONINFO, State.SPLITTING, SERVERNAME_A);
506     am.getZKTable().setEnabledTable(REGIONINFO.getTable());
507     RegionTransition data = RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_SPLITTING,
508         REGIONINFO.getRegionName(), SERVERNAME_A);
509     String node = ZKAssign.getNodeName(this.watcher, REGIONINFO.getEncodedName());
510     // create znode in M_ZK_REGION_CLOSING state.
511     ZKUtil.createAndWatch(this.watcher, node, data.toByteArray());
512 
513     try {
514       processServerShutdownHandler(ct, am, regionSplitDone);
515       // check znode deleted or not.
516       // In both cases the znode should be deleted.
517 
518       if (regionSplitDone) {
519         assertFalse("Region state of region in SPLITTING should be removed from rit.",
520             am.getRegionStates().isRegionsInTransition());
521       } else {
522         while (!am.assignInvoked) {
523           Thread.sleep(1);
524         }
525         assertTrue("Assign should be invoked.", am.assignInvoked);
526       }
527     } finally {
528       REGIONINFO.setOffline(false);
529       REGIONINFO.setSplit(false);
530       executor.shutdown();
531       am.shutdown();
532       // Clean up all znodes
533       ZKAssign.deleteAllNodes(this.watcher);
534     }
535   }
536 
537   private void testCaseWithPartiallyDisabledState(Table.State state) throws KeeperException,
538       IOException, NodeExistsException, ServiceException {
539     // Create and startup an executor. This is used by AssignmentManager
540     // handling zk callbacks.
541     ExecutorService executor = startupMasterExecutor("testSSHWhenDisableTableInProgress");
542     // We need a mocked catalog tracker.
543     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
544     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(server.getConfiguration());
545     ZKAssign.deleteAllNodes(this.watcher);
546 
547     // Create an AM.
548     AssignmentManager am = new AssignmentManager(this.server,
549       this.serverManager, ct, balancer, executor, null, master.getTableLockManager());
550     // adding region to regions and servers maps.
551     am.regionOnline(REGIONINFO, SERVERNAME_A);
552     // adding region in pending close.
553     am.getRegionStates().updateRegionState(REGIONINFO, State.PENDING_CLOSE);
554     if (state == Table.State.DISABLING) {
555       am.getZKTable().setDisablingTable(REGIONINFO.getTable());
556     } else {
557       am.getZKTable().setDisabledTable(REGIONINFO.getTable());
558     }
559     RegionTransition data = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_CLOSING,
560         REGIONINFO.getRegionName(), SERVERNAME_A);
561     // RegionTransitionData data = new
562     // RegionTransitionData(EventType.M_ZK_REGION_CLOSING,
563     // REGIONINFO.getRegionName(), SERVERNAME_A);
564     String node = ZKAssign.getNodeName(this.watcher, REGIONINFO.getEncodedName());
565     // create znode in M_ZK_REGION_CLOSING state.
566     ZKUtil.createAndWatch(this.watcher, node, data.toByteArray());
567 
568     try {
569       processServerShutdownHandler(ct, am, false);
570       // check znode deleted or not.
571       // In both cases the znode should be deleted.
572       assertTrue("The znode should be deleted.", ZKUtil.checkExists(this.watcher, node) == -1);
573       // check whether in rit or not. In the DISABLING case also the below
574       // assert will be true but the piece of code added for HBASE-5927 will not
575       // do that.
576       if (state == Table.State.DISABLED) {
577         assertFalse("Region state of region in pending close should be removed from rit.",
578             am.getRegionStates().isRegionsInTransition());
579       }
580     } finally {
581       am.setEnabledTable(REGIONINFO.getTable());
582       executor.shutdown();
583       am.shutdown();
584       // Clean up all znodes
585       ZKAssign.deleteAllNodes(this.watcher);
586     }
587   }
588 
589   private void processServerShutdownHandler(CatalogTracker ct, AssignmentManager am, boolean splitRegion)
590       throws IOException, ServiceException {
591     // Make sure our new AM gets callbacks; once registered, can't unregister.
592     // Thats ok because we make a new zk watcher for each test.
593     this.watcher.registerListenerFirst(am);
594 
595     // Need to set up a fake scan of meta for the servershutdown handler
596     // Make an RS Interface implementation.  Make it so a scanner can go against it.
597     ClientProtos.ClientService.BlockingInterface implementation =
598       Mockito.mock(ClientProtos.ClientService.BlockingInterface.class);
599     // Get a meta row result that has region up on SERVERNAME_A
600 
601     Result r;
602     if (splitRegion) {
603       r = MetaMockingUtil.getMetaTableRowResultAsSplitRegion(REGIONINFO, SERVERNAME_A);
604     } else {
605       r = MetaMockingUtil.getMetaTableRowResult(REGIONINFO, SERVERNAME_A);
606     }
607 
608     final ScanResponse.Builder builder = ScanResponse.newBuilder();
609     builder.setMoreResults(true);
610     builder.addCellsPerResult(r.size());
611     final List<CellScannable> cellScannables = new ArrayList<CellScannable>(1);
612     cellScannables.add(r);
613     Mockito.when(implementation.scan(
614       (RpcController)Mockito.any(), (ScanRequest)Mockito.any())).
615       thenAnswer(new Answer<ScanResponse>() {
616           @Override
617           public ScanResponse answer(InvocationOnMock invocation) throws Throwable {
618             PayloadCarryingRpcController controller = (PayloadCarryingRpcController) invocation
619                 .getArguments()[0];
620             if (controller != null) {
621               controller.setCellScanner(CellUtil.createCellScanner(cellScannables));
622             }
623             return builder.build();
624           }
625       });
626 
627     // Get a connection w/ mocked up common methods.
628     HConnection connection =
629       HConnectionTestingUtility.getMockedConnectionAndDecorate(HTU.getConfiguration(),
630         null, implementation, SERVERNAME_B, REGIONINFO);
631 
632     // Make it so we can get a catalogtracker from servermanager.. .needed
633     // down in guts of server shutdown handler.
634     Mockito.when(ct.getConnection()).thenReturn(connection);
635     Mockito.when(this.server.getCatalogTracker()).thenReturn(ct);
636 
637     // Now make a server shutdown handler instance and invoke process.
638     // Have it that SERVERNAME_A died.
639     DeadServer deadServers = new DeadServer();
640     deadServers.add(SERVERNAME_A);
641     // I need a services instance that will return the AM
642     MasterFileSystem fs = Mockito.mock(MasterFileSystem.class);
643     Mockito.doNothing().when(fs).setLogRecoveryMode();
644     Mockito.when(fs.getLogRecoveryMode()).thenReturn(RecoveryMode.LOG_REPLAY);
645     MasterServices services = Mockito.mock(MasterServices.class);
646     Mockito.when(services.getAssignmentManager()).thenReturn(am);
647     Mockito.when(services.getServerManager()).thenReturn(this.serverManager);
648     Mockito.when(services.getZooKeeper()).thenReturn(this.watcher);
649     Mockito.when(services.getMasterFileSystem()).thenReturn(fs);
650     ServerShutdownHandler handler = new ServerShutdownHandler(this.server,
651       services, deadServers, SERVERNAME_A, false);
652     am.failoverCleanupDone.set(true);
653     handler.process();
654     // The region in r will have been assigned.  It'll be up in zk as unassigned.
655   }
656 
657   /**
658    * Create and startup executor pools. Start same set as master does (just
659    * run a few less).
660    * @param name Name to give our executor
661    * @return Created executor (be sure to call shutdown when done).
662    */
663   private ExecutorService startupMasterExecutor(final String name) {
664     // TODO: Move up into HBaseTestingUtility?  Generally useful.
665     ExecutorService executor = new ExecutorService(name);
666     executor.startExecutorService(ExecutorType.MASTER_OPEN_REGION, 3);
667     executor.startExecutorService(ExecutorType.MASTER_CLOSE_REGION, 3);
668     executor.startExecutorService(ExecutorType.MASTER_SERVER_OPERATIONS, 3);
669     executor.startExecutorService(ExecutorType.MASTER_META_SERVER_OPERATIONS, 3);
670     return executor;
671   }
672 
673   @Test
674   public void testUnassignWithSplitAtSameTime() throws KeeperException, IOException {
675     // Region to use in test.
676     final HRegionInfo hri = HRegionInfo.FIRST_META_REGIONINFO;
677     // First amend the servermanager mock so that when we do send close of the
678     // first meta region on SERVERNAME_A, it will return true rather than
679     // default null.
680     Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_A, hri, -1)).thenReturn(true);
681     // Need a mocked catalog tracker.
682     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
683     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(server
684         .getConfiguration());
685     // Create an AM.
686     AssignmentManager am = new AssignmentManager(this.server,
687       this.serverManager, ct, balancer, null, null, master.getTableLockManager());
688     try {
689       // First make sure my mock up basically works.  Unassign a region.
690       unassign(am, SERVERNAME_A, hri);
691       // This delete will fail if the previous unassign did wrong thing.
692       ZKAssign.deleteClosingNode(this.watcher, hri, SERVERNAME_A);
693       // Now put a SPLITTING region in the way.  I don't have to assert it
694       // go put in place.  This method puts it in place then asserts it still
695       // owns it by moving state from SPLITTING to SPLITTING.
696       int version = createNodeSplitting(this.watcher, hri, SERVERNAME_A);
697       // Now, retry the unassign with the SPLTTING in place.  It should just
698       // complete without fail; a sort of 'silent' recognition that the
699       // region to unassign has been split and no longer exists: TOOD: what if
700       // the split fails and the parent region comes back to life?
701       unassign(am, SERVERNAME_A, hri);
702       // This transition should fail if the znode has been messed with.
703       ZKAssign.transitionNode(this.watcher, hri, SERVERNAME_A,
704         EventType.RS_ZK_REGION_SPLITTING, EventType.RS_ZK_REGION_SPLITTING, version);
705       assertFalse(am.getRegionStates().isRegionInTransition(hri));
706     } finally {
707       am.shutdown();
708     }
709   }
710 
711   /**
712    * Tests the processDeadServersAndRegionsInTransition should not fail with NPE
713    * when it failed to get the children. Let's abort the system in this
714    * situation
715    * @throws ServiceException
716    */
717   @Test(timeout = 60000)
718   public void testProcessDeadServersAndRegionsInTransitionShouldNotFailWithNPE()
719       throws IOException, KeeperException, InterruptedException, ServiceException {
720     final RecoverableZooKeeper recoverableZk = Mockito
721         .mock(RecoverableZooKeeper.class);
722     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
723       this.server, this.serverManager);
724     Watcher zkw = new ZooKeeperWatcher(HBaseConfiguration.create(), "unittest",
725         null) {
726       @Override
727       public RecoverableZooKeeper getRecoverableZooKeeper() {
728         return recoverableZk;
729       }
730     };
731     ((ZooKeeperWatcher) zkw).registerListener(am);
732     Mockito.doThrow(new InterruptedException()).when(recoverableZk)
733         .getChildren("/hbase/region-in-transition", null);
734     am.setWatcher((ZooKeeperWatcher) zkw);
735     try {
736       am.processDeadServersAndRegionsInTransition(null);
737       fail("Expected to abort");
738     } catch (NullPointerException e) {
739       fail("Should not throw NPE");
740     } catch (RuntimeException e) {
741       assertEquals("Aborted", e.getLocalizedMessage());
742     }
743   }
744   /**
745    * TestCase verifies that the regionPlan is updated whenever a region fails to open
746    * and the master tries to process RS_ZK_FAILED_OPEN state.(HBASE-5546).
747    */
748   @Test(timeout = 60000)
749   public void testRegionPlanIsUpdatedWhenRegionFailsToOpen() throws IOException, KeeperException,
750       ServiceException, InterruptedException {
751     this.server.getConfiguration().setClass(
752       HConstants.HBASE_MASTER_LOADBALANCER_CLASS, MockedLoadBalancer.class,
753       LoadBalancer.class);
754     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
755       this.server, this.serverManager);
756     try {
757       // Boolean variable used for waiting until randomAssignment is called and
758       // new
759       // plan is generated.
760       AtomicBoolean gate = new AtomicBoolean(false);
761       if (balancer instanceof MockedLoadBalancer) {
762         ((MockedLoadBalancer) balancer).setGateVariable(gate);
763       }
764       ZKAssign.createNodeOffline(this.watcher, REGIONINFO, SERVERNAME_A);
765       int v = ZKAssign.getVersion(this.watcher, REGIONINFO);
766       ZKAssign.transitionNode(this.watcher, REGIONINFO, SERVERNAME_A,
767           EventType.M_ZK_REGION_OFFLINE, EventType.RS_ZK_REGION_FAILED_OPEN, v);
768       String path = ZKAssign.getNodeName(this.watcher, REGIONINFO
769           .getEncodedName());
770       am.getRegionStates().updateRegionState(
771         REGIONINFO, State.OPENING, SERVERNAME_A);
772       // a dummy plan inserted into the regionPlans. This plan is cleared and
773       // new one is formed
774       am.regionPlans.put(REGIONINFO.getEncodedName(), new RegionPlan(
775           REGIONINFO, null, SERVERNAME_A));
776       RegionPlan regionPlan = am.regionPlans.get(REGIONINFO.getEncodedName());
777       List<ServerName> serverList = new ArrayList<ServerName>(2);
778       serverList.add(SERVERNAME_B);
779       Mockito.when(
780           this.serverManager.createDestinationServersList(SERVERNAME_A))
781           .thenReturn(serverList);
782       am.nodeDataChanged(path);
783       // here we are waiting until the random assignment in the load balancer is
784       // called.
785       while (!gate.get()) {
786         Thread.sleep(10);
787       }
788       // new region plan may take some time to get updated after random
789       // assignment is called and
790       // gate is set to true.
791       RegionPlan newRegionPlan = am.regionPlans
792           .get(REGIONINFO.getEncodedName());
793       while (newRegionPlan == null) {
794         Thread.sleep(10);
795         newRegionPlan = am.regionPlans.get(REGIONINFO.getEncodedName());
796       }
797       // the new region plan created may contain the same RS as destination but
798       // it should
799       // be new plan.
800       assertNotSame("Same region plan should not come", regionPlan,
801           newRegionPlan);
802       assertTrue("Destination servers should be different.", !(regionPlan
803           .getDestination().equals(newRegionPlan.getDestination())));
804 
805       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
806     } finally {
807       this.server.getConfiguration().setClass(
808           HConstants.HBASE_MASTER_LOADBALANCER_CLASS, SimpleLoadBalancer.class,
809           LoadBalancer.class);
810       am.getExecutorService().shutdown();
811       am.shutdown();
812     }
813   }
814 
815   /**
816    * Mocked load balancer class used in the testcase to make sure that the testcase waits until
817    * random assignment is called and the gate variable is set to true.
818    */
819   public static class MockedLoadBalancer extends SimpleLoadBalancer {
820     private AtomicBoolean gate;
821 
822     public void setGateVariable(AtomicBoolean gate) {
823       this.gate = gate;
824     }
825 
826     @Override
827     public ServerName randomAssignment(HRegionInfo regionInfo, List<ServerName> servers) {
828       ServerName randomServerName = super.randomAssignment(regionInfo, servers);
829       this.gate.set(true);
830       return randomServerName;
831     }
832 
833     @Override
834     public Map<ServerName, List<HRegionInfo>> retainAssignment(
835         Map<HRegionInfo, ServerName> regions, List<ServerName> servers) {
836       this.gate.set(true);
837       return super.retainAssignment(regions, servers);
838     }
839   }
840 
841   /**
842    * Test the scenario when the master is in failover and trying to process a
843    * region which is in Opening state on a dead RS. Master will force offline the
844    * region and put it in transition. AM relies on SSH to reassign it.
845    */
846   @Test(timeout = 60000)
847   public void testRegionInOpeningStateOnDeadRSWhileMasterFailover() throws IOException,
848       KeeperException, ServiceException, InterruptedException {
849     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
850       this.server, this.serverManager);
851     ZKAssign.createNodeOffline(this.watcher, REGIONINFO, SERVERNAME_A);
852     int version = ZKAssign.getVersion(this.watcher, REGIONINFO);
853     ZKAssign.transitionNode(this.watcher, REGIONINFO, SERVERNAME_A, EventType.M_ZK_REGION_OFFLINE,
854         EventType.RS_ZK_REGION_OPENING, version);
855     RegionTransition rt = RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_OPENING,
856         REGIONINFO.getRegionName(), SERVERNAME_A, HConstants.EMPTY_BYTE_ARRAY);
857     version = ZKAssign.getVersion(this.watcher, REGIONINFO);
858     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(false);
859     am.getRegionStates().logSplit(SERVERNAME_A); // Assume log splitting is done
860     am.getRegionStates().createRegionState(REGIONINFO);
861     am.gate.set(false);
862     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
863     assertFalse(am.processRegionsInTransition(rt, REGIONINFO, version));
864     am.getZKTable().setEnabledTable(REGIONINFO.getTable());
865     processServerShutdownHandler(ct, am, false);
866     // Waiting for the assignment to get completed.
867     while (!am.gate.get()) {
868       Thread.sleep(10);
869     }
870     assertTrue("The region should be assigned immediately.", null != am.regionPlans.get(REGIONINFO
871         .getEncodedName()));
872   }
873 
874   /**
875    * Test verifies whether assignment is skipped for regions of tables in DISABLING state during
876    * clean cluster startup. See HBASE-6281.
877    *
878    * @throws KeeperException
879    * @throws IOException
880    * @throws Exception
881    */
882   @Test(timeout = 60000)
883   public void testDisablingTableRegionsAssignmentDuringCleanClusterStartup()
884       throws KeeperException, IOException, Exception {
885     this.server.getConfiguration().setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
886         MockedLoadBalancer.class, LoadBalancer.class);
887     Mockito.when(this.serverManager.getOnlineServers()).thenReturn(
888         new HashMap<ServerName, ServerLoad>(0));
889     List<ServerName> destServers = new ArrayList<ServerName>(1);
890     destServers.add(SERVERNAME_A);
891     Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(destServers);
892     // To avoid cast exception in DisableTableHandler process.
893     HTU.getConfiguration().setInt(HConstants.MASTER_PORT, 0);
894     Server server = new HMaster(HTU.getConfiguration());
895     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(server,
896         this.serverManager);
897     AtomicBoolean gate = new AtomicBoolean(false);
898     if (balancer instanceof MockedLoadBalancer) {
899       ((MockedLoadBalancer) balancer).setGateVariable(gate);
900     }
901     try{
902       // set table in disabling state.
903       am.getZKTable().setDisablingTable(REGIONINFO.getTable());
904       am.joinCluster();
905       // should not call retainAssignment if we get empty regions in assignAllUserRegions.
906       assertFalse(
907           "Assign should not be invoked for disabling table regions during clean cluster startup.",
908           gate.get());
909       // need to change table state from disabling to disabled.
910       assertTrue("Table should be disabled.",
911           am.getZKTable().isDisabledTable(REGIONINFO.getTable()));
912     } finally {
913       this.server.getConfiguration().setClass(
914         HConstants.HBASE_MASTER_LOADBALANCER_CLASS, SimpleLoadBalancer.class,
915         LoadBalancer.class);
916       am.getZKTable().setEnabledTable(REGIONINFO.getTable());
917       am.shutdown();
918     }
919   }
920 
921   /**
922    * Test verifies whether all the enabling table regions assigned only once during master startup.
923    *
924    * @throws KeeperException
925    * @throws IOException
926    * @throws Exception
927    */
928   @Test
929   public void testMasterRestartWhenTableInEnabling() throws KeeperException, IOException, Exception {
930     enabling = true;
931     List<ServerName> destServers = new ArrayList<ServerName>(1);
932     destServers.add(SERVERNAME_A);
933     Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(destServers);
934     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(true);
935     HTU.getConfiguration().setInt(HConstants.MASTER_PORT, 0);
936     Server server = new HMaster(HTU.getConfiguration());
937     Whitebox.setInternalState(server, "serverManager", this.serverManager);
938     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(server,
939         this.serverManager);
940     try {
941       // set table in enabling state.
942       am.getZKTable().setEnablingTable(REGIONINFO.getTable());
943       new EnableTableHandler(server, REGIONINFO.getTable(),
944           am.getCatalogTracker(), am, new NullTableLockManager(), true).prepare()
945           .process();
946       assertEquals("Number of assignments should be 1.", 1, assignmentCount);
947       assertTrue("Table should be enabled.",
948           am.getZKTable().isEnabledTable(REGIONINFO.getTable()));
949     } finally {
950       enabling = false;
951       assignmentCount = 0;
952       am.getZKTable().setEnabledTable(REGIONINFO.getTable());
953       am.shutdown();
954       ZKAssign.deleteAllNodes(this.watcher);
955     }
956   }
957 
958   /**
959    * Test verifies whether stale znodes of unknown tables as for the hbase:meta will be removed or
960    * not.
961    * @throws KeeperException
962    * @throws IOException
963    * @throws Exception
964    */
965   @Test
966   public void testMasterRestartShouldRemoveStaleZnodesOfUnknownTableAsForMeta()
967       throws KeeperException, IOException, Exception {
968     List<ServerName> destServers = new ArrayList<ServerName>(1);
969     destServers.add(SERVERNAME_A);
970     Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(destServers);
971     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(true);
972     HTU.getConfiguration().setInt(HConstants.MASTER_PORT, 0);
973     Server server = new HMaster(HTU.getConfiguration());
974     Whitebox.setInternalState(server, "serverManager", this.serverManager);
975     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(server,
976         this.serverManager);
977     try {
978       TableName tableName = TableName.valueOf("dummyTable");
979       // set table in enabling state.
980       am.getZKTable().setEnablingTable(tableName);
981       am.joinCluster();
982       assertFalse("Table should not be present in zookeeper.",
983         am.getZKTable().isTablePresent(tableName));
984     } finally {
985     }
986   }
987   /**
988    * When a region is in transition, if the region server opening the region goes down,
989    * the region assignment takes a long time normally (waiting for timeout monitor to trigger assign).
990    * This test is to make sure SSH reassigns it right away.
991    */
992   @Test
993   public void testSSHTimesOutOpeningRegionTransition()
994       throws KeeperException, IOException, ServiceException {
995     // We need a mocked catalog tracker.
996     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
997     // Create an AM.
998     AssignmentManagerWithExtrasForTesting am =
999       setUpMockedAssignmentManager(this.server, this.serverManager);
1000     // adding region in pending open.
1001     RegionState state = new RegionState(REGIONINFO,
1002       State.OPENING, System.currentTimeMillis(), SERVERNAME_A);
1003     am.getRegionStates().regionOnline(REGIONINFO, SERVERNAME_B);
1004     am.getRegionStates().regionsInTransition.put(REGIONINFO.getEncodedName(), state);
1005     // adding region plan
1006     am.regionPlans.put(REGIONINFO.getEncodedName(),
1007       new RegionPlan(REGIONINFO, SERVERNAME_B, SERVERNAME_A));
1008     am.getZKTable().setEnabledTable(REGIONINFO.getTable());
1009 
1010     try {
1011       am.assignInvoked = false;
1012       processServerShutdownHandler(ct, am, false);
1013       assertTrue(am.assignInvoked);
1014     } finally {
1015       am.getRegionStates().regionsInTransition.remove(REGIONINFO.getEncodedName());
1016       am.regionPlans.remove(REGIONINFO.getEncodedName());
1017     }
1018   }
1019 
1020   /**
1021    * Scenario:<ul>
1022    *  <li> master starts a close, and creates a znode</li>
1023    *  <li> it fails just at this moment, before contacting the RS</li>
1024    *  <li> while the second master is coming up, the targeted RS dies. But it's before ZK timeout so
1025    *    we don't know, and we have an exception.</li>
1026    *  <li> the master must handle this nicely and reassign.
1027    *  </ul>
1028    */
1029   @Test
1030   public void testClosingFailureDuringRecovery() throws Exception {
1031 
1032     AssignmentManagerWithExtrasForTesting am =
1033         setUpMockedAssignmentManager(this.server, this.serverManager);
1034     ZKAssign.createNodeClosing(this.watcher, REGIONINFO, SERVERNAME_A);
1035     am.getRegionStates().createRegionState(REGIONINFO);
1036 
1037     assertFalse( am.getRegionStates().isRegionsInTransition() );
1038 
1039     am.processRegionInTransition(REGIONINFO.getEncodedName(), REGIONINFO);
1040 
1041     assertTrue( am.getRegionStates().isRegionsInTransition() );
1042   }
1043 
1044   /**
1045    * Creates a new ephemeral node in the SPLITTING state for the specified region.
1046    * Create it ephemeral in case regionserver dies mid-split.
1047    *
1048    * <p>Does not transition nodes from other states.  If a node already exists
1049    * for this region, a {@link NodeExistsException} will be thrown.
1050    *
1051    * @param zkw zk reference
1052    * @param region region to be created as offline
1053    * @param serverName server event originates from
1054    * @return Version of znode created.
1055    * @throws KeeperException
1056    * @throws IOException
1057    */
1058   // Copied from SplitTransaction rather than open the method over there in
1059   // the regionserver package.
1060   private static int createNodeSplitting(final ZooKeeperWatcher zkw,
1061       final HRegionInfo region, final ServerName serverName)
1062   throws KeeperException, IOException {
1063     RegionTransition rt =
1064       RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_SPLITTING,
1065         region.getRegionName(), serverName);
1066 
1067     String node = ZKAssign.getNodeName(zkw, region.getEncodedName());
1068     if (!ZKUtil.createEphemeralNodeAndWatch(zkw, node, rt.toByteArray())) {
1069       throw new IOException("Failed create of ephemeral " + node);
1070     }
1071     // Transition node from SPLITTING to SPLITTING and pick up version so we
1072     // can be sure this znode is ours; version is needed deleting.
1073     return transitionNodeSplitting(zkw, region, serverName, -1);
1074   }
1075 
1076   // Copied from SplitTransaction rather than open the method over there in
1077   // the regionserver package.
1078   private static int transitionNodeSplitting(final ZooKeeperWatcher zkw,
1079       final HRegionInfo parent,
1080       final ServerName serverName, final int version)
1081   throws KeeperException, IOException {
1082     return ZKAssign.transitionNode(zkw, parent, serverName,
1083       EventType.RS_ZK_REGION_SPLITTING, EventType.RS_ZK_REGION_SPLITTING, version);
1084   }
1085 
1086   private void unassign(final AssignmentManager am, final ServerName sn,
1087       final HRegionInfo hri) throws RegionException {
1088     // Before I can unassign a region, I need to set it online.
1089     am.regionOnline(hri, sn);
1090     // Unassign region.
1091     am.unassign(hri);
1092   }
1093 
1094   /**
1095    * Create an {@link AssignmentManagerWithExtrasForTesting} that has mocked
1096    * {@link CatalogTracker} etc.
1097    * @param server
1098    * @param manager
1099    * @return An AssignmentManagerWithExtras with mock connections, etc.
1100    * @throws IOException
1101    * @throws KeeperException
1102    */
1103   private AssignmentManagerWithExtrasForTesting setUpMockedAssignmentManager(final Server server,
1104       final ServerManager manager) throws IOException, KeeperException, ServiceException {
1105     // We need a mocked catalog tracker. Its used by our AM instance.
1106     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
1107     // Make an RS Interface implementation. Make it so a scanner can go against
1108     // it and a get to return the single region, REGIONINFO, this test is
1109     // messing with. Needed when "new master" joins cluster. AM will try and
1110     // rebuild its list of user regions and it will also get the HRI that goes
1111     // with an encoded name by doing a Get on hbase:meta
1112     ClientProtos.ClientService.BlockingInterface ri =
1113       Mockito.mock(ClientProtos.ClientService.BlockingInterface.class);
1114     // Get a meta row result that has region up on SERVERNAME_A for REGIONINFO
1115     Result r = MetaMockingUtil.getMetaTableRowResult(REGIONINFO, SERVERNAME_A);
1116     final ScanResponse.Builder builder = ScanResponse.newBuilder();
1117     builder.setMoreResults(true);
1118     builder.addCellsPerResult(r.size());
1119     final List<CellScannable> rows = new ArrayList<CellScannable>(1);
1120     rows.add(r);
1121     Answer<ScanResponse> ans = new Answer<ClientProtos.ScanResponse>() {
1122       @Override
1123       public ScanResponse answer(InvocationOnMock invocation) throws Throwable {
1124         PayloadCarryingRpcController controller = (PayloadCarryingRpcController) invocation
1125             .getArguments()[0];
1126         if (controller != null) {
1127           controller.setCellScanner(CellUtil.createCellScanner(rows));
1128         }
1129         return builder.build();
1130       }
1131     };
1132     if (enabling) {
1133       Mockito.when(ri.scan((RpcController) Mockito.any(), (ScanRequest) Mockito.any()))
1134           .thenAnswer(ans).thenAnswer(ans).thenAnswer(ans).thenAnswer(ans).thenAnswer(ans)
1135           .thenReturn(ScanResponse.newBuilder().setMoreResults(false).build());
1136     } else {
1137       Mockito.when(ri.scan((RpcController) Mockito.any(), (ScanRequest) Mockito.any())).thenAnswer(
1138           ans);
1139     }
1140     // If a get, return the above result too for REGIONINFO
1141     GetResponse.Builder getBuilder = GetResponse.newBuilder();
1142     getBuilder.setResult(ProtobufUtil.toResult(r));
1143     Mockito.when(ri.get((RpcController)Mockito.any(), (GetRequest) Mockito.any())).
1144       thenReturn(getBuilder.build());
1145     // Get a connection w/ mocked up common methods.
1146     HConnection connection = HConnectionTestingUtility.
1147       getMockedConnectionAndDecorate(HTU.getConfiguration(), null,
1148         ri, SERVERNAME_B, REGIONINFO);
1149     // Make it so we can get the connection from our mocked catalogtracker
1150     Mockito.when(ct.getConnection()).thenReturn(connection);
1151     // Create and startup an executor. Used by AM handling zk callbacks.
1152     ExecutorService executor = startupMasterExecutor("mockedAMExecutor");
1153     this.balancer = LoadBalancerFactory.getLoadBalancer(server.getConfiguration());
1154     AssignmentManagerWithExtrasForTesting am = new AssignmentManagerWithExtrasForTesting(
1155       server, manager, ct, this.balancer, executor, new NullTableLockManager());
1156     return am;
1157   }
1158 
1159   /**
1160    * An {@link AssignmentManager} with some extra facility used testing
1161    */
1162   class AssignmentManagerWithExtrasForTesting extends AssignmentManager {
1163     // Keep a reference so can give it out below in {@link #getExecutorService}
1164     private final ExecutorService es;
1165     // Ditto for ct
1166     private final CatalogTracker ct;
1167     boolean processRITInvoked = false;
1168     boolean assignInvoked = false;
1169     AtomicBoolean gate = new AtomicBoolean(true);
1170 
1171     public AssignmentManagerWithExtrasForTesting(
1172         final Server master, final ServerManager serverManager,
1173         final CatalogTracker catalogTracker, final LoadBalancer balancer,
1174         final ExecutorService service, final TableLockManager tableLockManager)
1175             throws KeeperException, IOException {
1176       super(master, serverManager, catalogTracker, balancer, service, null, tableLockManager);
1177       this.es = service;
1178       this.ct = catalogTracker;
1179     }
1180 
1181     @Override
1182     boolean processRegionInTransition(String encodedRegionName,
1183         HRegionInfo regionInfo) throws KeeperException, IOException {
1184       this.processRITInvoked = true;
1185       return super.processRegionInTransition(encodedRegionName, regionInfo);
1186     }
1187 
1188     @Override
1189     public void assign(HRegionInfo region, boolean setOfflineInZK, boolean forceNewPlan) {
1190       if (enabling) {
1191         assignmentCount++;
1192         this.regionOnline(region, SERVERNAME_A);
1193       } else {
1194         super.assign(region, setOfflineInZK, forceNewPlan);
1195         this.gate.set(true);
1196       }
1197     }
1198 
1199     @Override
1200     boolean assign(ServerName destination, List<HRegionInfo> regions) {
1201       if (enabling) {
1202         for (HRegionInfo region : regions) {
1203           assignmentCount++;
1204           this.regionOnline(region, SERVERNAME_A);
1205         }
1206         return true;
1207       }
1208       return super.assign(destination, regions);
1209     }
1210 
1211     @Override
1212     public void assign(List<HRegionInfo> regions)
1213         throws IOException, InterruptedException {
1214       assignInvoked = (regions != null && regions.size() > 0);
1215       super.assign(regions);
1216       this.gate.set(true);
1217     }
1218 
1219     /** reset the watcher */
1220     void setWatcher(ZooKeeperWatcher watcher) {
1221       this.watcher = watcher;
1222     }
1223 
1224     /**
1225      * @return ExecutorService used by this instance.
1226      */
1227     ExecutorService getExecutorService() {
1228       return this.es;
1229     }
1230 
1231     /**
1232      * @return CatalogTracker used by this AM (Its a mock).
1233      */
1234     CatalogTracker getCatalogTracker() {
1235       return this.ct;
1236     }
1237   }
1238 
1239   /**
1240    * Call joinCluster on the passed AssignmentManager.  Do it in a thread
1241    * so it runs independent of what all else is going on.  Try to simulate
1242    * an AM running insided a failed over master by clearing all in-memory
1243    * AM state first.
1244   */
1245   private void startFakeFailedOverMasterAssignmentManager(final AssignmentManager am,
1246       final ZooKeeperWatcher watcher) {
1247     // Make sure our new AM gets callbacks; once registered, we can't unregister.
1248     // Thats ok because we make a new zk watcher for each test.
1249     watcher.registerListenerFirst(am);
1250     Thread t = new Thread("RunAmJoinCluster") {
1251       @Override
1252       public void run() {
1253         // Call the joinCluster function as though we were doing a master
1254         // failover at this point. It will stall just before we go to add
1255         // the RIT region to our RIT Map in AM at processRegionsInTransition.
1256         // First clear any inmemory state from AM so it acts like a new master
1257         // coming on line.
1258         am.getRegionStates().regionsInTransition.clear();
1259         am.regionPlans.clear();
1260         try {
1261           am.joinCluster();
1262         } catch (IOException e) {
1263           throw new RuntimeException(e);
1264         } catch (KeeperException e) {
1265           throw new RuntimeException(e);
1266         } catch (InterruptedException e) {
1267           throw new RuntimeException(e);
1268         }
1269       }
1270     };
1271     t.start();
1272     while (!t.isAlive()) Threads.sleep(1);
1273   }
1274 
1275   @Test
1276   public void testForceAssignMergingRegion() throws Exception {
1277     // Region to use in test.
1278     final HRegionInfo hri = HRegionInfo.FIRST_META_REGIONINFO;
1279     // Need a mocked catalog tracker.
1280     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
1281     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(
1282       server.getConfiguration());
1283     // Create an AM.
1284     AssignmentManager am = new AssignmentManager(this.server,
1285       this.serverManager, ct, balancer, null, null, master.getTableLockManager());
1286     RegionStates regionStates = am.getRegionStates();
1287     try {
1288       // First set the state of the region to merging
1289       regionStates.updateRegionState(hri, RegionState.State.MERGING);
1290       // Now, try to assign it with force new plan
1291       am.assign(hri, true, true);
1292       assertEquals("The region should be still in merging state",
1293         RegionState.State.MERGING, regionStates.getRegionState(hri).getState());
1294     } finally {
1295       am.shutdown();
1296     }
1297   }
1298 
1299   /**
1300    * Test assignment related ZK events are ignored by AM if the region is not known
1301    * by AM to be in transition. During normal operation, all assignments are started
1302    * by AM (not considering split/merge), if an event is received but the region
1303    * is not in transition, the event must be a very late one. So it can be ignored.
1304    * During master failover, since AM watches assignment znodes after failover cleanup
1305    * is completed, when an event comes in, AM should already have the region in transition
1306    * if ZK is used during the assignment action (only hbck doesn't use ZK for region
1307    * assignment). So during master failover, we can ignored such events too.
1308    */
1309   @Test
1310   public void testAssignmentEventIgnoredIfNotExpected() throws KeeperException, IOException {
1311     // Region to use in test.
1312     final HRegionInfo hri = HRegionInfo.FIRST_META_REGIONINFO;
1313     // Need a mocked catalog tracker.
1314     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
1315     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(
1316       server.getConfiguration());
1317     final AtomicBoolean zkEventProcessed = new AtomicBoolean(false);
1318     // Create an AM.
1319     AssignmentManager am = new AssignmentManager(this.server,
1320       this.serverManager, ct, balancer, null, null, master.getTableLockManager()) {
1321 
1322       @Override
1323       void handleRegion(final RegionTransition rt, int expectedVersion) {
1324         super.handleRegion(rt, expectedVersion);
1325         if (rt != null && Bytes.equals(hri.getRegionName(),
1326           rt.getRegionName()) && rt.getEventType() == EventType.RS_ZK_REGION_OPENING) {
1327           zkEventProcessed.set(true);
1328         }
1329       }
1330     };
1331     try {
1332       // First make sure the region is not in transition
1333       am.getRegionStates().regionOffline(hri);
1334       zkEventProcessed.set(false); // Reset it before faking zk transition
1335       this.watcher.registerListenerFirst(am);
1336       assertFalse("The region should not be in transition",
1337         am.getRegionStates().isRegionInTransition(hri));
1338       ZKAssign.createNodeOffline(this.watcher, hri, SERVERNAME_A);
1339       // Trigger a transition event
1340       ZKAssign.transitionNodeOpening(this.watcher, hri, SERVERNAME_A);
1341       long startTime = EnvironmentEdgeManager.currentTimeMillis();
1342       while (!zkEventProcessed.get()) {
1343         assertTrue("Timed out in waiting for ZK event to be processed",
1344           EnvironmentEdgeManager.currentTimeMillis() - startTime < 30000);
1345         Threads.sleepWithoutInterrupt(100);
1346       }
1347       assertFalse(am.getRegionStates().isRegionInTransition(hri));
1348     } finally {
1349       am.shutdown();
1350     }
1351   }
1352 
1353   /**
1354    * If a table is deleted, we should not be able to balance it anymore.
1355    * Otherwise, the region will be brought back.
1356    * @throws Exception
1357    */
1358   @Test
1359   public void testBalanceRegionOfDeletedTable() throws Exception {
1360     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
1361     AssignmentManager am = new AssignmentManager(this.server, this.serverManager,
1362       ct, balancer, null, null, master.getTableLockManager());
1363     RegionStates regionStates = am.getRegionStates();
1364     HRegionInfo hri = REGIONINFO;
1365     regionStates.createRegionState(hri);
1366     assertFalse(regionStates.isRegionInTransition(hri));
1367     RegionPlan plan = new RegionPlan(hri, SERVERNAME_A, SERVERNAME_B);
1368     // Fake table is deleted
1369     regionStates.tableDeleted(hri.getTable());
1370     am.balance(plan);
1371     assertFalse("The region should not in transition",
1372       regionStates.isRegionInTransition(hri));
1373   }
1374 
1375   /**
1376    * Tests an on-the-fly RPC that was scheduled for the earlier RS on the same port
1377    * for openRegion. AM should assign this somewhere else. (HBASE-9721)
1378    */
1379   @SuppressWarnings("unchecked")
1380   @Test
1381   public void testOpenCloseRegionRPCIntendedForPreviousServer() throws Exception {
1382     Mockito.when(this.serverManager.sendRegionOpen(Mockito.eq(SERVERNAME_B), Mockito.eq(REGIONINFO),
1383       Mockito.anyInt(), (List<ServerName>)Mockito.any()))
1384       .thenThrow(new DoNotRetryIOException());
1385     this.server.getConfiguration().setInt("hbase.assignment.maximum.attempts", 100);
1386 
1387     HRegionInfo hri = REGIONINFO;
1388     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
1389     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(
1390       server.getConfiguration());
1391     // Create an AM.
1392     AssignmentManager am = new AssignmentManager(this.server,
1393       this.serverManager, ct, balancer, null, null, master.getTableLockManager());
1394     RegionStates regionStates = am.getRegionStates();
1395     try {
1396       am.regionPlans.put(REGIONINFO.getEncodedName(),
1397         new RegionPlan(REGIONINFO, null, SERVERNAME_B));
1398 
1399       // Should fail once, but succeed on the second attempt for the SERVERNAME_A
1400       am.assign(hri, true, false);
1401     } finally {
1402       assertEquals(SERVERNAME_A, regionStates.getRegionState(REGIONINFO).getServerName());
1403     }
1404   }
1405 }