View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.assertFalse;
23  import static org.junit.Assert.assertNotNull;
24  import static org.junit.Assert.assertNotSame;
25  import static org.junit.Assert.assertNull;
26  import static org.junit.Assert.assertTrue;
27  import static org.junit.Assert.fail;
28  
29  import java.io.IOException;
30  import java.util.List;
31  import java.util.Map;
32  import java.util.concurrent.CountDownLatch;
33  
34  import org.apache.commons.logging.Log;
35  import org.apache.commons.logging.LogFactory;
36  import org.apache.hadoop.conf.Configuration;
37  import org.apache.hadoop.fs.FileSystem;
38  import org.apache.hadoop.fs.Path;
39  import org.apache.hadoop.hbase.Abortable;
40  import org.apache.hadoop.hbase.Coprocessor;
41  import org.apache.hadoop.hbase.HBaseIOException;
42  import org.apache.hadoop.hbase.HBaseTestingUtility;
43  import org.apache.hadoop.hbase.HColumnDescriptor;
44  import org.apache.hadoop.hbase.HConstants;
45  import org.apache.hadoop.hbase.HRegionInfo;
46  import org.apache.hadoop.hbase.HTableDescriptor;
47  import org.apache.hadoop.hbase.LargeTests;
48  import org.apache.hadoop.hbase.MasterNotRunningException;
49  import org.apache.hadoop.hbase.MiniHBaseCluster;
50  import org.apache.hadoop.hbase.RegionTransition;
51  import org.apache.hadoop.hbase.Server;
52  import org.apache.hadoop.hbase.ServerName;
53  import org.apache.hadoop.hbase.TableName;
54  import org.apache.hadoop.hbase.UnknownRegionException;
55  import org.apache.hadoop.hbase.Waiter;
56  import org.apache.hadoop.hbase.ZooKeeperConnectionException;
57  import org.apache.hadoop.hbase.catalog.MetaEditor;
58  import org.apache.hadoop.hbase.catalog.MetaReader;
59  import org.apache.hadoop.hbase.client.Delete;
60  import org.apache.hadoop.hbase.client.HBaseAdmin;
61  import org.apache.hadoop.hbase.client.HTable;
62  import org.apache.hadoop.hbase.client.Mutation;
63  import org.apache.hadoop.hbase.client.Put;
64  import org.apache.hadoop.hbase.client.Result;
65  import org.apache.hadoop.hbase.client.ResultScanner;
66  import org.apache.hadoop.hbase.client.Scan;
67  import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
68  import org.apache.hadoop.hbase.coprocessor.ObserverContext;
69  import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
70  import org.apache.hadoop.hbase.exceptions.DeserializationException;
71  import org.apache.hadoop.hbase.executor.EventType;
72  import org.apache.hadoop.hbase.master.AssignmentManager;
73  import org.apache.hadoop.hbase.master.HMaster;
74  import org.apache.hadoop.hbase.master.RegionState;
75  import org.apache.hadoop.hbase.master.RegionStates;
76  import org.apache.hadoop.hbase.master.RegionState.State;
77  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
78  import org.apache.hadoop.hbase.util.Bytes;
79  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
80  import org.apache.hadoop.hbase.util.FSUtils;
81  import org.apache.hadoop.hbase.util.HBaseFsck;
82  import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
83  import org.apache.hadoop.hbase.util.PairOfSameType;
84  import org.apache.hadoop.hbase.util.Threads;
85  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
86  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
87  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
88  import org.apache.zookeeper.KeeperException;
89  import org.apache.zookeeper.KeeperException.NodeExistsException;
90  import org.apache.zookeeper.data.Stat;
91  import org.junit.After;
92  import org.junit.AfterClass;
93  import org.junit.Assert;
94  import org.junit.Before;
95  import org.junit.BeforeClass;
96  import org.junit.Test;
97  import org.junit.experimental.categories.Category;
98  
99  import com.google.protobuf.ServiceException;
100 
101 /**
102  * Like {@link TestSplitTransaction} in that we're testing {@link SplitTransaction}
103  * only the below tests are against a running cluster where {@link TestSplitTransaction}
104  * is tests against a bare {@link HRegion}.
105  */
106 @Category(LargeTests.class)
107 public class TestSplitTransactionOnCluster {
108   private static final Log LOG =
109     LogFactory.getLog(TestSplitTransactionOnCluster.class);
110   private HBaseAdmin admin = null;
111   private MiniHBaseCluster cluster = null;
112   private static final int NB_SERVERS = 3;
113   private static CountDownLatch latch = new CountDownLatch(1);
114   private static volatile boolean secondSplit = false;
115   private static volatile boolean callRollBack = false;
116   private static volatile boolean firstSplitCompleted = false;
117 
118   private static final HBaseTestingUtility TESTING_UTIL =
119     new HBaseTestingUtility();
120 
121   @BeforeClass public static void before() throws Exception {
122     TESTING_UTIL.getConfiguration().setInt("hbase.balancer.period", 60000);
123     // Needed because some tests have splits happening on RS that are killed
124     // We don't want to wait 3min for the master to figure it out
125     TESTING_UTIL.getConfiguration().setInt(
126         "hbase.master.assignment.timeoutmonitor.timeout", 4000);
127     TESTING_UTIL.startMiniCluster(NB_SERVERS);
128   }
129 
130   @AfterClass public static void after() throws Exception {
131     TESTING_UTIL.shutdownMiniCluster();
132   }
133 
134   @Before public void setup() throws IOException {
135     TESTING_UTIL.ensureSomeNonStoppedRegionServersAvailable(NB_SERVERS);
136     this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
137     this.cluster = TESTING_UTIL.getMiniHBaseCluster();
138   }
139 
140   @After
141   public void tearDown() throws Exception {
142     this.admin.close();
143   }
144 
145   private HRegionInfo getAndCheckSingleTableRegion(final List<HRegion> regions) {
146     assertEquals(1, regions.size());
147     HRegionInfo hri = regions.get(0).getRegionInfo();
148     return waitOnRIT(hri);
149   }
150 
151   /**
152    * Often region has not yet fully opened.  If we try to use it -- do a move for instance -- it
153    * will fail silently if the region is not yet opened.
154    * @param hri Region to check if in Regions In Transition... wait until out of transition before
155    * returning
156    * @return Passed in <code>hri</code>
157    */
158   private HRegionInfo waitOnRIT(final HRegionInfo hri) {
159     // Close worked but we are going to open the region elsewhere.  Before going on, make sure
160     // this completes.
161     while (TESTING_UTIL.getHBaseCluster().getMaster().getAssignmentManager().
162         getRegionStates().isRegionInTransition(hri)) {
163       LOG.info("Waiting on region in transition: " +
164         TESTING_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStates().
165           getRegionTransitionState(hri));
166       Threads.sleep(10);
167     }
168     return hri;
169   }
170 
171   @SuppressWarnings("deprecation")
172   @Test(timeout = 60000)
173   public void testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack() throws Exception {
174     final TableName tableName =
175         TableName.valueOf("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack");
176     try {
177       // Create table then get the single region for our new table.
178       HTable t = createTableAndWait(tableName.getName(), Bytes.toBytes("cf"));
179       final List<HRegion> regions = cluster.getRegions(tableName);
180       HRegionInfo hri = getAndCheckSingleTableRegion(regions);
181       int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionName());
182       final HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
183       insertData(tableName.getName(), admin, t);
184       t.close();
185 
186       // Turn off balancer so it doesn't cut in and mess up our placements.
187       this.admin.setBalancerRunning(false, true);
188       // Turn off the meta scanner so it don't remove parent on us.
189       cluster.getMaster().setCatalogJanitorEnabled(false);
190 
191       // find a splittable region
192       final HRegion region = findSplittableRegion(regions);
193       assertTrue("not able to find a splittable region", region != null);
194 
195       new Thread() {
196         @Override
197         public void run() {
198           SplitTransaction st = null;
199           st = new MockedSplitTransaction(region, Bytes.toBytes("row2"));
200           try {
201             st.prepare();
202             st.execute(regionServer, regionServer);
203           } catch (IOException e) {
204 
205           }
206         }
207       }.start();
208       for (int i = 0; !callRollBack && i < 100; i++) {
209         Thread.sleep(100);
210       }
211       assertTrue("Waited too long for rollback", callRollBack);
212       SplitTransaction st = new MockedSplitTransaction(region, Bytes.toBytes("row3"));
213       try {
214         secondSplit = true;
215         // make region splittable
216         region.initialize();
217         st.prepare();
218         st.execute(regionServer, regionServer);
219       } catch (IOException e) {
220         LOG.debug("Rollback started :"+ e.getMessage());
221         st.rollback(regionServer, regionServer);
222       }
223       for (int i=0; !firstSplitCompleted && i<100; i++) {
224         Thread.sleep(100);
225       }
226       assertTrue("fist split did not complete", firstSplitCompleted);
227 
228       RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
229       Map<String, RegionState> rit = regionStates.getRegionsInTransition();
230 
231       for (int i=0; rit.containsKey(hri.getTable()) && i<100; i++) {
232         Thread.sleep(100);
233       }
234       assertFalse("region still in transition", rit.containsKey(
235           rit.containsKey(hri.getTable())));
236 
237       List<HRegion> onlineRegions = regionServer.getOnlineRegions(tableName);
238       // Region server side split is successful.
239       assertEquals("The parent region should be splitted", 2, onlineRegions.size());
240       //Should be present in RIT
241       List<HRegionInfo> regionsOfTable = cluster.getMaster().getAssignmentManager()
242           .getRegionStates().getRegionsOfTable(tableName);
243       // Master side should also reflect the same
244       assertEquals("No of regions in master", 2, regionsOfTable.size());
245     } finally {
246       admin.setBalancerRunning(true, false);
247       secondSplit = false;
248       firstSplitCompleted = false;
249       callRollBack = false;
250       cluster.getMaster().setCatalogJanitorEnabled(true);
251       TESTING_UTIL.deleteTable(tableName);
252     }
253   }
254 
255   @Test(timeout = 60000)
256   public void testRITStateForRollback() throws Exception {
257     final TableName tableName =
258         TableName.valueOf("testRITStateForRollback");
259     try {
260       // Create table then get the single region for our new table.
261       HTable t = createTableAndWait(tableName.getName(), Bytes.toBytes("cf"));
262       final List<HRegion> regions = cluster.getRegions(tableName);
263       final HRegionInfo hri = getAndCheckSingleTableRegion(regions);
264       insertData(tableName.getName(), admin, t);
265       t.close();
266 
267       // Turn off balancer so it doesn't cut in and mess up our placements.
268       this.admin.setBalancerRunning(false, true);
269       // Turn off the meta scanner so it don't remove parent on us.
270       cluster.getMaster().setCatalogJanitorEnabled(false);
271 
272       // find a splittable region
273       final HRegion region = findSplittableRegion(regions);
274       assertTrue("not able to find a splittable region", region != null);
275 
276       // install region co-processor to fail splits
277       region.getCoprocessorHost().load(FailingSplitRegionObserver.class,
278         Coprocessor.PRIORITY_USER, region.getBaseConf());
279 
280       // split async
281       this.admin.split(region.getRegionName(), new byte[] {42});
282 
283       // we have to wait until the SPLITTING state is seen by the master
284       FailingSplitRegionObserver.latch.await();
285 
286       LOG.info("Waiting for region to come out of RIT");
287       TESTING_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
288         @Override
289         public boolean evaluate() throws Exception {
290           RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
291           Map<String, RegionState> rit = regionStates.getRegionsInTransition();
292           return !rit.containsKey(hri.getEncodedName());
293         }
294       });
295     } finally {
296       admin.setBalancerRunning(true, false);
297       cluster.getMaster().setCatalogJanitorEnabled(true);
298       TESTING_UTIL.deleteTable(tableName);
299     }
300   }
301 
302   public static class FailingSplitRegionObserver extends BaseRegionObserver {
303     static volatile CountDownLatch latch = new CountDownLatch(1);
304     @Override
305     public void preSplitBeforePONR(ObserverContext<RegionCoprocessorEnvironment> ctx,
306         byte[] splitKey, List<Mutation> metaEntries) throws IOException {
307       latch.countDown();
308       throw new IOException("Causing rollback of region split");
309     }
310   }
311 
312  /**
313    * A test that intentionally has master fail the processing of the split message.
314    * Tests that the regionserver split ephemeral node gets cleaned up if it
315    * crashes and that after we process server shutdown, the daughters are up on
316    * line.
317    * @throws IOException
318    * @throws InterruptedException
319    * @throws NodeExistsException
320    * @throws KeeperException
321    * @throws DeserializationException
322    */
323   @Test (timeout = 300000) public void testRSSplitEphemeralsDisappearButDaughtersAreOnlinedAfterShutdownHandling()
324   throws IOException, InterruptedException, NodeExistsException, KeeperException,
325       DeserializationException, ServiceException {
326     final byte [] tableName =
327       Bytes.toBytes("testRSSplitEphemeralsDisappearButDaughtersAreOnlinedAfterShutdownHandling");
328 
329     // Create table then get the single region for our new table.
330     HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
331     List<HRegion> regions = cluster.getRegions(tableName);
332     HRegionInfo hri = getAndCheckSingleTableRegion(regions);
333 
334     int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
335 
336     // Turn off balancer so it doesn't cut in and mess up our placements.
337     this.admin.setBalancerRunning(false, true);
338     // Turn off the meta scanner so it don't remove parent on us.
339     cluster.getMaster().setCatalogJanitorEnabled(false);
340     try {
341       // Add a bit of load up into the table so splittable.
342       TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY);
343       // Get region pre-split.
344       HRegionServer server = cluster.getRegionServer(tableRegionIndex);
345       printOutRegions(server, "Initial regions: ");
346       int regionCount = ProtobufUtil.getOnlineRegions(server).size();
347       // Now, before we split, set special flag in master, a flag that has
348       // it FAIL the processing of split.
349       AssignmentManager.TEST_SKIP_SPLIT_HANDLING = true;
350       // Now try splitting and it should work.
351       split(hri, server, regionCount);
352       // Get daughters
353       List<HRegion> daughters = checkAndGetDaughters(tableName);
354       // Assert the ephemeral node is up in zk.
355       String path = ZKAssign.getNodeName(TESTING_UTIL.getZooKeeperWatcher(),
356         hri.getEncodedName());
357       RegionTransition rt = null;
358       Stat stats = null;
359       // Wait till the znode moved to SPLIT
360       for (int i=0; i<100; i++) {
361         stats = TESTING_UTIL.getZooKeeperWatcher().getRecoverableZooKeeper().exists(path, false);
362         rt = RegionTransition.parseFrom(ZKAssign.getData(TESTING_UTIL.getZooKeeperWatcher(),
363           hri.getEncodedName()));
364         if (rt.getEventType().equals(EventType.RS_ZK_REGION_SPLIT)) break;
365         Thread.sleep(100);
366       }
367       LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats=" + stats);
368       assertTrue(rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_SPLIT));
369       // Now crash the server
370       cluster.abortRegionServer(tableRegionIndex);
371       waitUntilRegionServerDead();
372       awaitDaughters(tableName, daughters.size());
373 
374       // Assert daughters are online.
375       regions = cluster.getRegions(tableName);
376       for (HRegion r: regions) {
377         assertTrue(daughters.contains(r));
378       }
379       // Finally assert that the ephemeral SPLIT znode was cleaned up.
380       for (int i=0; i<100; i++) {
381         // wait a bit (10s max) for the node to disappear
382         stats = TESTING_UTIL.getZooKeeperWatcher().getRecoverableZooKeeper().exists(path, false);
383         if (stats == null) break;
384         Thread.sleep(100);
385       }
386       LOG.info("EPHEMERAL NODE AFTER SERVER ABORT, path=" + path + ", stats=" + stats);
387       assertTrue(stats == null);
388     } finally {
389       // Set this flag back.
390       AssignmentManager.TEST_SKIP_SPLIT_HANDLING = false;
391       admin.setBalancerRunning(true, false);
392       cluster.getMaster().setCatalogJanitorEnabled(true);
393       cluster.startRegionServer();
394       t.close();
395     }
396   }
397 
398   @Test (timeout = 300000) public void testExistingZnodeBlocksSplitAndWeRollback()
399   throws IOException, InterruptedException, NodeExistsException, KeeperException, ServiceException {
400     final byte [] tableName =
401       Bytes.toBytes("testExistingZnodeBlocksSplitAndWeRollback");
402 
403     // Create table then get the single region for our new table.
404     HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
405     List<HRegion> regions = cluster.getRegions(tableName);
406     HRegionInfo hri = getAndCheckSingleTableRegion(regions);
407 
408     int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
409 
410     // Turn off balancer so it doesn't cut in and mess up our placements.
411     this.admin.setBalancerRunning(false, true);
412     // Turn off the meta scanner so it don't remove parent on us.
413     cluster.getMaster().setCatalogJanitorEnabled(false);
414     try {
415       // Add a bit of load up into the table so splittable.
416       TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
417       // Get region pre-split.
418       HRegionServer server = cluster.getRegionServer(tableRegionIndex);
419       printOutRegions(server, "Initial regions: ");
420       int regionCount = ProtobufUtil.getOnlineRegions(server).size();
421       // Insert into zk a blocking znode, a znode of same name as region
422       // so it gets in way of our splitting.
423       ServerName fakedServer = ServerName.valueOf("any.old.server", 1234, -1);
424       ZKAssign.createNodeClosing(TESTING_UTIL.getZooKeeperWatcher(),
425         hri, fakedServer);
426       // Now try splitting.... should fail.  And each should successfully
427       // rollback.
428       this.admin.split(hri.getRegionNameAsString());
429       this.admin.split(hri.getRegionNameAsString());
430       this.admin.split(hri.getRegionNameAsString());
431       // Wait around a while and assert count of regions remains constant.
432       for (int i = 0; i < 10; i++) {
433         Thread.sleep(100);
434         assertEquals(regionCount, ProtobufUtil.getOnlineRegions(server).size());
435       }
436       // Now clear the zknode
437       ZKAssign.deleteClosingNode(TESTING_UTIL.getZooKeeperWatcher(),
438         hri, fakedServer);
439       // Now try splitting and it should work.
440       split(hri, server, regionCount);
441       // Get daughters
442       checkAndGetDaughters(tableName);
443       // OK, so split happened after we cleared the blocking node.
444     } finally {
445       admin.setBalancerRunning(true, false);
446       cluster.getMaster().setCatalogJanitorEnabled(true);
447       t.close();
448     }
449   }
450 
451   /**
452    * Test that if daughter split on us, we won't do the shutdown handler fixup
453    * just because we can't find the immediate daughter of an offlined parent.
454    * @throws IOException
455    * @throws InterruptedException
456    */
457   @Test (timeout=300000) public void testShutdownFixupWhenDaughterHasSplit()
458   throws IOException, InterruptedException, ServiceException {
459     final byte [] tableName =
460       Bytes.toBytes("testShutdownFixupWhenDaughterHasSplit");
461 
462     // Create table then get the single region for our new table.
463     HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
464     List<HRegion> regions = cluster.getRegions(tableName);
465     HRegionInfo hri = getAndCheckSingleTableRegion(regions);
466 
467     int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
468 
469     // Turn off balancer so it doesn't cut in and mess up our placements.
470     this.admin.setBalancerRunning(false, true);
471     // Turn off the meta scanner so it don't remove parent on us.
472     cluster.getMaster().setCatalogJanitorEnabled(false);
473     try {
474       // Add a bit of load up into the table so splittable.
475       TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
476       // Get region pre-split.
477       HRegionServer server = cluster.getRegionServer(tableRegionIndex);
478       printOutRegions(server, "Initial regions: ");
479       int regionCount = ProtobufUtil.getOnlineRegions(server).size();
480       // Now split.
481       split(hri, server, regionCount);
482       // Get daughters
483       List<HRegion> daughters = checkAndGetDaughters(tableName);
484       // Now split one of the daughters.
485       regionCount = ProtobufUtil.getOnlineRegions(server).size();
486       HRegionInfo daughter = daughters.get(0).getRegionInfo();
487       LOG.info("Daughter we are going to split: " + daughter);
488       // Compact first to ensure we have cleaned up references -- else the split
489       // will fail.
490       this.admin.compact(daughter.getRegionName());
491       daughters = cluster.getRegions(tableName);
492       HRegion daughterRegion = null;
493       for (HRegion r: daughters) {
494         if (r.getRegionInfo().equals(daughter)) {
495           daughterRegion = r;
496           LOG.info("Found matching HRI: " + daughterRegion);
497           break;
498         }
499       }
500       assertTrue(daughterRegion != null);
501       for (int i=0; i<100; i++) {
502         if (!daughterRegion.hasReferences()) break;
503         Threads.sleep(100);
504       }
505       assertFalse("Waiting for reference to be compacted", daughterRegion.hasReferences());
506       LOG.info("Daughter hri before split (has been compacted): " + daughter);
507       split(daughter, server, regionCount);
508       // Get list of daughters
509       daughters = cluster.getRegions(tableName);
510       for (HRegion d: daughters) {
511         LOG.info("Regions before crash: " + d);
512       }
513       // Now crash the server
514       cluster.abortRegionServer(tableRegionIndex);
515       waitUntilRegionServerDead();
516       awaitDaughters(tableName, daughters.size());
517       // Assert daughters are online and ONLY the original daughters -- that
518       // fixup didn't insert one during server shutdown recover.
519       regions = cluster.getRegions(tableName);
520       for (HRegion d: daughters) {
521         LOG.info("Regions after crash: " + d);
522       }
523       assertEquals(daughters.size(), regions.size());
524       for (HRegion r: regions) {
525         LOG.info("Regions post crash " + r);
526         assertTrue("Missing region post crash " + r, daughters.contains(r));
527       }
528     } finally {
529       admin.setBalancerRunning(true, false);
530       cluster.getMaster().setCatalogJanitorEnabled(true);
531       t.close();
532     }
533   }
534 
535   @Test(timeout = 180000)
536   public void testSplitShouldNotThrowNPEEvenARegionHasEmptySplitFiles() throws Exception {
537     Configuration conf = TESTING_UTIL.getConfiguration();
538     TableName userTableName =
539         TableName.valueOf("testSplitShouldNotThrowNPEEvenARegionHasEmptySplitFiles");
540     HTableDescriptor htd = new HTableDescriptor(userTableName);
541     HColumnDescriptor hcd = new HColumnDescriptor("col");
542     htd.addFamily(hcd);
543     admin.createTable(htd);
544     HTable table = new HTable(conf, userTableName);
545     try {
546       for (int i = 0; i <= 5; i++) {
547         String row = "row" + i;
548         Put p = new Put(row.getBytes());
549         String val = "Val" + i;
550         p.add("col".getBytes(), "ql".getBytes(), val.getBytes());
551         table.put(p);
552         admin.flush(userTableName.getName());
553         Delete d = new Delete(row.getBytes());
554         // Do a normal delete
555         table.delete(d);
556         admin.flush(userTableName.getName());
557       }
558       admin.majorCompact(userTableName.getName());
559       List<HRegionInfo> regionsOfTable = TESTING_UTIL.getMiniHBaseCluster()
560           .getMaster().getAssignmentManager().getRegionStates()
561           .getRegionsOfTable(userTableName);
562       HRegionInfo hRegionInfo = regionsOfTable.get(0);
563       Put p = new Put("row6".getBytes());
564       p.add("col".getBytes(), "ql".getBytes(), "val".getBytes());
565       table.put(p);
566       p = new Put("row7".getBytes());
567       p.add("col".getBytes(), "ql".getBytes(), "val".getBytes());
568       table.put(p);
569       p = new Put("row8".getBytes());
570       p.add("col".getBytes(), "ql".getBytes(), "val".getBytes());
571       table.put(p);
572       admin.flush(userTableName.getName());
573       admin.split(hRegionInfo.getRegionName(), "row7".getBytes());
574       regionsOfTable = TESTING_UTIL.getMiniHBaseCluster().getMaster()
575           .getAssignmentManager().getRegionStates()
576           .getRegionsOfTable(userTableName);
577 
578       while (regionsOfTable.size() != 2) {
579         Thread.sleep(2000);
580         regionsOfTable = TESTING_UTIL.getMiniHBaseCluster().getMaster()
581             .getAssignmentManager().getRegionStates()
582             .getRegionsOfTable(userTableName);
583       }
584       Assert.assertEquals(2, regionsOfTable.size());
585       Scan s = new Scan();
586       ResultScanner scanner = table.getScanner(s);
587       int mainTableCount = 0;
588       for (Result rr = scanner.next(); rr != null; rr = scanner.next()) {
589         mainTableCount++;
590       }
591       Assert.assertEquals(3, mainTableCount);
592     } finally {
593       table.close();
594     }
595   }
596 
597   /**
598    * Noop Abortable implementation used below in tests.
599    */
600   static class UselessTestAbortable implements Abortable {
601     boolean aborted = false;
602     @Override
603     public void abort(String why, Throwable e) {
604       LOG.warn("ABORTED (But nothing to abort): why=" + why, e);
605       aborted = true;
606     }
607 
608     @Override
609     public boolean isAborted() {
610       return this.aborted;
611     }
612   }
613 
614   /**
615    * Verifies HBASE-5806.  When splitting is partially done and the master goes down
616    * when the SPLIT node is in either SPLIT or SPLITTING state.
617    *
618    * @throws IOException
619    * @throws InterruptedException
620    * @throws NodeExistsException
621    * @throws KeeperException
622    * @throws DeserializationException
623    */
624   @Test(timeout = 400000)
625   public void testMasterRestartWhenSplittingIsPartial()
626       throws IOException, InterruptedException, NodeExistsException,
627       KeeperException, DeserializationException, ServiceException {
628     final byte[] tableName = Bytes.toBytes("testMasterRestartWhenSplittingIsPartial");
629 
630     // Create table then get the single region for our new table.
631     HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
632     List<HRegion> regions = cluster.getRegions(tableName);
633     HRegionInfo hri = getAndCheckSingleTableRegion(regions);
634 
635     int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
636 
637     // Turn off balancer so it doesn't cut in and mess up our placements.
638     this.admin.setBalancerRunning(false, true);
639     // Turn off the meta scanner so it don't remove parent on us.
640     cluster.getMaster().setCatalogJanitorEnabled(false);
641     ZooKeeperWatcher zkw = new ZooKeeperWatcher(t.getConfiguration(),
642       "testMasterRestartWhenSplittingIsPartial", new UselessTestAbortable());
643     try {
644       // Add a bit of load up into the table so splittable.
645       TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
646       // Get region pre-split.
647       HRegionServer server = cluster.getRegionServer(tableRegionIndex);
648       printOutRegions(server, "Initial regions: ");
649       // Now, before we split, set special flag in master, a flag that has
650       // it FAIL the processing of split.
651       AssignmentManager.TEST_SKIP_SPLIT_HANDLING = true;
652       // Now try splitting and it should work.
653 
654       this.admin.split(hri.getRegionNameAsString());
655       checkAndGetDaughters(tableName);
656       // Assert the ephemeral node is up in zk.
657       String path = ZKAssign.getNodeName(zkw, hri.getEncodedName());
658       Stat stats = zkw.getRecoverableZooKeeper().exists(path, false);
659       LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats="
660           + stats);
661       byte[] bytes = ZKAssign.getData(zkw, hri.getEncodedName());
662       RegionTransition rtd = RegionTransition.parseFrom(bytes);
663       // State could be SPLIT or SPLITTING.
664       assertTrue(rtd.getEventType().equals(EventType.RS_ZK_REGION_SPLIT)
665           || rtd.getEventType().equals(EventType.RS_ZK_REGION_SPLITTING));
666 
667       // abort and wait for new master.
668       MockMasterWithoutCatalogJanitor master = abortAndWaitForMaster();
669 
670       this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
671 
672       // Update the region to be offline and split, so that HRegionInfo#equals
673       // returns true in checking rebuilt region states map.
674       hri.setOffline(true);
675       hri.setSplit(true);
676       ServerName regionServerOfRegion = master.getAssignmentManager()
677         .getRegionStates().getRegionServerOfRegion(hri);
678       assertTrue(regionServerOfRegion != null);
679 
680       // Remove the block so that split can move ahead.
681       AssignmentManager.TEST_SKIP_SPLIT_HANDLING = false;
682       String node = ZKAssign.getNodeName(zkw, hri.getEncodedName());
683       Stat stat = new Stat();
684       byte[] data = ZKUtil.getDataNoWatch(zkw, node, stat);
685       // ZKUtil.create
686       for (int i=0; data != null && i<60; i++) {
687         Thread.sleep(1000);
688         data = ZKUtil.getDataNoWatch(zkw, node, stat);
689       }
690       assertNull("Waited too long for ZK node to be removed: "+node, data);
691       RegionStates regionStates = master.getAssignmentManager().getRegionStates();
692       assertTrue("Split parent should be in SPLIT state",
693         regionStates.isRegionInState(hri, State.SPLIT));
694       regionServerOfRegion = regionStates.getRegionServerOfRegion(hri);
695       assertTrue(regionServerOfRegion == null);
696     } finally {
697       // Set this flag back.
698       AssignmentManager.TEST_SKIP_SPLIT_HANDLING = false;
699       admin.setBalancerRunning(true, false);
700       cluster.getMaster().setCatalogJanitorEnabled(true);
701       t.close();
702       zkw.close();
703     }
704   }
705 
706   /**
707    * Verifies HBASE-5806.  Here the case is that splitting is completed but before the
708    * CJ could remove the parent region the master is killed and restarted.
709    * @throws IOException
710    * @throws InterruptedException
711    * @throws NodeExistsException
712    * @throws KeeperException
713    */
714   @Test (timeout = 300000)
715   public void testMasterRestartAtRegionSplitPendingCatalogJanitor()
716       throws IOException, InterruptedException, NodeExistsException,
717       KeeperException, ServiceException {
718     final byte[] tableName = Bytes.toBytes("testMasterRestartAtRegionSplitPendingCatalogJanitor");
719 
720     // Create table then get the single region for our new table.
721     HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
722     List<HRegion> regions = cluster.getRegions(tableName);
723     HRegionInfo hri = getAndCheckSingleTableRegion(regions);
724 
725     int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
726 
727     // Turn off balancer so it doesn't cut in and mess up our placements.
728     this.admin.setBalancerRunning(false, true);
729     // Turn off the meta scanner so it don't remove parent on us.
730     cluster.getMaster().setCatalogJanitorEnabled(false);
731     ZooKeeperWatcher zkw = new ZooKeeperWatcher(t.getConfiguration(),
732       "testMasterRestartAtRegionSplitPendingCatalogJanitor", new UselessTestAbortable());
733     try {
734       // Add a bit of load up into the table so splittable.
735       TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
736       // Get region pre-split.
737       HRegionServer server = cluster.getRegionServer(tableRegionIndex);
738       printOutRegions(server, "Initial regions: ");
739 
740       this.admin.split(hri.getRegionNameAsString());
741       checkAndGetDaughters(tableName);
742       // Assert the ephemeral node is up in zk.
743       String path = ZKAssign.getNodeName(zkw, hri.getEncodedName());
744       Stat stats = zkw.getRecoverableZooKeeper().exists(path, false);
745       LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats="
746           + stats);
747       String node = ZKAssign.getNodeName(zkw, hri.getEncodedName());
748       Stat stat = new Stat();
749       byte[] data = ZKUtil.getDataNoWatch(zkw, node, stat);
750       // ZKUtil.create
751       for (int i=0; data != null && i<60; i++) {
752         Thread.sleep(1000);
753         data = ZKUtil.getDataNoWatch(zkw, node, stat);
754       }
755       assertNull("Waited too long for ZK node to be removed: "+node, data);
756 
757       MockMasterWithoutCatalogJanitor master = abortAndWaitForMaster();
758 
759       this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
760 
761       // Update the region to be offline and split, so that HRegionInfo#equals
762       // returns true in checking rebuilt region states map.
763       hri.setOffline(true);
764       hri.setSplit(true);
765       RegionStates regionStates = master.getAssignmentManager().getRegionStates();
766       assertTrue("Split parent should be in SPLIT state",
767         regionStates.isRegionInState(hri, State.SPLIT));
768       ServerName regionServerOfRegion = regionStates.getRegionServerOfRegion(hri);
769       assertTrue(regionServerOfRegion == null);
770     } finally {
771       this.admin.setBalancerRunning(true, false);
772       cluster.getMaster().setCatalogJanitorEnabled(true);
773       t.close();
774       zkw.close();
775     }
776   }
777 
778   /**
779    *
780    * While transitioning node from RS_ZK_REGION_SPLITTING to
781    * RS_ZK_REGION_SPLITTING during region split,if zookeper went down split always
782    * fails for the region. HBASE-6088 fixes this scenario.
783    * This test case is to test the znode is deleted(if created) or not in roll back.
784    *
785    * @throws IOException
786    * @throws InterruptedException
787    * @throws KeeperException
788    */
789   @Test
790   public void testSplitBeforeSettingSplittingInZK() throws Exception,
791       InterruptedException, KeeperException {
792     testSplitBeforeSettingSplittingInZKInternals();
793   }
794 
795   @Test(timeout = 60000)
796   public void testTableExistsIfTheSpecifiedTableRegionIsSplitParent() throws Exception {
797     ZooKeeperWatcher zkw = HBaseTestingUtility.getZooKeeperWatcher(TESTING_UTIL);
798     final TableName tableName =
799         TableName.valueOf("testTableExistsIfTheSpecifiedTableRegionIsSplitParent");
800     // Create table then get the single region for our new table.
801     HTable t = createTableAndWait(tableName.getName(), Bytes.toBytes("cf"));
802     List<HRegion> regions = null;
803     try {
804       regions = cluster.getRegions(tableName);
805       int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionName());
806       HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
807       insertData(tableName.getName(), admin, t);
808       // Turn off balancer so it doesn't cut in and mess up our placements.
809       admin.setBalancerRunning(false, true);
810       // Turn off the meta scanner so it don't remove parent on us.
811       cluster.getMaster().setCatalogJanitorEnabled(false);
812       boolean tableExists = MetaReader.tableExists(regionServer.getCatalogTracker(),
813           tableName);
814       assertEquals("The specified table should present.", true, tableExists);
815       final HRegion region = findSplittableRegion(regions);
816       assertTrue("not able to find a splittable region", region != null);
817       SplitTransaction st = new SplitTransaction(region, Bytes.toBytes("row2"));
818       try {
819         st.prepare();
820         st.createDaughters(regionServer, regionServer);
821       } catch (IOException e) {
822 
823       }
824       tableExists = MetaReader.tableExists(regionServer.getCatalogTracker(),
825           tableName);
826       assertEquals("The specified table should present.", true, tableExists);
827     } finally {
828       if (regions != null) {
829         String node = ZKAssign.getNodeName(zkw, regions.get(0).getRegionInfo()
830             .getEncodedName());
831         ZKUtil.deleteNodeFailSilent(zkw, node);
832       }
833       admin.setBalancerRunning(true, false);
834       cluster.getMaster().setCatalogJanitorEnabled(true);
835       t.close();
836     }
837   }
838 
839   private void insertData(final byte[] tableName, HBaseAdmin admin, HTable t) throws IOException,
840       InterruptedException {
841     Put p = new Put(Bytes.toBytes("row1"));
842     p.add(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("1"));
843     t.put(p);
844     p = new Put(Bytes.toBytes("row2"));
845     p.add(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("2"));
846     t.put(p);
847     p = new Put(Bytes.toBytes("row3"));
848     p.add(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("3"));
849     t.put(p);
850     p = new Put(Bytes.toBytes("row4"));
851     p.add(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("4"));
852     t.put(p);
853     admin.flush(tableName);
854   }
855 
856   /**
857    * If a table has regions that have no store files in a region, they should split successfully
858    * into two regions with no store files.
859    */
860   @Test
861   public void testSplitRegionWithNoStoreFiles()
862       throws Exception {
863     final TableName tableName =
864         TableName.valueOf("testSplitRegionWithNoStoreFiles");
865     // Create table then get the single region for our new table.
866     createTableAndWait(tableName.getName(), HConstants.CATALOG_FAMILY);
867     List<HRegion> regions = cluster.getRegions(tableName);
868     HRegionInfo hri = getAndCheckSingleTableRegion(regions);
869     ensureTableRegionNotOnSameServerAsMeta(admin, hri);
870     int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionName());
871     HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
872     // Turn off balancer so it doesn't cut in and mess up our placements.
873     this.admin.setBalancerRunning(false, true);
874     // Turn off the meta scanner so it don't remove parent on us.
875     cluster.getMaster().setCatalogJanitorEnabled(false);
876     try {
877       // Precondition: we created a table with no data, no store files.
878       printOutRegions(regionServer, "Initial regions: ");
879       Configuration conf = cluster.getConfiguration();
880       HBaseFsck.debugLsr(conf, new Path("/"));
881       Path rootDir = FSUtils.getRootDir(conf);
882       FileSystem fs = TESTING_UTIL.getDFSCluster().getFileSystem();
883       Map<String, Path> storefiles =
884           FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName);
885       assertEquals("Expected nothing but found " + storefiles.toString(), storefiles.size(), 0);
886 
887       // find a splittable region.  Refresh the regions list
888       regions = cluster.getRegions(tableName);
889       final HRegion region = findSplittableRegion(regions);
890       assertTrue("not able to find a splittable region", region != null);
891 
892       // Now split.
893       SplitTransaction st = new MockedSplitTransaction(region, Bytes.toBytes("row2"));
894       try {
895         st.prepare();
896         st.execute(regionServer, regionServer);
897       } catch (IOException e) {
898         fail("Split execution should have succeeded with no exceptions thrown");
899       }
900 
901       // Postcondition: split the table with no store files into two regions, but still have not
902       // store files
903       List<HRegion> daughters = cluster.getRegions(tableName);
904       assertTrue(daughters.size() == 2);
905 
906       // check dirs
907       HBaseFsck.debugLsr(conf, new Path("/"));
908       Map<String, Path> storefilesAfter =
909           FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName);
910       assertEquals("Expected nothing but found " + storefilesAfter.toString(),
911           storefilesAfter.size(), 0);
912 
913       hri = region.getRegionInfo(); // split parent
914       AssignmentManager am = cluster.getMaster().getAssignmentManager();
915       RegionStates regionStates = am.getRegionStates();
916       long start = EnvironmentEdgeManager.currentTimeMillis();
917       while (!regionStates.isRegionInState(hri, State.SPLIT)) {
918         assertFalse("Timed out in waiting split parent to be in state SPLIT",
919           EnvironmentEdgeManager.currentTimeMillis() - start > 60000);
920         Thread.sleep(500);
921       }
922 
923       // We should not be able to assign it again
924       am.assign(hri, true, true);
925       assertFalse("Split region can't be assigned",
926         regionStates.isRegionInTransition(hri));
927       assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
928 
929       // We should not be able to unassign it either
930       am.unassign(hri, true, null);
931       assertFalse("Split region can't be unassigned",
932         regionStates.isRegionInTransition(hri));
933       assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
934     } finally {
935       admin.setBalancerRunning(true, false);
936       cluster.getMaster().setCatalogJanitorEnabled(true);
937     }
938   }
939 
940   @Test(timeout = 180000)
941   public void testSplitHooksBeforeAndAfterPONR() throws Exception {
942     String firstTable = "testSplitHooksBeforeAndAfterPONR_1";
943     String secondTable = "testSplitHooksBeforeAndAfterPONR_2";
944     HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(firstTable));
945     desc.addCoprocessor(MockedRegionObserver.class.getName());
946     HColumnDescriptor hcd = new HColumnDescriptor("cf");
947     desc.addFamily(hcd);
948     admin.createTable(desc);
949     desc = new HTableDescriptor(TableName.valueOf(secondTable));
950     hcd = new HColumnDescriptor("cf");
951     desc.addFamily(hcd);
952     admin.createTable(desc);
953     List<HRegion> firstTableregions = cluster.getRegions(TableName.valueOf(firstTable));
954     List<HRegion> secondTableRegions = cluster.getRegions(TableName.valueOf(secondTable));
955     ServerName serverName =
956         cluster.getServerHoldingRegion(firstTableregions.get(0).getRegionName());
957     admin.move(secondTableRegions.get(0).getRegionInfo().getEncodedNameAsBytes(),
958       Bytes.toBytes(serverName.getServerName()));
959     HTable table1 = null;
960     HTable table2 = null;
961     try {
962       table1 = new HTable(TESTING_UTIL.getConfiguration(), firstTable);
963       table2 = new HTable(TESTING_UTIL.getConfiguration(), firstTable);
964       insertData(Bytes.toBytes(firstTable), admin, table1);
965       insertData(Bytes.toBytes(secondTable), admin, table2);
966       admin.split(Bytes.toBytes(firstTable), "row2".getBytes());
967       firstTableregions = cluster.getRegions(Bytes.toBytes(firstTable));
968       while (firstTableregions.size() != 2) {
969         Thread.sleep(1000);
970         firstTableregions = cluster.getRegions(Bytes.toBytes(firstTable));
971       }
972       assertEquals("Number of regions after split should be 2.", 2, firstTableregions.size());
973       secondTableRegions = cluster.getRegions(Bytes.toBytes(secondTable));
974       assertEquals("Number of regions after split should be 2.", 2, secondTableRegions.size());
975     } finally {
976       if (table1 != null) {
977         table1.close();
978       }
979       if (table2 != null) {
980         table2.close();
981       }
982       TESTING_UTIL.deleteTable(firstTable);
983       TESTING_UTIL.deleteTable(secondTable);
984     }
985   }
986 
987   private void testSplitBeforeSettingSplittingInZKInternals() throws Exception {
988     final byte[] tableName = Bytes.toBytes("testSplitBeforeSettingSplittingInZK");
989     try {
990       // Create table then get the single region for our new table.
991       createTableAndWait(tableName, Bytes.toBytes("cf"));
992 
993       List<HRegion> regions = awaitTableRegions(tableName);
994       assertTrue("Table not online", cluster.getRegions(tableName).size() != 0);
995 
996       int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionName());
997       HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
998       final HRegion region = findSplittableRegion(regions);
999       assertTrue("not able to find a splittable region", region != null);
1000       SplitTransaction st = new MockedSplitTransaction(region, Bytes.toBytes("row2")) {
1001         @Override
1002         public PairOfSameType<HRegion> stepsBeforePONR(final Server server,
1003             final RegionServerServices services, boolean testing) throws IOException {
1004           throw new SplittingNodeCreationFailedException ();
1005         }
1006       };
1007       String node = ZKAssign.getNodeName(regionServer.getZooKeeper(),
1008           region.getRegionInfo().getEncodedName());
1009       regionServer.getZooKeeper().sync(node);
1010       for (int i = 0; i < 100; i++) {
1011         // We expect the znode to be deleted by this time. Here the
1012         // znode could be in OPENED state and the
1013         // master has not yet deleted the znode.
1014         if (ZKUtil.checkExists(regionServer.getZooKeeper(), node) != -1) {
1015           Thread.sleep(100);
1016         }
1017       }
1018       try {
1019         st.prepare();
1020         st.execute(regionServer, regionServer);
1021       } catch (IOException e) {
1022         // check for the specific instance in case the Split failed due to the
1023         // existence of the znode in OPENED state.
1024         // This will at least make the test to fail;
1025         assertTrue("Should be instance of CreateSplittingNodeFailedException",
1026             e instanceof SplittingNodeCreationFailedException );
1027         node = ZKAssign.getNodeName(regionServer.getZooKeeper(),
1028             region.getRegionInfo().getEncodedName());
1029         {
1030           assertTrue(ZKUtil.checkExists(regionServer.getZooKeeper(), node) == -1);
1031         }
1032         assertTrue(st.rollback(regionServer, regionServer));
1033         assertTrue(ZKUtil.checkExists(regionServer.getZooKeeper(), node) == -1);
1034       }
1035     } finally {
1036       TESTING_UTIL.deleteTable(tableName);
1037     }
1038   }
1039 
1040   public static class MockedSplitTransaction extends SplitTransaction {
1041 
1042     private HRegion currentRegion;
1043     public MockedSplitTransaction(HRegion r, byte[] splitrow) {
1044       super(r, splitrow);
1045       this.currentRegion = r;
1046     }
1047 
1048     @Override
1049     void transitionZKNode(Server server, RegionServerServices services, HRegion a, HRegion b)
1050         throws IOException {
1051       if (this.currentRegion.getRegionInfo().getTable().getNameAsString()
1052           .equals("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack")) {
1053         try {
1054           if (!secondSplit){
1055             callRollBack = true;
1056             latch.await();
1057           }
1058         } catch (InterruptedException e) {
1059         }
1060 
1061       }
1062       super.transitionZKNode(server, services, a, b);
1063       if (this.currentRegion.getRegionInfo().getTable().getNameAsString()
1064           .equals("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack")) {
1065         firstSplitCompleted = true;
1066       }
1067     }
1068     @Override
1069     public boolean rollback(Server server, RegionServerServices services) throws IOException {
1070       if (this.currentRegion.getRegionInfo().getTable().getNameAsString()
1071           .equals("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack")) {
1072         if(secondSplit){
1073           super.rollback(server, services);
1074           latch.countDown();
1075           return true;
1076         }
1077       }
1078       return super.rollback(server, services);
1079     }
1080 
1081   }
1082 
1083   private HRegion findSplittableRegion(final List<HRegion> regions) throws InterruptedException {
1084     for (int i = 0; i < 5; ++i) {
1085       for (HRegion r: regions) {
1086         if (r.isSplittable()) {
1087           return(r);
1088         }
1089       }
1090       Thread.sleep(100);
1091     }
1092     return(null);
1093   }
1094 
1095   private List<HRegion> checkAndGetDaughters(byte[] tableName)
1096       throws InterruptedException {
1097     List<HRegion> daughters = null;
1098     // try up to 10s
1099     for (int i=0; i<100; i++) {
1100       daughters = cluster.getRegions(tableName);
1101       if (daughters.size() >= 2) break;
1102       Thread.sleep(100);
1103     }
1104     assertTrue(daughters.size() >= 2);
1105     return daughters;
1106   }
1107 
1108   private MockMasterWithoutCatalogJanitor abortAndWaitForMaster()
1109   throws IOException, InterruptedException {
1110     cluster.abortMaster(0);
1111     cluster.waitOnMaster(0);
1112     cluster.getConfiguration().setClass(HConstants.MASTER_IMPL,
1113     		MockMasterWithoutCatalogJanitor.class, HMaster.class);
1114     MockMasterWithoutCatalogJanitor master = null;
1115     master = (MockMasterWithoutCatalogJanitor) cluster.startMaster().getMaster();
1116     cluster.waitForActiveAndReadyMaster();
1117     return master;
1118   }
1119 
1120   private void split(final HRegionInfo hri, final HRegionServer server, final int regionCount)
1121       throws IOException, InterruptedException {
1122     this.admin.split(hri.getRegionNameAsString());
1123     for (int i = 0; ProtobufUtil.getOnlineRegions(server).size() <= regionCount && i < 300; i++) {
1124       LOG.debug("Waiting on region to split");
1125       Thread.sleep(100);
1126     }
1127 
1128     assertFalse("Waited too long for split",
1129         ProtobufUtil.getOnlineRegions(server).size() <= regionCount);
1130   }
1131 
1132   /**
1133    * Ensure single table region is not on same server as the single hbase:meta table
1134    * region.
1135    * @param admin
1136    * @param hri
1137    * @return Index of the server hosting the single table region
1138    * @throws UnknownRegionException
1139    * @throws MasterNotRunningException
1140    * @throws org.apache.hadoop.hbase.ZooKeeperConnectionException
1141    * @throws InterruptedException
1142    */
1143   private int ensureTableRegionNotOnSameServerAsMeta(final HBaseAdmin admin,
1144       final HRegionInfo hri)
1145   throws HBaseIOException, MasterNotRunningException,
1146   ZooKeeperConnectionException, InterruptedException {
1147     // Now make sure that the table region is not on same server as that hosting
1148     // hbase:meta  We don't want hbase:meta replay polluting our test when we later crash
1149     // the table region serving server.
1150     int metaServerIndex = cluster.getServerWithMeta();
1151     assertTrue(metaServerIndex != -1);
1152     HRegionServer metaRegionServer = cluster.getRegionServer(metaServerIndex);
1153     int tableRegionIndex = cluster.getServerWith(hri.getRegionName());
1154     assertTrue(tableRegionIndex != -1);
1155     HRegionServer tableRegionServer = cluster.getRegionServer(tableRegionIndex);
1156     if (metaRegionServer.getServerName().equals(tableRegionServer.getServerName())) {
1157       HRegionServer hrs = getOtherRegionServer(cluster, metaRegionServer);
1158       assertNotNull(hrs);
1159       assertNotNull(hri);
1160       LOG.info("Moving " + hri.getRegionNameAsString() + " from " +
1161         metaRegionServer.getServerName() + " to " +
1162         hrs.getServerName() + "; metaServerIndex=" + metaServerIndex);
1163       admin.move(hri.getEncodedNameAsBytes(), Bytes.toBytes(hrs.getServerName().toString()));
1164     }
1165     // Wait till table region is up on the server that is NOT carrying hbase:meta.
1166     for (int i = 0; i < 100; i++) {
1167       tableRegionIndex = cluster.getServerWith(hri.getRegionName());
1168       if (tableRegionIndex != -1 && tableRegionIndex != metaServerIndex) break;
1169       LOG.debug("Waiting on region move off the hbase:meta server; current index " +
1170         tableRegionIndex + " and metaServerIndex=" + metaServerIndex);
1171       Thread.sleep(100);
1172     }
1173     assertTrue("Region not moved off hbase:meta server", tableRegionIndex != -1
1174         && tableRegionIndex != metaServerIndex);
1175     // Verify for sure table region is not on same server as hbase:meta
1176     tableRegionIndex = cluster.getServerWith(hri.getRegionName());
1177     assertTrue(tableRegionIndex != -1);
1178     assertNotSame(metaServerIndex, tableRegionIndex);
1179     return tableRegionIndex;
1180   }
1181 
1182   /**
1183    * Find regionserver other than the one passed.
1184    * Can't rely on indexes into list of regionservers since crashed servers
1185    * occupy an index.
1186    * @param cluster
1187    * @param notThisOne
1188    * @return A regionserver that is not <code>notThisOne</code> or null if none
1189    * found
1190    */
1191   private HRegionServer getOtherRegionServer(final MiniHBaseCluster cluster,
1192       final HRegionServer notThisOne) {
1193     for (RegionServerThread rst: cluster.getRegionServerThreads()) {
1194       HRegionServer hrs = rst.getRegionServer();
1195       if (hrs.getServerName().equals(notThisOne.getServerName())) continue;
1196       if (hrs.isStopping() || hrs.isStopped()) continue;
1197       return hrs;
1198     }
1199     return null;
1200   }
1201 
1202   private void printOutRegions(final HRegionServer hrs, final String prefix)
1203       throws IOException {
1204     List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
1205     for (HRegionInfo region: regions) {
1206       LOG.info(prefix + region.getRegionNameAsString());
1207     }
1208   }
1209 
1210   private void waitUntilRegionServerDead() throws InterruptedException {
1211     // Wait until the master processes the RS shutdown
1212     for (int i=0; cluster.getMaster().getClusterStatus().
1213         getServers().size() == NB_SERVERS && i<100; i++) {
1214       LOG.info("Waiting on server to go down");
1215       Thread.sleep(100);
1216     }
1217     assertFalse("Waited too long for RS to die", cluster.getMaster().getClusterStatus().
1218         getServers().size() == NB_SERVERS);
1219   }
1220 
1221   private void awaitDaughters(byte[] tableName, int numDaughters) throws InterruptedException {
1222     // Wait till regions are back on line again.
1223     for (int i=0; cluster.getRegions(tableName).size() < numDaughters && i<60; i++) {
1224       LOG.info("Waiting for repair to happen");
1225       Thread.sleep(1000);
1226     }
1227     if (cluster.getRegions(tableName).size() < numDaughters) {
1228       fail("Waiting too long for daughter regions");
1229     }
1230   }
1231 
1232   private List<HRegion> awaitTableRegions(final byte[] tableName) throws InterruptedException {
1233     List<HRegion> regions = null;
1234     for (int i = 0; i < 100; i++) {
1235       regions = cluster.getRegions(tableName);
1236       if (regions.size() > 0) break;
1237       Thread.sleep(100);
1238     }
1239     return regions;
1240   }
1241 
1242   private HTable createTableAndWait(byte[] tableName, byte[] cf) throws IOException,
1243       InterruptedException {
1244     HTable t = TESTING_UTIL.createTable(tableName, cf);
1245     awaitTableRegions(tableName);
1246     assertTrue("Table not online: " + Bytes.toString(tableName),
1247       cluster.getRegions(tableName).size() != 0);
1248     return t;
1249   }
1250 
1251   public static class MockMasterWithoutCatalogJanitor extends HMaster {
1252 
1253     public MockMasterWithoutCatalogJanitor(Configuration conf) throws IOException, KeeperException,
1254         InterruptedException {
1255       super(conf);
1256     }
1257 
1258     @Override
1259     protected void startCatalogJanitorChore() {
1260       LOG.debug("Customised master executed.");
1261     }
1262   }
1263 
1264   private static class SplittingNodeCreationFailedException  extends IOException {
1265     private static final long serialVersionUID = 1652404976265623004L;
1266 
1267     public SplittingNodeCreationFailedException () {
1268       super();
1269     }
1270   }
1271 
1272   public static class MockedRegionObserver extends BaseRegionObserver {
1273     private SplitTransaction st = null;
1274     private PairOfSameType<HRegion> daughterRegions = null;
1275 
1276     @Override
1277     public void preSplitBeforePONR(ObserverContext<RegionCoprocessorEnvironment> ctx,
1278         byte[] splitKey, List<Mutation> metaEntries) throws IOException {
1279       RegionCoprocessorEnvironment environment = ctx.getEnvironment();
1280       HRegionServer rs = (HRegionServer) environment.getRegionServerServices();
1281       List<HRegion> onlineRegions =
1282           rs.getOnlineRegions(TableName.valueOf("testSplitHooksBeforeAndAfterPONR_2"));
1283       HRegion region = onlineRegions.get(0);
1284       for (HRegion r : onlineRegions) {
1285         if (r.getRegionInfo().containsRow(splitKey)) {
1286           region = r;
1287           break;
1288         }
1289       }
1290       st = new SplitTransaction(region, splitKey);
1291       if (!st.prepare()) {
1292         LOG.error("Prepare for the table " + region.getTableDesc().getNameAsString()
1293             + " failed. So returning null. ");
1294         ctx.bypass();
1295         return;
1296       }
1297       region.forceSplit(splitKey);
1298       daughterRegions = st.stepsBeforePONR(rs, rs, false);
1299       HRegionInfo copyOfParent = new HRegionInfo(region.getRegionInfo());
1300       copyOfParent.setOffline(true);
1301       copyOfParent.setSplit(true);
1302       // Put for parent
1303       Put putParent = MetaEditor.makePutFromRegionInfo(copyOfParent);
1304       MetaEditor.addDaughtersToPut(putParent, daughterRegions.getFirst().getRegionInfo(),
1305         daughterRegions.getSecond().getRegionInfo());
1306       metaEntries.add(putParent);
1307       // Puts for daughters
1308       Put putA = MetaEditor.makePutFromRegionInfo(daughterRegions.getFirst().getRegionInfo());
1309       Put putB = MetaEditor.makePutFromRegionInfo(daughterRegions.getSecond().getRegionInfo());
1310       st.addLocation(putA, rs.getServerName(), 1);
1311       st.addLocation(putB, rs.getServerName(), 1);
1312       metaEntries.add(putA);
1313       metaEntries.add(putB);
1314     }
1315 
1316     @Override
1317     public void preSplitAfterPONR(ObserverContext<RegionCoprocessorEnvironment> ctx)
1318         throws IOException {
1319       RegionCoprocessorEnvironment environment = ctx.getEnvironment();
1320       HRegionServer rs = (HRegionServer) environment.getRegionServerServices();
1321       st.stepsAfterPONR(rs, rs, daughterRegions);
1322     }
1323 
1324   }
1325 }
1326