1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.regionserver;
20
21 import static org.junit.Assert.assertEquals;
22 import static org.junit.Assert.assertFalse;
23 import static org.junit.Assert.assertNotNull;
24 import static org.junit.Assert.assertNotSame;
25 import static org.junit.Assert.assertNull;
26 import static org.junit.Assert.assertTrue;
27 import static org.junit.Assert.fail;
28
29 import java.io.IOException;
30 import java.io.InterruptedIOException;
31 import java.util.Collection;
32 import java.util.List;
33 import java.util.Map;
34 import java.util.concurrent.CountDownLatch;
35
36 import org.apache.commons.logging.Log;
37 import org.apache.commons.logging.LogFactory;
38 import org.apache.hadoop.conf.Configuration;
39 import org.apache.hadoop.fs.FileSystem;
40 import org.apache.hadoop.fs.Path;
41 import org.apache.hadoop.hbase.Abortable;
42 import org.apache.hadoop.hbase.CoordinatedStateManager;
43 import org.apache.hadoop.hbase.Coprocessor;
44 import org.apache.hadoop.hbase.CoprocessorEnvironment;
45 import org.apache.hadoop.hbase.HBaseTestingUtility;
46 import org.apache.hadoop.hbase.HColumnDescriptor;
47 import org.apache.hadoop.hbase.HConstants;
48 import org.apache.hadoop.hbase.HRegionInfo;
49 import org.apache.hadoop.hbase.HTableDescriptor;
50 import org.apache.hadoop.hbase.MasterNotRunningException;
51 import org.apache.hadoop.hbase.MetaTableAccessor;
52 import org.apache.hadoop.hbase.MiniHBaseCluster;
53 import org.apache.hadoop.hbase.RegionTransition;
54 import org.apache.hadoop.hbase.Server;
55 import org.apache.hadoop.hbase.ServerName;
56 import org.apache.hadoop.hbase.TableName;
57 import org.apache.hadoop.hbase.UnknownRegionException;
58 import org.apache.hadoop.hbase.Waiter;
59 import org.apache.hadoop.hbase.ZooKeeperConnectionException;
60 import org.apache.hadoop.hbase.client.Admin;
61 import org.apache.hadoop.hbase.client.Connection;
62 import org.apache.hadoop.hbase.client.ConnectionFactory;
63 import org.apache.hadoop.hbase.client.Consistency;
64 import org.apache.hadoop.hbase.client.Delete;
65 import org.apache.hadoop.hbase.client.Get;
66 import org.apache.hadoop.hbase.client.HBaseAdmin;
67 import org.apache.hadoop.hbase.client.HTable;
68 import org.apache.hadoop.hbase.client.Mutation;
69 import org.apache.hadoop.hbase.client.Put;
70 import org.apache.hadoop.hbase.client.Result;
71 import org.apache.hadoop.hbase.client.ResultScanner;
72 import org.apache.hadoop.hbase.client.Scan;
73 import org.apache.hadoop.hbase.client.Table;
74 import org.apache.hadoop.hbase.client.TestReplicasClient.SlowMeCopro;
75 import org.apache.hadoop.hbase.coordination.ZKSplitTransactionCoordination;
76 import org.apache.hadoop.hbase.coordination.ZkCloseRegionCoordination;
77 import org.apache.hadoop.hbase.coordination.ZkCoordinatedStateManager;
78 import org.apache.hadoop.hbase.coordination.ZkOpenRegionCoordination;
79 import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
80 import org.apache.hadoop.hbase.coprocessor.ObserverContext;
81 import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
82 import org.apache.hadoop.hbase.exceptions.DeserializationException;
83 import org.apache.hadoop.hbase.executor.EventType;
84 import org.apache.hadoop.hbase.master.AssignmentManager;
85 import org.apache.hadoop.hbase.master.HMaster;
86 import org.apache.hadoop.hbase.master.RegionState;
87 import org.apache.hadoop.hbase.master.RegionState.State;
88 import org.apache.hadoop.hbase.master.RegionStates;
89 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
90 import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
91 import org.apache.hadoop.hbase.regionserver.compactions.NoLimitCompactionThroughputController;
92 import org.apache.hadoop.hbase.security.User;
93 import org.apache.hadoop.hbase.testclassification.LargeTests;
94 import org.apache.hadoop.hbase.util.Bytes;
95 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
96 import org.apache.hadoop.hbase.util.FSUtils;
97 import org.apache.hadoop.hbase.util.HBaseFsck;
98 import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
99 import org.apache.hadoop.hbase.util.PairOfSameType;
100 import org.apache.hadoop.hbase.util.Threads;
101 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
102 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
103 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
104 import org.apache.zookeeper.KeeperException;
105 import org.apache.zookeeper.KeeperException.NodeExistsException;
106 import org.apache.zookeeper.data.Stat;
107 import org.junit.After;
108 import org.junit.AfterClass;
109 import org.junit.Assert;
110 import org.junit.Before;
111 import org.junit.BeforeClass;
112 import org.junit.Test;
113 import org.junit.experimental.categories.Category;
114
115 import com.google.protobuf.ServiceException;
116
117
118
119
120
121
122 @Category(LargeTests.class)
123 @SuppressWarnings("deprecation")
124 public class TestSplitTransactionOnCluster {
125 private static final Log LOG =
126 LogFactory.getLog(TestSplitTransactionOnCluster.class);
127 private HBaseAdmin admin = null;
128 private MiniHBaseCluster cluster = null;
129 private static final int NB_SERVERS = 3;
130 private static CountDownLatch latch = new CountDownLatch(1);
131 private static volatile boolean secondSplit = false;
132 private static volatile boolean callRollBack = false;
133 private static volatile boolean firstSplitCompleted = false;
134 private static boolean useZKForAssignment;
135
136 static final HBaseTestingUtility TESTING_UTIL =
137 new HBaseTestingUtility();
138
139 static void setupOnce() throws Exception {
140 TESTING_UTIL.getConfiguration().setInt("hbase.balancer.period", 60000);
141 useZKForAssignment = TESTING_UTIL.getConfiguration().getBoolean(
142 "hbase.assignment.usezk", true);
143 TESTING_UTIL.startMiniCluster(NB_SERVERS);
144 }
145
146 @BeforeClass public static void before() throws Exception {
147
148 TESTING_UTIL.getConfiguration().setBoolean("hbase.assignment.usezk", true);
149 setupOnce();
150 }
151
152 @AfterClass public static void after() throws Exception {
153 TESTING_UTIL.shutdownMiniCluster();
154 }
155
156 @Before public void setup() throws IOException {
157 TESTING_UTIL.ensureSomeNonStoppedRegionServersAvailable(NB_SERVERS);
158 this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
159 this.cluster = TESTING_UTIL.getMiniHBaseCluster();
160 }
161
162 @After
163 public void tearDown() throws Exception {
164 this.admin.close();
165 }
166
167 private HRegionInfo getAndCheckSingleTableRegion(final List<HRegion> regions) {
168 assertEquals(1, regions.size());
169 HRegionInfo hri = regions.get(0).getRegionInfo();
170 return waitOnRIT(hri);
171 }
172
173
174
175
176
177
178
179
180 private HRegionInfo waitOnRIT(final HRegionInfo hri) {
181
182
183 while (TESTING_UTIL.getHBaseCluster().getMaster().getAssignmentManager().
184 getRegionStates().isRegionInTransition(hri)) {
185 LOG.info("Waiting on region in transition: " +
186 TESTING_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStates().
187 getRegionTransitionState(hri));
188 Threads.sleep(10);
189 }
190 return hri;
191 }
192
193 @Test(timeout = 60000)
194 public void testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack() throws Exception {
195 final TableName tableName =
196 TableName.valueOf("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack");
197
198 if (!useZKForAssignment) {
199
200 return;
201 }
202
203 try {
204
205 HTable t = createTableAndWait(tableName, Bytes.toBytes("cf"));
206 final List<HRegion> regions = cluster.getRegions(tableName);
207 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
208 int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionInfo()
209 .getRegionName());
210 final HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
211 insertData(tableName, admin, t);
212 t.close();
213
214
215 this.admin.setBalancerRunning(false, true);
216
217 cluster.getMaster().setCatalogJanitorEnabled(false);
218
219
220 final HRegion region = findSplittableRegion(regions);
221 assertTrue("not able to find a splittable region", region != null);
222 MockedCoordinatedStateManager cp = new MockedCoordinatedStateManager();
223 cp.initialize(regionServer, region);
224 cp.start();
225 regionServer.csm = cp;
226
227 new Thread() {
228 @Override
229 public void run() {
230 SplitTransaction st = null;
231 st = new MockedSplitTransaction(region, Bytes.toBytes("row2"));
232 try {
233 st.prepare();
234 st.execute(regionServer, regionServer);
235 } catch (IOException e) {
236
237 }
238 }
239 }.start();
240 for (int i = 0; !callRollBack && i < 100; i++) {
241 Thread.sleep(100);
242 }
243 assertTrue("Waited too long for rollback", callRollBack);
244 SplitTransaction st = new MockedSplitTransaction(region, Bytes.toBytes("row3"));
245 try {
246 secondSplit = true;
247
248 region.initialize();
249 st.prepare();
250 st.execute(regionServer, regionServer);
251 } catch (IOException e) {
252 LOG.debug("Rollback started :"+ e.getMessage());
253 st.rollback(regionServer, regionServer);
254 }
255 for (int i=0; !firstSplitCompleted && i<100; i++) {
256 Thread.sleep(100);
257 }
258 assertTrue("fist split did not complete", firstSplitCompleted);
259
260 RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
261 Map<String, RegionState> rit = regionStates.getRegionsInTransition();
262
263 for (int i=0; rit.containsKey(hri.getTable()) && i<100; i++) {
264 Thread.sleep(100);
265 }
266 assertFalse("region still in transition", rit.containsKey(
267 rit.containsKey(hri.getTable())));
268
269 List<Region> onlineRegions = regionServer.getOnlineRegions(tableName);
270
271 assertEquals("The parent region should be splitted", 2, onlineRegions.size());
272
273 List<HRegionInfo> regionsOfTable = cluster.getMaster().getAssignmentManager()
274 .getRegionStates().getRegionsOfTable(tableName);
275
276 assertEquals("No of regions in master", 2, regionsOfTable.size());
277 } finally {
278 admin.setBalancerRunning(true, false);
279 secondSplit = false;
280 firstSplitCompleted = false;
281 callRollBack = false;
282 cluster.getMaster().setCatalogJanitorEnabled(true);
283 TESTING_UTIL.deleteTable(tableName);
284 }
285 }
286
287 @Test(timeout = 60000)
288 public void testRITStateForRollback() throws Exception {
289 final TableName tableName =
290 TableName.valueOf("testRITStateForRollback");
291 try {
292
293 Table t = createTableAndWait(tableName, Bytes.toBytes("cf"));
294 final List<HRegion> regions = cluster.getRegions(tableName);
295 final HRegionInfo hri = getAndCheckSingleTableRegion(regions);
296 insertData(tableName, admin, t);
297 t.close();
298
299
300 this.admin.setBalancerRunning(false, true);
301
302 cluster.getMaster().setCatalogJanitorEnabled(false);
303
304
305 final HRegion region = findSplittableRegion(regions);
306 assertTrue("not able to find a splittable region", region != null);
307
308
309 region.getCoprocessorHost().load(FailingSplitRegionObserver.class,
310 Coprocessor.PRIORITY_USER, region.getBaseConf());
311
312
313 this.admin.split(region.getRegionInfo().getRegionName(), new byte[] {42});
314
315
316 FailingSplitRegionObserver observer = (FailingSplitRegionObserver) region
317 .getCoprocessorHost().findCoprocessor(FailingSplitRegionObserver.class.getName());
318 assertNotNull(observer);
319 observer.latch.await();
320
321 LOG.info("Waiting for region to come out of RIT");
322 TESTING_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
323 @Override
324 public boolean evaluate() throws Exception {
325 RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
326 Map<String, RegionState> rit = regionStates.getRegionsInTransition();
327 return (rit.size() == 0);
328 }
329 });
330 } finally {
331 admin.setBalancerRunning(true, false);
332 cluster.getMaster().setCatalogJanitorEnabled(true);
333 TESTING_UTIL.deleteTable(tableName);
334 }
335 }
336 @Test(timeout = 60000)
337 public void testSplitFailedCompactionAndSplit() throws Exception {
338 final TableName tableName = TableName.valueOf("testSplitFailedCompactionAndSplit");
339 Configuration conf = TESTING_UTIL.getConfiguration();
340 try {
341 HBaseAdmin admin = new HBaseAdmin(conf);
342
343 HTableDescriptor htd = new HTableDescriptor(tableName);
344 byte[] cf = Bytes.toBytes("cf");
345 htd.addFamily(new HColumnDescriptor(cf));
346 admin.createTable(htd);
347
348 for (int i = 0; cluster.getRegions(tableName).size() == 0 && i < 100; i++) {
349 Thread.sleep(100);
350 }
351 assertEquals(1, cluster.getRegions(tableName).size());
352
353 HRegion region = cluster.getRegions(tableName).get(0);
354 Store store = region.getStore(cf);
355 int regionServerIndex = cluster.getServerWith(region.getRegionInfo().getRegionName());
356 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
357
358 Table t = new HTable(conf, tableName);
359
360 insertData(tableName, admin, t);
361 insertData(tableName, admin, t);
362
363 int fileNum = store.getStorefiles().size();
364
365 store.triggerMajorCompaction();
366 CompactionContext cc = store.requestCompaction();
367 assertNotNull(cc);
368
369
370 assertEquals(2, region.close(false).get(cf).size());
371
372 region.initialize();
373
374
375 assertFalse(region.compact(cc, store, NoLimitCompactionThroughputController.INSTANCE));
376 assertTrue(fileNum > store.getStorefiles().size());
377
378
379 SplitTransaction st = new SplitTransactionImpl(region, Bytes.toBytes("row3"));
380 assertTrue(st.prepare());
381 st.execute(regionServer, regionServer);
382 LOG.info("Waiting for region to come out of RIT");
383 TESTING_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
384 @Override
385 public boolean evaluate() throws Exception {
386 RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
387 Map<String, RegionState> rit = regionStates.getRegionsInTransition();
388 return (rit.size() == 0);
389 }
390 });
391 assertEquals(2, cluster.getRegions(tableName).size());
392 } finally {
393 TESTING_UTIL.deleteTable(tableName);
394 }
395 }
396
397 public static class FailingSplitRegionObserver extends BaseRegionObserver {
398 volatile CountDownLatch latch;
399 volatile CountDownLatch postSplit;
400 @Override
401 public void start(CoprocessorEnvironment e) throws IOException {
402 latch = new CountDownLatch(1);
403 postSplit = new CountDownLatch(1);
404 }
405 @Override
406 public void preSplitBeforePONR(ObserverContext<RegionCoprocessorEnvironment> ctx,
407 byte[] splitKey, List<Mutation> metaEntries) throws IOException {
408 latch.countDown();
409 LOG.info("Causing rollback of region split");
410 throw new IOException("Causing rollback of region split");
411 }
412 @Override
413 public void postCompleteSplit(ObserverContext<RegionCoprocessorEnvironment> ctx)
414 throws IOException {
415 postSplit.countDown();
416 LOG.info("postCompleteSplit called");
417 }
418 }
419
420
421
422
423
424
425
426
427
428
429
430
431 @Test (timeout = 300000) public void testRSSplitEphemeralsDisappearButDaughtersAreOnlinedAfterShutdownHandling()
432 throws IOException, InterruptedException, NodeExistsException, KeeperException,
433 DeserializationException, ServiceException {
434 final TableName tableName =
435 TableName.valueOf("testRSSplitEphemeralsDisappearButDaughtersAreOnlinedAfterShutdownHandling");
436
437
438 HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
439 List<HRegion> regions = cluster.getRegions(tableName);
440 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
441
442 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
443
444
445 this.admin.setBalancerRunning(false, true);
446
447 cluster.getMaster().setCatalogJanitorEnabled(false);
448 try {
449
450 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
451
452 HRegionServer server = cluster.getRegionServer(tableRegionIndex);
453 printOutRegions(server, "Initial regions: ");
454 int regionCount = ProtobufUtil.getOnlineRegions(server.getRSRpcServices()).size();
455
456
457 AssignmentManager.TEST_SKIP_SPLIT_HANDLING = true;
458
459 split(hri, server, regionCount);
460
461 String path = ZKAssign.getNodeName(TESTING_UTIL.getZooKeeperWatcher(),
462 hri.getEncodedName());
463 RegionTransition rt = null;
464 Stat stats = null;
465 List<HRegion> daughters = null;
466 if (useZKForAssignment) {
467 daughters = checkAndGetDaughters(tableName);
468
469
470 for (int i=0; i<100; i++) {
471 stats = TESTING_UTIL.getZooKeeperWatcher().getRecoverableZooKeeper().exists(path, false);
472 rt = RegionTransition.parseFrom(ZKAssign.getData(TESTING_UTIL.getZooKeeperWatcher(),
473 hri.getEncodedName()));
474 if (rt.getEventType().equals(EventType.RS_ZK_REGION_SPLIT)) break;
475 Thread.sleep(100);
476 }
477 LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats=" + stats);
478 assertTrue(rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_SPLIT));
479
480 cluster.abortRegionServer(tableRegionIndex);
481 }
482 waitUntilRegionServerDead();
483 awaitDaughters(tableName, 2);
484 if (useZKForAssignment) {
485 regions = cluster.getRegions(tableName);
486 for (HRegion r: regions) {
487 assertTrue(daughters.contains(r));
488 }
489
490
491 for (int i=0; i<100; i++) {
492
493 stats = TESTING_UTIL.getZooKeeperWatcher().getRecoverableZooKeeper().exists(path, false);
494 if (stats == null) break;
495 Thread.sleep(100);
496 }
497 LOG.info("EPHEMERAL NODE AFTER SERVER ABORT, path=" + path + ", stats=" + stats);
498 assertTrue(stats == null);
499 }
500 } finally {
501
502 AssignmentManager.TEST_SKIP_SPLIT_HANDLING = false;
503 cluster.getMaster().getAssignmentManager().regionOffline(hri);
504 admin.setBalancerRunning(true, false);
505 cluster.getMaster().setCatalogJanitorEnabled(true);
506 cluster.startRegionServer();
507 t.close();
508 TESTING_UTIL.deleteTable(tableName);
509 }
510 }
511
512 @Test (timeout = 300000) public void testExistingZnodeBlocksSplitAndWeRollback()
513 throws IOException, InterruptedException, NodeExistsException, KeeperException, ServiceException {
514 final TableName tableName =
515 TableName.valueOf("testExistingZnodeBlocksSplitAndWeRollback");
516
517
518 HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
519 List<HRegion> regions = cluster.getRegions(tableName);
520 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
521
522 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
523
524 RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
525
526
527 this.admin.setBalancerRunning(false, true);
528
529 cluster.getMaster().setCatalogJanitorEnabled(false);
530 try {
531
532 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
533
534 HRegionServer server = cluster.getRegionServer(tableRegionIndex);
535 printOutRegions(server, "Initial regions: ");
536 int regionCount = ProtobufUtil.getOnlineRegions(server.getRSRpcServices()).size();
537
538
539 ServerName fakedServer = ServerName.valueOf("any.old.server", 1234, -1);
540 if (useZKForAssignment) {
541 ZKAssign.createNodeClosing(TESTING_UTIL.getZooKeeperWatcher(),
542 hri, fakedServer);
543 } else {
544 regionStates.updateRegionState(hri, RegionState.State.CLOSING);
545 }
546
547
548 this.admin.split(hri.getRegionNameAsString());
549 this.admin.split(hri.getRegionNameAsString());
550 this.admin.split(hri.getRegionNameAsString());
551
552 for (int i = 0; i < 10; i++) {
553 Thread.sleep(100);
554 assertEquals(regionCount, ProtobufUtil.getOnlineRegions(
555 server.getRSRpcServices()).size());
556 }
557 if (useZKForAssignment) {
558
559 ZKAssign.deleteClosingNode(TESTING_UTIL.getZooKeeperWatcher(),
560 hri, fakedServer);
561 } else {
562 regionStates.regionOnline(hri, server.getServerName());
563 }
564
565 split(hri, server, regionCount);
566
567 checkAndGetDaughters(tableName);
568
569 } finally {
570 admin.setBalancerRunning(true, false);
571 cluster.getMaster().setCatalogJanitorEnabled(true);
572 t.close();
573 }
574 }
575
576
577
578
579
580
581
582 @Test (timeout=300000) public void testShutdownFixupWhenDaughterHasSplit()
583 throws IOException, InterruptedException, ServiceException {
584 final TableName tableName =
585 TableName.valueOf("testShutdownFixupWhenDaughterHasSplit");
586
587
588 HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
589 List<HRegion> regions = cluster.getRegions(tableName);
590 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
591
592 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
593
594
595 this.admin.setBalancerRunning(false, true);
596
597 cluster.getMaster().setCatalogJanitorEnabled(false);
598 try {
599
600 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY);
601
602 HRegionServer server = cluster.getRegionServer(tableRegionIndex);
603 printOutRegions(server, "Initial regions: ");
604 int regionCount = ProtobufUtil.getOnlineRegions(server.getRSRpcServices()).size();
605
606 split(hri, server, regionCount);
607
608 List<HRegion> daughters = checkAndGetDaughters(tableName);
609
610 regionCount = ProtobufUtil.getOnlineRegions(server.getRSRpcServices()).size();
611 HRegionInfo daughter = daughters.get(0).getRegionInfo();
612 LOG.info("Daughter we are going to split: " + daughter);
613
614
615 this.admin.compact(daughter.getRegionName());
616 daughters = cluster.getRegions(tableName);
617 HRegion daughterRegion = null;
618 for (HRegion r: daughters) {
619 if (r.getRegionInfo().equals(daughter)) {
620 daughterRegion = r;
621 LOG.info("Found matching HRI: " + daughterRegion);
622 break;
623 }
624 }
625 assertTrue(daughterRegion != null);
626 for (int i=0; i<100; i++) {
627 if (!daughterRegion.hasReferences()) break;
628 Threads.sleep(100);
629 }
630 assertFalse("Waiting for reference to be compacted", daughterRegion.hasReferences());
631 LOG.info("Daughter hri before split (has been compacted): " + daughter);
632 split(daughter, server, regionCount);
633
634 daughters = cluster.getRegions(tableName);
635 for (HRegion d: daughters) {
636 LOG.info("Regions before crash: " + d);
637 }
638
639 cluster.abortRegionServer(tableRegionIndex);
640 waitUntilRegionServerDead();
641 awaitDaughters(tableName, daughters.size());
642
643
644 regions = cluster.getRegions(tableName);
645 for (HRegion d: daughters) {
646 LOG.info("Regions after crash: " + d);
647 }
648 assertEquals(daughters.size(), regions.size());
649 for (HRegion r: regions) {
650 LOG.info("Regions post crash " + r);
651 assertTrue("Missing region post crash " + r, daughters.contains(r));
652 }
653 } finally {
654 admin.setBalancerRunning(true, false);
655 cluster.getMaster().setCatalogJanitorEnabled(true);
656 t.close();
657 }
658 }
659
660 @Test(timeout = 180000)
661 public void testSplitShouldNotThrowNPEEvenARegionHasEmptySplitFiles() throws Exception {
662 TableName userTableName =
663 TableName.valueOf("testSplitShouldNotThrowNPEEvenARegionHasEmptySplitFiles");
664 HTableDescriptor htd = new HTableDescriptor(userTableName);
665 HColumnDescriptor hcd = new HColumnDescriptor("col");
666 htd.addFamily(hcd);
667 admin.createTable(htd);
668 Table table = new HTable(TESTING_UTIL.getConfiguration(), userTableName);
669 try {
670 for (int i = 0; i <= 5; i++) {
671 String row = "row" + i;
672 Put p = new Put(row.getBytes());
673 String val = "Val" + i;
674 p.add("col".getBytes(), "ql".getBytes(), val.getBytes());
675 table.put(p);
676 admin.flush(userTableName.getName());
677 Delete d = new Delete(row.getBytes());
678
679 table.delete(d);
680 admin.flush(userTableName.getName());
681 }
682 admin.majorCompact(userTableName.getName());
683 List<HRegionInfo> regionsOfTable = TESTING_UTIL.getMiniHBaseCluster()
684 .getMaster().getAssignmentManager().getRegionStates()
685 .getRegionsOfTable(userTableName);
686 HRegionInfo hRegionInfo = regionsOfTable.get(0);
687 Put p = new Put("row6".getBytes());
688 p.add("col".getBytes(), "ql".getBytes(), "val".getBytes());
689 table.put(p);
690 p = new Put("row7".getBytes());
691 p.add("col".getBytes(), "ql".getBytes(), "val".getBytes());
692 table.put(p);
693 p = new Put("row8".getBytes());
694 p.add("col".getBytes(), "ql".getBytes(), "val".getBytes());
695 table.put(p);
696 admin.flush(userTableName.getName());
697 admin.split(hRegionInfo.getRegionName(), "row7".getBytes());
698 regionsOfTable = TESTING_UTIL.getMiniHBaseCluster().getMaster()
699 .getAssignmentManager().getRegionStates()
700 .getRegionsOfTable(userTableName);
701
702 while (regionsOfTable.size() != 2) {
703 Thread.sleep(2000);
704 regionsOfTable = TESTING_UTIL.getMiniHBaseCluster().getMaster()
705 .getAssignmentManager().getRegionStates()
706 .getRegionsOfTable(userTableName);
707 }
708 Assert.assertEquals(2, regionsOfTable.size());
709 Scan s = new Scan();
710 ResultScanner scanner = table.getScanner(s);
711 int mainTableCount = 0;
712 for (Result rr = scanner.next(); rr != null; rr = scanner.next()) {
713 mainTableCount++;
714 }
715 Assert.assertEquals(3, mainTableCount);
716 } finally {
717 table.close();
718 }
719 }
720
721
722
723
724 static class UselessTestAbortable implements Abortable {
725 boolean aborted = false;
726 @Override
727 public void abort(String why, Throwable e) {
728 LOG.warn("ABORTED (But nothing to abort): why=" + why, e);
729 aborted = true;
730 }
731
732 @Override
733 public boolean isAborted() {
734 return this.aborted;
735 }
736 }
737
738
739
740
741
742
743
744
745
746
747
748 @Test(timeout = 400000)
749 public void testMasterRestartWhenSplittingIsPartial()
750 throws IOException, InterruptedException, NodeExistsException,
751 KeeperException, DeserializationException, ServiceException {
752 final TableName tableName = TableName.valueOf("testMasterRestartWhenSplittingIsPartial");
753
754 if (!useZKForAssignment) {
755
756 return;
757 }
758
759
760 HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
761 List<HRegion> regions = cluster.getRegions(tableName);
762 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
763
764 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
765
766
767 this.admin.setBalancerRunning(false, true);
768
769 cluster.getMaster().setCatalogJanitorEnabled(false);
770 ZooKeeperWatcher zkw = new ZooKeeperWatcher(t.getConfiguration(),
771 "testMasterRestartWhenSplittingIsPartial", new UselessTestAbortable());
772 try {
773
774 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
775
776 HRegionServer server = cluster.getRegionServer(tableRegionIndex);
777 printOutRegions(server, "Initial regions: ");
778
779
780 AssignmentManager.TEST_SKIP_SPLIT_HANDLING = true;
781
782
783 this.admin.split(hri.getRegionNameAsString());
784 checkAndGetDaughters(tableName);
785
786 String path = ZKAssign.getNodeName(zkw, hri.getEncodedName());
787 Stat stats = zkw.getRecoverableZooKeeper().exists(path, false);
788 LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats="
789 + stats);
790 byte[] bytes = ZKAssign.getData(zkw, hri.getEncodedName());
791 RegionTransition rtd = RegionTransition.parseFrom(bytes);
792
793 assertTrue(rtd.getEventType().equals(EventType.RS_ZK_REGION_SPLIT)
794 || rtd.getEventType().equals(EventType.RS_ZK_REGION_SPLITTING));
795
796
797 MockMasterWithoutCatalogJanitor master = abortAndWaitForMaster();
798
799 this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
800
801
802
803 hri.setOffline(true);
804 hri.setSplit(true);
805 ServerName regionServerOfRegion = master.getAssignmentManager()
806 .getRegionStates().getRegionServerOfRegion(hri);
807 assertTrue(regionServerOfRegion != null);
808
809
810 AssignmentManager.TEST_SKIP_SPLIT_HANDLING = false;
811 String node = ZKAssign.getNodeName(zkw, hri.getEncodedName());
812 Stat stat = new Stat();
813 byte[] data = ZKUtil.getDataNoWatch(zkw, node, stat);
814
815 for (int i=0; data != null && i<60; i++) {
816 Thread.sleep(1000);
817 data = ZKUtil.getDataNoWatch(zkw, node, stat);
818 }
819 assertNull("Waited too long for ZK node to be removed: "+node, data);
820 RegionStates regionStates = master.getAssignmentManager().getRegionStates();
821 assertTrue("Split parent should be in SPLIT state",
822 regionStates.isRegionInState(hri, State.SPLIT));
823 regionServerOfRegion = regionStates.getRegionServerOfRegion(hri);
824 assertTrue(regionServerOfRegion == null);
825 } finally {
826
827 AssignmentManager.TEST_SKIP_SPLIT_HANDLING = false;
828 admin.setBalancerRunning(true, false);
829 cluster.getMaster().setCatalogJanitorEnabled(true);
830 t.close();
831 zkw.close();
832 }
833 }
834
835
836
837
838
839
840
841
842
843 @Test (timeout = 300000)
844 public void testMasterRestartAtRegionSplitPendingCatalogJanitor()
845 throws IOException, InterruptedException, NodeExistsException,
846 KeeperException, ServiceException {
847 final TableName tableName = TableName
848 .valueOf("testMasterRestartAtRegionSplitPendingCatalogJanitor");
849
850
851 HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
852 List<HRegion> regions = cluster.getRegions(tableName);
853 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
854
855 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
856
857
858 this.admin.setBalancerRunning(false, true);
859
860 cluster.getMaster().setCatalogJanitorEnabled(false);
861 ZooKeeperWatcher zkw = new ZooKeeperWatcher(t.getConfiguration(),
862 "testMasterRestartAtRegionSplitPendingCatalogJanitor", new UselessTestAbortable());
863 try {
864
865 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
866
867 HRegionServer server = cluster.getRegionServer(tableRegionIndex);
868 printOutRegions(server, "Initial regions: ");
869
870 this.admin.split(hri.getRegionNameAsString());
871 checkAndGetDaughters(tableName);
872
873 String path = ZKAssign.getNodeName(zkw, hri.getEncodedName());
874 Stat stats = zkw.getRecoverableZooKeeper().exists(path, false);
875 LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats="
876 + stats);
877 String node = ZKAssign.getNodeName(zkw, hri.getEncodedName());
878 Stat stat = new Stat();
879 byte[] data = ZKUtil.getDataNoWatch(zkw, node, stat);
880
881 for (int i=0; data != null && i<60; i++) {
882 Thread.sleep(1000);
883 data = ZKUtil.getDataNoWatch(zkw, node, stat);
884 }
885 assertNull("Waited too long for ZK node to be removed: "+node, data);
886
887 MockMasterWithoutCatalogJanitor master = abortAndWaitForMaster();
888
889 this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
890
891
892
893 hri.setOffline(true);
894 hri.setSplit(true);
895 RegionStates regionStates = master.getAssignmentManager().getRegionStates();
896 assertTrue("Split parent should be in SPLIT state",
897 regionStates.isRegionInState(hri, State.SPLIT));
898 ServerName regionServerOfRegion = regionStates.getRegionServerOfRegion(hri);
899 assertTrue(regionServerOfRegion == null);
900 } finally {
901 this.admin.setBalancerRunning(true, false);
902 cluster.getMaster().setCatalogJanitorEnabled(true);
903 t.close();
904 zkw.close();
905 }
906 }
907
908
909
910
911
912
913
914
915
916
917
918
919 @Test(timeout = 60000)
920 public void testSplitBeforeSettingSplittingInZK() throws Exception,
921 InterruptedException, KeeperException {
922 testSplitBeforeSettingSplittingInZKInternals();
923 }
924
925 @Test(timeout = 60000)
926 public void testTableExistsIfTheSpecifiedTableRegionIsSplitParent() throws Exception {
927 ZooKeeperWatcher zkw = HBaseTestingUtility.getZooKeeperWatcher(TESTING_UTIL);
928 final TableName tableName =
929 TableName.valueOf("testTableExistsIfTheSpecifiedTableRegionIsSplitParent");
930
931 Table t = createTableAndWait(tableName, Bytes.toBytes("cf"));
932 List<HRegion> regions = null;
933 try {
934 regions = cluster.getRegions(tableName);
935 int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionInfo()
936 .getRegionName());
937 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
938 insertData(tableName, admin, t);
939
940 admin.setBalancerRunning(false, true);
941
942 cluster.getMaster().setCatalogJanitorEnabled(false);
943 boolean tableExists = MetaTableAccessor.tableExists(regionServer.getConnection(),
944 tableName);
945 assertEquals("The specified table should present.", true, tableExists);
946 final HRegion region = findSplittableRegion(regions);
947 assertTrue("not able to find a splittable region", region != null);
948 SplitTransactionImpl st = new SplitTransactionImpl(region, Bytes.toBytes("row2"));
949 try {
950 st.prepare();
951 st.createDaughters(regionServer, regionServer, null);
952 } catch (IOException e) {
953
954 }
955 tableExists = MetaTableAccessor.tableExists(regionServer.getConnection(),
956 tableName);
957 assertEquals("The specified table should present.", true, tableExists);
958 Map<String, RegionState> rit = cluster.getMaster().getAssignmentManager().getRegionStates()
959 .getRegionsInTransition();
960 assertTrue(rit.size() == 3);
961 cluster.getMaster().getAssignmentManager().regionOffline(st.getFirstDaughter());
962 cluster.getMaster().getAssignmentManager().regionOffline(st.getSecondDaughter());
963 cluster.getMaster().getAssignmentManager().regionOffline(region.getRegionInfo());
964 rit = cluster.getMaster().getAssignmentManager().getRegionStates().getRegionsInTransition();
965 assertTrue(rit.size() == 0);
966 }
967 finally {
968 admin.setBalancerRunning(true, false);
969 cluster.getMaster().setCatalogJanitorEnabled(true);
970 t.close();
971 TESTING_UTIL.deleteTable(tableName);
972 }
973 }
974
975 @Test
976 public void testSplitWithRegionReplicas() throws Exception {
977 ZooKeeperWatcher zkw = HBaseTestingUtility.getZooKeeperWatcher(TESTING_UTIL);
978 final TableName tableName =
979 TableName.valueOf("foobar");
980 HTableDescriptor htd = TESTING_UTIL.createTableDescriptor("foobar");
981 htd.setRegionReplication(2);
982 htd.addCoprocessor(SlowMeCopro.class.getName());
983
984 HTable t = TESTING_UTIL.createTable(htd, new byte[][]{Bytes.toBytes("cf")},
985 TESTING_UTIL.getConfiguration());
986 int count;
987 List<HRegion> oldRegions;
988 do {
989 oldRegions = cluster.getRegions(tableName);
990 Thread.sleep(10);
991 } while (oldRegions.size() != 2);
992 for (HRegion h : oldRegions) LOG.debug("OLDREGION " + h.getRegionInfo());
993 try {
994 int regionServerIndex = cluster.getServerWith(oldRegions.get(0).getRegionInfo()
995 .getRegionName());
996 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
997 insertData(tableName, admin, t);
998
999 admin.setBalancerRunning(false, true);
1000
1001 cluster.getMaster().setCatalogJanitorEnabled(false);
1002 boolean tableExists = MetaTableAccessor.tableExists(regionServer.getConnection(),
1003 tableName);
1004 assertEquals("The specified table should be present.", true, tableExists);
1005 final HRegion region = findSplittableRegion(oldRegions);
1006 regionServerIndex = cluster.getServerWith(region.getRegionInfo().getRegionName());
1007 regionServer = cluster.getRegionServer(regionServerIndex);
1008 assertTrue("not able to find a splittable region", region != null);
1009 String node = ZKAssign.getNodeName(regionServer.getZooKeeper(),
1010 region.getRegionInfo().getEncodedName());
1011 regionServer.getZooKeeper().sync(node);
1012 SplitTransactionImpl st = new SplitTransactionImpl(region, Bytes.toBytes("row2"));
1013 try {
1014 st.prepare();
1015 st.execute(regionServer, regionServer);
1016 } catch (IOException e) {
1017 e.printStackTrace();
1018 fail("Split execution should have succeeded with no exceptions thrown " + e);
1019 }
1020
1021 List<HRegion> newRegions;
1022 do {
1023 newRegions = cluster.getRegions(tableName);
1024 for (HRegion h : newRegions) LOG.debug("NEWREGION " + h.getRegionInfo());
1025 Thread.sleep(1000);
1026 } while ((newRegions.contains(oldRegions.get(0)) || newRegions.contains(oldRegions.get(1)))
1027 || newRegions.size() != 4);
1028 tableExists = MetaTableAccessor.tableExists(regionServer.getConnection(),
1029 tableName);
1030 assertEquals("The specified table should be present.", true, tableExists);
1031
1032 byte[] b1 = "row1".getBytes();
1033 Get g = new Get(b1);
1034 g.setConsistency(Consistency.STRONG);
1035
1036
1037
1038 Result r = t.get(g);
1039 Assert.assertFalse(r.isStale());
1040 LOG.info("exists stale after flush done");
1041
1042 SlowMeCopro.getCdl().set(new CountDownLatch(1));
1043 g = new Get(b1);
1044 g.setConsistency(Consistency.TIMELINE);
1045
1046 r = t.get(g);
1047 Assert.assertTrue(r.isStale());
1048 SlowMeCopro.getCdl().get().countDown();
1049 } finally {
1050 SlowMeCopro.getCdl().get().countDown();
1051 admin.setBalancerRunning(true, false);
1052 cluster.getMaster().setCatalogJanitorEnabled(true);
1053 t.close();
1054 }
1055 }
1056
1057 private void insertData(final TableName tableName, HBaseAdmin admin, Table t) throws IOException,
1058 InterruptedException {
1059 Put p = new Put(Bytes.toBytes("row1"));
1060 p.add(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("1"));
1061 t.put(p);
1062 p = new Put(Bytes.toBytes("row2"));
1063 p.add(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("2"));
1064 t.put(p);
1065 p = new Put(Bytes.toBytes("row3"));
1066 p.add(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("3"));
1067 t.put(p);
1068 p = new Put(Bytes.toBytes("row4"));
1069 p.add(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("4"));
1070 t.put(p);
1071 admin.flush(tableName);
1072 }
1073
1074
1075
1076
1077
1078 @Test(timeout = 60000)
1079 public void testSplitRegionWithNoStoreFiles()
1080 throws Exception {
1081 final TableName tableName =
1082 TableName.valueOf("testSplitRegionWithNoStoreFiles");
1083
1084 createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
1085 List<HRegion> regions = cluster.getRegions(tableName);
1086 HRegionInfo hri = getAndCheckSingleTableRegion(regions);
1087 ensureTableRegionNotOnSameServerAsMeta(admin, hri);
1088 int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionInfo()
1089 .getRegionName());
1090 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
1091
1092 this.admin.setBalancerRunning(false, true);
1093
1094 cluster.getMaster().setCatalogJanitorEnabled(false);
1095 try {
1096
1097 printOutRegions(regionServer, "Initial regions: ");
1098 Configuration conf = cluster.getConfiguration();
1099 HBaseFsck.debugLsr(conf, new Path("/"));
1100 Path rootDir = FSUtils.getRootDir(conf);
1101 FileSystem fs = TESTING_UTIL.getDFSCluster().getFileSystem();
1102 Map<String, Path> storefiles =
1103 FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName);
1104 assertEquals("Expected nothing but found " + storefiles.toString(), storefiles.size(), 0);
1105
1106
1107 regions = cluster.getRegions(tableName);
1108 final HRegion region = findSplittableRegion(regions);
1109 assertTrue("not able to find a splittable region", region != null);
1110
1111
1112 SplitTransactionImpl st = new MockedSplitTransaction(region, Bytes.toBytes("row2"));
1113 try {
1114 st.prepare();
1115 st.execute(regionServer, regionServer);
1116 } catch (IOException e) {
1117 fail("Split execution should have succeeded with no exceptions thrown");
1118 }
1119
1120
1121
1122 List<HRegion> daughters = cluster.getRegions(tableName);
1123 assertTrue(daughters.size() == 2);
1124
1125
1126 HBaseFsck.debugLsr(conf, new Path("/"));
1127 Map<String, Path> storefilesAfter =
1128 FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName);
1129 assertEquals("Expected nothing but found " + storefilesAfter.toString(),
1130 storefilesAfter.size(), 0);
1131
1132 hri = region.getRegionInfo();
1133 AssignmentManager am = cluster.getMaster().getAssignmentManager();
1134 RegionStates regionStates = am.getRegionStates();
1135 long start = EnvironmentEdgeManager.currentTime();
1136 while (!regionStates.isRegionInState(hri, State.SPLIT)) {
1137 assertFalse("Timed out in waiting split parent to be in state SPLIT",
1138 EnvironmentEdgeManager.currentTime() - start > 60000);
1139 Thread.sleep(500);
1140 }
1141
1142
1143 am.assign(hri, true, true);
1144 assertFalse("Split region can't be assigned",
1145 regionStates.isRegionInTransition(hri));
1146 assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
1147
1148
1149 am.unassign(hri, true, null);
1150 assertFalse("Split region can't be unassigned",
1151 regionStates.isRegionInTransition(hri));
1152 assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
1153 } finally {
1154 admin.setBalancerRunning(true, false);
1155 cluster.getMaster().setCatalogJanitorEnabled(true);
1156 }
1157 }
1158
1159 @Test(timeout = 180000)
1160 public void testSplitHooksBeforeAndAfterPONR() throws Exception {
1161 TableName firstTable = TableName.valueOf("testSplitHooksBeforeAndAfterPONR_1");
1162 TableName secondTable = TableName.valueOf("testSplitHooksBeforeAndAfterPONR_2");
1163 HColumnDescriptor hcd = new HColumnDescriptor("cf");
1164
1165 HTableDescriptor desc = new HTableDescriptor(firstTable);
1166 desc.addCoprocessor(MockedRegionObserver.class.getName());
1167 desc.addFamily(hcd);
1168 admin.createTable(desc);
1169 TESTING_UTIL.waitUntilAllRegionsAssigned(firstTable);
1170
1171 desc = new HTableDescriptor(secondTable);
1172 desc.addFamily(hcd);
1173 admin.createTable(desc);
1174 TESTING_UTIL.waitUntilAllRegionsAssigned(secondTable);
1175
1176 List<HRegion> firstTableRegions = cluster.getRegions(firstTable);
1177 List<HRegion> secondTableRegions = cluster.getRegions(secondTable);
1178
1179
1180 if (firstTableRegions.size() == 0 || secondTableRegions.size() == 0) {
1181 fail("Each table should have at least one region.");
1182 }
1183 ServerName serverName = cluster.getServerHoldingRegion(firstTable,
1184 firstTableRegions.get(0).getRegionInfo().getRegionName());
1185 admin.move(secondTableRegions.get(0).getRegionInfo().getEncodedNameAsBytes(),
1186 Bytes.toBytes(serverName.getServerName()));
1187 Table table1 = null;
1188 Table table2 = null;
1189 try {
1190 table1 = new HTable(TESTING_UTIL.getConfiguration(), firstTable);
1191 table2 = new HTable(TESTING_UTIL.getConfiguration(), firstTable);
1192 insertData(firstTable, admin, table1);
1193 insertData(secondTable, admin, table2);
1194 admin.split(firstTable, "row2".getBytes());
1195 firstTableRegions = cluster.getRegions(firstTable);
1196 while (firstTableRegions.size() != 2) {
1197 Thread.sleep(1000);
1198 firstTableRegions = cluster.getRegions(firstTable);
1199 }
1200 assertEquals("Number of regions after split should be 2.", 2, firstTableRegions.size());
1201 secondTableRegions = cluster.getRegions(secondTable);
1202 assertEquals("Number of regions after split should be 2.", 2, secondTableRegions.size());
1203 } finally {
1204 if (table1 != null) {
1205 table1.close();
1206 }
1207 if (table2 != null) {
1208 table2.close();
1209 }
1210 TESTING_UTIL.deleteTable(firstTable);
1211 TESTING_UTIL.deleteTable(secondTable);
1212 }
1213 }
1214
1215 private void testSplitBeforeSettingSplittingInZKInternals() throws Exception {
1216 final TableName tableName = TableName.valueOf("testSplitBeforeSettingSplittingInZK");
1217 try {
1218
1219 createTableAndWait(tableName, Bytes.toBytes("cf"));
1220
1221 List<HRegion> regions = awaitTableRegions(tableName);
1222 assertTrue("Table not online", cluster.getRegions(tableName).size() != 0);
1223
1224 int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionInfo()
1225 .getRegionName());
1226 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
1227 final HRegion region = findSplittableRegion(regions);
1228 assertTrue("not able to find a splittable region", region != null);
1229 SplitTransactionImpl st = new MockedSplitTransaction(region, Bytes.toBytes("row2")) {
1230 @Override
1231 public PairOfSameType<Region> stepsBeforePONR(final Server server,
1232 final RegionServerServices services, boolean testing) throws IOException {
1233 throw new SplittingNodeCreationFailedException ();
1234 }
1235 };
1236 String node = ZKAssign.getNodeName(regionServer.getZooKeeper(),
1237 region.getRegionInfo().getEncodedName());
1238 regionServer.getZooKeeper().sync(node);
1239 for (int i = 0; i < 100; i++) {
1240
1241
1242
1243 if (ZKUtil.checkExists(regionServer.getZooKeeper(), node) != -1) {
1244 Thread.sleep(100);
1245 }
1246 }
1247 try {
1248 st.prepare();
1249 st.execute(regionServer, regionServer);
1250 } catch (IOException e) {
1251
1252
1253
1254 assertTrue("Should be instance of CreateSplittingNodeFailedException",
1255 e instanceof SplittingNodeCreationFailedException );
1256 node = ZKAssign.getNodeName(regionServer.getZooKeeper(),
1257 region.getRegionInfo().getEncodedName());
1258 {
1259 assertTrue(ZKUtil.checkExists(regionServer.getZooKeeper(), node) == -1);
1260 }
1261 assertTrue(st.rollback(regionServer, regionServer));
1262 assertTrue(ZKUtil.checkExists(regionServer.getZooKeeper(), node) == -1);
1263 }
1264 } finally {
1265 TESTING_UTIL.deleteTable(tableName);
1266 }
1267 }
1268
1269 @Test
1270 public void testStoreFileReferenceCreationWhenSplitPolicySaysToSkipRangeCheck()
1271 throws Exception {
1272 final TableName tableName =
1273 TableName.valueOf("testStoreFileReferenceCreationWhenSplitPolicySaysToSkipRangeCheck");
1274 try {
1275 HTableDescriptor htd = new HTableDescriptor(tableName);
1276 htd.addFamily(new HColumnDescriptor("f"));
1277 htd.addFamily(new HColumnDescriptor("i_f"));
1278 htd.setRegionSplitPolicyClassName(CustomSplitPolicy.class.getName());
1279 admin.createTable(htd);
1280 List<HRegion> regions = awaitTableRegions(tableName);
1281 HRegion region = regions.get(0);
1282 for(int i = 3;i<9;i++) {
1283 Put p = new Put(Bytes.toBytes("row"+i));
1284 p.add(Bytes.toBytes("f"), Bytes.toBytes("q"), Bytes.toBytes("value"+i));
1285 p.add(Bytes.toBytes("i_f"), Bytes.toBytes("q"), Bytes.toBytes("value"+i));
1286 region.put(p);
1287 }
1288 region.flush(true);
1289 Store store = region.getStore(Bytes.toBytes("f"));
1290 Collection<StoreFile> storefiles = store.getStorefiles();
1291 assertEquals(storefiles.size(), 1);
1292 assertFalse(region.hasReferences());
1293 Path referencePath =
1294 region.getRegionFileSystem().splitStoreFile(region.getRegionInfo(), "f",
1295 storefiles.iterator().next(), Bytes.toBytes("row1"), false, region.getSplitPolicy());
1296 assertNull(referencePath);
1297 referencePath =
1298 region.getRegionFileSystem().splitStoreFile(region.getRegionInfo(), "i_f",
1299 storefiles.iterator().next(), Bytes.toBytes("row1"), false, region.getSplitPolicy());
1300 assertNotNull(referencePath);
1301 } finally {
1302 TESTING_UTIL.deleteTable(tableName);
1303 }
1304 }
1305
1306 @Test(timeout = 120000)
1307 public void testFailedSplit() throws Exception {
1308 TableName tableName = TableName.valueOf("testFailedSplit");
1309 byte[] colFamily = Bytes.toBytes("info");
1310 TESTING_UTIL.createTable(tableName, colFamily);
1311 Connection connection = ConnectionFactory.createConnection(TESTING_UTIL.getConfiguration());
1312 HTable table = (HTable) connection.getTable(tableName);
1313 try {
1314 TESTING_UTIL.loadTable(table, colFamily);
1315 List<HRegionInfo> regions = TESTING_UTIL.getHBaseAdmin().getTableRegions(tableName);
1316 assertTrue(regions.size() == 1);
1317 final HRegion actualRegion = cluster.getRegions(tableName).get(0);
1318 actualRegion.getCoprocessorHost().load(FailingSplitRegionObserver.class,
1319 Coprocessor.PRIORITY_USER, actualRegion.getBaseConf());
1320
1321
1322 admin.split(tableName);
1323 FailingSplitRegionObserver observer = (FailingSplitRegionObserver) actualRegion
1324 .getCoprocessorHost().findCoprocessor(FailingSplitRegionObserver.class.getName());
1325 assertNotNull(observer);
1326 observer.latch.await();
1327 observer.postSplit.await();
1328 LOG.info("Waiting for region to come out of RIT: " + actualRegion);
1329 TESTING_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
1330 @Override
1331 public boolean evaluate() throws Exception {
1332 RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
1333 Map<String, RegionState> rit = regionStates.getRegionsInTransition();
1334 return (rit.size() == 0);
1335 }
1336 });
1337 regions = TESTING_UTIL.getHBaseAdmin().getTableRegions(tableName);
1338 assertTrue(regions.size() == 1);
1339 RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
1340 Map<String, RegionState> rit = regionStates.getRegionsInTransition();
1341 assertTrue(rit.size() == 0);
1342 } finally {
1343 table.close();
1344 connection.close();
1345 TESTING_UTIL.deleteTable(tableName);
1346 }
1347 }
1348
1349 @Test (timeout=300000)
1350 public void testSSHCleanupDaugtherRegionsOfAbortedSplit() throws Exception {
1351 TableName table = TableName.valueOf("testSSHCleanupDaugtherRegionsOfAbortedSplit");
1352 try {
1353 HTableDescriptor desc = new HTableDescriptor(table);
1354 desc.addFamily(new HColumnDescriptor(Bytes.toBytes("f")));
1355 admin.createTable(desc);
1356 HTable hTable = new HTable(cluster.getConfiguration(), desc.getTableName());
1357 for(int i = 1; i < 5; i++) {
1358 Put p1 = new Put(("r"+i).getBytes());
1359 p1.add(Bytes.toBytes("f"), "q1".getBytes(), "v".getBytes());
1360 hTable.put(p1);
1361 }
1362 admin.flush(desc.getTableName());
1363 List<HRegion> regions = cluster.getRegions(desc.getTableName());
1364 int serverWith = cluster.getServerWith(regions.get(0).getRegionInfo().getRegionName());
1365 HRegionServer regionServer = cluster.getRegionServer(serverWith);
1366 cluster.getServerWith(regions.get(0).getRegionInfo().getRegionName());
1367 SplitTransactionImpl st = new SplitTransactionImpl(regions.get(0), Bytes.toBytes("r3"));
1368 st.prepare();
1369 st.stepsBeforePONR(regionServer, regionServer, false);
1370 Path tableDir =
1371 FSUtils.getTableDir(cluster.getMaster().getMasterFileSystem().getRootDir(),
1372 desc.getTableName());
1373 tableDir.getFileSystem(cluster.getConfiguration());
1374 List<Path> regionDirs =
1375 FSUtils.getRegionDirs(tableDir.getFileSystem(cluster.getConfiguration()), tableDir);
1376 assertEquals(3,regionDirs.size());
1377 cluster.startRegionServer();
1378 regionServer.kill();
1379 cluster.getRegionServerThreads().get(serverWith).join();
1380
1381 while (cluster.getMaster().getServerManager().areDeadServersInProgress()) {
1382 Thread.sleep(10);
1383 }
1384 AssignmentManager am = cluster.getMaster().getAssignmentManager();
1385 while(am.getRegionStates().isRegionsInTransition()) {
1386 Thread.sleep(10);
1387 }
1388 assertEquals(am.getRegionStates().getRegionsInTransition().toString(), 0, am
1389 .getRegionStates().getRegionsInTransition().size());
1390 regionDirs =
1391 FSUtils.getRegionDirs(tableDir.getFileSystem(cluster.getConfiguration()), tableDir);
1392 assertEquals(1,regionDirs.size());
1393 } finally {
1394 TESTING_UTIL.deleteTable(table);
1395 }
1396 }
1397
1398 public static class MockedCoordinatedStateManager extends ZkCoordinatedStateManager {
1399
1400 public void initialize(Server server, HRegion region) {
1401 this.server = server;
1402 this.watcher = server.getZooKeeper();
1403 splitTransactionCoordination = new MockedSplitTransactionCoordination(this, watcher, region);
1404 closeRegionCoordination = new ZkCloseRegionCoordination(this, watcher);
1405 openRegionCoordination = new ZkOpenRegionCoordination(this, watcher);
1406 }
1407 }
1408
1409 public static class MockedSplitTransaction extends SplitTransactionImpl {
1410
1411 private HRegion currentRegion;
1412 public MockedSplitTransaction(HRegion region, byte[] splitrow) {
1413 super(region, splitrow);
1414 this.currentRegion = region;
1415 }
1416 @Override
1417 public boolean rollback(Server server, RegionServerServices services) throws IOException {
1418 if (this.currentRegion.getRegionInfo().getTable().getNameAsString()
1419 .equals("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack")) {
1420 if(secondSplit){
1421 super.rollback(server, services);
1422 latch.countDown();
1423 return true;
1424 }
1425 }
1426 return super.rollback(server, services);
1427 }
1428
1429
1430 }
1431
1432 public static class MockedSplitTransactionCoordination extends ZKSplitTransactionCoordination {
1433
1434 private HRegion currentRegion;
1435
1436 public MockedSplitTransactionCoordination(CoordinatedStateManager coordinationProvider,
1437 ZooKeeperWatcher watcher, HRegion region) {
1438 super(coordinationProvider, watcher);
1439 currentRegion = region;
1440 }
1441
1442 @Override
1443 public void completeSplitTransaction(RegionServerServices services, Region a, Region b,
1444 SplitTransactionDetails std, Region parent) throws IOException {
1445 if (this.currentRegion.getRegionInfo().getTable().getNameAsString()
1446 .equals("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack")) {
1447 try {
1448 if (!secondSplit){
1449 callRollBack = true;
1450 latch.await();
1451 }
1452 } catch (InterruptedException e) {
1453 }
1454
1455 }
1456 super.completeSplitTransaction(services, a, b, std, parent);
1457 if (this.currentRegion.getRegionInfo().getTable().getNameAsString()
1458 .equals("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack")) {
1459 firstSplitCompleted = true;
1460 }
1461 }
1462 }
1463
1464 private HRegion findSplittableRegion(final List<HRegion> regions) throws InterruptedException {
1465 for (int i = 0; i < 5; ++i) {
1466 for (HRegion r: regions) {
1467 if (r.isSplittable() && r.getRegionInfo().getReplicaId() == 0) {
1468 return(r);
1469 }
1470 }
1471 Thread.sleep(100);
1472 }
1473 return(null);
1474 }
1475
1476 private List<HRegion> checkAndGetDaughters(TableName tableName)
1477 throws InterruptedException {
1478 List<HRegion> daughters = null;
1479
1480 for (int i=0; i<100; i++) {
1481 daughters = cluster.getRegions(tableName);
1482 if (daughters.size() >= 2) break;
1483 Thread.sleep(100);
1484 }
1485 assertTrue(daughters.size() >= 2);
1486 return daughters;
1487 }
1488
1489 private MockMasterWithoutCatalogJanitor abortAndWaitForMaster()
1490 throws IOException, InterruptedException {
1491 cluster.abortMaster(0);
1492 cluster.waitOnMaster(0);
1493 cluster.getConfiguration().setClass(HConstants.MASTER_IMPL,
1494 MockMasterWithoutCatalogJanitor.class, HMaster.class);
1495 MockMasterWithoutCatalogJanitor master = null;
1496 master = (MockMasterWithoutCatalogJanitor) cluster.startMaster().getMaster();
1497 cluster.waitForActiveAndReadyMaster();
1498 return master;
1499 }
1500
1501 private void split(final HRegionInfo hri, final HRegionServer server, final int regionCount)
1502 throws IOException, InterruptedException {
1503 this.admin.split(hri.getRegionNameAsString());
1504 try {
1505 for (int i = 0; ProtobufUtil.getOnlineRegions(
1506 server.getRSRpcServices()).size() <= regionCount && i < 300; i++) {
1507 LOG.debug("Waiting on region to split");
1508 Thread.sleep(100);
1509 }
1510
1511 assertFalse("Waited too long for split",
1512 ProtobufUtil.getOnlineRegions(server.getRSRpcServices()).size() <= regionCount);
1513 } catch (RegionServerStoppedException e) {
1514 if (useZKForAssignment) {
1515
1516 LOG.error(e);
1517 throw e;
1518 }
1519 }
1520 }
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533 private int ensureTableRegionNotOnSameServerAsMeta(final Admin admin,
1534 final HRegionInfo hri)
1535 throws IOException, MasterNotRunningException,
1536 ZooKeeperConnectionException, InterruptedException {
1537
1538
1539
1540 int metaServerIndex = cluster.getServerWithMeta();
1541 assertTrue(metaServerIndex != -1);
1542 HRegionServer metaRegionServer = cluster.getRegionServer(metaServerIndex);
1543 int tableRegionIndex = cluster.getServerWith(hri.getRegionName());
1544 assertTrue(tableRegionIndex != -1);
1545 HRegionServer tableRegionServer = cluster.getRegionServer(tableRegionIndex);
1546 if (metaRegionServer.getServerName().equals(tableRegionServer.getServerName())) {
1547 HRegionServer hrs = getOtherRegionServer(cluster, metaRegionServer);
1548 assertNotNull(hrs);
1549 assertNotNull(hri);
1550 LOG.info("Moving " + hri.getRegionNameAsString() + " from " +
1551 metaRegionServer.getServerName() + " to " +
1552 hrs.getServerName() + "; metaServerIndex=" + metaServerIndex);
1553 admin.move(hri.getEncodedNameAsBytes(), Bytes.toBytes(hrs.getServerName().toString()));
1554 }
1555
1556 for (int i = 0; i < 20; i++) {
1557 tableRegionIndex = cluster.getServerWith(hri.getRegionName());
1558 if (tableRegionIndex != -1 && tableRegionIndex != metaServerIndex) break;
1559 LOG.debug("Waiting on region move off the hbase:meta server; current index " +
1560 tableRegionIndex + " and metaServerIndex=" + metaServerIndex);
1561 Thread.sleep(1000);
1562 }
1563 assertTrue("Region not moved off hbase:meta server", tableRegionIndex != -1
1564 && tableRegionIndex != metaServerIndex);
1565
1566 tableRegionIndex = cluster.getServerWith(hri.getRegionName());
1567 assertTrue(tableRegionIndex != -1);
1568 assertNotSame(metaServerIndex, tableRegionIndex);
1569 return tableRegionIndex;
1570 }
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581 private HRegionServer getOtherRegionServer(final MiniHBaseCluster cluster,
1582 final HRegionServer notThisOne) {
1583 for (RegionServerThread rst: cluster.getRegionServerThreads()) {
1584 HRegionServer hrs = rst.getRegionServer();
1585 if (hrs.getServerName().equals(notThisOne.getServerName())) continue;
1586 if (hrs.isStopping() || hrs.isStopped()) continue;
1587 return hrs;
1588 }
1589 return null;
1590 }
1591
1592 private void printOutRegions(final HRegionServer hrs, final String prefix)
1593 throws IOException {
1594 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
1595 for (HRegionInfo region: regions) {
1596 LOG.info(prefix + region.getRegionNameAsString());
1597 }
1598 }
1599
1600 private void waitUntilRegionServerDead() throws InterruptedException, InterruptedIOException {
1601
1602 for (int i=0; cluster.getMaster().getClusterStatus().
1603 getServers().size() > NB_SERVERS && i<100; i++) {
1604 LOG.info("Waiting on server to go down");
1605 Thread.sleep(100);
1606 }
1607 assertFalse("Waited too long for RS to die", cluster.getMaster().getClusterStatus().
1608 getServers().size() > NB_SERVERS);
1609 }
1610
1611 private void awaitDaughters(TableName tableName, int numDaughters) throws InterruptedException {
1612
1613 for (int i=0; cluster.getRegions(tableName).size() < numDaughters && i<60; i++) {
1614 LOG.info("Waiting for repair to happen");
1615 Thread.sleep(1000);
1616 }
1617 if (cluster.getRegions(tableName).size() < numDaughters) {
1618 fail("Waiting too long for daughter regions");
1619 }
1620 }
1621
1622 private List<HRegion> awaitTableRegions(final TableName tableName) throws InterruptedException {
1623 List<HRegion> regions = null;
1624 for (int i = 0; i < 100; i++) {
1625 regions = cluster.getRegions(tableName);
1626 if (regions.size() > 0) break;
1627 Thread.sleep(100);
1628 }
1629 return regions;
1630 }
1631
1632 private HTable createTableAndWait(TableName tableName, byte[] cf) throws IOException,
1633 InterruptedException {
1634 HTable t = TESTING_UTIL.createTable(tableName, cf);
1635 awaitTableRegions(tableName);
1636 assertTrue("Table not online: " + tableName,
1637 cluster.getRegions(tableName).size() != 0);
1638 return t;
1639 }
1640
1641 public static class MockMasterWithoutCatalogJanitor extends HMaster {
1642
1643 public MockMasterWithoutCatalogJanitor(Configuration conf, CoordinatedStateManager cp)
1644 throws IOException, KeeperException,
1645 InterruptedException {
1646 super(conf, cp);
1647 }
1648 }
1649
1650 private static class SplittingNodeCreationFailedException extends IOException {
1651 private static final long serialVersionUID = 1652404976265623004L;
1652
1653 public SplittingNodeCreationFailedException () {
1654 super();
1655 }
1656 }
1657
1658 public static class MockedRegionObserver extends BaseRegionObserver {
1659 private SplitTransactionImpl st = null;
1660 private PairOfSameType<Region> daughterRegions = null;
1661
1662 @Override
1663 public void preSplitBeforePONR(ObserverContext<RegionCoprocessorEnvironment> ctx,
1664 byte[] splitKey, List<Mutation> metaEntries) throws IOException {
1665 RegionCoprocessorEnvironment environment = ctx.getEnvironment();
1666 HRegionServer rs = (HRegionServer) environment.getRegionServerServices();
1667 List<Region> onlineRegions =
1668 rs.getOnlineRegions(TableName.valueOf("testSplitHooksBeforeAndAfterPONR_2"));
1669 Region region = onlineRegions.get(0);
1670 for (Region r : onlineRegions) {
1671 if (r.getRegionInfo().containsRow(splitKey)) {
1672 region = r;
1673 break;
1674 }
1675 }
1676 st = new SplitTransactionImpl((HRegion) region, splitKey);
1677 if (!st.prepare()) {
1678 LOG.error("Prepare for the table " + region.getTableDesc().getNameAsString()
1679 + " failed. So returning null. ");
1680 ctx.bypass();
1681 return;
1682 }
1683 ((HRegion)region).forceSplit(splitKey);
1684 daughterRegions = st.stepsBeforePONR(rs, rs, false);
1685 HRegionInfo copyOfParent = new HRegionInfo(region.getRegionInfo());
1686 copyOfParent.setOffline(true);
1687 copyOfParent.setSplit(true);
1688
1689 Put putParent = MetaTableAccessor.makePutFromRegionInfo(copyOfParent);
1690 MetaTableAccessor.addDaughtersToPut(putParent, daughterRegions.getFirst().getRegionInfo(),
1691 daughterRegions.getSecond().getRegionInfo());
1692 metaEntries.add(putParent);
1693
1694 Put putA = MetaTableAccessor.makePutFromRegionInfo(
1695 daughterRegions.getFirst().getRegionInfo());
1696 Put putB = MetaTableAccessor.makePutFromRegionInfo(
1697 daughterRegions.getSecond().getRegionInfo());
1698 st.addLocation(putA, rs.getServerName(), 1);
1699 st.addLocation(putB, rs.getServerName(), 1);
1700 metaEntries.add(putA);
1701 metaEntries.add(putB);
1702 }
1703
1704 @Override
1705 public void preSplitAfterPONR(ObserverContext<RegionCoprocessorEnvironment> ctx)
1706 throws IOException {
1707 RegionCoprocessorEnvironment environment = ctx.getEnvironment();
1708 HRegionServer rs = (HRegionServer) environment.getRegionServerServices();
1709 st.stepsAfterPONR(rs, rs, daughterRegions, null);
1710 }
1711
1712 }
1713
1714 static class CustomSplitPolicy extends RegionSplitPolicy {
1715
1716 @Override
1717 protected boolean shouldSplit() {
1718 return true;
1719 }
1720
1721 @Override
1722 public boolean skipStoreFileRangeCheck(String familyName) {
1723 if(familyName.startsWith("i_")) {
1724 return true;
1725 } else {
1726 return false;
1727 }
1728 }
1729 }
1730 }
1731