View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.IOException;
23  
24  import org.apache.commons.logging.Log;
25  import org.apache.commons.logging.LogFactory;
26  import org.apache.hadoop.hbase.HBaseTestingUtility;
27  import org.apache.hadoop.hbase.HConstants;
28  import org.apache.hadoop.hbase.HRegionInfo;
29  import org.apache.hadoop.hbase.HTableDescriptor;
30  import org.apache.hadoop.hbase.testclassification.MediumTests;
31  import org.apache.hadoop.hbase.NotServingRegionException;
32  import org.apache.hadoop.hbase.ServerName;
33  import org.apache.hadoop.hbase.MetaTableAccessor;
34  import org.apache.hadoop.hbase.TableName;
35  import org.apache.hadoop.hbase.client.HTable;
36  import org.apache.hadoop.hbase.client.Put;
37  import org.apache.hadoop.hbase.coordination.BaseCoordinatedStateManager;
38  import org.apache.hadoop.hbase.coordination.ZkCoordinatedStateManager;
39  import org.apache.hadoop.hbase.coordination.ZkOpenRegionCoordination;
40  import org.apache.hadoop.hbase.executor.EventType;
41  import org.apache.hadoop.hbase.protobuf.RequestConverter;
42  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
43  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.CloseRegionRequest;
44  import org.apache.hadoop.hbase.regionserver.handler.OpenRegionHandler;
45  import org.apache.hadoop.hbase.util.Threads;
46  import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
47  import org.apache.hadoop.hbase.master.HMaster;
48  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
49  import org.apache.zookeeper.KeeperException;
50  import org.apache.zookeeper.KeeperException.NodeExistsException;
51  import org.junit.After;
52  import org.junit.AfterClass;
53  import org.junit.Assert;
54  import org.junit.BeforeClass;
55  import org.junit.Test;
56  import org.junit.experimental.categories.Category;
57  
58  import com.google.protobuf.ServiceException;
59  
60  
61  /**
62   * Tests on the region server, without the master.
63   */
64  @Category(MediumTests.class)
65  public class TestRegionServerNoMaster {
66  
67    private static final Log LOG = LogFactory.getLog(TestRegionServerNoMaster.class);
68  
69    private static final int NB_SERVERS = 1;
70    private static HTable table;
71    private static final byte[] row = "ee".getBytes();
72  
73    private static HRegionInfo hri;
74  
75    private static byte[] regionName;
76    private static final HBaseTestingUtility HTU = new HBaseTestingUtility();
77  
78  
79    @BeforeClass
80    public static void before() throws Exception {
81      HTU.getConfiguration().setBoolean("hbase.assignment.usezk", true);
82      HTU.startMiniCluster(NB_SERVERS);
83      final TableName tableName = TableName.valueOf(TestRegionServerNoMaster.class.getSimpleName());
84  
85      // Create table then get the single region for our new table.
86      table = HTU.createTable(tableName,HConstants.CATALOG_FAMILY);
87      Put p = new Put(row);
88      p.add(HConstants.CATALOG_FAMILY, row, row);
89      table.put(p);
90  
91      hri = table.getRegionLocation(row, false).getRegionInfo();
92      regionName = hri.getRegionName();
93  
94      stopMasterAndAssignMeta(HTU);
95    }
96  
97    public static void stopMasterAndAssignMeta(HBaseTestingUtility HTU)
98        throws NodeExistsException, KeeperException, IOException, InterruptedException {
99      // Stop master
100     HMaster master = HTU.getHBaseCluster().getMaster();
101     Thread masterThread = HTU.getHBaseCluster().getMasterThread();
102     ServerName masterAddr = master.getServerName();
103     master.stopMaster();
104 
105     LOG.info("Waiting until master thread exits");
106     while (masterThread != null && masterThread.isAlive()) {
107       Threads.sleep(100);
108     }
109   }
110 
111   /** Flush the given region in the mini cluster. Since no master, we cannot use HBaseAdmin.flush() */
112   public static void flushRegion(HBaseTestingUtility HTU, HRegionInfo regionInfo) throws IOException {
113     for (RegionServerThread rst : HTU.getMiniHBaseCluster().getRegionServerThreads()) {
114       Region region = rst.getRegionServer().getRegionByEncodedName(regionInfo.getEncodedName());
115       if (region != null) {
116         region.flush(true);
117         return;
118       }
119     }
120     throw new IOException("Region to flush cannot be found");
121   }
122 
123   @AfterClass
124   public static void afterClass() throws Exception {
125     table.close();
126     HTU.shutdownMiniCluster();
127   }
128 
129   @After
130   public void after() throws Exception {
131     // Clean the state if the test failed before cleaning the znode
132     // It does not manage all bad failures, so if there are multiple failures, only
133     //  the first one should be looked at.
134     ZKAssign.deleteNodeFailSilent(HTU.getZooKeeperWatcher(), hri);
135   }
136 
137 
138   private static HRegionServer getRS() {
139     return HTU.getHBaseCluster().getLiveRegionServerThreads().get(0).getRegionServer();
140   }
141 
142   public static void openRegion(HBaseTestingUtility HTU, HRegionServer rs, HRegionInfo hri)
143       throws Exception {
144     ZKAssign.createNodeOffline(HTU.getZooKeeperWatcher(), hri, rs.getServerName());
145     // first version is '0'
146     AdminProtos.OpenRegionRequest orr =
147         RequestConverter.buildOpenRegionRequest(rs.getServerName(), hri, 0, null, null);
148     AdminProtos.OpenRegionResponse responseOpen = rs.rpcServices.openRegion(null, orr);
149 
150     Assert.assertTrue(responseOpen.getOpeningStateCount() == 1);
151     Assert.assertTrue(responseOpen.getOpeningState(0).
152         equals(AdminProtos.OpenRegionResponse.RegionOpeningState.OPENED));
153 
154 
155     checkRegionIsOpened(HTU, rs, hri);
156   }
157 
158   public static void checkRegionIsOpened(HBaseTestingUtility HTU, HRegionServer rs,
159       HRegionInfo hri) throws Exception {
160     while (!rs.getRegionsInTransitionInRS().isEmpty()) {
161       Thread.sleep(1);
162     }
163 
164     Assert.assertTrue(rs.getRegion(hri.getRegionName()).isAvailable());
165 
166     Assert.assertTrue(
167       ZKAssign.deleteOpenedNode(HTU.getZooKeeperWatcher(), hri.getEncodedName(),
168         rs.getServerName()));
169   }
170 
171   public static void closeRegion(HBaseTestingUtility HTU, HRegionServer rs, HRegionInfo hri)
172       throws Exception {
173     ZKAssign.createNodeClosing(HTU.getZooKeeperWatcher(), hri, rs.getServerName());
174     AdminProtos.CloseRegionRequest crr = RequestConverter.buildCloseRegionRequest(
175       rs.getServerName(), hri.getEncodedName(), true);
176     AdminProtos.CloseRegionResponse responseClose = rs.rpcServices.closeRegion(null, crr);
177     Assert.assertTrue(responseClose.getClosed());
178     checkRegionIsClosed(HTU, rs, hri);
179     ZKAssign.deleteClosedNode(HTU.getZooKeeperWatcher(), hri.getEncodedName(), null);
180   }
181 
182   public static void checkRegionIsClosed(HBaseTestingUtility HTU, HRegionServer rs,
183       HRegionInfo hri) throws Exception {
184     while (!rs.getRegionsInTransitionInRS().isEmpty()) {
185       Thread.sleep(1);
186     }
187 
188     boolean exception = false;
189     try {
190       while ((rs.getRegion(hri.getRegionName()).isAvailable())) {
191         Thread.sleep(10);
192       }
193     } catch (NotServingRegionException expected) {
194       exception = true;
195       // That's how it work: if the region is closed we have an exception.
196     }
197     assert(exception);
198     // We don't delete the znode here, because there is not always a znode.
199   }
200 
201   /**
202    * Close the region without using ZK
203    */
204   private void closeRegionNoZK() throws Exception {
205     // no transition in ZK
206     AdminProtos.CloseRegionRequest crr =
207         RequestConverter.buildCloseRegionRequest(getRS().getServerName(), regionName, false);
208     AdminProtos.CloseRegionResponse responseClose = getRS().rpcServices.closeRegion(null, crr);
209     Assert.assertTrue(responseClose.getClosed());
210 
211     // now waiting & checking. After a while, the transition should be done and the region closed
212     checkRegionIsClosed(HTU, getRS(), hri);
213   }
214 
215 
216   @Test(timeout = 60000)
217   public void testCloseByRegionServer() throws Exception {
218     closeRegionNoZK();
219     openRegion(HTU, getRS(), hri);
220   }
221 
222   @Test(timeout = 60000)
223   public void testCloseByMasterWithoutZNode() throws Exception {
224 
225     // Transition in ZK on. This should fail, as there is no znode
226     AdminProtos.CloseRegionRequest crr = RequestConverter.buildCloseRegionRequest(
227       getRS().getServerName(), regionName, true);
228     AdminProtos.CloseRegionResponse responseClose = getRS().rpcServices.closeRegion(null, crr);
229     Assert.assertTrue(responseClose.getClosed());
230 
231     // now waiting. After a while, the transition should be done
232     while (!getRS().getRegionsInTransitionInRS().isEmpty()) {
233       Thread.sleep(1);
234     }
235 
236     // the region is still available, the close got rejected at the end
237     Assert.assertTrue("The close should have failed", getRS().getRegion(regionName).isAvailable());
238   }
239 
240   @Test(timeout = 60000)
241   public void testOpenCloseByMasterWithZNode() throws Exception {
242 
243     ZKAssign.createNodeClosing(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
244 
245     AdminProtos.CloseRegionRequest crr = RequestConverter.buildCloseRegionRequest(
246       getRS().getServerName(), regionName, true);
247     AdminProtos.CloseRegionResponse responseClose = getRS().rpcServices.closeRegion(null, crr);
248     Assert.assertTrue(responseClose.getClosed());
249 
250     checkRegionIsClosed(HTU, getRS(), hri);
251 
252     ZKAssign.deleteClosedNode(HTU.getZooKeeperWatcher(), hri.getEncodedName(),
253       getRS().getServerName());
254 
255     openRegion(HTU, getRS(), hri);
256   }
257 
258   /**
259    * Test that we can send multiple openRegion to the region server.
260    * This is used when:
261    * - there is a SocketTimeout: in this case, the master does not know if the region server
262    * received the request before the timeout.
263    * - We have a socket error during the operation: same stuff: we don't know
264    * - a master failover: if we find a znode in thz M_ZK_REGION_OFFLINE, we don't know if
265    * the region server has received the query or not. Only solution to be efficient: re-ask
266    * immediately.
267    */
268   @Test(timeout = 60000)
269   public void testMultipleOpen() throws Exception {
270 
271     // We close
272     closeRegionNoZK();
273     checkRegionIsClosed(HTU, getRS(), hri);
274 
275     // We reopen. We need a ZK node here, as a open is always triggered by a master.
276     ZKAssign.createNodeOffline(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
277 
278     // We're sending multiple requests in a row. The region server must handle this nicely.
279     for (int i = 0; i < 10; i++) {
280       AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest(
281         getRS().getServerName(), hri, 0, null, null);
282       AdminProtos.OpenRegionResponse responseOpen = getRS().rpcServices.openRegion(null, orr);
283       Assert.assertTrue(responseOpen.getOpeningStateCount() == 1);
284 
285       AdminProtos.OpenRegionResponse.RegionOpeningState ors = responseOpen.getOpeningState(0);
286       Assert.assertTrue("request " + i + " failed",
287           ors.equals(AdminProtos.OpenRegionResponse.RegionOpeningState.OPENED) ||
288               ors.equals(AdminProtos.OpenRegionResponse.RegionOpeningState.ALREADY_OPENED)
289       );
290     }
291 
292     checkRegionIsOpened(HTU, getRS(), hri);
293   }
294 
295   @Test
296   public void testOpenClosingRegion() throws Exception {
297     Assert.assertTrue(getRS().getRegion(regionName).isAvailable());
298 
299     try {
300       // we re-opened meta so some of its data is lost
301       ServerName sn = getRS().getServerName();
302       MetaTableAccessor.updateRegionLocation(getRS().getConnection(),
303         hri, sn, getRS().getRegion(regionName).getOpenSeqNum(), -1);
304       // fake region to be closing now, need to clear state afterwards
305       getRS().regionsInTransitionInRS.put(hri.getEncodedNameAsBytes(), Boolean.FALSE);
306       AdminProtos.OpenRegionRequest orr =
307         RequestConverter.buildOpenRegionRequest(sn, hri, 0, null, null);
308       getRS().rpcServices.openRegion(null, orr);
309       Assert.fail("The closing region should not be opened");
310     } catch (ServiceException se) {
311       Assert.assertTrue("The region should be already in transition",
312         se.getCause() instanceof RegionAlreadyInTransitionException);
313     } finally {
314       getRS().regionsInTransitionInRS.remove(hri.getEncodedNameAsBytes());
315     }
316   }
317 
318   @Test(timeout = 60000)
319   public void testMultipleCloseFromMaster() throws Exception {
320 
321     // As opening, we must support multiple requests on the same region
322     ZKAssign.createNodeClosing(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
323     for (int i = 0; i < 10; i++) {
324       AdminProtos.CloseRegionRequest crr =
325           RequestConverter.buildCloseRegionRequest(getRS().getServerName(), regionName, 0, null, true);
326       try {
327         AdminProtos.CloseRegionResponse responseClose = getRS().rpcServices.closeRegion(null, crr);
328         Assert.assertEquals("The first request should succeeds", 0, i);
329         Assert.assertTrue("request " + i + " failed",
330             responseClose.getClosed() || responseClose.hasClosed());
331       } catch (ServiceException se) {
332         Assert.assertTrue("The next queries should throw an exception.", i > 0);
333       }
334     }
335 
336     checkRegionIsClosed(HTU, getRS(), hri);
337 
338     Assert.assertTrue(
339       ZKAssign.deleteClosedNode(HTU.getZooKeeperWatcher(), hri.getEncodedName(),
340         getRS().getServerName())
341     );
342 
343     openRegion(HTU, getRS(), hri);
344   }
345 
346   /**
347    * Test that if we do a close while opening it stops the opening.
348    */
349   @Test(timeout = 60000)
350   public void testCancelOpeningWithoutZK() throws Exception {
351     // We close
352     closeRegionNoZK();
353     checkRegionIsClosed(HTU, getRS(), hri);
354 
355     // Let do the initial steps, without having a handler
356     ZKAssign.createNodeOffline(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
357     getRS().getRegionsInTransitionInRS().put(hri.getEncodedNameAsBytes(), Boolean.TRUE);
358 
359     // That's a close without ZK.
360     AdminProtos.CloseRegionRequest crr =
361         RequestConverter.buildCloseRegionRequest(getRS().getServerName(), regionName, false);
362     try {
363       getRS().rpcServices.closeRegion(null, crr);
364       Assert.assertTrue(false);
365     } catch (ServiceException expected) {
366     }
367 
368     // The state in RIT should have changed to close
369     Assert.assertEquals(Boolean.FALSE, getRS().getRegionsInTransitionInRS().get(
370         hri.getEncodedNameAsBytes()));
371 
372     // Let's start the open handler
373     HTableDescriptor htd = getRS().tableDescriptors.get(hri.getTable());
374 
375     BaseCoordinatedStateManager csm = new ZkCoordinatedStateManager();
376     csm.initialize(getRS());
377     csm.start();
378 
379     ZkOpenRegionCoordination.ZkOpenRegionDetails zkCrd =
380       new ZkOpenRegionCoordination.ZkOpenRegionDetails();
381     zkCrd.setServerName(getRS().getServerName());
382     zkCrd.setVersionOfOfflineNode(0);
383 
384     getRS().service.submit(new OpenRegionHandler(getRS(), getRS(), hri, htd,
385       -1, csm.getOpenRegionCoordination(), zkCrd));
386 
387     // The open handler should have removed the region from RIT but kept the region closed
388     checkRegionIsClosed(HTU, getRS(), hri);
389 
390     // The open handler should have updated the value in ZK.
391     Assert.assertTrue(ZKAssign.deleteNode(
392         getRS().getZooKeeper(), hri.getEncodedName(),
393         EventType.RS_ZK_REGION_FAILED_OPEN, 1)
394     );
395 
396     openRegion(HTU, getRS(), hri);
397   }
398 
399   /**
400    * Test an open then a close with ZK. This is going to mess-up the ZK states, so
401    * the opening will fail as well because it doesn't find what it expects in ZK.
402    */
403   @Test(timeout = 60000)
404   public void testCancelOpeningWithZK() throws Exception {
405     // We close
406     closeRegionNoZK();
407     checkRegionIsClosed(HTU, getRS(), hri);
408 
409     // Let do the initial steps, without having a handler
410     getRS().getRegionsInTransitionInRS().put(hri.getEncodedNameAsBytes(), Boolean.TRUE);
411 
412     // That's a close without ZK.
413     ZKAssign.createNodeClosing(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
414     AdminProtos.CloseRegionRequest crr =
415         RequestConverter.buildCloseRegionRequest(getRS().getServerName(), regionName, false);
416     try {
417       getRS().rpcServices.closeRegion(null, crr);
418       Assert.assertTrue(false);
419     } catch (ServiceException expected) {
420       Assert.assertTrue(expected.getCause() instanceof RegionAlreadyInTransitionException);
421     }
422 
423     // The close should have left the ZK state as it is: it's the job the AM to delete it
424     Assert.assertTrue(ZKAssign.deleteNode(
425         getRS().getZooKeeper(), hri.getEncodedName(),
426         EventType.M_ZK_REGION_CLOSING, 0)
427     );
428 
429     // The state in RIT should have changed to close
430     Assert.assertEquals(Boolean.FALSE, getRS().getRegionsInTransitionInRS().get(
431         hri.getEncodedNameAsBytes()));
432 
433     // Let's start the open handler
434     // It should not succeed for two reasons:
435     //  1) There is no ZK node
436     //  2) The region in RIT was changed.
437     // The order is more or less implementation dependant.
438     HTableDescriptor htd = getRS().tableDescriptors.get(hri.getTable());
439 
440     BaseCoordinatedStateManager csm = new ZkCoordinatedStateManager();
441     csm.initialize(getRS());
442     csm.start();
443 
444     ZkOpenRegionCoordination.ZkOpenRegionDetails zkCrd =
445       new ZkOpenRegionCoordination.ZkOpenRegionDetails();
446     zkCrd.setServerName(getRS().getServerName());
447     zkCrd.setVersionOfOfflineNode(0);
448 
449     getRS().service.submit(new OpenRegionHandler(getRS(), getRS(), hri, htd,
450       -1, csm.getOpenRegionCoordination(), zkCrd));
451 
452     // The open handler should have removed the region from RIT but kept the region closed
453     checkRegionIsClosed(HTU, getRS(), hri);
454 
455     // We should not find any znode here.
456     Assert.assertEquals(-1, ZKAssign.getVersion(HTU.getZooKeeperWatcher(), hri));
457 
458     openRegion(HTU, getRS(), hri);
459   }
460 
461   /**
462    * Tests an on-the-fly RPC that was scheduled for the earlier RS on the same port
463    * for openRegion. The region server should reject this RPC. (HBASE-9721)
464    */
465   @Test
466   public void testOpenCloseRegionRPCIntendedForPreviousServer() throws Exception {
467     Assert.assertTrue(getRS().getRegion(regionName).isAvailable());
468 
469     ServerName sn = getRS().getServerName();
470     ServerName earlierServerName = ServerName.valueOf(sn.getHostname(), sn.getPort(), 1);
471 
472     try {
473       CloseRegionRequest request = RequestConverter.buildCloseRegionRequest(earlierServerName, regionName, true);
474       getRS().getRSRpcServices().closeRegion(null, request);
475       Assert.fail("The closeRegion should have been rejected");
476     } catch (ServiceException se) {
477       Assert.assertTrue(se.getCause() instanceof IOException);
478       Assert.assertTrue(se.getCause().getMessage().contains("This RPC was intended for a different server"));
479     }
480 
481     //actual close
482     closeRegionNoZK();
483     try {
484       AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest(
485         earlierServerName, hri, 0, null, null);
486       getRS().getRSRpcServices().openRegion(null, orr);
487       Assert.fail("The openRegion should have been rejected");
488     } catch (ServiceException se) {
489       Assert.assertTrue(se.getCause() instanceof IOException);
490       Assert.assertTrue(se.getCause().getMessage().contains("This RPC was intended for a different server"));
491     } finally {
492       openRegion(HTU, getRS(), hri);
493     }
494   }
495 }