View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.IOException;
23  
24  import org.apache.hadoop.hbase.HBaseTestingUtility;
25  import org.apache.hadoop.hbase.HConstants;
26  import org.apache.hadoop.hbase.HRegionInfo;
27  import org.apache.hadoop.hbase.HTableDescriptor;
28  import org.apache.hadoop.hbase.MediumTests;
29  import org.apache.hadoop.hbase.NotServingRegionException;
30  import org.apache.hadoop.hbase.ServerName;
31  import org.apache.hadoop.hbase.client.HTable;
32  import org.apache.hadoop.hbase.client.Put;
33  import org.apache.hadoop.hbase.executor.EventType;
34  import org.apache.hadoop.hbase.protobuf.RequestConverter;
35  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
36  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.CloseRegionRequest;
37  import org.apache.hadoop.hbase.regionserver.handler.OpenRegionHandler;
38  import org.apache.hadoop.hbase.util.Bytes;
39  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
40  import org.junit.After;
41  import org.junit.AfterClass;
42  import org.junit.Assert;
43  import org.junit.BeforeClass;
44  import org.junit.Test;
45  import org.junit.experimental.categories.Category;
46  
47  import com.google.protobuf.ServiceException;
48  
49  
50  /**
51   * Tests on the region server, without the master.
52   */
53  @Category(MediumTests.class)
54  public class TestRegionServerNoMaster {
55  
56    private static final int NB_SERVERS = 1;
57    private static HTable table;
58    private static final byte[] row = "ee".getBytes();
59  
60    private static HRegionInfo hri;
61  
62    private static byte[] regionName;
63    private static final HBaseTestingUtility HTU = new HBaseTestingUtility();
64  
65  
66    @BeforeClass
67    public static void before() throws Exception {
68      HTU.startMiniCluster(NB_SERVERS);
69      final byte[] tableName = Bytes.toBytes(TestRegionServerNoMaster.class.getSimpleName());
70  
71      // Create table then get the single region for our new table.
72      table = HTU.createTable(tableName, HConstants.CATALOG_FAMILY);
73      Put p = new Put(row);
74      p.add(HConstants.CATALOG_FAMILY, row, row);
75      table.put(p);
76  
77      hri = table.getRegionLocation(row, false).getRegionInfo();
78      regionName = hri.getRegionName();
79  
80      // No master
81      HTU.getHBaseCluster().getMaster().stopMaster();
82    }
83  
84    @AfterClass
85    public static void afterClass() throws Exception {
86      table.close();
87      HTU.shutdownMiniCluster();
88    }
89  
90    @After
91    public void after() throws Exception {
92      // Clean the state if the test failed before cleaning the znode
93      // It does not manage all bad failures, so if there are multiple failures, only
94      //  the first one should be looked at.
95      ZKAssign.deleteNodeFailSilent(HTU.getZooKeeperWatcher(), hri);
96    }
97  
98  
99    private static HRegionServer getRS() {
100     return HTU.getHBaseCluster().getLiveRegionServerThreads().get(0).getRegionServer();
101   }
102 
103 
104   /**
105    * Reopen the region. Reused in multiple tests as we always leave the region open after a test.
106    */
107   private void reopenRegion() throws Exception {
108     // We reopen. We need a ZK node here, as a open is always triggered by a master.
109     ZKAssign.createNodeOffline(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
110     // first version is '0'
111     AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest(
112       getRS().getServerName(), hri, 0, null, null);
113     AdminProtos.OpenRegionResponse responseOpen = getRS().openRegion(null, orr);
114     Assert.assertTrue(responseOpen.getOpeningStateCount() == 1);
115     Assert.assertTrue(responseOpen.getOpeningState(0).
116         equals(AdminProtos.OpenRegionResponse.RegionOpeningState.OPENED));
117 
118 
119     checkRegionIsOpened();
120   }
121 
122   private void checkRegionIsOpened() throws Exception {
123 
124     while (!getRS().getRegionsInTransitionInRS().isEmpty()) {
125       Thread.sleep(1);
126     }
127 
128     Assert.assertTrue(getRS().getRegion(regionName).isAvailable());
129 
130     Assert.assertTrue(
131       ZKAssign.deleteOpenedNode(HTU.getZooKeeperWatcher(), hri.getEncodedName(),
132         getRS().getServerName()));
133   }
134 
135 
136   private void checkRegionIsClosed() throws Exception {
137 
138     while (!getRS().getRegionsInTransitionInRS().isEmpty()) {
139       Thread.sleep(1);
140     }
141 
142     try {
143       Assert.assertFalse(getRS().getRegion(regionName).isAvailable());
144     } catch (NotServingRegionException expected) {
145       // That's how it work: if the region is closed we have an exception.
146     }
147 
148     // We don't delete the znode here, because there is not always a znode.
149   }
150 
151 
152   /**
153    * Close the region without using ZK
154    */
155   private void closeNoZK() throws Exception {
156     // no transition in ZK
157     AdminProtos.CloseRegionRequest crr =
158         RequestConverter.buildCloseRegionRequest(getRS().getServerName(), regionName, false);
159     AdminProtos.CloseRegionResponse responseClose = getRS().closeRegion(null, crr);
160     Assert.assertTrue(responseClose.getClosed());
161 
162     // now waiting & checking. After a while, the transition should be done and the region closed
163     checkRegionIsClosed();
164   }
165 
166 
167   @Test(timeout = 60000)
168   public void testCloseByRegionServer() throws Exception {
169     closeNoZK();
170     reopenRegion();
171   }
172 
173   @Test(timeout = 60000)
174   public void testCloseByMasterWithoutZNode() throws Exception {
175 
176     // Transition in ZK on. This should fail, as there is no znode
177     AdminProtos.CloseRegionRequest crr = RequestConverter.buildCloseRegionRequest(
178       getRS().getServerName(), regionName, true);
179     AdminProtos.CloseRegionResponse responseClose = getRS().closeRegion(null, crr);
180     Assert.assertTrue(responseClose.getClosed());
181 
182     // now waiting. After a while, the transition should be done
183     while (!getRS().getRegionsInTransitionInRS().isEmpty()) {
184       Thread.sleep(1);
185     }
186 
187     // the region is still available, the close got rejected at the end
188     Assert.assertTrue("The close should have failed", getRS().getRegion(regionName).isAvailable());
189   }
190 
191   @Test(timeout = 60000)
192   public void testOpenCloseByMasterWithZNode() throws Exception {
193 
194     ZKAssign.createNodeClosing(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
195 
196     AdminProtos.CloseRegionRequest crr = RequestConverter.buildCloseRegionRequest(
197       getRS().getServerName(), regionName, true);
198     AdminProtos.CloseRegionResponse responseClose = getRS().closeRegion(null, crr);
199     Assert.assertTrue(responseClose.getClosed());
200 
201     checkRegionIsClosed();
202 
203     ZKAssign.deleteClosedNode(HTU.getZooKeeperWatcher(), hri.getEncodedName(),
204       getRS().getServerName());
205 
206     reopenRegion();
207   }
208 
209   /**
210    * Test that we can send multiple openRegion to the region server.
211    * This is used when:
212    * - there is a SocketTimeout: in this case, the master does not know if the region server
213    * received the request before the timeout.
214    * - We have a socket error during the operation: same stuff: we don't know
215    * - a master failover: if we find a znode in thz M_ZK_REGION_OFFLINE, we don't know if
216    * the region server has received the query or not. Only solution to be efficient: re-ask
217    * immediately.
218    */
219   @Test(timeout = 60000)
220   public void testMultipleOpen() throws Exception {
221 
222     // We close
223     closeNoZK();
224     checkRegionIsClosed();
225 
226     // We reopen. We need a ZK node here, as a open is always triggered by a master.
227     ZKAssign.createNodeOffline(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
228 
229     // We're sending multiple requests in a row. The region server must handle this nicely.
230     for (int i = 0; i < 10; i++) {
231       AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest(
232         getRS().getServerName(), hri, 0, null, null);
233       AdminProtos.OpenRegionResponse responseOpen = getRS().openRegion(null, orr);
234       Assert.assertTrue(responseOpen.getOpeningStateCount() == 1);
235 
236       AdminProtos.OpenRegionResponse.RegionOpeningState ors = responseOpen.getOpeningState(0);
237       Assert.assertTrue("request " + i + " failed",
238           ors.equals(AdminProtos.OpenRegionResponse.RegionOpeningState.OPENED) ||
239               ors.equals(AdminProtos.OpenRegionResponse.RegionOpeningState.ALREADY_OPENED)
240       );
241     }
242 
243     checkRegionIsOpened();
244   }
245 
246   @Test
247   public void testOpenClosingRegion() throws Exception {
248     Assert.assertTrue(getRS().getRegion(regionName).isAvailable());
249 
250     try {
251       // fake region to be closing now, need to clear state afterwards
252       getRS().regionsInTransitionInRS.put(hri.getEncodedNameAsBytes(), Boolean.FALSE);
253       AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest(
254         getRS().getServerName(), hri, 0, null, null);
255       getRS().openRegion(null, orr);
256       Assert.fail("The closing region should not be opened");
257     } catch (ServiceException se) {
258       Assert.assertTrue("The region should be already in transition",
259         se.getCause() instanceof RegionAlreadyInTransitionException);
260     } finally {
261       getRS().regionsInTransitionInRS.remove(hri.getEncodedNameAsBytes());
262     }
263   }
264 
265   @Test(timeout = 60000)
266   public void testMultipleCloseFromMaster() throws Exception {
267 
268     // As opening, we must support multiple requests on the same region
269     ZKAssign.createNodeClosing(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
270     for (int i = 0; i < 10; i++) {
271       AdminProtos.CloseRegionRequest crr =
272           RequestConverter.buildCloseRegionRequest(getRS().getServerName(), regionName, 0, null, true);
273       try {
274         AdminProtos.CloseRegionResponse responseClose = getRS().closeRegion(null, crr);
275         Assert.assertEquals("The first request should succeeds", 0, i);
276         Assert.assertTrue("request " + i + " failed",
277             responseClose.getClosed() || responseClose.hasClosed());
278       } catch (ServiceException se) {
279         Assert.assertTrue("The next queries should throw an exception.", i > 0);
280       }
281     }
282 
283     checkRegionIsClosed();
284 
285     Assert.assertTrue(
286       ZKAssign.deleteClosedNode(HTU.getZooKeeperWatcher(), hri.getEncodedName(),
287         getRS().getServerName())
288     );
289 
290     reopenRegion();
291   }
292 
293   /**
294    * Test that if we do a close while opening it stops the opening.
295    */
296   @Test(timeout = 60000)
297   public void testCancelOpeningWithoutZK() throws Exception {
298     // We close
299     closeNoZK();
300     checkRegionIsClosed();
301 
302     // Let do the initial steps, without having a handler
303     ZKAssign.createNodeOffline(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
304     getRS().getRegionsInTransitionInRS().put(hri.getEncodedNameAsBytes(), Boolean.TRUE);
305 
306     // That's a close without ZK.
307     AdminProtos.CloseRegionRequest crr =
308         RequestConverter.buildCloseRegionRequest(getRS().getServerName(), regionName, false);
309     try {
310       getRS().closeRegion(null, crr);
311       Assert.assertTrue(false);
312     } catch (ServiceException expected) {
313     }
314 
315     // The state in RIT should have changed to close
316     Assert.assertEquals(Boolean.FALSE, getRS().getRegionsInTransitionInRS().get(
317         hri.getEncodedNameAsBytes()));
318 
319     // Let's start the open handler
320     HTableDescriptor htd = getRS().tableDescriptors.get(hri.getTable());
321     getRS().service.submit(new OpenRegionHandler(getRS(), getRS(), hri, htd, 0));
322 
323     // The open handler should have removed the region from RIT but kept the region closed
324     checkRegionIsClosed();
325 
326     // The open handler should have updated the value in ZK.
327     Assert.assertTrue(ZKAssign.deleteNode(
328         getRS().getZooKeeperWatcher(), hri.getEncodedName(),
329         EventType.RS_ZK_REGION_FAILED_OPEN, 1)
330     );
331 
332     reopenRegion();
333   }
334 
335   /**
336    * Test an open then a close with ZK. This is going to mess-up the ZK states, so
337    * the opening will fail as well because it doesn't find what it expects in ZK.
338    */
339   @Test(timeout = 60000)
340   public void testCancelOpeningWithZK() throws Exception {
341     // We close
342     closeNoZK();
343     checkRegionIsClosed();
344 
345     // Let do the initial steps, without having a handler
346     getRS().getRegionsInTransitionInRS().put(hri.getEncodedNameAsBytes(), Boolean.TRUE);
347 
348     // That's a close without ZK.
349     ZKAssign.createNodeClosing(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
350     AdminProtos.CloseRegionRequest crr =
351         RequestConverter.buildCloseRegionRequest(getRS().getServerName(), regionName, false);
352     try {
353       getRS().closeRegion(null, crr);
354       Assert.assertTrue(false);
355     } catch (ServiceException expected) {
356       Assert.assertTrue(expected.getCause() instanceof NotServingRegionException);
357     }
358 
359     // The close should have left the ZK state as it is: it's the job the AM to delete it
360     Assert.assertTrue(ZKAssign.deleteNode(
361         getRS().getZooKeeperWatcher(), hri.getEncodedName(),
362         EventType.M_ZK_REGION_CLOSING, 0)
363     );
364 
365     // The state in RIT should have changed to close
366     Assert.assertEquals(Boolean.FALSE, getRS().getRegionsInTransitionInRS().get(
367         hri.getEncodedNameAsBytes()));
368 
369     // Let's start the open handler
370     // It should not succeed for two reasons:
371     //  1) There is no ZK node
372     //  2) The region in RIT was changed.
373     // The order is more or less implementation dependant.
374     HTableDescriptor htd = getRS().tableDescriptors.get(hri.getTable());
375     getRS().service.submit(new OpenRegionHandler(getRS(), getRS(), hri, htd, 0));
376 
377     // The open handler should have removed the region from RIT but kept the region closed
378     checkRegionIsClosed();
379 
380     // We should not find any znode here.
381     Assert.assertEquals(-1, ZKAssign.getVersion(HTU.getZooKeeperWatcher(), hri));
382 
383     reopenRegion();
384   }
385 
386   /**
387    * Tests an on-the-fly RPC that was scheduled for the earlier RS on the same port
388    * for openRegion. The region server should reject this RPC. (HBASE-9721)
389    */
390   @Test
391   public void testOpenCloseRegionRPCIntendedForPreviousServer() throws Exception {
392     Assert.assertTrue(getRS().getRegion(regionName).isAvailable());
393 
394     ServerName sn = getRS().getServerName();
395     ServerName earlierServerName = ServerName.valueOf(sn.getHostname(), sn.getPort(), 1);
396 
397     try {
398       CloseRegionRequest request = RequestConverter.buildCloseRegionRequest(earlierServerName, regionName, true);
399       getRS().closeRegion(null, request);
400       Assert.fail("The closeRegion should have been rejected");
401     } catch (ServiceException se) {
402       Assert.assertTrue(se.getCause() instanceof IOException);
403       Assert.assertTrue(se.getCause().getMessage().contains("This RPC was intended for a different server"));
404     }
405 
406     //actual close
407     closeNoZK();
408     try {
409       AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest(
410         earlierServerName, hri, 0, null, null);
411       getRS().openRegion(null, orr);
412       Assert.fail("The openRegion should have been rejected");
413     } catch (ServiceException se) {
414       Assert.assertTrue(se.getCause() instanceof IOException);
415       Assert.assertTrue(se.getCause().getMessage().contains("This RPC was intended for a different server"));
416     } finally {
417       reopenRegion();
418     }
419   }
420 }