View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.fail;
23  
24  import org.apache.commons.logging.Log;
25  import org.apache.commons.logging.LogFactory;
26  import org.apache.hadoop.hbase.client.Admin;
27  import org.apache.hadoop.hbase.client.Connection;
28  import org.apache.hadoop.hbase.client.ConnectionFactory;
29  import org.apache.hadoop.hbase.client.RegionLocator;
30  import org.apache.hadoop.hbase.master.RegionStates;
31  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
32  import org.apache.hadoop.hbase.regionserver.HRegionServer;
33  import org.apache.hadoop.hbase.testclassification.LargeTests;
34  import org.apache.hadoop.hbase.util.Bytes;
35  import org.apache.hadoop.hbase.util.JVMClusterUtil;
36  import org.apache.hadoop.hbase.util.Threads;
37  import org.junit.After;
38  import org.junit.Before;
39  import org.junit.Test;
40  import org.junit.experimental.categories.Category;
41  import org.junit.runner.RunWith;
42  import org.junit.runners.Parameterized;
43  import org.junit.runners.Parameterized.Parameters;
44  
45  import java.io.IOException;
46  import java.util.ArrayList;
47  import java.util.Arrays;
48  import java.util.Collection;
49  import java.util.List;
50  
51  /**
52   * Test whether region re-balancing works. (HBASE-71)
53   */
54  @Category(LargeTests.class)
55  @RunWith(value = Parameterized.class)
56  public class TestRegionRebalancing {
57  
58    @Parameters
59    public static Collection<Object[]> data() {
60      Object[][] balancers =
61          new String[][] { { "org.apache.hadoop.hbase.master.balancer.SimpleLoadBalancer" },
62              { "org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer" } };
63      return Arrays.asList(balancers);
64    }
65  
66    private static final byte[] FAMILY_NAME = Bytes.toBytes("col");
67    private static final Log LOG = LogFactory.getLog(TestRegionRebalancing.class);
68    private final HBaseTestingUtility UTIL = new HBaseTestingUtility();
69    private RegionLocator regionLocator;
70    private HTableDescriptor desc;
71    private String balancerName;
72  
73    public TestRegionRebalancing(String balancerName) {
74      this.balancerName = balancerName;
75  
76    }
77  
78    @After
79    public void after() throws Exception {
80      UTIL.shutdownMiniCluster();
81    }
82  
83    @Before
84    public void before() throws Exception {
85      UTIL.getConfiguration().set("hbase.master.loadbalancer.class", this.balancerName);
86      UTIL.startMiniCluster(1);
87      this.desc = new HTableDescriptor(TableName.valueOf("test"));
88      this.desc.addFamily(new HColumnDescriptor(FAMILY_NAME));
89    }
90  
91    /**
92     * For HBASE-71. Try a few different configurations of starting and stopping
93     * region servers to see if the assignment or regions is pretty balanced.
94     * @throws IOException
95     * @throws InterruptedException
96     */
97    @Test (timeout=300000)
98    public void testRebalanceOnRegionServerNumberChange()
99    throws IOException, InterruptedException {
100     try(Connection connection = ConnectionFactory.createConnection(UTIL.getConfiguration());
101         Admin admin = connection.getAdmin()) {
102       admin.createTable(this.desc, Arrays.copyOfRange(HBaseTestingUtility.KEYS,
103           1, HBaseTestingUtility.KEYS.length));
104       this.regionLocator = connection.getRegionLocator(this.desc.getTableName());
105   
106       MetaTableAccessor.fullScanMetaAndPrint(admin.getConnection());
107   
108       assertEquals("Test table should have right number of regions",
109         HBaseTestingUtility.KEYS.length,
110         this.regionLocator.getStartKeys().length);
111   
112       // verify that the region assignments are balanced to start out
113       assertRegionsAreBalanced();
114   
115       // add a region server - total of 2
116       LOG.info("Started second server=" +
117         UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
118       UTIL.getHBaseCluster().getMaster().balance();
119       assertRegionsAreBalanced();
120   
121       // On a balanced cluster, calling balance() should return true
122       assert(UTIL.getHBaseCluster().getMaster().balance() == true);
123   
124       // if we add a server, then the balance() call should return true
125       // add a region server - total of 3
126       LOG.info("Started third server=" +
127           UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
128       assert(UTIL.getHBaseCluster().getMaster().balance() == true);
129       assertRegionsAreBalanced();
130   
131       // kill a region server - total of 2
132       LOG.info("Stopped third server=" + UTIL.getHBaseCluster().stopRegionServer(2, false));
133       UTIL.getHBaseCluster().waitOnRegionServer(2);
134       waitOnCrashProcessing();
135       UTIL.getHBaseCluster().getMaster().balance();
136       assertRegionsAreBalanced();
137   
138       // start two more region servers - total of 4
139       LOG.info("Readding third server=" +
140           UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
141       LOG.info("Added fourth server=" +
142           UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
143       waitOnCrashProcessing();
144       assert(UTIL.getHBaseCluster().getMaster().balance() == true);
145       assertRegionsAreBalanced();
146       for (int i = 0; i < 6; i++){
147         LOG.info("Adding " + (i + 5) + "th region server");
148         UTIL.getHBaseCluster().startRegionServer();
149       }
150       assert(UTIL.getHBaseCluster().getMaster().balance() == true);
151       assertRegionsAreBalanced();
152       regionLocator.close();
153     }
154   }
155 
156   /**
157    * Wait on crash processing. Balancer won't run if processing a crashed server.
158    */
159   private void waitOnCrashProcessing() {
160     while (UTIL.getHBaseCluster().getMaster().getServerManager().areDeadServersInProgress()) {
161       LOG.info("Waiting on processing of crashed server before proceeding...");
162       Threads.sleep(1000);
163     }
164   }
165 
166   /**
167    * Determine if regions are balanced. Figure out the total, divide by the
168    * number of online servers, then test if each server is +/- 1 of average
169    * rounded up.
170    */
171   private void assertRegionsAreBalanced() throws IOException {
172     // TODO: Fix this test.  Old balancer used to run with 'slop'.  New
173     // balancer does not.
174     boolean success = false;
175     float slop = (float)UTIL.getConfiguration().getFloat("hbase.regions.slop", 0.1f);
176     if (slop <= 0) slop = 1;
177 
178     for (int i = 0; i < 5; i++) {
179       success = true;
180       // make sure all the regions are reassigned before we test balance
181       waitForAllRegionsAssigned();
182 
183       long regionCount = UTIL.getMiniHBaseCluster().countServedRegions();
184       List<HRegionServer> servers = getOnlineRegionServers();
185       double avg = UTIL.getHBaseCluster().getMaster().getAverageLoad();
186       int avgLoadPlusSlop = (int)Math.ceil(avg * (1 + slop));
187       int avgLoadMinusSlop = (int)Math.floor(avg * (1 - slop)) - 1;
188       LOG.debug("There are " + servers.size() + " servers and " + regionCount
189         + " regions. Load Average: " + avg + " low border: " + avgLoadMinusSlop
190         + ", up border: " + avgLoadPlusSlop + "; attempt: " + i);
191 
192       for (HRegionServer server : servers) {
193         int serverLoad =
194           ProtobufUtil.getOnlineRegions(server.getRSRpcServices()).size();
195         LOG.debug(server.getServerName() + " Avg: " + avg + " actual: " + serverLoad);
196         if (!(avg > 2.0 && serverLoad <= avgLoadPlusSlop
197             && serverLoad >= avgLoadMinusSlop)) {
198           for (HRegionInfo hri :
199               ProtobufUtil.getOnlineRegions(server.getRSRpcServices())) {
200             if (hri.isMetaRegion()) serverLoad--;
201             // LOG.debug(hri.getRegionNameAsString());
202           }
203           if (!(serverLoad <= avgLoadPlusSlop && serverLoad >= avgLoadMinusSlop)) {
204             LOG.debug(server.getServerName() + " Isn't balanced!!! Avg: " + avg +
205                 " actual: " + serverLoad + " slop: " + slop);
206             success = false;
207             break;
208           }
209         }
210       }
211 
212       if (!success) {
213         // one or more servers are not balanced. sleep a little to give it a
214         // chance to catch up. then, go back to the retry loop.
215         try {
216           Thread.sleep(10000);
217         } catch (InterruptedException e) {}
218 
219         UTIL.getHBaseCluster().getMaster().balance();
220         continue;
221       }
222 
223       // if we get here, all servers were balanced, so we should just return.
224       return;
225     }
226     // if we get here, we tried 5 times and never got to short circuit out of
227     // the retry loop, so this is a failure.
228     fail("After 5 attempts, region assignments were not balanced.");
229   }
230 
231   private List<HRegionServer> getOnlineRegionServers() {
232     List<HRegionServer> list = new ArrayList<HRegionServer>();
233     for (JVMClusterUtil.RegionServerThread rst :
234         UTIL.getHBaseCluster().getRegionServerThreads()) {
235       if (rst.getRegionServer().isOnline()) {
236         list.add(rst.getRegionServer());
237       }
238     }
239     return list;
240   }
241 
242   /**
243    * Wait until all the regions are assigned.
244    */
245   private void waitForAllRegionsAssigned() throws IOException {
246     int totalRegions = HBaseTestingUtility.KEYS.length;
247     while (UTIL.getMiniHBaseCluster().countServedRegions() < totalRegions) {
248     // while (!cluster.getMaster().allRegionsAssigned()) {
249       LOG.debug("Waiting for there to be "+ totalRegions +" regions, but there are "
250         + UTIL.getMiniHBaseCluster().countServedRegions() + " right now.");
251       try {
252         Thread.sleep(200);
253       } catch (InterruptedException e) {}
254     }
255     RegionStates regionStates = UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStates();
256     while (!regionStates.getRegionsInTransition().isEmpty()) {
257       Threads.sleep(100);
258     }
259   }
260 
261 }
262