View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.fail;
23  
24  import java.io.IOException;
25  import java.util.ArrayList;
26  import java.util.Arrays;
27  import java.util.Collection;
28  import java.util.List;
29  
30  import org.apache.commons.logging.Log;
31  import org.apache.commons.logging.LogFactory;
32  import org.apache.hadoop.hbase.catalog.CatalogTracker;
33  import org.apache.hadoop.hbase.catalog.MetaReader;
34  import org.apache.hadoop.hbase.client.HBaseAdmin;
35  import org.apache.hadoop.hbase.client.HTable;
36  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
37  import org.apache.hadoop.hbase.regionserver.HRegionServer;
38  import org.apache.hadoop.hbase.util.Bytes;
39  import org.apache.hadoop.hbase.util.JVMClusterUtil;
40  import org.junit.After;
41  import org.junit.Before;
42  import org.junit.Test;
43  import org.junit.experimental.categories.Category;
44  import org.junit.runner.RunWith;
45  import org.junit.runners.Parameterized;
46  import org.junit.runners.Parameterized.Parameters;
47  
48  /**
49   * Test whether region re-balancing works. (HBASE-71)
50   */
51  @Category(LargeTests.class)
52  @RunWith(value = Parameterized.class)
53  public class TestRegionRebalancing {
54  
55    @Parameters
56    public static Collection<Object[]> data() {
57      Object[][] balancers =
58          new String[][] { { "org.apache.hadoop.hbase.master.balancer.SimpleLoadBalancer" },
59              { "org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer" } };
60      return Arrays.asList(balancers);
61    }
62  
63    private static final byte[] FAMILY_NAME = Bytes.toBytes("col");
64    public static final Log LOG = LogFactory.getLog(TestRegionRebalancing.class);
65    private final HBaseTestingUtility UTIL = new HBaseTestingUtility();
66    private HTable table;
67    private HTableDescriptor desc;
68    private String balancerName;
69  
70    public TestRegionRebalancing(String balancerName) {
71      this.balancerName = balancerName;
72  
73    }
74  
75    @After
76    public void after() throws Exception {
77      UTIL.shutdownMiniCluster();
78    }
79  
80    @Before
81    public void before() throws Exception {
82      UTIL.getConfiguration().set("hbase.master.loadbalancer.class", this.balancerName);
83      UTIL.startMiniCluster(1);
84      this.desc = new HTableDescriptor(TableName.valueOf("test"));
85      this.desc.addFamily(new HColumnDescriptor(FAMILY_NAME));
86    }
87  
88    /**
89     * For HBASE-71. Try a few different configurations of starting and stopping
90     * region servers to see if the assignment or regions is pretty balanced.
91     * @throws IOException
92     * @throws InterruptedException
93     */
94    @Test (timeout=300000)
95    public void testRebalanceOnRegionServerNumberChange()
96    throws IOException, InterruptedException {
97      HBaseAdmin admin = new HBaseAdmin(UTIL.getConfiguration());
98      admin.createTable(this.desc, Arrays.copyOfRange(HBaseTestingUtility.KEYS,
99          1, HBaseTestingUtility.KEYS.length));
100     this.table = new HTable(UTIL.getConfiguration(), this.desc.getTableName());
101     CatalogTracker ct = new CatalogTracker(UTIL.getConfiguration());
102     ct.start();
103     try {
104       MetaReader.fullScanMetaAndPrint(ct);
105     } finally {
106       ct.stop();
107     }
108     assertEquals("Test table should have right number of regions",
109       HBaseTestingUtility.KEYS.length,
110       this.table.getStartKeys().length);
111 
112     // verify that the region assignments are balanced to start out
113     assertRegionsAreBalanced();
114 
115     // add a region server - total of 2
116     LOG.info("Started second server=" +
117       UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
118     UTIL.getHBaseCluster().getMaster().balance();
119     assertRegionsAreBalanced();
120 
121     // add a region server - total of 3
122     LOG.info("Started third server=" +
123         UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
124     UTIL.getHBaseCluster().getMaster().balance();
125     assertRegionsAreBalanced();
126 
127     // kill a region server - total of 2
128     LOG.info("Stopped third server=" + UTIL.getHBaseCluster().stopRegionServer(2, false));
129     UTIL.getHBaseCluster().waitOnRegionServer(2);
130     UTIL.getHBaseCluster().getMaster().balance();
131     assertRegionsAreBalanced();
132 
133     // start two more region servers - total of 4
134     LOG.info("Readding third server=" +
135         UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
136     LOG.info("Added fourth server=" +
137         UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
138     UTIL.getHBaseCluster().getMaster().balance();
139     assertRegionsAreBalanced();
140 
141     for (int i = 0; i < 6; i++){
142       LOG.info("Adding " + (i + 5) + "th region server");
143       UTIL.getHBaseCluster().startRegionServer();
144     }
145     UTIL.getHBaseCluster().getMaster().balance();
146     assertRegionsAreBalanced();
147     table.close();
148   }
149 
150   /** figure out how many regions are currently being served. */
151   private int getRegionCount() throws IOException {
152     int total = 0;
153     for (HRegionServer server : getOnlineRegionServers()) {
154       total += ProtobufUtil.getOnlineRegions(server).size();
155     }
156     return total;
157   }
158 
159   /**
160    * Determine if regions are balanced. Figure out the total, divide by the
161    * number of online servers, then test if each server is +/- 1 of average
162    * rounded up.
163    */
164   private void assertRegionsAreBalanced() throws IOException {
165     // TODO: Fix this test.  Old balancer used to run with 'slop'.  New
166     // balancer does not.
167     boolean success = false;
168     float slop = (float)UTIL.getConfiguration().getFloat("hbase.regions.slop", 0.1f);
169     if (slop <= 0) slop = 1;
170 
171     for (int i = 0; i < 5; i++) {
172       success = true;
173       // make sure all the regions are reassigned before we test balance
174       waitForAllRegionsAssigned();
175 
176       int regionCount = getRegionCount();
177       List<HRegionServer> servers = getOnlineRegionServers();
178       double avg = UTIL.getHBaseCluster().getMaster().getAverageLoad();
179       int avgLoadPlusSlop = (int)Math.ceil(avg * (1 + slop));
180       int avgLoadMinusSlop = (int)Math.floor(avg * (1 - slop)) - 1;
181       LOG.debug("There are " + servers.size() + " servers and " + regionCount
182         + " regions. Load Average: " + avg + " low border: " + avgLoadMinusSlop
183         + ", up border: " + avgLoadPlusSlop + "; attempt: " + i);
184 
185       for (HRegionServer server : servers) {
186         int serverLoad = ProtobufUtil.getOnlineRegions(server).size();
187         LOG.debug(server.getServerName() + " Avg: " + avg + " actual: " + serverLoad);
188         if (!(avg > 2.0 && serverLoad <= avgLoadPlusSlop
189             && serverLoad >= avgLoadMinusSlop)) {
190           for (HRegionInfo hri : ProtobufUtil.getOnlineRegions(server)) {
191             if (hri.isMetaRegion()) serverLoad--;
192             // LOG.debug(hri.getRegionNameAsString());
193           }
194           if (!(serverLoad <= avgLoadPlusSlop && serverLoad >= avgLoadMinusSlop)) {
195             LOG.debug(server.getServerName() + " Isn't balanced!!! Avg: " + avg +
196                 " actual: " + serverLoad + " slop: " + slop);
197             success = false;            
198             break;
199           }
200         }
201       }
202 
203       if (!success) {
204         // one or more servers are not balanced. sleep a little to give it a
205         // chance to catch up. then, go back to the retry loop.
206         try {
207           Thread.sleep(10000);
208         } catch (InterruptedException e) {}
209 
210         UTIL.getHBaseCluster().getMaster().balance();
211         continue;
212       }
213 
214       // if we get here, all servers were balanced, so we should just return.
215       return;
216     }
217     // if we get here, we tried 5 times and never got to short circuit out of
218     // the retry loop, so this is a failure.
219     fail("After 5 attempts, region assignments were not balanced.");
220   }
221 
222   private List<HRegionServer> getOnlineRegionServers() {
223     List<HRegionServer> list = new ArrayList<HRegionServer>();
224     for (JVMClusterUtil.RegionServerThread rst :
225         UTIL.getHBaseCluster().getRegionServerThreads()) {
226       if (rst.getRegionServer().isOnline()) {
227         list.add(rst.getRegionServer());
228       }
229     }
230     return list;
231   }
232 
233   /**
234    * Wait until all the regions are assigned.
235    */
236   private void waitForAllRegionsAssigned() throws IOException {
237     int totalRegions = HBaseTestingUtility.KEYS.length+1;
238     while (getRegionCount() < totalRegions) {
239     // while (!cluster.getMaster().allRegionsAssigned()) {
240       LOG.debug("Waiting for there to be "+ totalRegions +" regions, but there are " + getRegionCount() + " right now.");
241       try {
242         Thread.sleep(200);
243       } catch (InterruptedException e) {}
244     }
245   }
246 
247 }
248