View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.chaos.actions;
20  
21  import java.util.ArrayList;
22  import java.util.LinkedList;
23  import java.util.List;
24  
25  import org.apache.commons.lang.math.RandomUtils;
26  import org.apache.hadoop.hbase.ClusterStatus;
27  import org.apache.hadoop.hbase.ServerName;
28  import org.junit.Assert;
29  
30  /** This action is too specific to put in ChaosMonkey; put it here */
31  public class UnbalanceKillAndRebalanceAction extends Action {
32    /** Fractions of servers to get regions and live and die respectively; from all other
33     * servers, HOARD_FRC_OF_REGIONS will be removed to the above randomly */
34    private static final double FRC_SERVERS_THAT_HOARD_AND_LIVE = 0.1;
35    private static final double FRC_SERVERS_THAT_HOARD_AND_DIE = 0.1;
36    private static final double HOARD_FRC_OF_REGIONS = 0.8;
37    /** Waits between calling unbalance and killing servers, kills and rebalance, and rebalance
38     * and restarting the servers; to make sure these events have time to impact the cluster. */
39    private static final long WAIT_FOR_UNBALANCE_MS = 2 * 1000;
40    private static final long WAIT_FOR_KILLS_MS = 2 * 1000;
41    private static final long WAIT_AFTER_BALANCE_MS = 5 * 1000;
42  
43    @Override
44    public void perform() throws Exception {
45      ClusterStatus status = this.cluster.getClusterStatus();
46      List<ServerName> victimServers = new LinkedList<ServerName>(status.getServers());
47      int liveCount = (int)Math.ceil(FRC_SERVERS_THAT_HOARD_AND_LIVE * victimServers.size());
48      int deadCount = (int)Math.ceil(FRC_SERVERS_THAT_HOARD_AND_DIE * victimServers.size());
49      Assert.assertTrue((liveCount + deadCount) < victimServers.size());
50      List<ServerName> targetServers = new ArrayList<ServerName>(liveCount);
51      for (int i = 0; i < liveCount + deadCount; ++i) {
52        int victimIx = RandomUtils.nextInt(victimServers.size());
53        targetServers.add(victimServers.remove(victimIx));
54      }
55      unbalanceRegions(status, victimServers, targetServers, HOARD_FRC_OF_REGIONS);
56      Thread.sleep(WAIT_FOR_UNBALANCE_MS);
57      for (int i = 0; i < liveCount; ++i) {
58        killRs(targetServers.get(i));
59      }
60      Thread.sleep(WAIT_FOR_KILLS_MS);
61      forceBalancer();
62      Thread.sleep(WAIT_AFTER_BALANCE_MS);
63      for (int i = 0; i < liveCount; ++i) {
64        startRs(targetServers.get(i));
65      }
66    }
67  }