1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.chaos.actions;
20
21 import java.util.HashSet;
22 import java.util.List;
23 import java.util.Set;
24
25 import org.apache.hadoop.hbase.ServerName;
26 import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
27
28
29
30
31 public class BatchRestartRsAction extends RestartActionBaseAction {
32 float ratio;
33
34 public BatchRestartRsAction(long sleepTime, float ratio) {
35 super(sleepTime);
36 this.ratio = ratio;
37 }
38
39 @Override
40 public void perform() throws Exception {
41 LOG.info(String.format("Performing action: Batch restarting %d%% of region servers",
42 (int)(ratio * 100)));
43 List<ServerName> selectedServers = PolicyBasedChaosMonkey.selectRandomItems(getCurrentServers(),
44 ratio);
45
46 Set<ServerName> killedServers = new HashSet<ServerName>();
47
48 for (ServerName server : selectedServers) {
49
50
51 if (context.isStopping()) {
52 break;
53 }
54 LOG.info("Killing region server:" + server);
55 cluster.killRegionServer(server);
56 killedServers.add(server);
57 }
58
59 for (ServerName server : killedServers) {
60 cluster.waitForRegionServerToStop(server, PolicyBasedChaosMonkey.TIMEOUT);
61 }
62
63 LOG.info("Killed " + killedServers.size() + " region servers. Reported num of rs:"
64 + cluster.getClusterStatus().getServersSize());
65
66 sleep(sleepTime);
67
68 for (ServerName server : killedServers) {
69 LOG.info("Starting region server:" + server.getHostname());
70 cluster.startRegionServer(server.getHostname(), server.getPort());
71
72 }
73 for (ServerName server : killedServers) {
74 cluster.waitForRegionServerToStart(server.getHostname(),
75 server.getPort(),
76 PolicyBasedChaosMonkey.TIMEOUT);
77 }
78 LOG.info("Started " + killedServers.size() +" region servers. Reported num of rs:"
79 + cluster.getClusterStatus().getServersSize());
80 }
81 }