1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.chaos.actions;
20
21 import java.io.IOException;
22 import java.util.ArrayList;
23 import java.util.LinkedList;
24 import java.util.List;
25 import java.util.Queue;
26
27 import org.apache.commons.lang.math.RandomUtils;
28 import org.apache.commons.logging.Log;
29 import org.apache.commons.logging.LogFactory;
30 import org.apache.hadoop.hbase.ServerName;
31 import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
32
33
34
35
36
37 public class RollingBatchRestartRsAction extends BatchRestartRsAction {
38 private static Log LOG = LogFactory.getLog(RollingBatchRestartRsAction.class);
39
40 public RollingBatchRestartRsAction(long sleepTime, float ratio) {
41 super(sleepTime, ratio);
42 }
43
44 @Override
45 public void perform() throws Exception {
46 LOG.info(String.format("Performing action: Rolling batch restarting %d%% of region servers",
47 (int)(ratio * 100)));
48 List<ServerName> selectedServers = PolicyBasedChaosMonkey.selectRandomItems(getCurrentServers(),
49 ratio);
50
51 Queue<ServerName> serversToBeKilled = new LinkedList<ServerName>(selectedServers);
52 Queue<ServerName> deadServers = new LinkedList<ServerName>();
53
54
55 while (!serversToBeKilled.isEmpty() || !deadServers.isEmpty()) {
56 boolean action = true;
57
58 if (serversToBeKilled.isEmpty() || deadServers.isEmpty()) {
59 action = deadServers.isEmpty();
60 } else {
61 action = RandomUtils.nextBoolean();
62 }
63
64 if (action) {
65 ServerName server = serversToBeKilled.remove();
66 try {
67 killRs(server);
68 } catch (org.apache.hadoop.util.Shell.ExitCodeException e) {
69
70
71 LOG.info("Problem killing but presume successful; code=" + e.getExitCode(), e);
72 }
73 deadServers.add(server);
74 } else {
75 try {
76 ServerName server = deadServers.remove();
77 startRs(server);
78 } catch (org.apache.hadoop.util.Shell.ExitCodeException e) {
79
80
81 LOG.info("Problem starting, will retry; code=" + e.getExitCode(), e);
82 }
83 }
84
85 sleep(RandomUtils.nextInt((int)sleepTime));
86 }
87 }
88
89
90
91
92
93
94 public static void main(final String[] args) throws Exception {
95 RollingBatchRestartRsAction action = new RollingBatchRestartRsAction(1, 1.0f) {
96 private int invocations = 0;
97 @Override
98 protected ServerName[] getCurrentServers() throws IOException {
99 final int count = 4;
100 List<ServerName> serverNames = new ArrayList<ServerName>(count);
101 for (int i = 0; i < 4; i++) {
102 serverNames.add(ServerName.valueOf(i + ".example.org", i, i));
103 }
104 return serverNames.toArray(new ServerName [] {});
105 }
106
107 @Override
108 protected void killRs(ServerName server) throws IOException {
109 LOG.info("Killed " + server);
110 if (this.invocations++ % 3 == 0) {
111 throw new org.apache.hadoop.util.Shell.ExitCodeException(-1, "Failed");
112 }
113 }
114
115 @Override
116 protected void startRs(ServerName server) throws IOException {
117 LOG.info("Started " + server);
118 if (this.invocations++ % 3 == 0) {
119 throw new org.apache.hadoop.util.Shell.ExitCodeException(-1, "Failed");
120 }
121 }
122 };
123
124 action.perform();
125 }
126 }