1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.tool;
21
22 import java.io.IOException;
23 import java.util.ArrayList;
24 import java.util.Arrays;
25 import java.util.HashMap;
26 import java.util.List;
27 import java.util.Map;
28 import java.util.Set;
29 import java.util.TreeSet;
30 import java.util.regex.Matcher;
31 import java.util.regex.Pattern;
32
33 import org.apache.commons.lang.time.StopWatch;
34 import org.apache.commons.logging.Log;
35 import org.apache.commons.logging.LogFactory;
36 import org.apache.hadoop.conf.Configuration;
37 import org.apache.hadoop.hbase.DoNotRetryIOException;
38 import org.apache.hadoop.hbase.HBaseConfiguration;
39 import org.apache.hadoop.hbase.HColumnDescriptor;
40 import org.apache.hadoop.hbase.HRegionInfo;
41 import org.apache.hadoop.hbase.HTableDescriptor;
42 import org.apache.hadoop.hbase.ServerName;
43 import org.apache.hadoop.hbase.TableName;
44 import org.apache.hadoop.hbase.TableNotEnabledException;
45 import org.apache.hadoop.hbase.TableNotFoundException;
46 import org.apache.hadoop.hbase.client.Get;
47 import org.apache.hadoop.hbase.client.HBaseAdmin;
48 import org.apache.hadoop.hbase.client.HTable;
49 import org.apache.hadoop.hbase.client.ResultScanner;
50 import org.apache.hadoop.hbase.client.Scan;
51 import org.apache.hadoop.util.Tool;
52 import org.apache.hadoop.util.ToolRunner;
53
54
55
56
57
58
59
60
61
62
63
64
65 public final class Canary implements Tool {
66
67 public interface Sink {
68 public void publishReadFailure(HRegionInfo region, Exception e);
69 public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e);
70 public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime);
71 }
72
73
74 public interface ExtendedSink extends Sink {
75 public void publishReadFailure(String table, String server);
76 public void publishReadTiming(String table, String server, long msTime);
77 }
78
79
80
81 public static class StdOutSink implements Sink {
82 @Override
83 public void publishReadFailure(HRegionInfo region, Exception e) {
84 LOG.error(String.format("read from region %s failed", region.getRegionNameAsString()), e);
85 }
86
87 @Override
88 public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e) {
89 LOG.error(String.format("read from region %s column family %s failed",
90 region.getRegionNameAsString(), column.getNameAsString()), e);
91 }
92
93 @Override
94 public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime) {
95 LOG.info(String.format("read from region %s column family %s in %dms",
96 region.getRegionNameAsString(), column.getNameAsString(), msTime));
97 }
98 }
99
100 public static class RegionServerStdOutSink extends StdOutSink implements ExtendedSink {
101
102 @Override
103 public void publishReadFailure(String table, String server) {
104 LOG.error(String.format("Read from table:%s on region server:%s", table, server));
105 }
106
107 @Override
108 public void publishReadTiming(String table, String server, long msTime) {
109 LOG.info(String.format("Read from table:%s on region server:%s in %dms",
110 table, server, msTime));
111 }
112 }
113
114 private static final int USAGE_EXIT_CODE = 1;
115 private static final int INIT_ERROR_EXIT_CODE = 2;
116 private static final int TIMEOUT_ERROR_EXIT_CODE = 3;
117 private static final int ERROR_EXIT_CODE = 4;
118
119 private static final long DEFAULT_INTERVAL = 6000;
120
121 private static final long DEFAULT_TIMEOUT = 600000;
122
123 private static final Log LOG = LogFactory.getLog(Canary.class);
124
125 private Configuration conf = null;
126 private long interval = 0;
127 private Sink sink = null;
128
129 private boolean useRegExp;
130 private long timeout = DEFAULT_TIMEOUT;
131 private boolean failOnError = true;
132 private boolean regionServerMode = false;
133
134 public Canary() {
135 this(new RegionServerStdOutSink());
136 }
137
138 public Canary(Sink sink) {
139 this.sink = sink;
140 }
141
142 @Override
143 public Configuration getConf() {
144 return conf;
145 }
146
147 @Override
148 public void setConf(Configuration conf) {
149 this.conf = conf;
150 }
151
152 @Override
153 public int run(String[] args) throws Exception {
154 int index = -1;
155
156
157 for (int i = 0; i < args.length; i++) {
158 String cmd = args[i];
159
160 if (cmd.startsWith("-")) {
161 if (index >= 0) {
162
163 System.err.println("Invalid command line options");
164 printUsageAndExit();
165 }
166
167 if (cmd.equals("-help")) {
168
169 printUsageAndExit();
170 } else if (cmd.equals("-daemon") && interval == 0) {
171
172 interval = DEFAULT_INTERVAL;
173 } else if (cmd.equals("-interval")) {
174
175 i++;
176
177 if (i == args.length) {
178 System.err.println("-interval needs a numeric value argument.");
179 printUsageAndExit();
180 }
181
182 try {
183 interval = Long.parseLong(args[i]) * 1000;
184 } catch (NumberFormatException e) {
185 System.err.println("-interval needs a numeric value argument.");
186 printUsageAndExit();
187 }
188 } else if(cmd.equals("-regionserver")) {
189 this.regionServerMode = true;
190 } else if (cmd.equals("-e")) {
191 this.useRegExp = true;
192 } else if (cmd.equals("-t")) {
193 i++;
194
195 if (i == args.length) {
196 System.err.println("-t needs a numeric value argument.");
197 printUsageAndExit();
198 }
199
200 try {
201 this.timeout = Long.parseLong(args[i]);
202 } catch (NumberFormatException e) {
203 System.err.println("-t needs a numeric value argument.");
204 printUsageAndExit();
205 }
206
207 } else if (cmd.equals("-f")) {
208 i++;
209
210 if (i == args.length) {
211 System.err
212 .println("-f needs a boolean value argument (true|false).");
213 printUsageAndExit();
214 }
215
216 this.failOnError = Boolean.parseBoolean(args[i]);
217 } else {
218
219 System.err.println(cmd + " options is invalid.");
220 printUsageAndExit();
221 }
222 } else if (index < 0) {
223
224 index = i;
225 }
226 }
227
228
229 Monitor monitor = null;
230 Thread monitorThread = null;
231 long startTime = 0;
232 long currentTimeLength = 0;
233
234 do {
235
236 monitor = this.newMonitor(index, args);
237 monitorThread = new Thread(monitor);
238 startTime = System.currentTimeMillis();
239 monitorThread.start();
240 while (!monitor.isDone()) {
241
242 Thread.sleep(1000);
243
244 if (this.failOnError && monitor.hasError()) {
245 monitorThread.interrupt();
246 if (monitor.initialized) {
247 System.exit(monitor.errorCode);
248 } else {
249 System.exit(INIT_ERROR_EXIT_CODE);
250 }
251 }
252 currentTimeLength = System.currentTimeMillis() - startTime;
253 if (currentTimeLength > this.timeout) {
254 LOG.error("The monitor is running too long (" + currentTimeLength
255 + ") after timeout limit:" + this.timeout
256 + " will be killed itself !!");
257 if (monitor.initialized) {
258 System.exit(TIMEOUT_ERROR_EXIT_CODE);
259 } else {
260 System.exit(INIT_ERROR_EXIT_CODE);
261 }
262 break;
263 }
264 }
265
266 if (this.failOnError && monitor.hasError()) {
267 monitorThread.interrupt();
268 System.exit(monitor.errorCode);
269 }
270
271 Thread.sleep(interval);
272 } while (interval > 0);
273
274 return(monitor.errorCode);
275 }
276
277 private void printUsageAndExit() {
278 System.err.printf(
279 "Usage: bin/hbase %s [opts] [table1 [table2]...] | [regionserver1 [regionserver2]..]%n",
280 getClass().getName());
281 System.err.println(" where [opts] are:");
282 System.err.println(" -help Show this help and exit.");
283 System.err.println(" -regionserver replace the table argument to regionserver,");
284 System.err.println(" which means to enable regionserver mode");
285 System.err.println(" -daemon Continuous check at defined intervals.");
286 System.err.println(" -interval <N> Interval between checks (sec)");
287 System.err.println(" -e Use region/regionserver as regular expression");
288 System.err.println(" which means the region/regionserver is regular expression pattern");
289 System.err.println(" -f <B> stop whole program if first error occurs," +
290 " default is true");
291 System.err.println(" -t <N> timeout for a check, default is 600000 (milisecs)");
292 System.exit(USAGE_EXIT_CODE);
293 }
294
295
296
297
298
299
300
301
302 public Monitor newMonitor(int index, String[] args) {
303 Monitor monitor = null;
304 String[] monitorTargets = null;
305
306 if(index >= 0) {
307 int length = args.length - index;
308 monitorTargets = new String[length];
309 System.arraycopy(args, index, monitorTargets, 0, length);
310 }
311
312 if(this.regionServerMode) {
313 monitor = new RegionServerMonitor(
314 this.conf,
315 monitorTargets,
316 this.useRegExp,
317 (ExtendedSink)this.sink);
318 } else {
319 monitor = new RegionMonitor(this.conf, monitorTargets, this.useRegExp, this.sink);
320 }
321 return monitor;
322 }
323
324
325 public static abstract class Monitor implements Runnable {
326
327 protected Configuration config;
328 protected HBaseAdmin admin;
329 protected String[] targets;
330 protected boolean useRegExp;
331 protected boolean initialized = false;
332
333 protected boolean done = false;
334 protected int errorCode = 0;
335 protected Sink sink;
336
337 public boolean isDone() {
338 return done;
339 }
340
341 public boolean hasError() {
342 return errorCode != 0;
343 }
344
345 protected Monitor(Configuration config, String[] monitorTargets,
346 boolean useRegExp, Sink sink) {
347 if (null == config)
348 throw new IllegalArgumentException("config shall not be null");
349
350 this.config = config;
351 this.targets = monitorTargets;
352 this.useRegExp = useRegExp;
353 this.sink = sink;
354 }
355
356 public abstract void run();
357
358 protected boolean initAdmin() {
359 if (null == this.admin) {
360 try {
361 this.admin = new HBaseAdmin(config);
362 } catch (Exception e) {
363 LOG.error("Initial HBaseAdmin failed...", e);
364 this.errorCode = INIT_ERROR_EXIT_CODE;
365 }
366 } else if (admin.isAborted()) {
367 LOG.error("HBaseAdmin aborted");
368 this.errorCode = INIT_ERROR_EXIT_CODE;
369 }
370 return !this.hasError();
371 }
372 }
373
374
375 private static class RegionMonitor extends Monitor {
376
377 public RegionMonitor(Configuration config, String[] monitorTargets,
378 boolean useRegExp, Sink sink) {
379 super(config, monitorTargets, useRegExp, sink);
380 }
381
382 @Override
383 public void run() {
384 if(this.initAdmin()) {
385 try {
386 if (this.targets != null && this.targets.length > 0) {
387 String[] tables = generateMonitorTables(this.targets);
388 this.initialized = true;
389 for (String table : tables) {
390 Canary.sniff(admin, sink, table);
391 }
392 } else {
393 sniff();
394 }
395 } catch (Exception e) {
396 LOG.error("Run regionMonitor failed", e);
397 this.errorCode = ERROR_EXIT_CODE;
398 }
399 }
400 this.done = true;
401 }
402
403 private String[] generateMonitorTables(String[] monitorTargets) throws IOException {
404 String[] returnTables = null;
405
406 if(this.useRegExp) {
407 Pattern pattern = null;
408 HTableDescriptor[] tds = null;
409 Set<String> tmpTables = new TreeSet<String>();
410 try {
411 for (String monitorTarget : monitorTargets) {
412 pattern = Pattern.compile(monitorTarget);
413 tds = this.admin.listTables(pattern);
414 if (tds != null) {
415 for (HTableDescriptor td : tds) {
416 tmpTables.add(td.getNameAsString());
417 }
418 }
419 }
420 } catch(IOException e) {
421 LOG.error("Communicate with admin failed", e);
422 throw e;
423 }
424
425 if(tmpTables.size() > 0) {
426 returnTables = tmpTables.toArray(new String[tmpTables.size()]);
427 } else {
428 String msg = "No HTable found, tablePattern:"
429 + Arrays.toString(monitorTargets);
430 LOG.error(msg);
431 this.errorCode = INIT_ERROR_EXIT_CODE;
432 throw new TableNotFoundException(msg);
433 }
434 } else {
435 returnTables = monitorTargets;
436 }
437
438 return returnTables;
439 }
440
441
442
443
444 private void sniff() throws Exception {
445 for (HTableDescriptor table : admin.listTables()) {
446 Canary.sniff(admin, sink, table);
447 }
448 }
449
450 }
451
452
453
454
455
456 public static void sniff(final HBaseAdmin admin, TableName tableName) throws Exception {
457 sniff(admin, new StdOutSink(), tableName.getNameAsString());
458 }
459
460
461
462
463
464 private static void sniff(final HBaseAdmin admin, final Sink sink, String tableName)
465 throws Exception {
466 if (admin.isTableAvailable(tableName)) {
467 sniff(admin, sink, admin.getTableDescriptor(tableName.getBytes()));
468 } else {
469 LOG.warn(String.format("Table %s is not available", tableName));
470 }
471 }
472
473
474
475
476 private static void sniff(final HBaseAdmin admin, final Sink sink, HTableDescriptor tableDesc)
477 throws Exception {
478 HTable table = null;
479
480 try {
481 table = new HTable(admin.getConfiguration(), tableDesc.getName());
482 } catch (TableNotFoundException e) {
483 return;
484 }
485
486 try {
487 for (HRegionInfo region : admin.getTableRegions(tableDesc.getName())) {
488 try {
489 sniffRegion(admin, sink, region, table);
490 } catch (Exception e) {
491 sink.publishReadFailure(region, e);
492 LOG.debug("sniffRegion failed", e);
493 }
494 }
495 } finally {
496 table.close();
497 }
498 }
499
500
501
502
503
504 private static void sniffRegion(
505 final HBaseAdmin admin,
506 final Sink sink,
507 HRegionInfo region,
508 HTable table) throws Exception {
509 HTableDescriptor tableDesc = table.getTableDescriptor();
510 byte[] startKey = null;
511 Get get = null;
512 Scan scan = null;
513 ResultScanner rs = null;
514 StopWatch stopWatch = new StopWatch();
515 for (HColumnDescriptor column : tableDesc.getColumnFamilies()) {
516 stopWatch.reset();
517 startKey = region.getStartKey();
518
519 if (startKey.length > 0) {
520 get = new Get(startKey);
521 get.addFamily(column.getName());
522 } else {
523 scan = new Scan();
524 scan.setCaching(1);
525 scan.addFamily(column.getName());
526 scan.setMaxResultSize(1L);
527 }
528
529 try {
530 if (startKey.length > 0) {
531 stopWatch.start();
532 table.get(get);
533 stopWatch.stop();
534 sink.publishReadTiming(region, column, stopWatch.getTime());
535 } else {
536 stopWatch.start();
537 rs = table.getScanner(scan);
538 stopWatch.stop();
539 sink.publishReadTiming(region, column, stopWatch.getTime());
540 }
541 } catch (Exception e) {
542 sink.publishReadFailure(region, column, e);
543 } finally {
544 if (rs != null) {
545 rs.close();
546 }
547 scan = null;
548 get = null;
549 startKey = null;
550 }
551 }
552 }
553
554 private static class RegionServerMonitor extends Monitor {
555
556 public RegionServerMonitor(Configuration config, String[] monitorTargets,
557 boolean useRegExp, ExtendedSink sink) {
558 super(config, monitorTargets, useRegExp, sink);
559 }
560
561 private ExtendedSink getSink() {
562 return (ExtendedSink) this.sink;
563 }
564
565 @Override
566 public void run() {
567 if (this.initAdmin() && this.checkNoTableNames()) {
568 Map<String, List<HRegionInfo>> rsAndRMap = this.filterRegionServerByName();
569 this.initialized = true;
570 this.monitorRegionServers(rsAndRMap);
571 }
572 this.done = true;
573 }
574
575 private boolean checkNoTableNames() {
576 List<String> foundTableNames = new ArrayList<String>();
577 TableName[] tableNames = null;
578
579 try {
580 tableNames = this.admin.listTableNames();
581 } catch (IOException e) {
582 LOG.error("Get listTableNames failed", e);
583 this.errorCode = INIT_ERROR_EXIT_CODE;
584 return false;
585 }
586
587 if (this.targets == null || this.targets.length == 0) return true;
588
589 for (String target : this.targets) {
590 for (TableName tableName : tableNames) {
591 if (target.equals(tableName.getNameAsString())) {
592 foundTableNames.add(target);
593 }
594 }
595 }
596
597 if (foundTableNames.size() > 0) {
598 System.err.println("Cannot pass a tablename when using the -regionserver " +
599 "option, tablenames:" + foundTableNames.toString());
600 this.errorCode = USAGE_EXIT_CODE;
601 }
602 return foundTableNames.size() == 0;
603 }
604
605 private void monitorRegionServers(Map<String, List<HRegionInfo>> rsAndRMap) {
606 String serverName = null;
607 String tableName = null;
608 HRegionInfo region = null;
609 HTable table = null;
610 Get get = null;
611 byte[] startKey = null;
612 Scan scan = null;
613 StopWatch stopWatch = new StopWatch();
614
615 for (Map.Entry<String, List<HRegionInfo>> entry : rsAndRMap.entrySet()) {
616 stopWatch.reset();
617 serverName = entry.getKey();
618
619 region = entry.getValue().get(0);
620 try {
621 tableName = region.getTable().getNameAsString();
622 table = new HTable(this.admin.getConfiguration(), tableName);
623 startKey = region.getStartKey();
624
625 if(startKey.length > 0) {
626 get = new Get(startKey);
627 stopWatch.start();
628 table.get(get);
629 stopWatch.stop();
630 } else {
631 scan = new Scan();
632 scan.setCaching(1);
633 scan.setMaxResultSize(1L);
634 stopWatch.start();
635 table.getScanner(scan);
636 stopWatch.stop();
637 }
638 this.getSink().publishReadTiming(tableName, serverName, stopWatch.getTime());
639 } catch (TableNotFoundException tnfe) {
640
641 } catch (TableNotEnabledException tnee) {
642
643 LOG.debug("The targeted table was disabled. Assuming success.");
644 } catch (DoNotRetryIOException dnrioe) {
645 this.getSink().publishReadFailure(tableName, serverName);
646 LOG.error(dnrioe);
647 } catch (IOException e) {
648 this.getSink().publishReadFailure(tableName, serverName);
649 LOG.error(e);
650 this.errorCode = ERROR_EXIT_CODE;
651 } finally {
652 if (table != null) {
653 try {
654 table.close();
655 } catch (IOException e) {
656 }
657 }
658 scan = null;
659 get = null;
660 startKey = null;
661 }
662 }
663 }
664
665 private Map<String, List<HRegionInfo>> filterRegionServerByName() {
666 Map<String, List<HRegionInfo>> regionServerAndRegionsMap = this.getAllRegionServerByName();
667 regionServerAndRegionsMap = this.doFilterRegionServerByName(regionServerAndRegionsMap);
668 return regionServerAndRegionsMap;
669 }
670
671 private Map<String, List<HRegionInfo>> getAllRegionServerByName() {
672 Map<String, List<HRegionInfo>> rsAndRMap = new HashMap<String, List<HRegionInfo>>();
673 HTable table = null;
674 try {
675 HTableDescriptor[] tableDescs = this.admin.listTables();
676 List<HRegionInfo> regions = null;
677 for (HTableDescriptor tableDesc : tableDescs) {
678 table = new HTable(this.admin.getConfiguration(), tableDesc.getName());
679
680 for (Map.Entry<HRegionInfo, ServerName> entry : table
681 .getRegionLocations().entrySet()) {
682 ServerName rs = entry.getValue();
683 String rsName = rs.getHostname();
684 HRegionInfo r = entry.getKey();
685
686 if (rsAndRMap.containsKey(rsName)) {
687 regions = rsAndRMap.get(rsName);
688 } else {
689 regions = new ArrayList<HRegionInfo>();
690 rsAndRMap.put(rsName, regions);
691 }
692 regions.add(r);
693 }
694 table.close();
695 }
696
697 } catch (IOException e) {
698 String msg = "Get HTables info failed";
699 LOG.error(msg, e);
700 this.errorCode = INIT_ERROR_EXIT_CODE;
701 } finally {
702 if (table != null) {
703 try {
704 table.close();
705 } catch (IOException e) {
706 LOG.warn("Close table failed", e);
707 }
708 }
709 }
710
711 return rsAndRMap;
712 }
713
714 private Map<String, List<HRegionInfo>> doFilterRegionServerByName(
715 Map<String, List<HRegionInfo>> fullRsAndRMap) {
716
717 Map<String, List<HRegionInfo>> filteredRsAndRMap = null;
718
719 if (this.targets != null && this.targets.length > 0) {
720 filteredRsAndRMap = new HashMap<String, List<HRegionInfo>>();
721 Pattern pattern = null;
722 Matcher matcher = null;
723 boolean regExpFound = false;
724 for (String rsName : this.targets) {
725 if (this.useRegExp) {
726 regExpFound = false;
727 pattern = Pattern.compile(rsName);
728 for (Map.Entry<String,List<HRegionInfo>> entry : fullRsAndRMap.entrySet()) {
729 matcher = pattern.matcher(entry.getKey());
730 if (matcher.matches()) {
731 filteredRsAndRMap.put(entry.getKey(), entry.getValue());
732 regExpFound = true;
733 }
734 }
735 if (!regExpFound) {
736 LOG.info("No RegionServerInfo found, regionServerPattern:" + rsName);
737 }
738 } else {
739 if (fullRsAndRMap.containsKey(rsName)) {
740 filteredRsAndRMap.put(rsName, fullRsAndRMap.get(rsName));
741 } else {
742 LOG.info("No RegionServerInfo found, regionServerName:" + rsName);
743 }
744 }
745 }
746 } else {
747 filteredRsAndRMap = fullRsAndRMap;
748 }
749 return filteredRsAndRMap;
750 }
751 }
752
753 public static void main(String[] args) throws Exception {
754 int exitCode = ToolRunner.run(HBaseConfiguration.create(), new Canary(), args);
755 System.exit(exitCode);
756 }
757 }