1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.io.hfile;
21
22 import java.io.ByteArrayOutputStream;
23 import java.io.DataInput;
24 import java.io.IOException;
25 import java.io.PrintStream;
26 import java.util.ArrayList;
27 import java.util.List;
28 import java.util.Locale;
29 import java.util.Map;
30 import java.util.SortedMap;
31
32 import com.yammer.metrics.core.*;
33 import com.yammer.metrics.reporting.ConsoleReporter;
34
35 import org.apache.commons.cli.CommandLine;
36 import org.apache.commons.cli.CommandLineParser;
37 import org.apache.commons.cli.HelpFormatter;
38 import org.apache.commons.cli.Options;
39 import org.apache.commons.cli.ParseException;
40 import org.apache.commons.cli.PosixParser;
41 import org.apache.commons.logging.Log;
42 import org.apache.commons.logging.LogFactory;
43 import org.apache.hadoop.classification.InterfaceAudience;
44 import org.apache.hadoop.classification.InterfaceStability;
45 import org.apache.hadoop.conf.Configuration;
46 import org.apache.hadoop.fs.FileSystem;
47 import org.apache.hadoop.fs.Path;
48 import org.apache.hadoop.hbase.TableName;
49 import org.apache.hadoop.hbase.HBaseConfiguration;
50 import org.apache.hadoop.hbase.HRegionInfo;
51 import org.apache.hadoop.hbase.KeyValue;
52 import org.apache.hadoop.hbase.Tag;
53 import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
54 import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
55 import org.apache.hadoop.hbase.util.BloomFilter;
56 import org.apache.hadoop.hbase.util.BloomFilterFactory;
57 import org.apache.hadoop.hbase.util.ByteBloomFilter;
58 import org.apache.hadoop.hbase.util.Bytes;
59 import org.apache.hadoop.hbase.util.FSUtils;
60 import org.apache.hadoop.hbase.util.Writables;
61
62
63
64
65 @InterfaceAudience.Public
66 @InterfaceStability.Evolving
67 public class HFilePrettyPrinter {
68
69 private static final Log LOG = LogFactory.getLog(HFilePrettyPrinter.class);
70
71 private Options options = new Options();
72
73 private boolean verbose;
74 private boolean printValue;
75 private boolean printKey;
76 private boolean shouldPrintMeta;
77 private boolean printBlocks;
78 private boolean printStats;
79 private boolean checkRow;
80 private boolean checkFamily;
81 private boolean isSeekToRow = false;
82
83
84
85
86 private byte[] row = null;
87 private Configuration conf;
88
89 private List<Path> files = new ArrayList<Path>();
90 private int count;
91
92 private static final String FOUR_SPACES = " ";
93
94 public HFilePrettyPrinter() {
95 options.addOption("v", "verbose", false,
96 "Verbose output; emits file and meta data delimiters");
97 options.addOption("p", "printkv", false, "Print key/value pairs");
98 options.addOption("e", "printkey", false, "Print keys");
99 options.addOption("m", "printmeta", false, "Print meta data of file");
100 options.addOption("b", "printblocks", false, "Print block index meta data");
101 options.addOption("k", "checkrow", false,
102 "Enable row order check; looks for out-of-order keys");
103 options.addOption("a", "checkfamily", false, "Enable family check");
104 options.addOption("f", "file", true,
105 "File to scan. Pass full-path; e.g. hdfs://a:9000/hbase/hbase:meta/12/34");
106 options.addOption("w", "seekToRow", true,
107 "Seek to this row and print all the kvs for this row only");
108 options.addOption("r", "region", true,
109 "Region to scan. Pass region name; e.g. 'hbase:meta,,1'");
110 options.addOption("s", "stats", false, "Print statistics");
111 }
112
113 public boolean parseOptions(String args[]) throws ParseException,
114 IOException {
115 if (args.length == 0) {
116 HelpFormatter formatter = new HelpFormatter();
117 formatter.printHelp("HFile", options, true);
118 return false;
119 }
120 CommandLineParser parser = new PosixParser();
121 CommandLine cmd = parser.parse(options, args);
122
123 verbose = cmd.hasOption("v");
124 printValue = cmd.hasOption("p");
125 printKey = cmd.hasOption("e") || printValue;
126 shouldPrintMeta = cmd.hasOption("m");
127 printBlocks = cmd.hasOption("b");
128 printStats = cmd.hasOption("s");
129 checkRow = cmd.hasOption("k");
130 checkFamily = cmd.hasOption("a");
131
132 if (cmd.hasOption("f")) {
133 files.add(new Path(cmd.getOptionValue("f")));
134 }
135
136 if (cmd.hasOption("w")) {
137 String key = cmd.getOptionValue("w");
138 if (key != null && key.length() != 0) {
139 row = key.getBytes();
140 isSeekToRow = true;
141 } else {
142 System.err.println("Invalid row is specified.");
143 System.exit(-1);
144 }
145 }
146
147 if (cmd.hasOption("r")) {
148 String regionName = cmd.getOptionValue("r");
149 byte[] rn = Bytes.toBytes(regionName);
150 byte[][] hri = HRegionInfo.parseRegionName(rn);
151 Path rootDir = FSUtils.getRootDir(conf);
152 Path tableDir = FSUtils.getTableDir(rootDir, TableName.valueOf(hri[0]));
153 String enc = HRegionInfo.encodeRegionName(rn);
154 Path regionDir = new Path(tableDir, enc);
155 if (verbose)
156 System.out.println("region dir -> " + regionDir);
157 List<Path> regionFiles = HFile.getStoreFiles(FileSystem.get(conf),
158 regionDir);
159 if (verbose)
160 System.out.println("Number of region files found -> "
161 + regionFiles.size());
162 if (verbose) {
163 int i = 1;
164 for (Path p : regionFiles) {
165 if (verbose)
166 System.out.println("Found file[" + i++ + "] -> " + p);
167 }
168 }
169 files.addAll(regionFiles);
170 }
171
172 return true;
173 }
174
175
176
177
178
179 public int run(String[] args) {
180 conf = HBaseConfiguration.create();
181 try {
182 FSUtils.setFsDefault(conf, FSUtils.getRootDir(conf));
183 if (!parseOptions(args))
184 return 1;
185 } catch (IOException ex) {
186 LOG.error("Error parsing command-line options", ex);
187 return 1;
188 } catch (ParseException ex) {
189 LOG.error("Error parsing command-line options", ex);
190 return 1;
191 }
192
193
194 for (Path fileName : files) {
195 try {
196 processFile(fileName);
197 } catch (IOException ex) {
198 LOG.error("Error reading " + fileName, ex);
199 }
200 }
201
202 if (verbose || printKey) {
203 System.out.println("Scanned kv count -> " + count);
204 }
205
206 return 0;
207 }
208
209 private void processFile(Path file) throws IOException {
210 if (verbose)
211 System.out.println("Scanning -> " + file);
212 FileSystem fs = file.getFileSystem(conf);
213 if (!fs.exists(file)) {
214 System.err.println("ERROR, file doesnt exist: " + file);
215 }
216
217 HFile.Reader reader = HFile.createReader(fs, file, new CacheConfig(conf), conf);
218
219 Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
220
221 KeyValueStatsCollector fileStats = null;
222
223 if (verbose || printKey || checkRow || checkFamily || printStats) {
224
225 HFileScanner scanner = reader.getScanner(false, false, false);
226 fileStats = new KeyValueStatsCollector();
227 boolean shouldScanKeysValues = false;
228 if (this.isSeekToRow) {
229
230 shouldScanKeysValues =
231 (scanner.seekTo(KeyValue.createFirstOnRow(this.row).getKey()) != -1);
232 } else {
233 shouldScanKeysValues = scanner.seekTo();
234 }
235 if (shouldScanKeysValues)
236 scanKeysValues(file, fileStats, scanner, row);
237 }
238
239
240 if (shouldPrintMeta) {
241 printMeta(reader, fileInfo);
242 }
243
244 if (printBlocks) {
245 System.out.println("Block Index:");
246 System.out.println(reader.getDataBlockIndexReader());
247 }
248
249 if (printStats) {
250 fileStats.finish();
251 System.out.println("Stats:\n" + fileStats);
252 }
253
254 reader.close();
255 }
256
257 private void scanKeysValues(Path file, KeyValueStatsCollector fileStats,
258 HFileScanner scanner, byte[] row) throws IOException {
259 KeyValue pkv = null;
260 do {
261 KeyValue kv = scanner.getKeyValue();
262 if (row != null && row.length != 0) {
263 int result = Bytes.compareTo(kv.getRow(), row);
264 if (result > 0) {
265 break;
266 } else if (result < 0) {
267 continue;
268 }
269 }
270
271 if (printStats) {
272 fileStats.collect(kv);
273 }
274
275 if (printKey) {
276 System.out.print("K: " + kv);
277 if (printValue) {
278 System.out.print(" V: " + Bytes.toStringBinary(kv.getValue()));
279 int i = 0;
280 List<Tag> tags = kv.getTags();
281 for (Tag tag : tags) {
282 System.out
283 .print(String.format(" T[%d]: %s", i++, Bytes.toStringBinary(tag.getValue())));
284 }
285 }
286 System.out.println();
287 }
288
289 if (checkRow && pkv != null) {
290 if (Bytes.compareTo(pkv.getRow(), kv.getRow()) > 0) {
291 System.err.println("WARNING, previous row is greater then"
292 + " current row\n\tfilename -> " + file + "\n\tprevious -> "
293 + Bytes.toStringBinary(pkv.getKey()) + "\n\tcurrent -> "
294 + Bytes.toStringBinary(kv.getKey()));
295 }
296 }
297
298 if (checkFamily) {
299 String fam = Bytes.toString(kv.getFamily());
300 if (!file.toString().contains(fam)) {
301 System.err.println("WARNING, filename does not match kv family,"
302 + "\n\tfilename -> " + file + "\n\tkeyvalue -> "
303 + Bytes.toStringBinary(kv.getKey()));
304 }
305 if (pkv != null
306 && !Bytes.equals(pkv.getFamily(), kv.getFamily())) {
307 System.err.println("WARNING, previous kv has different family"
308 + " compared to current key\n\tfilename -> " + file
309 + "\n\tprevious -> " + Bytes.toStringBinary(pkv.getKey())
310 + "\n\tcurrent -> " + Bytes.toStringBinary(kv.getKey()));
311 }
312 }
313 pkv = kv;
314 ++count;
315 } while (scanner.next());
316 }
317
318
319
320
321
322 private static String asSeparateLines(String keyValueStr) {
323 return keyValueStr.replaceAll(", ([a-zA-Z]+=)",
324 ",\n" + FOUR_SPACES + "$1");
325 }
326
327 private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo)
328 throws IOException {
329 System.out.println("Block index size as per heapsize: "
330 + reader.indexSize());
331 System.out.println(asSeparateLines(reader.toString()));
332 System.out.println("Trailer:\n "
333 + asSeparateLines(reader.getTrailer().toString()));
334 System.out.println("Fileinfo:");
335 for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
336 System.out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = ");
337 if (Bytes.compareTo(e.getKey(), Bytes.toBytes("MAX_SEQ_ID_KEY")) == 0) {
338 long seqid = Bytes.toLong(e.getValue());
339 System.out.println(seqid);
340 } else if (Bytes.compareTo(e.getKey(), Bytes.toBytes("TIMERANGE")) == 0) {
341 TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
342 Writables.copyWritable(e.getValue(), timeRangeTracker);
343 System.out.println(timeRangeTracker.getMinimumTimestamp() + "...."
344 + timeRangeTracker.getMaximumTimestamp());
345 } else if (Bytes.compareTo(e.getKey(), FileInfo.AVG_KEY_LEN) == 0
346 || Bytes.compareTo(e.getKey(), FileInfo.AVG_VALUE_LEN) == 0) {
347 System.out.println(Bytes.toInt(e.getValue()));
348 } else {
349 System.out.println(Bytes.toStringBinary(e.getValue()));
350 }
351 }
352
353 try {
354 System.out.println("Mid-key: " + Bytes.toStringBinary(reader.midkey()));
355 } catch (Exception e) {
356 System.out.println ("Unable to retrieve the midkey");
357 }
358
359
360 DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
361 BloomFilter bloomFilter = null;
362 if (bloomMeta != null)
363 bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
364
365 System.out.println("Bloom filter:");
366 if (bloomFilter != null) {
367 System.out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(
368 ByteBloomFilter.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
369 } else {
370 System.out.println(FOUR_SPACES + "Not present");
371 }
372
373
374 bloomMeta = reader.getDeleteBloomFilterMetadata();
375 bloomFilter = null;
376 if (bloomMeta != null)
377 bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
378
379 System.out.println("Delete Family Bloom filter:");
380 if (bloomFilter != null) {
381 System.out.println(FOUR_SPACES
382 + bloomFilter.toString().replaceAll(ByteBloomFilter.STATS_RECORD_SEP,
383 "\n" + FOUR_SPACES));
384 } else {
385 System.out.println(FOUR_SPACES + "Not present");
386 }
387 }
388
389 private static class KeyValueStatsCollector {
390 private final MetricsRegistry metricsRegistry = new MetricsRegistry();
391 private final ByteArrayOutputStream metricsOutput = new ByteArrayOutputStream();
392 private final SimpleReporter simpleReporter = new SimpleReporter(metricsRegistry, new PrintStream(metricsOutput));
393 Histogram keyLen = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Key length");
394 Histogram valLen = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Val length");
395 Histogram rowSizeBytes = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Row size (bytes)");
396 Histogram rowSizeCols = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Row size (columns)");
397
398 long curRowBytes = 0;
399 long curRowCols = 0;
400
401 byte[] biggestRow = null;
402
403 private KeyValue prevKV = null;
404 private long maxRowBytes = 0;
405 private long curRowKeyLength;
406
407 public void collect(KeyValue kv) {
408 valLen.update(kv.getValueLength());
409 if (prevKV != null &&
410 KeyValue.COMPARATOR.compareRows(prevKV, kv) != 0) {
411
412 collectRow();
413 }
414 curRowBytes += kv.getLength();
415 curRowKeyLength = kv.getKeyLength();
416 curRowCols++;
417 prevKV = kv;
418 }
419
420 private void collectRow() {
421 rowSizeBytes.update(curRowBytes);
422 rowSizeCols.update(curRowCols);
423 keyLen.update(curRowKeyLength);
424
425 if (curRowBytes > maxRowBytes && prevKV != null) {
426 biggestRow = prevKV.getRow();
427 maxRowBytes = curRowBytes;
428 }
429
430 curRowBytes = 0;
431 curRowCols = 0;
432 }
433
434 public void finish() {
435 if (curRowCols > 0) {
436 collectRow();
437 }
438 }
439
440 @Override
441 public String toString() {
442 if (prevKV == null)
443 return "no data available for statistics";
444
445
446 simpleReporter.shutdown();
447 simpleReporter.run();
448 metricsRegistry.shutdown();
449
450 return
451 metricsOutput.toString() +
452 "Key of biggest row: " + Bytes.toStringBinary(biggestRow);
453 }
454 }
455
456 private static class SimpleReporter extends ConsoleReporter {
457 private final PrintStream out;
458
459 public SimpleReporter(MetricsRegistry metricsRegistry, PrintStream out) {
460 super(metricsRegistry, out, MetricPredicate.ALL);
461 this.out = out;
462 }
463
464 @Override
465 public void run() {
466 for (Map.Entry<String, SortedMap<MetricName, Metric>> entry : getMetricsRegistry().groupedMetrics(
467 MetricPredicate.ALL).entrySet()) {
468 try {
469 for (Map.Entry<MetricName, Metric> subEntry : entry.getValue().entrySet()) {
470 out.print(" " + subEntry.getKey().getName());
471 out.println(':');
472
473 subEntry.getValue().processWith(this, subEntry.getKey(), out);
474 }
475 } catch (Exception e) {
476 e.printStackTrace(out);
477 }
478 }
479 }
480
481 @Override
482 public void processHistogram(MetricName name, Histogram histogram, PrintStream stream) {
483 super.processHistogram(name, histogram, stream);
484 stream.printf(Locale.getDefault(), " count = %d\n", histogram.count());
485 }
486 }
487 }