View Javadoc

1   
2   /*
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.io.hfile;
21  
22  import java.io.ByteArrayOutputStream;
23  import java.io.DataInput;
24  import java.io.IOException;
25  import java.io.PrintStream;
26  import java.util.ArrayList;
27  import java.util.List;
28  import java.util.Locale;
29  import java.util.Map;
30  import java.util.SortedMap;
31  
32  import com.yammer.metrics.core.*;
33  import com.yammer.metrics.reporting.ConsoleReporter;
34  
35  import org.apache.commons.cli.CommandLine;
36  import org.apache.commons.cli.CommandLineParser;
37  import org.apache.commons.cli.HelpFormatter;
38  import org.apache.commons.cli.Options;
39  import org.apache.commons.cli.ParseException;
40  import org.apache.commons.cli.PosixParser;
41  import org.apache.commons.logging.Log;
42  import org.apache.commons.logging.LogFactory;
43  import org.apache.hadoop.classification.InterfaceAudience;
44  import org.apache.hadoop.classification.InterfaceStability;
45  import org.apache.hadoop.conf.Configuration;
46  import org.apache.hadoop.fs.FileSystem;
47  import org.apache.hadoop.fs.Path;
48  import org.apache.hadoop.hbase.TableName;
49  import org.apache.hadoop.hbase.HBaseConfiguration;
50  import org.apache.hadoop.hbase.HRegionInfo;
51  import org.apache.hadoop.hbase.KeyValue;
52  import org.apache.hadoop.hbase.Tag;
53  import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
54  import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
55  import org.apache.hadoop.hbase.util.BloomFilter;
56  import org.apache.hadoop.hbase.util.BloomFilterFactory;
57  import org.apache.hadoop.hbase.util.ByteBloomFilter;
58  import org.apache.hadoop.hbase.util.Bytes;
59  import org.apache.hadoop.hbase.util.FSUtils;
60  import org.apache.hadoop.hbase.util.Writables;
61  
62  /**
63   * Implements pretty-printing functionality for {@link HFile}s.
64   */
65  @InterfaceAudience.Public
66  @InterfaceStability.Evolving
67  public class HFilePrettyPrinter {
68  
69    private static final Log LOG = LogFactory.getLog(HFilePrettyPrinter.class);
70  
71    private Options options = new Options();
72  
73    private boolean verbose;
74    private boolean printValue;
75    private boolean printKey;
76    private boolean shouldPrintMeta;
77    private boolean printBlocks;
78    private boolean printStats;
79    private boolean checkRow;
80    private boolean checkFamily;
81    private boolean isSeekToRow = false;
82  
83    /**
84     * The row which the user wants to specify and print all the KeyValues for.
85     */
86    private byte[] row = null;
87    private Configuration conf;
88  
89    private List<Path> files = new ArrayList<Path>();
90    private int count;
91  
92    private static final String FOUR_SPACES = "    ";
93  
94    public HFilePrettyPrinter() {
95      options.addOption("v", "verbose", false,
96          "Verbose output; emits file and meta data delimiters");
97      options.addOption("p", "printkv", false, "Print key/value pairs");
98      options.addOption("e", "printkey", false, "Print keys");
99      options.addOption("m", "printmeta", false, "Print meta data of file");
100     options.addOption("b", "printblocks", false, "Print block index meta data");
101     options.addOption("k", "checkrow", false,
102         "Enable row order check; looks for out-of-order keys");
103     options.addOption("a", "checkfamily", false, "Enable family check");
104     options.addOption("f", "file", true,
105         "File to scan. Pass full-path; e.g. hdfs://a:9000/hbase/hbase:meta/12/34");
106     options.addOption("w", "seekToRow", true,
107       "Seek to this row and print all the kvs for this row only");
108     options.addOption("r", "region", true,
109         "Region to scan. Pass region name; e.g. 'hbase:meta,,1'");
110     options.addOption("s", "stats", false, "Print statistics");
111   }
112 
113   public boolean parseOptions(String args[]) throws ParseException,
114       IOException {
115     if (args.length == 0) {
116       HelpFormatter formatter = new HelpFormatter();
117       formatter.printHelp("HFile", options, true);
118       return false;
119     }
120     CommandLineParser parser = new PosixParser();
121     CommandLine cmd = parser.parse(options, args);
122 
123     verbose = cmd.hasOption("v");
124     printValue = cmd.hasOption("p");
125     printKey = cmd.hasOption("e") || printValue;
126     shouldPrintMeta = cmd.hasOption("m");
127     printBlocks = cmd.hasOption("b");
128     printStats = cmd.hasOption("s");
129     checkRow = cmd.hasOption("k");
130     checkFamily = cmd.hasOption("a");
131 
132     if (cmd.hasOption("f")) {
133       files.add(new Path(cmd.getOptionValue("f")));
134     }
135 
136     if (cmd.hasOption("w")) {
137       String key = cmd.getOptionValue("w");
138       if (key != null && key.length() != 0) {
139         row = key.getBytes();
140         isSeekToRow = true;
141       } else {
142         System.err.println("Invalid row is specified.");
143         System.exit(-1);
144       }
145     }
146 
147     if (cmd.hasOption("r")) {
148       String regionName = cmd.getOptionValue("r");
149       byte[] rn = Bytes.toBytes(regionName);
150       byte[][] hri = HRegionInfo.parseRegionName(rn);
151       Path rootDir = FSUtils.getRootDir(conf);
152       Path tableDir = FSUtils.getTableDir(rootDir, TableName.valueOf(hri[0]));
153       String enc = HRegionInfo.encodeRegionName(rn);
154       Path regionDir = new Path(tableDir, enc);
155       if (verbose)
156         System.out.println("region dir -> " + regionDir);
157       List<Path> regionFiles = HFile.getStoreFiles(FileSystem.get(conf),
158           regionDir);
159       if (verbose)
160         System.out.println("Number of region files found -> "
161             + regionFiles.size());
162       if (verbose) {
163         int i = 1;
164         for (Path p : regionFiles) {
165           if (verbose)
166             System.out.println("Found file[" + i++ + "] -> " + p);
167         }
168       }
169       files.addAll(regionFiles);
170     }
171 
172     return true;
173   }
174 
175   /**
176    * Runs the command-line pretty-printer, and returns the desired command
177    * exit code (zero for success, non-zero for failure).
178    */
179   public int run(String[] args) {
180     conf = HBaseConfiguration.create();
181     try {
182       FSUtils.setFsDefault(conf, FSUtils.getRootDir(conf));
183       if (!parseOptions(args))
184         return 1;
185     } catch (IOException ex) {
186       LOG.error("Error parsing command-line options", ex);
187       return 1;
188     } catch (ParseException ex) {
189       LOG.error("Error parsing command-line options", ex);
190       return 1;
191     }
192 
193     // iterate over all files found
194     for (Path fileName : files) {
195       try {
196         processFile(fileName);
197       } catch (IOException ex) {
198         LOG.error("Error reading " + fileName, ex);
199       }
200     }
201 
202     if (verbose || printKey) {
203       System.out.println("Scanned kv count -> " + count);
204     }
205 
206     return 0;
207   }
208 
209   private void processFile(Path file) throws IOException {
210     if (verbose)
211       System.out.println("Scanning -> " + file);
212     FileSystem fs = file.getFileSystem(conf);
213     if (!fs.exists(file)) {
214       System.err.println("ERROR, file doesnt exist: " + file);
215     }
216 
217     HFile.Reader reader = HFile.createReader(fs, file, new CacheConfig(conf), conf);
218 
219     Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
220 
221     KeyValueStatsCollector fileStats = null;
222 
223     if (verbose || printKey || checkRow || checkFamily || printStats) {
224       // scan over file and read key/value's and check if requested
225       HFileScanner scanner = reader.getScanner(false, false, false);
226       fileStats = new KeyValueStatsCollector();
227       boolean shouldScanKeysValues = false;
228       if (this.isSeekToRow) {
229         // seek to the first kv on this row
230         shouldScanKeysValues = 
231           (scanner.seekTo(KeyValue.createFirstOnRow(this.row).getKey()) != -1);
232       } else {
233         shouldScanKeysValues = scanner.seekTo();
234       }
235       if (shouldScanKeysValues)
236         scanKeysValues(file, fileStats, scanner, row);
237     }
238 
239     // print meta data
240     if (shouldPrintMeta) {
241       printMeta(reader, fileInfo);
242     }
243 
244     if (printBlocks) {
245       System.out.println("Block Index:");
246       System.out.println(reader.getDataBlockIndexReader());
247     }
248 
249     if (printStats) {
250       fileStats.finish();
251       System.out.println("Stats:\n" + fileStats);
252     }
253 
254     reader.close();
255   }
256 
257   private void scanKeysValues(Path file, KeyValueStatsCollector fileStats,
258       HFileScanner scanner,  byte[] row) throws IOException {
259     KeyValue pkv = null;
260     do {
261       KeyValue kv = scanner.getKeyValue();
262       if (row != null && row.length != 0) {
263         int result = Bytes.compareTo(kv.getRow(), row);
264         if (result > 0) {
265           break;
266         } else if (result < 0) {
267           continue;
268         }
269       }
270       // collect stats
271       if (printStats) {
272         fileStats.collect(kv);
273       }
274       // dump key value
275       if (printKey) {
276         System.out.print("K: " + kv);
277         if (printValue) {
278           System.out.print(" V: " + Bytes.toStringBinary(kv.getValue()));
279           int i = 0;
280           List<Tag> tags = kv.getTags();
281           for (Tag tag : tags) {
282             System.out
283                 .print(String.format(" T[%d]: %s", i++, Bytes.toStringBinary(tag.getValue())));
284           }
285         }
286         System.out.println();
287       }
288       // check if rows are in order
289       if (checkRow && pkv != null) {
290         if (Bytes.compareTo(pkv.getRow(), kv.getRow()) > 0) {
291           System.err.println("WARNING, previous row is greater then"
292               + " current row\n\tfilename -> " + file + "\n\tprevious -> "
293               + Bytes.toStringBinary(pkv.getKey()) + "\n\tcurrent  -> "
294               + Bytes.toStringBinary(kv.getKey()));
295         }
296       }
297       // check if families are consistent
298       if (checkFamily) {
299         String fam = Bytes.toString(kv.getFamily());
300         if (!file.toString().contains(fam)) {
301           System.err.println("WARNING, filename does not match kv family,"
302               + "\n\tfilename -> " + file + "\n\tkeyvalue -> "
303               + Bytes.toStringBinary(kv.getKey()));
304         }
305         if (pkv != null
306             && !Bytes.equals(pkv.getFamily(), kv.getFamily())) {
307           System.err.println("WARNING, previous kv has different family"
308               + " compared to current key\n\tfilename -> " + file
309               + "\n\tprevious -> " + Bytes.toStringBinary(pkv.getKey())
310               + "\n\tcurrent  -> " + Bytes.toStringBinary(kv.getKey()));
311         }
312       }
313       pkv = kv;
314       ++count;
315     } while (scanner.next());
316   }
317 
318   /**
319    * Format a string of the form "k1=v1, k2=v2, ..." into separate lines
320    * with a four-space indentation.
321    */
322   private static String asSeparateLines(String keyValueStr) {
323     return keyValueStr.replaceAll(", ([a-zA-Z]+=)",
324                                   ",\n" + FOUR_SPACES + "$1");
325   }
326 
327   private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo)
328       throws IOException {
329     System.out.println("Block index size as per heapsize: "
330         + reader.indexSize());
331     System.out.println(asSeparateLines(reader.toString()));
332     System.out.println("Trailer:\n    "
333         + asSeparateLines(reader.getTrailer().toString()));
334     System.out.println("Fileinfo:");
335     for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
336       System.out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = ");
337       if (Bytes.compareTo(e.getKey(), Bytes.toBytes("MAX_SEQ_ID_KEY")) == 0) {
338         long seqid = Bytes.toLong(e.getValue());
339         System.out.println(seqid);
340       } else if (Bytes.compareTo(e.getKey(), Bytes.toBytes("TIMERANGE")) == 0) {
341         TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
342         Writables.copyWritable(e.getValue(), timeRangeTracker);
343         System.out.println(timeRangeTracker.getMinimumTimestamp() + "...."
344             + timeRangeTracker.getMaximumTimestamp());
345       } else if (Bytes.compareTo(e.getKey(), FileInfo.AVG_KEY_LEN) == 0
346           || Bytes.compareTo(e.getKey(), FileInfo.AVG_VALUE_LEN) == 0) {
347         System.out.println(Bytes.toInt(e.getValue()));
348       } else {
349         System.out.println(Bytes.toStringBinary(e.getValue()));
350       }
351     }
352 
353     try {
354       System.out.println("Mid-key: " + Bytes.toStringBinary(reader.midkey()));
355     } catch (Exception e) {
356       System.out.println ("Unable to retrieve the midkey");
357     }
358 
359     // Printing general bloom information
360     DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
361     BloomFilter bloomFilter = null;
362     if (bloomMeta != null)
363       bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
364 
365     System.out.println("Bloom filter:");
366     if (bloomFilter != null) {
367       System.out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(
368           ByteBloomFilter.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
369     } else {
370       System.out.println(FOUR_SPACES + "Not present");
371     }
372 
373     // Printing delete bloom information
374     bloomMeta = reader.getDeleteBloomFilterMetadata();
375     bloomFilter = null;
376     if (bloomMeta != null)
377       bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
378 
379     System.out.println("Delete Family Bloom filter:");
380     if (bloomFilter != null) {
381       System.out.println(FOUR_SPACES
382           + bloomFilter.toString().replaceAll(ByteBloomFilter.STATS_RECORD_SEP,
383               "\n" + FOUR_SPACES));
384     } else {
385       System.out.println(FOUR_SPACES + "Not present");
386     }
387   }
388 
389   private static class KeyValueStatsCollector {
390     private final MetricsRegistry metricsRegistry = new MetricsRegistry();
391     private final ByteArrayOutputStream metricsOutput = new ByteArrayOutputStream();
392     private final SimpleReporter simpleReporter = new SimpleReporter(metricsRegistry, new PrintStream(metricsOutput));
393     Histogram keyLen = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Key length");
394     Histogram valLen = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Val length");
395     Histogram rowSizeBytes = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Row size (bytes)");
396     Histogram rowSizeCols = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Row size (columns)");
397 
398     long curRowBytes = 0;
399     long curRowCols = 0;
400 
401     byte[] biggestRow = null;
402 
403     private KeyValue prevKV = null;
404     private long maxRowBytes = 0;
405     private long curRowKeyLength;
406 
407     public void collect(KeyValue kv) {
408       valLen.update(kv.getValueLength());
409       if (prevKV != null &&
410           KeyValue.COMPARATOR.compareRows(prevKV, kv) != 0) {
411         // new row
412         collectRow();
413       }
414       curRowBytes += kv.getLength();
415       curRowKeyLength = kv.getKeyLength();
416       curRowCols++;
417       prevKV = kv;
418     }
419 
420     private void collectRow() {
421       rowSizeBytes.update(curRowBytes);
422       rowSizeCols.update(curRowCols);
423       keyLen.update(curRowKeyLength);
424 
425       if (curRowBytes > maxRowBytes && prevKV != null) {
426         biggestRow = prevKV.getRow();
427         maxRowBytes = curRowBytes;
428       }
429 
430       curRowBytes = 0;
431       curRowCols = 0;
432     }
433 
434     public void finish() {
435       if (curRowCols > 0) {
436         collectRow();
437       }
438     }
439 
440     @Override
441     public String toString() {
442       if (prevKV == null)
443         return "no data available for statistics";
444 
445       // Dump the metrics to the output stream
446       simpleReporter.shutdown();
447       simpleReporter.run();
448       metricsRegistry.shutdown();
449 
450       return
451               metricsOutput.toString() +
452                       "Key of biggest row: " + Bytes.toStringBinary(biggestRow);
453     }
454   }
455 
456   private static class SimpleReporter extends ConsoleReporter {
457     private final PrintStream out;
458 
459     public SimpleReporter(MetricsRegistry metricsRegistry, PrintStream out) {
460       super(metricsRegistry, out, MetricPredicate.ALL);
461       this.out = out;
462     }
463 
464     @Override
465     public void run() {
466       for (Map.Entry<String, SortedMap<MetricName, Metric>> entry : getMetricsRegistry().groupedMetrics(
467               MetricPredicate.ALL).entrySet()) {
468         try {
469           for (Map.Entry<MetricName, Metric> subEntry : entry.getValue().entrySet()) {
470             out.print("   " + subEntry.getKey().getName());
471             out.println(':');
472 
473             subEntry.getValue().processWith(this, subEntry.getKey(), out);
474           }
475         } catch (Exception e) {
476           e.printStackTrace(out);
477         }
478       }
479     }
480 
481     @Override
482     public void processHistogram(MetricName name, Histogram histogram, PrintStream stream) {
483       super.processHistogram(name, histogram, stream);
484       stream.printf(Locale.getDefault(), "             count = %d\n", histogram.count());
485     }
486   }
487 }