1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.io.hfile;
20
21 import java.io.ByteArrayInputStream;
22 import java.io.Closeable;
23 import java.io.DataInput;
24 import java.io.DataInputStream;
25 import java.io.DataOutputStream;
26 import java.io.IOException;
27 import java.io.SequenceInputStream;
28 import java.net.InetSocketAddress;
29 import java.nio.ByteBuffer;
30 import java.util.ArrayList;
31 import java.util.Collection;
32 import java.util.Comparator;
33 import java.util.List;
34 import java.util.Map;
35 import java.util.Set;
36 import java.util.SortedMap;
37 import java.util.TreeMap;
38 import java.util.concurrent.atomic.AtomicLong;
39
40 import org.apache.commons.logging.Log;
41 import org.apache.commons.logging.LogFactory;
42 import org.apache.hadoop.hbase.classification.InterfaceAudience;
43 import org.apache.hadoop.conf.Configuration;
44 import org.apache.hadoop.fs.FSDataInputStream;
45 import org.apache.hadoop.fs.FSDataOutputStream;
46 import org.apache.hadoop.fs.FileStatus;
47 import org.apache.hadoop.fs.FileSystem;
48 import org.apache.hadoop.fs.Path;
49 import org.apache.hadoop.fs.PathFilter;
50 import org.apache.hadoop.hbase.Cell;
51 import org.apache.hadoop.hbase.HConstants;
52 import org.apache.hadoop.hbase.KeyValue;
53 import org.apache.hadoop.hbase.KeyValue.KVComparator;
54 import org.apache.hadoop.hbase.fs.HFileSystem;
55 import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
56 import org.apache.hadoop.hbase.io.compress.Compression;
57 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
58 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
59 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
60 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.BytesBytesPair;
61 import org.apache.hadoop.hbase.protobuf.generated.HFileProtos;
62 import org.apache.hadoop.hbase.util.BloomFilterWriter;
63 import org.apache.hadoop.hbase.util.ByteStringer;
64 import org.apache.hadoop.hbase.util.Bytes;
65 import org.apache.hadoop.hbase.util.FSUtils;
66 import org.apache.hadoop.io.Writable;
67
68 import com.google.common.base.Preconditions;
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134 @InterfaceAudience.Private
135 public class HFile {
136
137 static final Log LOG = LogFactory.getLog(HFile.class);
138
139
140
141
142 public final static int MAXIMUM_KEY_LENGTH = Integer.MAX_VALUE;
143
144
145
146
147 public final static Compression.Algorithm DEFAULT_COMPRESSION_ALGORITHM =
148 Compression.Algorithm.NONE;
149
150
151 public static final int MIN_FORMAT_VERSION = 2;
152
153
154
155 public static final int MAX_FORMAT_VERSION = 3;
156
157
158
159
160 public static final int MIN_FORMAT_VERSION_WITH_TAGS = 3;
161
162
163 public final static String DEFAULT_COMPRESSION =
164 DEFAULT_COMPRESSION_ALGORITHM.getName();
165
166
167 public static final String BLOOM_FILTER_DATA_KEY = "BLOOM_FILTER_DATA";
168
169
170
171
172
173
174
175 public final static int MIN_NUM_HFILE_PATH_LEVELS = 5;
176
177
178
179
180 public static final int DEFAULT_BYTES_PER_CHECKSUM = 16 * 1024;
181
182 static final AtomicLong checksumFailures = new AtomicLong();
183
184
185 public static final AtomicLong dataBlockReadCnt = new AtomicLong(0);
186
187
188
189
190
191 public static final long getChecksumFailuresCount() {
192 return checksumFailures.getAndSet(0);
193 }
194
195
196 public interface Writer extends Closeable {
197
198
199 void appendFileInfo(byte[] key, byte[] value) throws IOException;
200
201 void append(Cell cell) throws IOException;
202
203
204 Path getPath();
205
206
207
208
209
210 void addInlineBlockWriter(InlineBlockWriter bloomWriter);
211
212
213
214
215
216
217 void appendMetaBlock(String bloomFilterMetaKey, Writable metaWriter);
218
219
220
221
222
223
224 void addGeneralBloomFilter(BloomFilterWriter bfw);
225
226
227
228
229
230 void addDeleteFamilyBloomFilter(BloomFilterWriter bfw) throws IOException;
231
232
233
234
235 HFileContext getFileContext();
236 }
237
238
239
240
241
242 public static abstract class WriterFactory {
243 protected final Configuration conf;
244 protected final CacheConfig cacheConf;
245 protected FileSystem fs;
246 protected Path path;
247 protected FSDataOutputStream ostream;
248 protected KVComparator comparator = KeyValue.COMPARATOR;
249 protected InetSocketAddress[] favoredNodes;
250 private HFileContext fileContext;
251 protected boolean shouldDropBehind = false;
252
253 WriterFactory(Configuration conf, CacheConfig cacheConf) {
254 this.conf = conf;
255 this.cacheConf = cacheConf;
256 }
257
258 public WriterFactory withPath(FileSystem fs, Path path) {
259 Preconditions.checkNotNull(fs);
260 Preconditions.checkNotNull(path);
261 this.fs = fs;
262 this.path = path;
263 return this;
264 }
265
266 public WriterFactory withOutputStream(FSDataOutputStream ostream) {
267 Preconditions.checkNotNull(ostream);
268 this.ostream = ostream;
269 return this;
270 }
271
272 public WriterFactory withComparator(KVComparator comparator) {
273 Preconditions.checkNotNull(comparator);
274 this.comparator = comparator;
275 return this;
276 }
277
278 public WriterFactory withFavoredNodes(InetSocketAddress[] favoredNodes) {
279
280 this.favoredNodes = favoredNodes;
281 return this;
282 }
283
284 public WriterFactory withFileContext(HFileContext fileContext) {
285 this.fileContext = fileContext;
286 return this;
287 }
288
289 public WriterFactory withShouldDropCacheBehind(boolean shouldDropBehind) {
290 this.shouldDropBehind = shouldDropBehind;
291 return this;
292 }
293
294
295 public Writer create() throws IOException {
296 if ((path != null ? 1 : 0) + (ostream != null ? 1 : 0) != 1) {
297 throw new AssertionError("Please specify exactly one of " +
298 "filesystem/path or path");
299 }
300 if (path != null) {
301 ostream = AbstractHFileWriter.createOutputStream(conf, fs, path, favoredNodes);
302 try {
303 ostream.setDropBehind(shouldDropBehind && cacheConf.shouldDropBehindCompaction());
304 } catch (UnsupportedOperationException uoe) {
305 if (LOG.isTraceEnabled()) LOG.trace("Unable to set drop behind on " + path, uoe);
306 else if (LOG.isDebugEnabled()) LOG.debug("Unable to set drop behind on " + path);
307 }
308 }
309 return createWriter(fs, path, ostream,
310 comparator, fileContext);
311 }
312
313 protected abstract Writer createWriter(FileSystem fs, Path path, FSDataOutputStream ostream,
314 KVComparator comparator, HFileContext fileContext) throws IOException;
315 }
316
317
318 public static final String FORMAT_VERSION_KEY = "hfile.format.version";
319
320 public static int getFormatVersion(Configuration conf) {
321 int version = conf.getInt(FORMAT_VERSION_KEY, MAX_FORMAT_VERSION);
322 checkFormatVersion(version);
323 return version;
324 }
325
326
327
328
329
330
331 public static final WriterFactory getWriterFactoryNoCache(Configuration
332 conf) {
333 Configuration tempConf = new Configuration(conf);
334 tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
335 return HFile.getWriterFactory(conf, new CacheConfig(tempConf));
336 }
337
338
339
340
341 public static final WriterFactory getWriterFactory(Configuration conf,
342 CacheConfig cacheConf) {
343 int version = getFormatVersion(conf);
344 switch (version) {
345 case 2:
346 return new HFileWriterV2.WriterFactoryV2(conf, cacheConf);
347 case 3:
348 return new HFileWriterV3.WriterFactoryV3(conf, cacheConf);
349 default:
350 throw new IllegalArgumentException("Cannot create writer for HFile " +
351 "format version " + version);
352 }
353 }
354
355
356
357
358
359
360 public interface CachingBlockReader {
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378 HFileBlock readBlock(long offset, long onDiskBlockSize,
379 boolean cacheBlock, final boolean pread, final boolean isCompaction,
380 final boolean updateCacheMetrics, BlockType expectedBlockType,
381 DataBlockEncoding expectedDataBlockEncoding)
382 throws IOException;
383 }
384
385
386 public interface Reader extends Closeable, CachingBlockReader {
387
388
389
390
391
392 String getName();
393
394 KVComparator getComparator();
395
396 HFileScanner getScanner(boolean cacheBlocks, final boolean pread, final boolean isCompaction);
397
398 ByteBuffer getMetaBlock(String metaBlockName, boolean cacheBlock) throws IOException;
399
400 Map<byte[], byte[]> loadFileInfo() throws IOException;
401
402 byte[] getLastKey();
403
404 byte[] midkey() throws IOException;
405
406 long length();
407
408 long getEntries();
409
410 byte[] getFirstKey();
411
412 long indexSize();
413
414 byte[] getFirstRowKey();
415
416 byte[] getLastRowKey();
417
418 FixedFileTrailer getTrailer();
419
420 HFileBlockIndex.BlockIndexReader getDataBlockIndexReader();
421
422 HFileScanner getScanner(boolean cacheBlocks, boolean pread);
423
424 Compression.Algorithm getCompressionAlgorithm();
425
426
427
428
429
430
431 DataInput getGeneralBloomFilterMetadata() throws IOException;
432
433
434
435
436
437
438 DataInput getDeleteBloomFilterMetadata() throws IOException;
439
440 Path getPath();
441
442
443 void close(boolean evictOnClose) throws IOException;
444
445 DataBlockEncoding getDataBlockEncoding();
446
447 boolean hasMVCCInfo();
448
449
450
451
452 HFileContext getFileContext();
453
454 boolean isPrimaryReplicaReader();
455
456 void setPrimaryReplicaReader(boolean isPrimaryReplicaReader);
457 }
458
459
460
461
462
463
464
465
466
467
468
469
470
471 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="SF_SWITCH_FALLTHROUGH",
472 justification="Intentional")
473 private static Reader pickReaderVersion(Path path, FSDataInputStreamWrapper fsdis,
474 long size, CacheConfig cacheConf, HFileSystem hfs, Configuration conf) throws IOException {
475 FixedFileTrailer trailer = null;
476 try {
477 boolean isHBaseChecksum = fsdis.shouldUseHBaseChecksum();
478 assert !isHBaseChecksum;
479 trailer = FixedFileTrailer.readFromStream(fsdis.getStream(isHBaseChecksum), size);
480 switch (trailer.getMajorVersion()) {
481 case 2:
482 return new HFileReaderV2(path, trailer, fsdis, size, cacheConf, hfs, conf);
483 case 3 :
484 return new HFileReaderV3(path, trailer, fsdis, size, cacheConf, hfs, conf);
485 default:
486 throw new IllegalArgumentException("Invalid HFile version " + trailer.getMajorVersion());
487 }
488 } catch (Throwable t) {
489 try {
490 fsdis.close();
491 } catch (Throwable t2) {
492 LOG.warn("Error closing fsdis FSDataInputStreamWrapper", t2);
493 }
494 throw new CorruptHFileException("Problem reading HFile Trailer from file " + path, t);
495 }
496 }
497
498
499
500
501
502
503
504
505
506
507
508 public static Reader createReader(FileSystem fs, Path path,
509 FSDataInputStreamWrapper fsdis, long size, CacheConfig cacheConf, Configuration conf)
510 throws IOException {
511 HFileSystem hfs = null;
512
513
514
515
516
517 if (!(fs instanceof HFileSystem)) {
518 hfs = new HFileSystem(fs);
519 } else {
520 hfs = (HFileSystem)fs;
521 }
522 return pickReaderVersion(path, fsdis, size, cacheConf, hfs, conf);
523 }
524
525
526
527
528
529
530
531
532
533 public static Reader createReader(
534 FileSystem fs, Path path, CacheConfig cacheConf, Configuration conf) throws IOException {
535 Preconditions.checkNotNull(cacheConf, "Cannot create Reader with null CacheConf");
536 FSDataInputStreamWrapper stream = new FSDataInputStreamWrapper(fs, path);
537 return pickReaderVersion(path, stream, fs.getFileStatus(path).getLen(),
538 cacheConf, stream.getHfs(), conf);
539 }
540
541
542
543
544 static Reader createReaderFromStream(Path path,
545 FSDataInputStream fsdis, long size, CacheConfig cacheConf, Configuration conf)
546 throws IOException {
547 FSDataInputStreamWrapper wrapper = new FSDataInputStreamWrapper(fsdis);
548 return pickReaderVersion(path, wrapper, size, cacheConf, null, conf);
549 }
550
551
552
553
554
555
556
557
558 public static boolean isHFileFormat(final FileSystem fs, final Path path) throws IOException {
559 return isHFileFormat(fs, fs.getFileStatus(path));
560 }
561
562
563
564
565
566
567
568
569 public static boolean isHFileFormat(final FileSystem fs, final FileStatus fileStatus)
570 throws IOException {
571 final Path path = fileStatus.getPath();
572 final long size = fileStatus.getLen();
573 FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, path);
574 try {
575 boolean isHBaseChecksum = fsdis.shouldUseHBaseChecksum();
576 assert !isHBaseChecksum;
577 FixedFileTrailer.readFromStream(fsdis.getStream(isHBaseChecksum), size);
578 return true;
579 } catch (IllegalArgumentException e) {
580 return false;
581 } catch (IOException e) {
582 throw e;
583 } finally {
584 try {
585 fsdis.close();
586 } catch (Throwable t) {
587 LOG.warn("Error closing fsdis FSDataInputStreamWrapper: " + path, t);
588 }
589 }
590 }
591
592
593
594
595 public static class FileInfo implements SortedMap<byte[], byte[]> {
596 static final String RESERVED_PREFIX = "hfile.";
597 static final byte[] RESERVED_PREFIX_BYTES = Bytes.toBytes(RESERVED_PREFIX);
598 static final byte [] LASTKEY = Bytes.toBytes(RESERVED_PREFIX + "LASTKEY");
599 static final byte [] AVG_KEY_LEN = Bytes.toBytes(RESERVED_PREFIX + "AVG_KEY_LEN");
600 static final byte [] AVG_VALUE_LEN = Bytes.toBytes(RESERVED_PREFIX + "AVG_VALUE_LEN");
601 static final byte [] CREATE_TIME_TS = Bytes.toBytes(RESERVED_PREFIX + "CREATE_TIME_TS");
602 static final byte [] COMPARATOR = Bytes.toBytes(RESERVED_PREFIX + "COMPARATOR");
603 static final byte [] TAGS_COMPRESSED = Bytes.toBytes(RESERVED_PREFIX + "TAGS_COMPRESSED");
604 public static final byte [] MAX_TAGS_LEN = Bytes.toBytes(RESERVED_PREFIX + "MAX_TAGS_LEN");
605 private final SortedMap<byte [], byte []> map = new TreeMap<byte [], byte []>(Bytes.BYTES_COMPARATOR);
606
607 public FileInfo() {
608 super();
609 }
610
611
612
613
614
615
616
617
618
619
620
621
622 public FileInfo append(final byte[] k, final byte[] v,
623 final boolean checkPrefix) throws IOException {
624 if (k == null || v == null) {
625 throw new NullPointerException("Key nor value may be null");
626 }
627 if (checkPrefix && isReservedFileInfoKey(k)) {
628 throw new IOException("Keys with a " + FileInfo.RESERVED_PREFIX
629 + " are reserved");
630 }
631 put(k, v);
632 return this;
633 }
634
635 public void clear() {
636 this.map.clear();
637 }
638
639 public Comparator<? super byte[]> comparator() {
640 return map.comparator();
641 }
642
643 public boolean containsKey(Object key) {
644 return map.containsKey(key);
645 }
646
647 public boolean containsValue(Object value) {
648 return map.containsValue(value);
649 }
650
651 public Set<java.util.Map.Entry<byte[], byte[]>> entrySet() {
652 return map.entrySet();
653 }
654
655 public boolean equals(Object o) {
656 return map.equals(o);
657 }
658
659 public byte[] firstKey() {
660 return map.firstKey();
661 }
662
663 public byte[] get(Object key) {
664 return map.get(key);
665 }
666
667 public int hashCode() {
668 return map.hashCode();
669 }
670
671 public SortedMap<byte[], byte[]> headMap(byte[] toKey) {
672 return this.map.headMap(toKey);
673 }
674
675 public boolean isEmpty() {
676 return map.isEmpty();
677 }
678
679 public Set<byte[]> keySet() {
680 return map.keySet();
681 }
682
683 public byte[] lastKey() {
684 return map.lastKey();
685 }
686
687 public byte[] put(byte[] key, byte[] value) {
688 return this.map.put(key, value);
689 }
690
691 public void putAll(Map<? extends byte[], ? extends byte[]> m) {
692 this.map.putAll(m);
693 }
694
695 public byte[] remove(Object key) {
696 return this.map.remove(key);
697 }
698
699 public int size() {
700 return map.size();
701 }
702
703 public SortedMap<byte[], byte[]> subMap(byte[] fromKey, byte[] toKey) {
704 return this.map.subMap(fromKey, toKey);
705 }
706
707 public SortedMap<byte[], byte[]> tailMap(byte[] fromKey) {
708 return this.map.tailMap(fromKey);
709 }
710
711 public Collection<byte[]> values() {
712 return map.values();
713 }
714
715
716
717
718
719
720
721
722 void write(final DataOutputStream out) throws IOException {
723 HFileProtos.FileInfoProto.Builder builder = HFileProtos.FileInfoProto.newBuilder();
724 for (Map.Entry<byte [], byte[]> e: this.map.entrySet()) {
725 HBaseProtos.BytesBytesPair.Builder bbpBuilder = HBaseProtos.BytesBytesPair.newBuilder();
726 bbpBuilder.setFirst(ByteStringer.wrap(e.getKey()));
727 bbpBuilder.setSecond(ByteStringer.wrap(e.getValue()));
728 builder.addMapEntry(bbpBuilder.build());
729 }
730 out.write(ProtobufUtil.PB_MAGIC);
731 builder.build().writeDelimitedTo(out);
732 }
733
734
735
736
737
738
739
740
741 void read(final DataInputStream in) throws IOException {
742
743 int pblen = ProtobufUtil.lengthOfPBMagic();
744 byte [] pbuf = new byte[pblen];
745 if (in.markSupported()) in.mark(pblen);
746 int read = in.read(pbuf);
747 if (read != pblen) throw new IOException("read=" + read + ", wanted=" + pblen);
748 if (ProtobufUtil.isPBMagicPrefix(pbuf)) {
749 parsePB(HFileProtos.FileInfoProto.parseDelimitedFrom(in));
750 } else {
751 if (in.markSupported()) {
752 in.reset();
753 parseWritable(in);
754 } else {
755
756 ByteArrayInputStream bais = new ByteArrayInputStream(pbuf);
757 SequenceInputStream sis = new SequenceInputStream(bais, in);
758
759
760
761 parseWritable(new DataInputStream(sis));
762 }
763 }
764 }
765
766
767
768
769
770
771 void parseWritable(final DataInputStream in) throws IOException {
772
773 this.map.clear();
774
775 int entries = in.readInt();
776
777 for (int i = 0; i < entries; i++) {
778 byte [] key = Bytes.readByteArray(in);
779
780 in.readByte();
781 byte [] value = Bytes.readByteArray(in);
782 this.map.put(key, value);
783 }
784 }
785
786
787
788
789
790 void parsePB(final HFileProtos.FileInfoProto fip) {
791 this.map.clear();
792 for (BytesBytesPair pair: fip.getMapEntryList()) {
793 this.map.put(pair.getFirst().toByteArray(), pair.getSecond().toByteArray());
794 }
795 }
796 }
797
798
799 public static boolean isReservedFileInfoKey(byte[] key) {
800 return Bytes.startsWith(key, FileInfo.RESERVED_PREFIX_BYTES);
801 }
802
803
804
805
806
807
808
809
810
811
812
813
814
815 public static String[] getSupportedCompressionAlgorithms() {
816 return Compression.getSupportedAlgorithms();
817 }
818
819
820
821
822
823
824 static int longToInt(final long l) {
825
826
827 return (int)(l & 0x00000000ffffffffL);
828 }
829
830
831
832
833
834
835
836
837
838
839 static List<Path> getStoreFiles(FileSystem fs, Path regionDir)
840 throws IOException {
841 List<Path> regionHFiles = new ArrayList<Path>();
842 PathFilter dirFilter = new FSUtils.DirFilter(fs);
843 FileStatus[] familyDirs = fs.listStatus(regionDir, dirFilter);
844 for(FileStatus dir : familyDirs) {
845 FileStatus[] files = fs.listStatus(dir.getPath());
846 for (FileStatus file : files) {
847 if (!file.isDirectory() &&
848 (!file.getPath().toString().contains(HConstants.HREGION_OLDLOGDIR_NAME)) &&
849 (!file.getPath().toString().contains(HConstants.RECOVERED_EDITS_DIR))) {
850 regionHFiles.add(file.getPath());
851 }
852 }
853 }
854 return regionHFiles;
855 }
856
857
858
859
860
861
862
863
864
865
866 public static void checkFormatVersion(int version)
867 throws IllegalArgumentException {
868 if (version < MIN_FORMAT_VERSION || version > MAX_FORMAT_VERSION) {
869 throw new IllegalArgumentException("Invalid HFile version: " + version
870 + " (expected to be " + "between " + MIN_FORMAT_VERSION + " and "
871 + MAX_FORMAT_VERSION + ")");
872 }
873 }
874
875 public static void main(String[] args) throws Exception {
876
877 HFilePrettyPrinter.main(args);
878 }
879 }