1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 package org.apache.hadoop.hbase.io.hfile; 19 20 import java.io.ByteArrayOutputStream; 21 import java.io.IOException; 22 import java.nio.ByteBuffer; 23 24 import org.apache.commons.logging.Log; 25 import org.apache.commons.logging.LogFactory; 26 import org.apache.hadoop.fs.ChecksumException; 27 import org.apache.hadoop.hbase.classification.InterfaceAudience; 28 import org.apache.hadoop.fs.Path; 29 import org.apache.hadoop.hbase.util.ChecksumType; 30 import org.apache.hadoop.util.DataChecksum; 31 32 /** 33 * Utility methods to compute and validate checksums. 34 */ 35 @InterfaceAudience.Private 36 public class ChecksumUtil { 37 public static final Log LOG = LogFactory.getLog(ChecksumUtil.class); 38 39 /** This is used to reserve space in a byte buffer */ 40 private static byte[] DUMMY_VALUE = new byte[128 * HFileBlock.CHECKSUM_SIZE]; 41 42 /** 43 * This is used by unit tests to make checksum failures throw an 44 * exception instead of returning null. Returning a null value from 45 * checksum validation will cause the higher layer to retry that 46 * read with hdfs-level checksums. Instead, we would like checksum 47 * failures to cause the entire unit test to fail. 48 */ 49 private static boolean generateExceptions = false; 50 51 /** 52 * Generates a checksum for all the data in indata. The checksum is 53 * written to outdata. 54 * @param indata input data stream 55 * @param startOffset starting offset in the indata stream from where to 56 * compute checkums from 57 * @param endOffset ending offset in the indata stream upto 58 * which checksums needs to be computed 59 * @param outdata the output buffer where checksum values are written 60 * @param outOffset the starting offset in the outdata where the 61 * checksum values are written 62 * @param checksumType type of checksum 63 * @param bytesPerChecksum number of bytes per checksum value 64 */ 65 static void generateChecksums(byte[] indata, int startOffset, int endOffset, 66 byte[] outdata, int outOffset, ChecksumType checksumType, 67 int bytesPerChecksum) throws IOException { 68 69 if (checksumType == ChecksumType.NULL) { 70 return; // No checksum for this block. 71 } 72 73 DataChecksum checksum = DataChecksum.newDataChecksum( 74 checksumType.getDataChecksumType(), bytesPerChecksum); 75 76 checksum.calculateChunkedSums( 77 ByteBuffer.wrap(indata, startOffset, endOffset - startOffset), 78 ByteBuffer.wrap(outdata, outOffset, outdata.length - outOffset)); 79 } 80 81 /** 82 * Validates that the data in the specified HFileBlock matches the 83 * checksum. Generates the checksum for the data and 84 * then validate that it matches the value stored in the header. 85 * If there is a checksum mismatch, then return false. Otherwise 86 * return true. 87 * The header is extracted from the specified HFileBlock while the 88 * data-to-be-verified is extracted from 'data'. 89 */ 90 static boolean validateBlockChecksum(Path path, long offset, HFileBlock block, 91 byte[] data, int hdrSize) throws IOException { 92 93 // If this is an older version of the block that does not have 94 // checksums, then return false indicating that checksum verification 95 // did not succeed. Actually, this methiod should never be called 96 // when the minorVersion is 0, thus this is a defensive check for a 97 // cannot-happen case. Since this is a cannot-happen case, it is 98 // better to return false to indicate a checksum validation failure. 99 if (!block.getHFileContext().isUseHBaseChecksum()) { 100 return false; 101 } 102 103 // Get a checksum object based on the type of checksum that is 104 // set in the HFileBlock header. A ChecksumType.NULL indicates that 105 // the caller is not interested in validating checksums, so we 106 // always return true. 107 ChecksumType cktype = ChecksumType.codeToType(block.getChecksumType()); 108 if (cktype == ChecksumType.NULL) { 109 return true; // No checksum validations needed for this block. 110 } 111 112 // read in the stored value of the checksum size from the header. 113 int bytesPerChecksum = block.getBytesPerChecksum(); 114 115 DataChecksum dataChecksum = DataChecksum.newDataChecksum( 116 cktype.getDataChecksumType(), bytesPerChecksum); 117 assert dataChecksum != null; 118 int sizeWithHeader = block.getOnDiskDataSizeWithHeader(); 119 if (LOG.isTraceEnabled()) { 120 LOG.info("dataLength=" + data.length 121 + ", sizeWithHeader=" + sizeWithHeader 122 + ", checksumType=" + cktype.getName() 123 + ", file=" + path.toString() 124 + ", offset=" + offset 125 + ", headerSize=" + hdrSize 126 + ", bytesPerChecksum=" + bytesPerChecksum); 127 } 128 try { 129 dataChecksum.verifyChunkedSums(ByteBuffer.wrap(data, 0, sizeWithHeader), 130 ByteBuffer.wrap(data, sizeWithHeader, data.length - sizeWithHeader), 131 path.toString(), 0); 132 } catch (ChecksumException e) { 133 return false; 134 } 135 return true; // checksum is valid 136 } 137 138 /** 139 * Returns the number of bytes needed to store the checksums for 140 * a specified data size 141 * @param datasize number of bytes of data 142 * @param bytesPerChecksum number of bytes in a checksum chunk 143 * @return The number of bytes needed to store the checksum values 144 */ 145 static long numBytes(long datasize, int bytesPerChecksum) { 146 return numChunks(datasize, bytesPerChecksum) * 147 HFileBlock.CHECKSUM_SIZE; 148 } 149 150 /** 151 * Returns the number of checksum chunks needed to store the checksums for 152 * a specified data size 153 * @param datasize number of bytes of data 154 * @param bytesPerChecksum number of bytes in a checksum chunk 155 * @return The number of checksum chunks 156 */ 157 static long numChunks(long datasize, int bytesPerChecksum) { 158 long numChunks = datasize/bytesPerChecksum; 159 if (datasize % bytesPerChecksum != 0) { 160 numChunks++; 161 } 162 return numChunks; 163 } 164 165 /** 166 * Write dummy checksums to the end of the specified bytes array 167 * to reserve space for writing checksums later 168 * @param baos OutputStream to write dummy checkum values 169 * @param numBytes Number of bytes of data for which dummy checksums 170 * need to be generated 171 * @param bytesPerChecksum Number of bytes per checksum value 172 */ 173 static void reserveSpaceForChecksums(ByteArrayOutputStream baos, 174 int numBytes, int bytesPerChecksum) throws IOException { 175 long numChunks = numChunks(numBytes, bytesPerChecksum); 176 long bytesLeft = numChunks * HFileBlock.CHECKSUM_SIZE; 177 while (bytesLeft > 0) { 178 long count = Math.min(bytesLeft, DUMMY_VALUE.length); 179 baos.write(DUMMY_VALUE, 0, (int)count); 180 bytesLeft -= count; 181 } 182 } 183 184 /** 185 * Mechanism to throw an exception in case of hbase checksum 186 * failure. This is used by unit tests only. 187 * @param value Setting this to true will cause hbase checksum 188 * verification failures to generate exceptions. 189 */ 190 public static void generateExceptionForChecksumFailureForTest(boolean value) { 191 generateExceptions = value; 192 } 193 } 194