View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.util;
19  
20  import static com.google.common.base.Preconditions.checkArgument;
21  import static com.google.common.base.Preconditions.checkNotNull;
22  import static com.google.common.base.Preconditions.checkPositionIndex;
23  
24  import java.io.DataInput;
25  import java.io.DataOutput;
26  import java.io.IOException;
27  import java.lang.reflect.Field;
28  import java.math.BigDecimal;
29  import java.math.BigInteger;
30  import java.nio.ByteBuffer;
31  import java.nio.ByteOrder;
32  import java.nio.charset.Charset;
33  import java.security.AccessController;
34  import java.security.PrivilegedAction;
35  import java.security.SecureRandom;
36  import java.util.Arrays;
37  import java.util.Collection;
38  import java.util.Comparator;
39  import java.util.Iterator;
40  import java.util.List;
41  
42  import org.apache.commons.logging.Log;
43  import org.apache.commons.logging.LogFactory;
44  import org.apache.hadoop.classification.InterfaceAudience;
45  import org.apache.hadoop.classification.InterfaceStability;
46  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
47  import org.apache.hadoop.io.RawComparator;
48  import org.apache.hadoop.io.WritableComparator;
49  import org.apache.hadoop.io.WritableUtils;
50  
51  import sun.misc.Unsafe;
52  
53  import com.google.common.annotations.VisibleForTesting;
54  import com.google.common.collect.Lists;
55  
56  /**
57   * Utility class that handles byte arrays, conversions to/from other types,
58   * comparisons, hash code generation, manufacturing keys for HashMaps or
59   * HashSets, etc.
60   */
61  @InterfaceAudience.Public
62  @InterfaceStability.Stable
63  public class Bytes {
64    //HConstants.UTF8_ENCODING should be updated if this changed
65    /** When we encode strings, we always specify UTF8 encoding */
66    private static final String UTF8_ENCODING = "UTF-8";
67  
68    //HConstants.UTF8_CHARSET should be updated if this changed
69    /** When we encode strings, we always specify UTF8 encoding */
70    private static final Charset UTF8_CHARSET = Charset.forName(UTF8_ENCODING);
71  
72    //HConstants.EMPTY_BYTE_ARRAY should be updated if this changed
73    private static final byte [] EMPTY_BYTE_ARRAY = new byte [0];
74  
75    private static final Log LOG = LogFactory.getLog(Bytes.class);
76  
77    /**
78     * Size of boolean in bytes
79     */
80    public static final int SIZEOF_BOOLEAN = Byte.SIZE / Byte.SIZE;
81  
82    /**
83     * Size of byte in bytes
84     */
85    public static final int SIZEOF_BYTE = SIZEOF_BOOLEAN;
86  
87    /**
88     * Size of char in bytes
89     */
90    public static final int SIZEOF_CHAR = Character.SIZE / Byte.SIZE;
91  
92    /**
93     * Size of double in bytes
94     */
95    public static final int SIZEOF_DOUBLE = Double.SIZE / Byte.SIZE;
96  
97    /**
98     * Size of float in bytes
99     */
100   public static final int SIZEOF_FLOAT = Float.SIZE / Byte.SIZE;
101 
102   /**
103    * Size of int in bytes
104    */
105   public static final int SIZEOF_INT = Integer.SIZE / Byte.SIZE;
106 
107   /**
108    * Size of long in bytes
109    */
110   public static final int SIZEOF_LONG = Long.SIZE / Byte.SIZE;
111 
112   /**
113    * Size of short in bytes
114    */
115   public static final int SIZEOF_SHORT = Short.SIZE / Byte.SIZE;
116 
117 
118   /**
119    * Estimate of size cost to pay beyond payload in jvm for instance of byte [].
120    * Estimate based on study of jhat and jprofiler numbers.
121    */
122   // JHat says BU is 56 bytes.
123   // SizeOf which uses java.lang.instrument says 24 bytes. (3 longs?)
124   public static final int ESTIMATED_HEAP_TAX = 16;
125 
126   
127   /**
128    * Returns length of the byte array, returning 0 if the array is null.
129    * Useful for calculating sizes.
130    * @param b byte array, which can be null
131    * @return 0 if b is null, otherwise returns length
132    */
133   final public static int len(byte[] b) {
134     return b == null ? 0 : b.length;
135   }
136 
137   /**
138    * Byte array comparator class.
139    */
140   @InterfaceAudience.Public
141   @InterfaceStability.Stable
142   public static class ByteArrayComparator implements RawComparator<byte []> {
143     /**
144      * Constructor
145      */
146     public ByteArrayComparator() {
147       super();
148     }
149     @Override
150     public int compare(byte [] left, byte [] right) {
151       return compareTo(left, right);
152     }
153     @Override
154     public int compare(byte [] b1, int s1, int l1, byte [] b2, int s2, int l2) {
155       return LexicographicalComparerHolder.BEST_COMPARER.
156         compareTo(b1, s1, l1, b2, s2, l2);
157     }
158   }
159 
160   /**
161    * A {@link ByteArrayComparator} that treats the empty array as the largest value.
162    * This is useful for comparing row end keys for regions.
163    */
164   // TODO: unfortunately, HBase uses byte[0] as both start and end keys for region
165   // boundaries. Thus semantically, we should treat empty byte array as the smallest value
166   // while comparing row keys, start keys etc; but as the largest value for comparing
167   // region boundaries for endKeys.
168   @InterfaceAudience.Public
169   @InterfaceStability.Stable
170   public static class RowEndKeyComparator extends ByteArrayComparator {
171     @Override
172     public int compare(byte[] left, byte[] right) {
173       return compare(left, 0, left.length, right, 0, right.length);
174     }
175     @Override
176     public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
177       if (b1 == b2 && s1 == s2 && l1 == l2) {
178         return 0;
179       }
180       if (l1 == 0) {
181         return l2; //0 or positive
182       }
183       if (l2 == 0) {
184         return -1;
185       }
186       return super.compare(b1, s1, l1, b2, s2, l2);
187     }
188   }
189 
190   /**
191    * Pass this to TreeMaps where byte [] are keys.
192    */
193   public final static Comparator<byte []> BYTES_COMPARATOR = new ByteArrayComparator();
194 
195   /**
196    * Use comparing byte arrays, byte-by-byte
197    */
198   public final static RawComparator<byte []> BYTES_RAWCOMPARATOR = new ByteArrayComparator();
199 
200   /**
201    * Read byte-array written with a WritableableUtils.vint prefix.
202    * @param in Input to read from.
203    * @return byte array read off <code>in</code>
204    * @throws IOException e
205    */
206   public static byte [] readByteArray(final DataInput in)
207   throws IOException {
208     int len = WritableUtils.readVInt(in);
209     if (len < 0) {
210       throw new NegativeArraySizeException(Integer.toString(len));
211     }
212     byte [] result = new byte[len];
213     in.readFully(result, 0, len);
214     return result;
215   }
216 
217   /**
218    * Read byte-array written with a WritableableUtils.vint prefix.
219    * IOException is converted to a RuntimeException.
220    * @param in Input to read from.
221    * @return byte array read off <code>in</code>
222    */
223   public static byte [] readByteArrayThrowsRuntime(final DataInput in) {
224     try {
225       return readByteArray(in);
226     } catch (Exception e) {
227       throw new RuntimeException(e);
228     }
229   }
230 
231   /**
232    * Write byte-array with a WritableableUtils.vint prefix.
233    * @param out output stream to be written to
234    * @param b array to write
235    * @throws IOException e
236    */
237   public static void writeByteArray(final DataOutput out, final byte [] b)
238   throws IOException {
239     if(b == null) {
240       WritableUtils.writeVInt(out, 0);
241     } else {
242       writeByteArray(out, b, 0, b.length);
243     }
244   }
245 
246   /**
247    * Write byte-array to out with a vint length prefix.
248    * @param out output stream
249    * @param b array
250    * @param offset offset into array
251    * @param length length past offset
252    * @throws IOException e
253    */
254   public static void writeByteArray(final DataOutput out, final byte [] b,
255       final int offset, final int length)
256   throws IOException {
257     WritableUtils.writeVInt(out, length);
258     out.write(b, offset, length);
259   }
260 
261   /**
262    * Write byte-array from src to tgt with a vint length prefix.
263    * @param tgt target array
264    * @param tgtOffset offset into target array
265    * @param src source array
266    * @param srcOffset source offset
267    * @param srcLength source length
268    * @return New offset in src array.
269    */
270   public static int writeByteArray(final byte [] tgt, final int tgtOffset,
271       final byte [] src, final int srcOffset, final int srcLength) {
272     byte [] vint = vintToBytes(srcLength);
273     System.arraycopy(vint, 0, tgt, tgtOffset, vint.length);
274     int offset = tgtOffset + vint.length;
275     System.arraycopy(src, srcOffset, tgt, offset, srcLength);
276     return offset + srcLength;
277   }
278 
279   /**
280    * Put bytes at the specified byte array position.
281    * @param tgtBytes the byte array
282    * @param tgtOffset position in the array
283    * @param srcBytes array to write out
284    * @param srcOffset source offset
285    * @param srcLength source length
286    * @return incremented offset
287    */
288   public static int putBytes(byte[] tgtBytes, int tgtOffset, byte[] srcBytes,
289       int srcOffset, int srcLength) {
290     System.arraycopy(srcBytes, srcOffset, tgtBytes, tgtOffset, srcLength);
291     return tgtOffset + srcLength;
292   }
293 
294   /**
295    * Write a single byte out to the specified byte array position.
296    * @param bytes the byte array
297    * @param offset position in the array
298    * @param b byte to write out
299    * @return incremented offset
300    */
301   public static int putByte(byte[] bytes, int offset, byte b) {
302     bytes[offset] = b;
303     return offset + 1;
304   }
305 
306   /**
307    * Add the whole content of the ByteBuffer to the bytes arrays. The ByteBuffer is modified.
308    * @param bytes the byte array
309    * @param offset position in the array
310    * @param buf ByteBuffer to write out
311    * @return incremented offset
312    */
313   public static int putByteBuffer(byte[] bytes, int offset, ByteBuffer buf) {
314     int len = buf.remaining();
315     buf.get(bytes, offset, len);
316     return offset + len;
317   }
318 
319   /**
320    * Returns a new byte array, copied from the given {@code buf},
321    * from the index 0 (inclusive) to the limit (exclusive),
322    * regardless of the current position.
323    * The position and the other index parameters are not changed.
324    *
325    * @param buf a byte buffer
326    * @return the byte array
327    * @see #getBytes(ByteBuffer)
328    */
329   public static byte[] toBytes(ByteBuffer buf) {
330     ByteBuffer dup = buf.duplicate();
331     dup.position(0);
332     return readBytes(dup);
333   }
334 
335   private static byte[] readBytes(ByteBuffer buf) {
336     byte [] result = new byte[buf.remaining()];
337     buf.get(result);
338     return result;
339   }
340 
341   /**
342    * @param b Presumed UTF-8 encoded byte array.
343    * @return String made from <code>b</code>
344    */
345   public static String toString(final byte [] b) {
346     if (b == null) {
347       return null;
348     }
349     return toString(b, 0, b.length);
350   }
351 
352   /**
353    * Joins two byte arrays together using a separator.
354    * @param b1 The first byte array.
355    * @param sep The separator to use.
356    * @param b2 The second byte array.
357    */
358   public static String toString(final byte [] b1,
359                                 String sep,
360                                 final byte [] b2) {
361     return toString(b1, 0, b1.length) + sep + toString(b2, 0, b2.length);
362   }
363 
364   /**
365    * This method will convert utf8 encoded bytes into a string. If
366    * the given byte array is null, this method will return null.
367    *
368    * @param b Presumed UTF-8 encoded byte array.
369    * @param off offset into array
370    * @param len length of utf-8 sequence
371    * @return String made from <code>b</code> or null
372    */
373   public static String toString(final byte [] b, int off, int len) {
374     if (b == null) {
375       return null;
376     }
377     if (len == 0) {
378       return "";
379     }
380     return new String(b, off, len, UTF8_CHARSET);
381   }
382 
383   /**
384    * Write a printable representation of a byte array.
385    *
386    * @param b byte array
387    * @return string
388    * @see #toStringBinary(byte[], int, int)
389    */
390   public static String toStringBinary(final byte [] b) {
391     if (b == null)
392       return "null";
393     return toStringBinary(b, 0, b.length);
394   }
395 
396   /**
397    * Converts the given byte buffer to a printable representation,
398    * from the index 0 (inclusive) to the limit (exclusive),
399    * regardless of the current position.
400    * The position and the other index parameters are not changed.
401    *
402    * @param buf a byte buffer
403    * @return a string representation of the buffer's binary contents
404    * @see #toBytes(ByteBuffer)
405    * @see #getBytes(ByteBuffer)
406    */
407   public static String toStringBinary(ByteBuffer buf) {
408     if (buf == null)
409       return "null";
410     if (buf.hasArray()) {
411       return toStringBinary(buf.array(), buf.arrayOffset(), buf.limit());
412     }
413     return toStringBinary(toBytes(buf));
414   }
415 
416   /**
417    * Write a printable representation of a byte array. Non-printable
418    * characters are hex escaped in the format \\x%02X, eg:
419    * \x00 \x05 etc
420    *
421    * @param b array to write out
422    * @param off offset to start at
423    * @param len length to write
424    * @return string output
425    */
426   public static String toStringBinary(final byte [] b, int off, int len) {
427     StringBuilder result = new StringBuilder();
428     // Just in case we are passed a 'len' that is > buffer length...
429     if (off >= b.length) return result.toString();
430     if (off + len > b.length) len = b.length - off;
431     for (int i = off; i < off + len ; ++i ) {
432       int ch = b[i] & 0xFF;
433       if ( (ch >= '0' && ch <= '9')
434           || (ch >= 'A' && ch <= 'Z')
435           || (ch >= 'a' && ch <= 'z')
436           || " `~!@#$%^&*()-_=+[]{}|;:'\",.<>/?".indexOf(ch) >= 0 ) {
437         result.append((char)ch);
438       } else {
439         result.append(String.format("\\x%02X", ch));
440       }
441     }
442     return result.toString();
443   }
444 
445   private static boolean isHexDigit(char c) {
446     return
447         (c >= 'A' && c <= 'F') ||
448         (c >= '0' && c <= '9');
449   }
450 
451   /**
452    * Takes a ASCII digit in the range A-F0-9 and returns
453    * the corresponding integer/ordinal value.
454    * @param ch  The hex digit.
455    * @return The converted hex value as a byte.
456    */
457   public static byte toBinaryFromHex(byte ch) {
458     if ( ch >= 'A' && ch <= 'F' )
459       return (byte) ((byte)10 + (byte) (ch - 'A'));
460     // else
461     return (byte) (ch - '0');
462   }
463 
464   public static byte [] toBytesBinary(String in) {
465     // this may be bigger than we need, but let's be safe.
466     byte [] b = new byte[in.length()];
467     int size = 0;
468     for (int i = 0; i < in.length(); ++i) {
469       char ch = in.charAt(i);
470       if (ch == '\\' && in.length() > i+1 && in.charAt(i+1) == 'x') {
471         // ok, take next 2 hex digits.
472         char hd1 = in.charAt(i+2);
473         char hd2 = in.charAt(i+3);
474 
475         // they need to be A-F0-9:
476         if (!isHexDigit(hd1) ||
477             !isHexDigit(hd2)) {
478           // bogus escape code, ignore:
479           continue;
480         }
481         // turn hex ASCII digit -> number
482         byte d = (byte) ((toBinaryFromHex((byte)hd1) << 4) + toBinaryFromHex((byte)hd2));
483 
484         b[size++] = d;
485         i += 3; // skip 3
486       } else {
487         b[size++] = (byte) ch;
488       }
489     }
490     // resize:
491     byte [] b2 = new byte[size];
492     System.arraycopy(b, 0, b2, 0, size);
493     return b2;
494   }
495 
496   /**
497    * Converts a string to a UTF-8 byte array.
498    * @param s string
499    * @return the byte array
500    */
501   public static byte[] toBytes(String s) {
502     return s.getBytes(UTF8_CHARSET);
503   }
504 
505   /**
506    * Convert a boolean to a byte array. True becomes -1
507    * and false becomes 0.
508    *
509    * @param b value
510    * @return <code>b</code> encoded in a byte array.
511    */
512   public static byte [] toBytes(final boolean b) {
513     return new byte[] { b ? (byte) -1 : (byte) 0 };
514   }
515 
516   /**
517    * Reverses {@link #toBytes(boolean)}
518    * @param b array
519    * @return True or false.
520    */
521   public static boolean toBoolean(final byte [] b) {
522     if (b.length != 1) {
523       throw new IllegalArgumentException("Array has wrong size: " + b.length);
524     }
525     return b[0] != (byte) 0;
526   }
527 
528   /**
529    * Convert a long value to a byte array using big-endian.
530    *
531    * @param val value to convert
532    * @return the byte array
533    */
534   public static byte[] toBytes(long val) {
535     byte [] b = new byte[8];
536     for (int i = 7; i > 0; i--) {
537       b[i] = (byte) val;
538       val >>>= 8;
539     }
540     b[0] = (byte) val;
541     return b;
542   }
543 
544   /**
545    * Converts a byte array to a long value. Reverses
546    * {@link #toBytes(long)}
547    * @param bytes array
548    * @return the long value
549    */
550   public static long toLong(byte[] bytes) {
551     return toLong(bytes, 0, SIZEOF_LONG);
552   }
553 
554   /**
555    * Converts a byte array to a long value. Assumes there will be
556    * {@link #SIZEOF_LONG} bytes available.
557    *
558    * @param bytes bytes
559    * @param offset offset
560    * @return the long value
561    */
562   public static long toLong(byte[] bytes, int offset) {
563     return toLong(bytes, offset, SIZEOF_LONG);
564   }
565 
566   /**
567    * Converts a byte array to a long value.
568    *
569    * @param bytes array of bytes
570    * @param offset offset into array
571    * @param length length of data (must be {@link #SIZEOF_LONG})
572    * @return the long value
573    * @throws IllegalArgumentException if length is not {@link #SIZEOF_LONG} or
574    * if there's not enough room in the array at the offset indicated.
575    */
576   public static long toLong(byte[] bytes, int offset, final int length) {
577     if (length != SIZEOF_LONG || offset + length > bytes.length) {
578       throw explainWrongLengthOrOffset(bytes, offset, length, SIZEOF_LONG);
579     }
580     long l = 0;
581     for(int i = offset; i < offset + length; i++) {
582       l <<= 8;
583       l ^= bytes[i] & 0xFF;
584     }
585     return l;
586   }
587 
588   private static IllegalArgumentException
589     explainWrongLengthOrOffset(final byte[] bytes,
590                                final int offset,
591                                final int length,
592                                final int expectedLength) {
593     String reason;
594     if (length != expectedLength) {
595       reason = "Wrong length: " + length + ", expected " + expectedLength;
596     } else {
597      reason = "offset (" + offset + ") + length (" + length + ") exceed the"
598         + " capacity of the array: " + bytes.length;
599     }
600     return new IllegalArgumentException(reason);
601   }
602 
603   /**
604    * Put a long value out to the specified byte array position.
605    * @param bytes the byte array
606    * @param offset position in the array
607    * @param val long to write out
608    * @return incremented offset
609    * @throws IllegalArgumentException if the byte array given doesn't have
610    * enough room at the offset specified.
611    */
612   public static int putLong(byte[] bytes, int offset, long val) {
613     if (bytes.length - offset < SIZEOF_LONG) {
614       throw new IllegalArgumentException("Not enough room to put a long at"
615           + " offset " + offset + " in a " + bytes.length + " byte array");
616     }
617     for(int i = offset + 7; i > offset; i--) {
618       bytes[i] = (byte) val;
619       val >>>= 8;
620     }
621     bytes[offset] = (byte) val;
622     return offset + SIZEOF_LONG;
623   }
624 
625   /**
626    * Presumes float encoded as IEEE 754 floating-point "single format"
627    * @param bytes byte array
628    * @return Float made from passed byte array.
629    */
630   public static float toFloat(byte [] bytes) {
631     return toFloat(bytes, 0);
632   }
633 
634   /**
635    * Presumes float encoded as IEEE 754 floating-point "single format"
636    * @param bytes array to convert
637    * @param offset offset into array
638    * @return Float made from passed byte array.
639    */
640   public static float toFloat(byte [] bytes, int offset) {
641     return Float.intBitsToFloat(toInt(bytes, offset, SIZEOF_INT));
642   }
643 
644   /**
645    * @param bytes byte array
646    * @param offset offset to write to
647    * @param f float value
648    * @return New offset in <code>bytes</code>
649    */
650   public static int putFloat(byte [] bytes, int offset, float f) {
651     return putInt(bytes, offset, Float.floatToRawIntBits(f));
652   }
653 
654   /**
655    * @param f float value
656    * @return the float represented as byte []
657    */
658   public static byte [] toBytes(final float f) {
659     // Encode it as int
660     return Bytes.toBytes(Float.floatToRawIntBits(f));
661   }
662 
663   /**
664    * @param bytes byte array
665    * @return Return double made from passed bytes.
666    */
667   public static double toDouble(final byte [] bytes) {
668     return toDouble(bytes, 0);
669   }
670 
671   /**
672    * @param bytes byte array
673    * @param offset offset where double is
674    * @return Return double made from passed bytes.
675    */
676   public static double toDouble(final byte [] bytes, final int offset) {
677     return Double.longBitsToDouble(toLong(bytes, offset, SIZEOF_LONG));
678   }
679 
680   /**
681    * @param bytes byte array
682    * @param offset offset to write to
683    * @param d value
684    * @return New offset into array <code>bytes</code>
685    */
686   public static int putDouble(byte [] bytes, int offset, double d) {
687     return putLong(bytes, offset, Double.doubleToLongBits(d));
688   }
689 
690   /**
691    * Serialize a double as the IEEE 754 double format output. The resultant
692    * array will be 8 bytes long.
693    *
694    * @param d value
695    * @return the double represented as byte []
696    */
697   public static byte [] toBytes(final double d) {
698     // Encode it as a long
699     return Bytes.toBytes(Double.doubleToRawLongBits(d));
700   }
701 
702   /**
703    * Convert an int value to a byte array.  Big-endian.  Same as what DataOutputStream.writeInt
704    * does.
705    *
706    * @param val value
707    * @return the byte array
708    */
709   public static byte[] toBytes(int val) {
710     byte [] b = new byte[4];
711     for(int i = 3; i > 0; i--) {
712       b[i] = (byte) val;
713       val >>>= 8;
714     }
715     b[0] = (byte) val;
716     return b;
717   }
718 
719   /**
720    * Converts a byte array to an int value
721    * @param bytes byte array
722    * @return the int value
723    */
724   public static int toInt(byte[] bytes) {
725     return toInt(bytes, 0, SIZEOF_INT);
726   }
727 
728   /**
729    * Converts a byte array to an int value
730    * @param bytes byte array
731    * @param offset offset into array
732    * @return the int value
733    */
734   public static int toInt(byte[] bytes, int offset) {
735     return toInt(bytes, offset, SIZEOF_INT);
736   }
737 
738   /**
739    * Converts a byte array to an int value
740    * @param bytes byte array
741    * @param offset offset into array
742    * @param length length of int (has to be {@link #SIZEOF_INT})
743    * @return the int value
744    * @throws IllegalArgumentException if length is not {@link #SIZEOF_INT} or
745    * if there's not enough room in the array at the offset indicated.
746    */
747   public static int toInt(byte[] bytes, int offset, final int length) {
748     if (length != SIZEOF_INT || offset + length > bytes.length) {
749       throw explainWrongLengthOrOffset(bytes, offset, length, SIZEOF_INT);
750     }
751     int n = 0;
752     for(int i = offset; i < (offset + length); i++) {
753       n <<= 8;
754       n ^= bytes[i] & 0xFF;
755     }
756     return n;
757   }
758 
759   /**
760    * Converts a byte array to an int value
761    * @param bytes byte array
762    * @param offset offset into array
763    * @param length how many bytes should be considered for creating int
764    * @return the int value
765    * @throws IllegalArgumentException if there's not enough room in the array at the offset
766    * indicated.
767    */
768   public static int readAsInt(byte[] bytes, int offset, final int length) {
769     if (offset + length > bytes.length) {
770       throw new IllegalArgumentException("offset (" + offset + ") + length (" + length
771           + ") exceed the" + " capacity of the array: " + bytes.length);
772     }
773     int n = 0;
774     for(int i = offset; i < (offset + length); i++) {
775       n <<= 8;
776       n ^= bytes[i] & 0xFF;
777     }
778     return n;
779   }
780 
781   /**
782    * Put an int value out to the specified byte array position.
783    * @param bytes the byte array
784    * @param offset position in the array
785    * @param val int to write out
786    * @return incremented offset
787    * @throws IllegalArgumentException if the byte array given doesn't have
788    * enough room at the offset specified.
789    */
790   public static int putInt(byte[] bytes, int offset, int val) {
791     if (bytes.length - offset < SIZEOF_INT) {
792       throw new IllegalArgumentException("Not enough room to put an int at"
793           + " offset " + offset + " in a " + bytes.length + " byte array");
794     }
795     for(int i= offset + 3; i > offset; i--) {
796       bytes[i] = (byte) val;
797       val >>>= 8;
798     }
799     bytes[offset] = (byte) val;
800     return offset + SIZEOF_INT;
801   }
802 
803   /**
804    * Convert a short value to a byte array of {@link #SIZEOF_SHORT} bytes long.
805    * @param val value
806    * @return the byte array
807    */
808   public static byte[] toBytes(short val) {
809     byte[] b = new byte[SIZEOF_SHORT];
810     b[1] = (byte) val;
811     val >>= 8;
812     b[0] = (byte) val;
813     return b;
814   }
815 
816   /**
817    * Converts a byte array to a short value
818    * @param bytes byte array
819    * @return the short value
820    */
821   public static short toShort(byte[] bytes) {
822     return toShort(bytes, 0, SIZEOF_SHORT);
823   }
824 
825   /**
826    * Converts a byte array to a short value
827    * @param bytes byte array
828    * @param offset offset into array
829    * @return the short value
830    */
831   public static short toShort(byte[] bytes, int offset) {
832     return toShort(bytes, offset, SIZEOF_SHORT);
833   }
834 
835   /**
836    * Converts a byte array to a short value
837    * @param bytes byte array
838    * @param offset offset into array
839    * @param length length, has to be {@link #SIZEOF_SHORT}
840    * @return the short value
841    * @throws IllegalArgumentException if length is not {@link #SIZEOF_SHORT}
842    * or if there's not enough room in the array at the offset indicated.
843    */
844   public static short toShort(byte[] bytes, int offset, final int length) {
845     if (length != SIZEOF_SHORT || offset + length > bytes.length) {
846       throw explainWrongLengthOrOffset(bytes, offset, length, SIZEOF_SHORT);
847     }
848     short n = 0;
849     n ^= bytes[offset] & 0xFF;
850     n <<= 8;
851     n ^= bytes[offset+1] & 0xFF;
852     return n;
853   }
854 
855   /**
856    * Returns a new byte array, copied from the given {@code buf},
857    * from the position (inclusive) to the limit (exclusive).
858    * The position and the other index parameters are not changed.
859    *
860    * @param buf a byte buffer
861    * @return the byte array
862    * @see #toBytes(ByteBuffer)
863    */
864   public static byte[] getBytes(ByteBuffer buf) {
865     return readBytes(buf.duplicate());
866   }
867 
868   /**
869    * Put a short value out to the specified byte array position.
870    * @param bytes the byte array
871    * @param offset position in the array
872    * @param val short to write out
873    * @return incremented offset
874    * @throws IllegalArgumentException if the byte array given doesn't have
875    * enough room at the offset specified.
876    */
877   public static int putShort(byte[] bytes, int offset, short val) {
878     if (bytes.length - offset < SIZEOF_SHORT) {
879       throw new IllegalArgumentException("Not enough room to put a short at"
880           + " offset " + offset + " in a " + bytes.length + " byte array");
881     }
882     bytes[offset+1] = (byte) val;
883     val >>= 8;
884     bytes[offset] = (byte) val;
885     return offset + SIZEOF_SHORT;
886   }
887 
888   /**
889    * Put an int value as short out to the specified byte array position. Only the lower 2 bytes of
890    * the short will be put into the array. The caller of the API need to make sure they will not
891    * loose the value by doing so. This is useful to store an unsigned short which is represented as
892    * int in other parts.
893    * @param bytes the byte array
894    * @param offset position in the array
895    * @param val value to write out
896    * @return incremented offset
897    * @throws IllegalArgumentException if the byte array given doesn't have
898    * enough room at the offset specified.
899    */
900   public static int putAsShort(byte[] bytes, int offset, int val) {
901     if (bytes.length - offset < SIZEOF_SHORT) {
902       throw new IllegalArgumentException("Not enough room to put a short at"
903           + " offset " + offset + " in a " + bytes.length + " byte array");
904     }
905     bytes[offset+1] = (byte) val;
906     val >>= 8;
907     bytes[offset] = (byte) val;
908     return offset + SIZEOF_SHORT;
909   }
910 
911   /**
912    * Convert a BigDecimal value to a byte array
913    *
914    * @param val
915    * @return the byte array
916    */
917   public static byte[] toBytes(BigDecimal val) {
918     byte[] valueBytes = val.unscaledValue().toByteArray();
919     byte[] result = new byte[valueBytes.length + SIZEOF_INT];
920     int offset = putInt(result, 0, val.scale());
921     putBytes(result, offset, valueBytes, 0, valueBytes.length);
922     return result;
923   }
924 
925 
926   /**
927    * Converts a byte array to a BigDecimal
928    *
929    * @param bytes
930    * @return the char value
931    */
932   public static BigDecimal toBigDecimal(byte[] bytes) {
933     return toBigDecimal(bytes, 0, bytes.length);
934   }
935 
936   /**
937    * Converts a byte array to a BigDecimal value
938    *
939    * @param bytes
940    * @param offset
941    * @param length
942    * @return the char value
943    */
944   public static BigDecimal toBigDecimal(byte[] bytes, int offset, final int length) {
945     if (bytes == null || length < SIZEOF_INT + 1 ||
946       (offset + length > bytes.length)) {
947       return null;
948     }
949 
950     int scale = toInt(bytes, offset);
951     byte[] tcBytes = new byte[length - SIZEOF_INT];
952     System.arraycopy(bytes, offset + SIZEOF_INT, tcBytes, 0, length - SIZEOF_INT);
953     return new BigDecimal(new BigInteger(tcBytes), scale);
954   }
955 
956   /**
957    * Put a BigDecimal value out to the specified byte array position.
958    *
959    * @param bytes  the byte array
960    * @param offset position in the array
961    * @param val    BigDecimal to write out
962    * @return incremented offset
963    */
964   public static int putBigDecimal(byte[] bytes, int offset, BigDecimal val) {
965     if (bytes == null) {
966       return offset;
967     }
968 
969     byte[] valueBytes = val.unscaledValue().toByteArray();
970     byte[] result = new byte[valueBytes.length + SIZEOF_INT];
971     offset = putInt(result, offset, val.scale());
972     return putBytes(result, offset, valueBytes, 0, valueBytes.length);
973   }
974 
975   /**
976    * @param vint Integer to make a vint of.
977    * @return Vint as bytes array.
978    */
979   public static byte [] vintToBytes(final long vint) {
980     long i = vint;
981     int size = WritableUtils.getVIntSize(i);
982     byte [] result = new byte[size];
983     int offset = 0;
984     if (i >= -112 && i <= 127) {
985       result[offset] = (byte) i;
986       return result;
987     }
988 
989     int len = -112;
990     if (i < 0) {
991       i ^= -1L; // take one's complement'
992       len = -120;
993     }
994 
995     long tmp = i;
996     while (tmp != 0) {
997       tmp = tmp >> 8;
998       len--;
999     }
1000 
1001     result[offset++] = (byte) len;
1002 
1003     len = (len < -120) ? -(len + 120) : -(len + 112);
1004 
1005     for (int idx = len; idx != 0; idx--) {
1006       int shiftbits = (idx - 1) * 8;
1007       long mask = 0xFFL << shiftbits;
1008       result[offset++] = (byte)((i & mask) >> shiftbits);
1009     }
1010     return result;
1011   }
1012 
1013   /**
1014    * @param buffer buffer to convert
1015    * @return vint bytes as an integer.
1016    */
1017   public static long bytesToVint(final byte [] buffer) {
1018     int offset = 0;
1019     byte firstByte = buffer[offset++];
1020     int len = WritableUtils.decodeVIntSize(firstByte);
1021     if (len == 1) {
1022       return firstByte;
1023     }
1024     long i = 0;
1025     for (int idx = 0; idx < len-1; idx++) {
1026       byte b = buffer[offset++];
1027       i = i << 8;
1028       i = i | (b & 0xFF);
1029     }
1030     return (WritableUtils.isNegativeVInt(firstByte) ? ~i : i);
1031   }
1032 
1033   /**
1034    * Reads a zero-compressed encoded long from input stream and returns it.
1035    * @param buffer Binary array
1036    * @param offset Offset into array at which vint begins.
1037    * @throws java.io.IOException e
1038    * @return deserialized long from stream.
1039    */
1040   public static long readVLong(final byte [] buffer, final int offset)
1041   throws IOException {
1042     byte firstByte = buffer[offset];
1043     int len = WritableUtils.decodeVIntSize(firstByte);
1044     if (len == 1) {
1045       return firstByte;
1046     }
1047     long i = 0;
1048     for (int idx = 0; idx < len-1; idx++) {
1049       byte b = buffer[offset + 1 + idx];
1050       i = i << 8;
1051       i = i | (b & 0xFF);
1052     }
1053     return (WritableUtils.isNegativeVInt(firstByte) ? ~i : i);
1054   }
1055 
1056   /**
1057    * @param left left operand
1058    * @param right right operand
1059    * @return 0 if equal, < 0 if left is less than right, etc.
1060    */
1061   public static int compareTo(final byte [] left, final byte [] right) {
1062     return LexicographicalComparerHolder.BEST_COMPARER.
1063       compareTo(left, 0, left.length, right, 0, right.length);
1064   }
1065 
1066   /**
1067    * Lexicographically compare two arrays.
1068    *
1069    * @param buffer1 left operand
1070    * @param buffer2 right operand
1071    * @param offset1 Where to start comparing in the left buffer
1072    * @param offset2 Where to start comparing in the right buffer
1073    * @param length1 How much to compare from the left buffer
1074    * @param length2 How much to compare from the right buffer
1075    * @return 0 if equal, < 0 if left is less than right, etc.
1076    */
1077   public static int compareTo(byte[] buffer1, int offset1, int length1,
1078       byte[] buffer2, int offset2, int length2) {
1079     return LexicographicalComparerHolder.BEST_COMPARER.
1080       compareTo(buffer1, offset1, length1, buffer2, offset2, length2);
1081   }
1082 
1083   interface Comparer<T> {
1084     int compareTo(
1085       T buffer1, int offset1, int length1, T buffer2, int offset2, int length2
1086     );
1087   }
1088 
1089   @VisibleForTesting
1090   static Comparer<byte[]> lexicographicalComparerJavaImpl() {
1091     return LexicographicalComparerHolder.PureJavaComparer.INSTANCE;
1092   }
1093 
1094   /**
1095    * Provides a lexicographical comparer implementation; either a Java
1096    * implementation or a faster implementation based on {@link Unsafe}.
1097    *
1098    * <p>Uses reflection to gracefully fall back to the Java implementation if
1099    * {@code Unsafe} isn't available.
1100    */
1101   @VisibleForTesting
1102   static class LexicographicalComparerHolder {
1103     static final String UNSAFE_COMPARER_NAME =
1104         LexicographicalComparerHolder.class.getName() + "$UnsafeComparer";
1105 
1106     static final Comparer<byte[]> BEST_COMPARER = getBestComparer();
1107     /**
1108      * Returns the Unsafe-using Comparer, or falls back to the pure-Java
1109      * implementation if unable to do so.
1110      */
1111     static Comparer<byte[]> getBestComparer() {
1112       try {
1113         Class<?> theClass = Class.forName(UNSAFE_COMPARER_NAME);
1114 
1115         // yes, UnsafeComparer does implement Comparer<byte[]>
1116         @SuppressWarnings("unchecked")
1117         Comparer<byte[]> comparer =
1118           (Comparer<byte[]>) theClass.getEnumConstants()[0];
1119         return comparer;
1120       } catch (Throwable t) { // ensure we really catch *everything*
1121         return lexicographicalComparerJavaImpl();
1122       }
1123     }
1124 
1125     enum PureJavaComparer implements Comparer<byte[]> {
1126       INSTANCE;
1127 
1128       @Override
1129       public int compareTo(byte[] buffer1, int offset1, int length1,
1130           byte[] buffer2, int offset2, int length2) {
1131         // Short circuit equal case
1132         if (buffer1 == buffer2 &&
1133             offset1 == offset2 &&
1134             length1 == length2) {
1135           return 0;
1136         }
1137         // Bring WritableComparator code local
1138         int end1 = offset1 + length1;
1139         int end2 = offset2 + length2;
1140         for (int i = offset1, j = offset2; i < end1 && j < end2; i++, j++) {
1141           int a = (buffer1[i] & 0xff);
1142           int b = (buffer2[j] & 0xff);
1143           if (a != b) {
1144             return a - b;
1145           }
1146         }
1147         return length1 - length2;
1148       }
1149     }
1150 
1151     @VisibleForTesting
1152     enum UnsafeComparer implements Comparer<byte[]> {
1153       INSTANCE;
1154 
1155       static final Unsafe theUnsafe;
1156 
1157       /** The offset to the first element in a byte array. */
1158       static final int BYTE_ARRAY_BASE_OFFSET;
1159 
1160       static {
1161         theUnsafe = (Unsafe) AccessController.doPrivileged(
1162             new PrivilegedAction<Object>() {
1163               @Override
1164               public Object run() {
1165                 try {
1166                   Field f = Unsafe.class.getDeclaredField("theUnsafe");
1167                   f.setAccessible(true);
1168                   return f.get(null);
1169                 } catch (NoSuchFieldException e) {
1170                   // It doesn't matter what we throw;
1171                   // it's swallowed in getBestComparer().
1172                   throw new Error();
1173                 } catch (IllegalAccessException e) {
1174                   throw new Error();
1175                 }
1176               }
1177             });
1178 
1179         BYTE_ARRAY_BASE_OFFSET = theUnsafe.arrayBaseOffset(byte[].class);
1180 
1181         // sanity check - this should never fail
1182         if (theUnsafe.arrayIndexScale(byte[].class) != 1) {
1183           throw new AssertionError();
1184         }
1185       }
1186 
1187       static final boolean littleEndian =
1188         ByteOrder.nativeOrder().equals(ByteOrder.LITTLE_ENDIAN);
1189 
1190       /**
1191        * Returns true if x1 is less than x2, when both values are treated as
1192        * unsigned.
1193        */
1194       static boolean lessThanUnsigned(long x1, long x2) {
1195         return (x1 + Long.MIN_VALUE) < (x2 + Long.MIN_VALUE);
1196       }
1197 
1198       /**
1199        * Lexicographically compare two arrays.
1200        *
1201        * @param buffer1 left operand
1202        * @param buffer2 right operand
1203        * @param offset1 Where to start comparing in the left buffer
1204        * @param offset2 Where to start comparing in the right buffer
1205        * @param length1 How much to compare from the left buffer
1206        * @param length2 How much to compare from the right buffer
1207        * @return 0 if equal, < 0 if left is less than right, etc.
1208        */
1209       @Override
1210       public int compareTo(byte[] buffer1, int offset1, int length1,
1211           byte[] buffer2, int offset2, int length2) {
1212         // Short circuit equal case
1213         if (buffer1 == buffer2 &&
1214             offset1 == offset2 &&
1215             length1 == length2) {
1216           return 0;
1217         }
1218         int minLength = Math.min(length1, length2);
1219         int minWords = minLength / SIZEOF_LONG;
1220         int offset1Adj = offset1 + BYTE_ARRAY_BASE_OFFSET;
1221         int offset2Adj = offset2 + BYTE_ARRAY_BASE_OFFSET;
1222 
1223         /*
1224          * Compare 8 bytes at a time. Benchmarking shows comparing 8 bytes at a
1225          * time is no slower than comparing 4 bytes at a time even on 32-bit.
1226          * On the other hand, it is substantially faster on 64-bit.
1227          */
1228         for (int i = 0; i < minWords * SIZEOF_LONG; i += SIZEOF_LONG) {
1229           long lw = theUnsafe.getLong(buffer1, offset1Adj + (long) i);
1230           long rw = theUnsafe.getLong(buffer2, offset2Adj + (long) i);
1231           long diff = lw ^ rw;
1232 
1233           if (diff != 0) {
1234             if (!littleEndian) {
1235               return lessThanUnsigned(lw, rw) ? -1 : 1;
1236             }
1237 
1238             // Use binary search
1239             int n = 0;
1240             int y;
1241             int x = (int) diff;
1242             if (x == 0) {
1243               x = (int) (diff >>> 32);
1244               n = 32;
1245             }
1246 
1247             y = x << 16;
1248             if (y == 0) {
1249               n += 16;
1250             } else {
1251               x = y;
1252             }
1253 
1254             y = x << 8;
1255             if (y == 0) {
1256               n += 8;
1257             }
1258             return (int) (((lw >>> n) & 0xFFL) - ((rw >>> n) & 0xFFL));
1259           }
1260         }
1261 
1262         // The epilogue to cover the last (minLength % 8) elements.
1263         for (int i = minWords * SIZEOF_LONG; i < minLength; i++) {
1264           int a = (buffer1[offset1 + i] & 0xff);
1265           int b = (buffer2[offset2 + i] & 0xff);
1266           if (a != b) {
1267             return a - b;
1268           }
1269         }
1270         return length1 - length2;
1271       }
1272     }
1273   }
1274 
1275   /**
1276    * @param left left operand
1277    * @param right right operand
1278    * @return True if equal
1279    */
1280   public static boolean equals(final byte [] left, final byte [] right) {
1281     // Could use Arrays.equals?
1282     //noinspection SimplifiableConditionalExpression
1283     if (left == right) return true;
1284     if (left == null || right == null) return false;
1285     if (left.length != right.length) return false;
1286     if (left.length == 0) return true;
1287 
1288     // Since we're often comparing adjacent sorted data,
1289     // it's usual to have equal arrays except for the very last byte
1290     // so check that first
1291     if (left[left.length - 1] != right[right.length - 1]) return false;
1292 
1293     return compareTo(left, right) == 0;
1294   }
1295 
1296   public static boolean equals(final byte[] left, int leftOffset, int leftLen,
1297                                final byte[] right, int rightOffset, int rightLen) {
1298     // short circuit case
1299     if (left == right &&
1300         leftOffset == rightOffset &&
1301         leftLen == rightLen) {
1302       return true;
1303     }
1304     // different lengths fast check
1305     if (leftLen != rightLen) {
1306       return false;
1307     }
1308     if (leftLen == 0) {
1309       return true;
1310     }
1311 
1312     // Since we're often comparing adjacent sorted data,
1313     // it's usual to have equal arrays except for the very last byte
1314     // so check that first
1315     if (left[leftOffset + leftLen - 1] != right[rightOffset + rightLen - 1]) return false;
1316 
1317     return LexicographicalComparerHolder.BEST_COMPARER.
1318       compareTo(left, leftOffset, leftLen, right, rightOffset, rightLen) == 0;
1319   }
1320 
1321 
1322   /**
1323    * @param a left operand
1324    * @param buf right operand
1325    * @return True if equal
1326    */
1327   public static boolean equals(byte[] a, ByteBuffer buf) {
1328     if (a == null) return buf == null;
1329     if (buf == null) return false;
1330     if (a.length != buf.remaining()) return false;
1331 
1332     // Thou shalt not modify the original byte buffer in what should be read only operations.
1333     ByteBuffer b = buf.duplicate();
1334     for (byte anA : a) {
1335       if (anA != b.get()) {
1336         return false;
1337       }
1338     }
1339     return true;
1340   }
1341 
1342 
1343   /**
1344    * Return true if the byte array on the right is a prefix of the byte
1345    * array on the left.
1346    */
1347   public static boolean startsWith(byte[] bytes, byte[] prefix) {
1348     return bytes != null && prefix != null &&
1349       bytes.length >= prefix.length &&
1350       LexicographicalComparerHolder.BEST_COMPARER.
1351         compareTo(bytes, 0, prefix.length, prefix, 0, prefix.length) == 0;
1352   }
1353 
1354   /**
1355    * @param b bytes to hash
1356    * @return Runs {@link WritableComparator#hashBytes(byte[], int)} on the
1357    * passed in array.  This method is what {@link org.apache.hadoop.io.Text} and
1358    * {@link ImmutableBytesWritable} use calculating hash code.
1359    */
1360   public static int hashCode(final byte [] b) {
1361     return hashCode(b, b.length);
1362   }
1363 
1364   /**
1365    * @param b value
1366    * @param length length of the value
1367    * @return Runs {@link WritableComparator#hashBytes(byte[], int)} on the
1368    * passed in array.  This method is what {@link org.apache.hadoop.io.Text} and
1369    * {@link ImmutableBytesWritable} use calculating hash code.
1370    */
1371   public static int hashCode(final byte [] b, final int length) {
1372     return WritableComparator.hashBytes(b, length);
1373   }
1374 
1375   /**
1376    * @param b bytes to hash
1377    * @return A hash of <code>b</code> as an Integer that can be used as key in
1378    * Maps.
1379    */
1380   public static Integer mapKey(final byte [] b) {
1381     return hashCode(b);
1382   }
1383 
1384   /**
1385    * @param b bytes to hash
1386    * @param length length to hash
1387    * @return A hash of <code>b</code> as an Integer that can be used as key in
1388    * Maps.
1389    */
1390   public static Integer mapKey(final byte [] b, final int length) {
1391     return hashCode(b, length);
1392   }
1393 
1394   /**
1395    * @param a lower half
1396    * @param b upper half
1397    * @return New array that has a in lower half and b in upper half.
1398    */
1399   public static byte [] add(final byte [] a, final byte [] b) {
1400     return add(a, b, EMPTY_BYTE_ARRAY);
1401   }
1402 
1403   /**
1404    * @param a first third
1405    * @param b second third
1406    * @param c third third
1407    * @return New array made from a, b and c
1408    */
1409   public static byte [] add(final byte [] a, final byte [] b, final byte [] c) {
1410     byte [] result = new byte[a.length + b.length + c.length];
1411     System.arraycopy(a, 0, result, 0, a.length);
1412     System.arraycopy(b, 0, result, a.length, b.length);
1413     System.arraycopy(c, 0, result, a.length + b.length, c.length);
1414     return result;
1415   }
1416 
1417   /**
1418    * @param a array
1419    * @param length amount of bytes to grab
1420    * @return First <code>length</code> bytes from <code>a</code>
1421    */
1422   public static byte [] head(final byte [] a, final int length) {
1423     if (a.length < length) {
1424       return null;
1425     }
1426     byte [] result = new byte[length];
1427     System.arraycopy(a, 0, result, 0, length);
1428     return result;
1429   }
1430 
1431   /**
1432    * @param a array
1433    * @param length amount of bytes to snarf
1434    * @return Last <code>length</code> bytes from <code>a</code>
1435    */
1436   public static byte [] tail(final byte [] a, final int length) {
1437     if (a.length < length) {
1438       return null;
1439     }
1440     byte [] result = new byte[length];
1441     System.arraycopy(a, a.length - length, result, 0, length);
1442     return result;
1443   }
1444 
1445   /**
1446    * @param a array
1447    * @param length new array size
1448    * @return Value in <code>a</code> plus <code>length</code> prepended 0 bytes
1449    */
1450   public static byte [] padHead(final byte [] a, final int length) {
1451     byte [] padding = new byte[length];
1452     for (int i = 0; i < length; i++) {
1453       padding[i] = 0;
1454     }
1455     return add(padding,a);
1456   }
1457 
1458   /**
1459    * @param a array
1460    * @param length new array size
1461    * @return Value in <code>a</code> plus <code>length</code> appended 0 bytes
1462    */
1463   public static byte [] padTail(final byte [] a, final int length) {
1464     byte [] padding = new byte[length];
1465     for (int i = 0; i < length; i++) {
1466       padding[i] = 0;
1467     }
1468     return add(a,padding);
1469   }
1470 
1471   /**
1472    * Split passed range.  Expensive operation relatively.  Uses BigInteger math.
1473    * Useful splitting ranges for MapReduce jobs.
1474    * @param a Beginning of range
1475    * @param b End of range
1476    * @param num Number of times to split range.  Pass 1 if you want to split
1477    * the range in two; i.e. one split.
1478    * @return Array of dividing values
1479    */
1480   public static byte [][] split(final byte [] a, final byte [] b, final int num) {
1481     return split(a, b, false, num);
1482   }
1483 
1484   /**
1485    * Split passed range.  Expensive operation relatively.  Uses BigInteger math.
1486    * Useful splitting ranges for MapReduce jobs.
1487    * @param a Beginning of range
1488    * @param b End of range
1489    * @param inclusive Whether the end of range is prefix-inclusive or is
1490    * considered an exclusive boundary.  Automatic splits are generally exclusive
1491    * and manual splits with an explicit range utilize an inclusive end of range.
1492    * @param num Number of times to split range.  Pass 1 if you want to split
1493    * the range in two; i.e. one split.
1494    * @return Array of dividing values
1495    */
1496   public static byte[][] split(final byte[] a, final byte[] b,
1497       boolean inclusive, final int num) {
1498     byte[][] ret = new byte[num + 2][];
1499     int i = 0;
1500     Iterable<byte[]> iter = iterateOnSplits(a, b, inclusive, num);
1501     if (iter == null)
1502       return null;
1503     for (byte[] elem : iter) {
1504       ret[i++] = elem;
1505     }
1506     return ret;
1507   }
1508 
1509   /**
1510    * Iterate over keys within the passed range, splitting at an [a,b) boundary.
1511    */
1512   public static Iterable<byte[]> iterateOnSplits(final byte[] a,
1513       final byte[] b, final int num)
1514   {
1515     return iterateOnSplits(a, b, false, num);
1516   }
1517 
1518   /**
1519    * Iterate over keys within the passed range.
1520    */
1521   public static Iterable<byte[]> iterateOnSplits(
1522       final byte[] a, final byte[]b, boolean inclusive, final int num)
1523   {
1524     byte [] aPadded;
1525     byte [] bPadded;
1526     if (a.length < b.length) {
1527       aPadded = padTail(a, b.length - a.length);
1528       bPadded = b;
1529     } else if (b.length < a.length) {
1530       aPadded = a;
1531       bPadded = padTail(b, a.length - b.length);
1532     } else {
1533       aPadded = a;
1534       bPadded = b;
1535     }
1536     if (compareTo(aPadded,bPadded) >= 0) {
1537       throw new IllegalArgumentException("b <= a");
1538     }
1539     if (num <= 0) {
1540       throw new IllegalArgumentException("num cannot be <= 0");
1541     }
1542     byte [] prependHeader = {1, 0};
1543     final BigInteger startBI = new BigInteger(add(prependHeader, aPadded));
1544     final BigInteger stopBI = new BigInteger(add(prependHeader, bPadded));
1545     BigInteger diffBI = stopBI.subtract(startBI);
1546     if (inclusive) {
1547       diffBI = diffBI.add(BigInteger.ONE);
1548     }
1549     final BigInteger splitsBI = BigInteger.valueOf(num + 1);
1550     if(diffBI.compareTo(splitsBI) < 0) {
1551       return null;
1552     }
1553     final BigInteger intervalBI;
1554     try {
1555       intervalBI = diffBI.divide(splitsBI);
1556     } catch(Exception e) {
1557       LOG.error("Exception caught during division", e);
1558       return null;
1559     }
1560 
1561     final Iterator<byte[]> iterator = new Iterator<byte[]>() {
1562       private int i = -1;
1563 
1564       @Override
1565       public boolean hasNext() {
1566         return i < num+1;
1567       }
1568 
1569       @Override
1570       public byte[] next() {
1571         i++;
1572         if (i == 0) return a;
1573         if (i == num + 1) return b;
1574 
1575         BigInteger curBI = startBI.add(intervalBI.multiply(BigInteger.valueOf(i)));
1576         byte [] padded = curBI.toByteArray();
1577         if (padded[1] == 0)
1578           padded = tail(padded, padded.length - 2);
1579         else
1580           padded = tail(padded, padded.length - 1);
1581         return padded;
1582       }
1583 
1584       @Override
1585       public void remove() {
1586         throw new UnsupportedOperationException();
1587       }
1588 
1589     };
1590 
1591     return new Iterable<byte[]>() {
1592       @Override
1593       public Iterator<byte[]> iterator() {
1594         return iterator;
1595       }
1596     };
1597   }
1598 
1599   /**
1600    * @param bytes array to hash
1601    * @param offset offset to start from
1602    * @param length length to hash
1603    * */
1604   public static int hashCode(byte[] bytes, int offset, int length) {
1605     int hash = 1;
1606     for (int i = offset; i < offset + length; i++)
1607       hash = (31 * hash) + (int) bytes[i];
1608     return hash;
1609   }
1610 
1611   /**
1612    * @param t operands
1613    * @return Array of byte arrays made from passed array of Text
1614    */
1615   public static byte [][] toByteArrays(final String [] t) {
1616     byte [][] result = new byte[t.length][];
1617     for (int i = 0; i < t.length; i++) {
1618       result[i] = Bytes.toBytes(t[i]);
1619     }
1620     return result;
1621   }
1622 
1623   /**
1624    * @param column operand
1625    * @return A byte array of a byte array where first and only entry is
1626    * <code>column</code>
1627    */
1628   public static byte [][] toByteArrays(final String column) {
1629     return toByteArrays(toBytes(column));
1630   }
1631 
1632   /**
1633    * @param column operand
1634    * @return A byte array of a byte array where first and only entry is
1635    * <code>column</code>
1636    */
1637   public static byte [][] toByteArrays(final byte [] column) {
1638     byte [][] result = new byte[1][];
1639     result[0] = column;
1640     return result;
1641   }
1642 
1643   /**
1644    * Binary search for keys in indexes.
1645    *
1646    * @param arr array of byte arrays to search for
1647    * @param key the key you want to find
1648    * @param offset the offset in the key you want to find
1649    * @param length the length of the key
1650    * @param comparator a comparator to compare.
1651    * @return zero-based index of the key, if the key is present in the array.
1652    *         Otherwise, a value -(i + 1) such that the key is between arr[i -
1653    *         1] and arr[i] non-inclusively, where i is in [0, i], if we define
1654    *         arr[-1] = -Inf and arr[N] = Inf for an N-element array. The above
1655    *         means that this function can return 2N + 1 different values
1656    *         ranging from -(N + 1) to N - 1.
1657    */
1658   public static int binarySearch(byte [][]arr, byte []key, int offset,
1659       int length, RawComparator<?> comparator) {
1660     int low = 0;
1661     int high = arr.length - 1;
1662 
1663     while (low <= high) {
1664       int mid = (low+high) >>> 1;
1665       // we have to compare in this order, because the comparator order
1666       // has special logic when the 'left side' is a special key.
1667       int cmp = comparator.compare(key, offset, length,
1668           arr[mid], 0, arr[mid].length);
1669       // key lives above the midpoint
1670       if (cmp > 0)
1671         low = mid + 1;
1672       // key lives below the midpoint
1673       else if (cmp < 0)
1674         high = mid - 1;
1675       // BAM. how often does this really happen?
1676       else
1677         return mid;
1678     }
1679     return - (low+1);
1680   }
1681 
1682   /**
1683    * Bytewise binary increment/deincrement of long contained in byte array
1684    * on given amount.
1685    *
1686    * @param value - array of bytes containing long (length <= SIZEOF_LONG)
1687    * @param amount value will be incremented on (deincremented if negative)
1688    * @return array of bytes containing incremented long (length == SIZEOF_LONG)
1689    */
1690   public static byte [] incrementBytes(byte[] value, long amount)
1691   {
1692     byte[] val = value;
1693     if (val.length < SIZEOF_LONG) {
1694       // Hopefully this doesn't happen too often.
1695       byte [] newvalue;
1696       if (val[0] < 0) {
1697         newvalue = new byte[]{-1, -1, -1, -1, -1, -1, -1, -1};
1698       } else {
1699         newvalue = new byte[SIZEOF_LONG];
1700       }
1701       System.arraycopy(val, 0, newvalue, newvalue.length - val.length,
1702         val.length);
1703       val = newvalue;
1704     } else if (val.length > SIZEOF_LONG) {
1705       throw new IllegalArgumentException("Increment Bytes - value too big: " +
1706         val.length);
1707     }
1708     if(amount == 0) return val;
1709     if(val[0] < 0){
1710       return binaryIncrementNeg(val, amount);
1711     }
1712     return binaryIncrementPos(val, amount);
1713   }
1714 
1715   /* increment/deincrement for positive value */
1716   private static byte [] binaryIncrementPos(byte [] value, long amount) {
1717     long amo = amount;
1718     int sign = 1;
1719     if (amount < 0) {
1720       amo = -amount;
1721       sign = -1;
1722     }
1723     for(int i=0;i<value.length;i++) {
1724       int cur = ((int)amo % 256) * sign;
1725       amo = (amo >> 8);
1726       int val = value[value.length-i-1] & 0x0ff;
1727       int total = val + cur;
1728       if(total > 255) {
1729         amo += sign;
1730         total %= 256;
1731       } else if (total < 0) {
1732         amo -= sign;
1733       }
1734       value[value.length-i-1] = (byte)total;
1735       if (amo == 0) return value;
1736     }
1737     return value;
1738   }
1739 
1740   /* increment/deincrement for negative value */
1741   private static byte [] binaryIncrementNeg(byte [] value, long amount) {
1742     long amo = amount;
1743     int sign = 1;
1744     if (amount < 0) {
1745       amo = -amount;
1746       sign = -1;
1747     }
1748     for(int i=0;i<value.length;i++) {
1749       int cur = ((int)amo % 256) * sign;
1750       amo = (amo >> 8);
1751       int val = ((~value[value.length-i-1]) & 0x0ff) + 1;
1752       int total = cur - val;
1753       if(total >= 0) {
1754         amo += sign;
1755       } else if (total < -256) {
1756         amo -= sign;
1757         total %= 256;
1758       }
1759       value[value.length-i-1] = (byte)total;
1760       if (amo == 0) return value;
1761     }
1762     return value;
1763   }
1764 
1765   /**
1766    * Writes a string as a fixed-size field, padded with zeros.
1767    */
1768   public static void writeStringFixedSize(final DataOutput out, String s,
1769       int size) throws IOException {
1770     byte[] b = toBytes(s);
1771     if (b.length > size) {
1772       throw new IOException("Trying to write " + b.length + " bytes (" +
1773           toStringBinary(b) + ") into a field of length " + size);
1774     }
1775 
1776     out.writeBytes(s);
1777     for (int i = 0; i < size - s.length(); ++i)
1778       out.writeByte(0);
1779   }
1780 
1781   /**
1782    * Reads a fixed-size field and interprets it as a string padded with zeros.
1783    */
1784   public static String readStringFixedSize(final DataInput in, int size)
1785       throws IOException {
1786     byte[] b = new byte[size];
1787     in.readFully(b);
1788     int n = b.length;
1789     while (n > 0 && b[n - 1] == 0)
1790       --n;
1791 
1792     return toString(b, 0, n);
1793   }
1794 
1795   /**
1796    * Copy the byte array given in parameter and return an instance
1797    * of a new byte array with the same length and the same content.
1798    * @param bytes the byte array to duplicate
1799    * @return a copy of the given byte array
1800    */
1801   public static byte [] copy(byte [] bytes) {
1802     if (bytes == null) return null;
1803     byte [] result = new byte[bytes.length];
1804     System.arraycopy(bytes, 0, result, 0, bytes.length);
1805     return result;
1806   }
1807 
1808   /**
1809    * Copy the byte array given in parameter and return an instance
1810    * of a new byte array with the same length and the same content.
1811    * @param bytes the byte array to copy from
1812    * @return a copy of the given designated byte array
1813    * @param offset
1814    * @param length
1815    */
1816   public static byte [] copy(byte [] bytes, final int offset, final int length) {
1817     if (bytes == null) return null;
1818     byte [] result = new byte[length];
1819     System.arraycopy(bytes, offset, result, 0, length);
1820     return result;
1821   }
1822 
1823   /**
1824    * Search sorted array "a" for byte "key". I can't remember if I wrote this or copied it from
1825    * somewhere. (mcorgan)
1826    * @param a Array to search. Entries must be sorted and unique.
1827    * @param fromIndex First index inclusive of "a" to include in the search.
1828    * @param toIndex Last index exclusive of "a" to include in the search.
1829    * @param key The byte to search for.
1830    * @return The index of key if found. If not found, return -(index + 1), where negative indicates
1831    *         "not found" and the "index + 1" handles the "-0" case.
1832    */
1833   public static int unsignedBinarySearch(byte[] a, int fromIndex, int toIndex, byte key) {
1834     int unsignedKey = key & 0xff;
1835     int low = fromIndex;
1836     int high = toIndex - 1;
1837 
1838     while (low <= high) {
1839       int mid = (low + high) >>> 1;
1840       int midVal = a[mid] & 0xff;
1841 
1842       if (midVal < unsignedKey) {
1843         low = mid + 1;
1844       } else if (midVal > unsignedKey) {
1845         high = mid - 1;
1846       } else {
1847         return mid; // key found
1848       }
1849     }
1850     return -(low + 1); // key not found.
1851   }
1852 
1853   /**
1854    * Treat the byte[] as an unsigned series of bytes, most significant bits first.  Start by adding
1855    * 1 to the rightmost bit/byte and carry over all overflows to the more significant bits/bytes.
1856    *
1857    * @param input The byte[] to increment.
1858    * @return The incremented copy of "in".  May be same length or 1 byte longer.
1859    */
1860   public static byte[] unsignedCopyAndIncrement(final byte[] input) {
1861     byte[] copy = copy(input);
1862     if (copy == null) {
1863       throw new IllegalArgumentException("cannot increment null array");
1864     }
1865     for (int i = copy.length - 1; i >= 0; --i) {
1866       if (copy[i] == -1) {// -1 is all 1-bits, which is the unsigned maximum
1867         copy[i] = 0;
1868       } else {
1869         ++copy[i];
1870         return copy;
1871       }
1872     }
1873     // we maxed out the array
1874     byte[] out = new byte[copy.length + 1];
1875     out[0] = 1;
1876     System.arraycopy(copy, 0, out, 1, copy.length);
1877     return out;
1878   }
1879 
1880   public static boolean equals(List<byte[]> a, List<byte[]> b) {
1881     if (a == null) {
1882       if (b == null) {
1883         return true;
1884       }
1885       return false;
1886     }
1887     if (b == null) {
1888       return false;
1889     }
1890     if (a.size() != b.size()) {
1891       return false;
1892     }
1893     for (int i = 0; i < a.size(); ++i) {
1894       if (!Bytes.equals(a.get(i), b.get(i))) {
1895         return false;
1896       }
1897     }
1898     return true;
1899   }
1900 
1901   public static boolean isSorted(Collection<byte[]> arrays) {
1902     byte[] previous = new byte[0];
1903     for (byte[] array : IterableUtils.nullSafe(arrays)) {
1904       if (Bytes.compareTo(previous, array) > 0) {
1905         return false;
1906       }
1907       previous = array;
1908     }
1909     return true;
1910   }
1911 
1912   public static List<byte[]> getUtf8ByteArrays(List<String> strings) {
1913     List<byte[]> byteArrays = Lists.newArrayListWithCapacity(CollectionUtils.nullSafeSize(strings));
1914     for (String s : IterableUtils.nullSafe(strings)) {
1915       byteArrays.add(Bytes.toBytes(s));
1916     }
1917     return byteArrays;
1918   }
1919 
1920   /**
1921    * Returns the index of the first appearance of the value {@code target} in
1922    * {@code array}.
1923    *
1924    * @param array an array of {@code byte} values, possibly empty
1925    * @param target a primitive {@code byte} value
1926    * @return the least index {@code i} for which {@code array[i] == target}, or
1927    *     {@code -1} if no such index exists.
1928    */
1929   public static int indexOf(byte[] array, byte target) {
1930     for (int i = 0; i < array.length; i++) {
1931       if (array[i] == target) {
1932         return i;
1933       }
1934     }
1935     return -1;
1936   }
1937 
1938   /**
1939    * Returns the start position of the first occurrence of the specified {@code
1940    * target} within {@code array}, or {@code -1} if there is no such occurrence.
1941    *
1942    * <p>More formally, returns the lowest index {@code i} such that {@code
1943    * java.util.Arrays.copyOfRange(array, i, i + target.length)} contains exactly
1944    * the same elements as {@code target}.
1945    *
1946    * @param array the array to search for the sequence {@code target}
1947    * @param target the array to search for as a sub-sequence of {@code array}
1948    */
1949   public static int indexOf(byte[] array, byte[] target) {
1950     checkNotNull(array, "array");
1951     checkNotNull(target, "target");
1952     if (target.length == 0) {
1953       return 0;
1954     }
1955 
1956     outer:
1957     for (int i = 0; i < array.length - target.length + 1; i++) {
1958       for (int j = 0; j < target.length; j++) {
1959         if (array[i + j] != target[j]) {
1960           continue outer;
1961         }
1962       }
1963       return i;
1964     }
1965     return -1;
1966   }
1967 
1968   /**
1969    * @param array an array of {@code byte} values, possibly empty
1970    * @param target a primitive {@code byte} value
1971    * @return {@code true} if {@code target} is present as an element anywhere in {@code array}.
1972    */
1973   public static boolean contains(byte[] array, byte target) {
1974     return indexOf(array, target) > -1;
1975   }
1976 
1977   /**
1978    * @param array an array of {@code byte} values, possibly empty
1979    * @param target an array of {@code byte}
1980    * @return {@code true} if {@code target} is present anywhere in {@code array}
1981    */
1982   public static boolean contains(byte[] array, byte[] target) {
1983     return indexOf(array, target) > -1;
1984   }
1985 
1986   /**
1987    * Fill given array with zeros.
1988    * @param b array which needs to be filled with zeros
1989    */
1990   public static void zero(byte[] b) {
1991     zero(b, 0, b.length);
1992   }
1993 
1994   /**
1995    * Fill given array with zeros at the specified position.
1996    * @param b
1997    * @param offset
1998    * @param length
1999    */
2000   public static void zero(byte[] b, int offset, int length) {
2001     checkPositionIndex(offset, b.length, "offset");
2002     checkArgument(length > 0, "length must be greater than 0");
2003     checkPositionIndex(offset + length, b.length, "offset + length");
2004     Arrays.fill(b, offset, offset + length, (byte) 0);
2005   }
2006 
2007   private static final SecureRandom RNG = new SecureRandom();
2008 
2009   /**
2010    * Fill given array with random bytes.
2011    * @param b array which needs to be filled with random bytes
2012    */
2013   public static void random(byte[] b) {
2014     RNG.nextBytes(b);
2015   }
2016 
2017   /**
2018    * Fill given array with random bytes at the specified position.
2019    * @param b
2020    * @param offset
2021    * @param length
2022    */
2023   public static void random(byte[] b, int offset, int length) {
2024     checkPositionIndex(offset, b.length, "offset");
2025     checkArgument(length > 0, "length must be greater than 0");
2026     checkPositionIndex(offset + length, b.length, "offset + length");
2027     byte[] buf = new byte[length];
2028     RNG.nextBytes(buf);
2029     System.arraycopy(buf, 0, b, offset, length);
2030   }
2031 
2032   /**
2033    * Create a max byte array with the specified max byte count
2034    * @param maxByteCount the length of returned byte array
2035    * @return the created max byte array
2036    */
2037   public static byte[] createMaxByteArray(int maxByteCount) {
2038     byte[] maxByteArray = new byte[maxByteCount];
2039     for (int i = 0; i < maxByteArray.length; i++) {
2040       maxByteArray[i] = (byte) 0xff;
2041     }
2042     return maxByteArray;
2043   }
2044 
2045   /**
2046    * Create a byte array which is multiple given bytes
2047    * @param srcBytes
2048    * @param multiNum
2049    * @return byte array
2050    */
2051   public static byte[] multiple(byte[] srcBytes, int multiNum) {
2052     if (multiNum <= 0) {
2053       return new byte[0];
2054     }
2055     byte[] result = new byte[srcBytes.length * multiNum];
2056     for (int i = 0; i < multiNum; i++) {
2057       System.arraycopy(srcBytes, 0, result, i * srcBytes.length,
2058         srcBytes.length);
2059     }
2060     return result;
2061   }
2062   
2063   /**
2064    * Convert a byte array into a hex string
2065    * @param b
2066    */
2067   public static String toHex(byte[] b) {
2068     checkArgument(b.length > 0, "length must be greater than 0");
2069     return String.format("%x", new BigInteger(1, b));
2070   }
2071 
2072   /**
2073    * Create a byte array from a string of hash digits. The length of the
2074    * string must be a multiple of 2
2075    * @param hex
2076    */
2077   public static byte[] fromHex(String hex) {
2078     checkArgument(hex.length() > 0, "length must be greater than 0");
2079     checkArgument(hex.length() % 2 == 0, "length must be a multiple of 2");
2080     // Make sure letters are upper case
2081     hex = hex.toUpperCase();
2082     byte[] b = new byte[hex.length() / 2];
2083     for (int i = 0; i < b.length; i++) {
2084       b[i] = (byte)((toBinaryFromHex((byte)hex.charAt(2 * i)) << 4) +
2085         toBinaryFromHex((byte)hex.charAt((2 * i + 1))));
2086     }
2087     return b;
2088   }
2089 
2090 }