View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver.wal;
20  
21  import java.io.DataInput;
22  import java.io.DataOutput;
23  import java.io.EOFException;
24  import java.io.IOException;
25  import java.util.ArrayList;
26  import java.util.HashMap;
27  import java.util.Iterator;
28  import java.util.List;
29  import java.util.Map;
30  import java.util.NavigableMap;
31  import java.util.TreeMap;
32  import java.util.UUID;
33  
34  import org.apache.hadoop.hbase.util.ByteStringer;
35  import org.apache.commons.logging.Log;
36  import org.apache.commons.logging.LogFactory;
37  import org.apache.hadoop.classification.InterfaceAudience;
38  import org.apache.hadoop.hbase.HConstants;
39  import org.apache.hadoop.hbase.HRegionInfo;
40  import org.apache.hadoop.hbase.TableName;
41  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
42  import org.apache.hadoop.hbase.protobuf.generated.WALProtos.FamilyScope;
43  import org.apache.hadoop.hbase.protobuf.generated.WALProtos.ScopeType;
44  import org.apache.hadoop.hbase.protobuf.generated.WALProtos.WALKey;
45  import org.apache.hadoop.hbase.util.Bytes;
46  import org.apache.hadoop.io.WritableComparable;
47  import org.apache.hadoop.io.WritableUtils;
48  
49  import com.google.common.annotations.VisibleForTesting;
50  import com.google.protobuf.ByteString;
51  
52  /**
53   * A Key for an entry in the change log.
54   *
55   * The log intermingles edits to many tables and rows, so each log entry
56   * identifies the appropriate table and row.  Within a table and row, they're
57   * also sorted.
58   *
59   * <p>Some Transactional edits (START, COMMIT, ABORT) will not have an
60   * associated row.
61   */
62  // TODO: Key and WALEdit are never used separately, or in one-to-many relation, for practical
63  //       purposes. They need to be merged into HLogEntry.
64  @InterfaceAudience.Private
65  public class HLogKey implements WritableComparable<HLogKey> {
66    public static final Log LOG = LogFactory.getLog(HLogKey.class);
67  
68    // should be < 0 (@see #readFields(DataInput))
69    // version 2 supports HLog compression
70    enum Version {
71      UNVERSIONED(0),
72      // Initial number we put on HLogKey when we introduced versioning.
73      INITIAL(-1),
74      // Version -2 introduced a dictionary compression facility.  Only this
75      // dictionary-based compression is available in version -2.
76      COMPRESSED(-2);
77  
78      final int code;
79      static final Version[] byCode;
80      static {
81        byCode = Version.values();
82        for (int i = 0; i < byCode.length; i++) {
83          if (byCode[i].code != -1 * i) {
84            throw new AssertionError("Values in this enum should be descending by one");
85          }
86        }
87      }
88  
89      Version(int code) {
90        this.code = code;
91      }
92  
93      boolean atLeast(Version other) {
94        return code <= other.code;
95      }
96  
97      static Version fromCode(int code) {
98        return byCode[code * -1];
99      }
100   }
101 
102   /*
103    * This is used for reading the log entries created by the previous releases
104    * (0.94.11) which write the clusters information to the scopes of WALEdit.
105    */
106   private static final String PREFIX_CLUSTER_KEY = ".";
107 
108 
109   private static final Version VERSION = Version.COMPRESSED;
110 
111   //  The encoded region name.
112   private byte [] encodedRegionName;
113   private TableName tablename;
114   private long logSeqNum;
115   // Time at which this edit was written.
116   private long writeTime;
117 
118   // The first element in the list is the cluster id on which the change has originated
119   private List<UUID> clusterIds;
120 
121   private NavigableMap<byte[], Integer> scopes;
122 
123   private long nonceGroup = HConstants.NO_NONCE;
124   private long nonce = HConstants.NO_NONCE;
125 
126   private CompressionContext compressionContext;
127 
128   public HLogKey() {
129     init(null, null, 0L, HConstants.LATEST_TIMESTAMP,
130         new ArrayList<UUID>(), HConstants.NO_NONCE, HConstants.NO_NONCE);
131   }
132 
133   @VisibleForTesting
134   public HLogKey(final byte[] encodedRegionName, final TableName tablename, long logSeqNum,
135       final long now, UUID clusterId) {
136     List<UUID> clusterIds = new ArrayList<UUID>();
137     clusterIds.add(clusterId);
138     init(encodedRegionName, tablename, logSeqNum, now, clusterIds,
139         HConstants.NO_NONCE, HConstants.NO_NONCE);
140   }
141 
142   /**
143    * Create the log key for writing to somewhere.
144    * We maintain the tablename mainly for debugging purposes.
145    * A regionName is always a sub-table object.
146    *
147    * @param encodedRegionName Encoded name of the region as returned by
148    * <code>HRegionInfo#getEncodedNameAsBytes()</code>.
149    * @param tablename   - name of table
150    * @param logSeqNum   - log sequence number
151    * @param now Time at which this edit was written.
152    * @param clusterIds the clusters that have consumed the change(used in Replication)
153    */
154   public HLogKey(final byte [] encodedRegionName, final TableName tablename,
155       long logSeqNum, final long now, List<UUID> clusterIds, long nonceGroup, long nonce) {
156     init(encodedRegionName, tablename, logSeqNum, now, clusterIds, nonceGroup, nonce);
157   }
158 
159   protected void init(final byte [] encodedRegionName, final TableName tablename,
160       long logSeqNum, final long now, List<UUID> clusterIds, long nonceGroup, long nonce) {
161     this.logSeqNum = logSeqNum;
162     this.writeTime = now;
163     this.clusterIds = clusterIds;
164     this.encodedRegionName = encodedRegionName;
165     this.tablename = tablename;
166     this.nonceGroup = nonceGroup;
167     this.nonce = nonce;
168   }
169 
170   /**
171    * @param compressionContext Compression context to use
172    */
173   public void setCompressionContext(CompressionContext compressionContext) {
174     this.compressionContext = compressionContext;
175   }
176 
177   /** @return encoded region name */
178   public byte [] getEncodedRegionName() {
179     return encodedRegionName;
180   }
181 
182   /** @return table name */
183   public TableName getTablename() {
184     return tablename;
185   }
186 
187   /** @return log sequence number */
188   public long getLogSeqNum() {
189     return this.logSeqNum;
190   }
191 
192   /**
193    * @return the write time
194    */
195   public long getWriteTime() {
196     return this.writeTime;
197   }
198 
199   public NavigableMap<byte[], Integer> getScopes() {
200     return scopes;
201   }
202 
203   /** @return The nonce group */
204   public long getNonceGroup() {
205     return nonceGroup;
206   }
207 
208   /** @return The nonce */
209   public long getNonce() {
210     return nonce;
211   }
212 
213   public void setScopes(NavigableMap<byte[], Integer> scopes) {
214     this.scopes = scopes;
215   }
216 
217   public void readOlderScopes(NavigableMap<byte[], Integer> scopes) {
218     if (scopes != null) {
219       Iterator<Map.Entry<byte[], Integer>> iterator = scopes.entrySet()
220           .iterator();
221       while (iterator.hasNext()) {
222         Map.Entry<byte[], Integer> scope = iterator.next();
223         String key = Bytes.toString(scope.getKey());
224         if (key.startsWith(PREFIX_CLUSTER_KEY)) {
225           addClusterId(UUID.fromString(key.substring(PREFIX_CLUSTER_KEY
226               .length())));
227           iterator.remove();
228         }
229       }
230       if (scopes.size() > 0) {
231         this.scopes = scopes;
232       }
233     }
234   }
235 
236   /**
237    * Marks that the cluster with the given clusterId has consumed the change
238    */
239   public void addClusterId(UUID clusterId) {
240     if (!clusterIds.contains(clusterId)) {
241       clusterIds.add(clusterId);
242     }
243   }
244 
245   /**
246    * @return the set of cluster Ids that have consumed the change
247    */
248   public List<UUID> getClusterIds() {
249     return clusterIds;
250   }
251 
252   /**
253    * @return the cluster id on which the change has originated. It there is no such cluster, it
254    *         returns DEFAULT_CLUSTER_ID (cases where replication is not enabled)
255    */
256   public UUID getOriginatingClusterId(){
257     return clusterIds.isEmpty() ? HConstants.DEFAULT_CLUSTER_ID : clusterIds.get(0);
258   }
259 
260   @Override
261   public String toString() {
262     return tablename + "/" + Bytes.toString(encodedRegionName) + "/" +
263       logSeqNum;
264   }
265 
266   /**
267    * Produces a string map for this key. Useful for programmatic use and
268    * manipulation of the data stored in an HLogKey, for example, printing
269    * as JSON.
270    *
271    * @return a Map containing data from this key
272    */
273   public Map<String, Object> toStringMap() {
274     Map<String, Object> stringMap = new HashMap<String, Object>();
275     stringMap.put("table", tablename);
276     stringMap.put("region", Bytes.toStringBinary(encodedRegionName));
277     stringMap.put("sequence", logSeqNum);
278     return stringMap;
279   }
280 
281   @Override
282   public boolean equals(Object obj) {
283     if (this == obj) {
284       return true;
285     }
286     if (obj == null || getClass() != obj.getClass()) {
287       return false;
288     }
289     return compareTo((HLogKey)obj) == 0;
290   }
291 
292   @Override
293   public int hashCode() {
294     int result = Bytes.hashCode(this.encodedRegionName);
295     result ^= this.logSeqNum;
296     result ^= this.writeTime;
297     return result;
298   }
299 
300   public int compareTo(HLogKey o) {
301     int result = Bytes.compareTo(this.encodedRegionName, o.encodedRegionName);
302     if (result == 0) {
303       if (this.logSeqNum < o.logSeqNum) {
304         result = -1;
305       } else if (this.logSeqNum  > o.logSeqNum ) {
306         result = 1;
307       }
308       if (result == 0) {
309         if (this.writeTime < o.writeTime) {
310           result = -1;
311         } else if (this.writeTime > o.writeTime) {
312           return 1;
313         }
314       }
315     }
316     // why isn't cluster id accounted for?
317     return result;
318   }
319 
320   /**
321    * Drop this instance's tablename byte array and instead
322    * hold a reference to the provided tablename. This is not
323    * meant to be a general purpose setter - it's only used
324    * to collapse references to conserve memory.
325    */
326   void internTableName(TableName tablename) {
327     // We should not use this as a setter - only to swap
328     // in a new reference to the same table name.
329     assert tablename.equals(this.tablename);
330     this.tablename = tablename;
331   }
332 
333   /**
334    * Drop this instance's region name byte array and instead
335    * hold a reference to the provided region name. This is not
336    * meant to be a general purpose setter - it's only used
337    * to collapse references to conserve memory.
338    */
339   void internEncodedRegionName(byte []encodedRegionName) {
340     // We should not use this as a setter - only to swap
341     // in a new reference to the same table name.
342     assert Bytes.equals(this.encodedRegionName, encodedRegionName);
343     this.encodedRegionName = encodedRegionName;
344   }
345 
346   @Override
347   @Deprecated
348   public void write(DataOutput out) throws IOException {
349     LOG.warn("HLogKey is being serialized to writable - only expected in test code");
350     WritableUtils.writeVInt(out, VERSION.code);
351     if (compressionContext == null) {
352       Bytes.writeByteArray(out, this.encodedRegionName);
353       Bytes.writeByteArray(out, this.tablename.getName());
354     } else {
355       Compressor.writeCompressed(this.encodedRegionName, 0,
356           this.encodedRegionName.length, out,
357           compressionContext.regionDict);
358       Compressor.writeCompressed(this.tablename.getName(), 0, this.tablename.getName().length, out,
359           compressionContext.tableDict);
360     }
361     out.writeLong(this.logSeqNum);
362     out.writeLong(this.writeTime);
363     // Don't need to write the clusters information as we are using protobufs from 0.95
364     // Writing only the first clusterId for testing the legacy read
365     Iterator<UUID> iterator = clusterIds.iterator();
366     if(iterator.hasNext()){
367       out.writeBoolean(true);
368       UUID clusterId = iterator.next();
369       out.writeLong(clusterId.getMostSignificantBits());
370       out.writeLong(clusterId.getLeastSignificantBits());
371     } else {
372       out.writeBoolean(false);
373     }
374   }
375 
376   @Override
377   public void readFields(DataInput in) throws IOException {
378     Version version = Version.UNVERSIONED;
379     // HLogKey was not versioned in the beginning.
380     // In order to introduce it now, we make use of the fact
381     // that encodedRegionName was written with Bytes.writeByteArray,
382     // which encodes the array length as a vint which is >= 0.
383     // Hence if the vint is >= 0 we have an old version and the vint
384     // encodes the length of encodedRegionName.
385     // If < 0 we just read the version and the next vint is the length.
386     // @see Bytes#readByteArray(DataInput)
387     this.scopes = null; // writable HLogKey does not contain scopes
388     int len = WritableUtils.readVInt(in);
389     byte[] tablenameBytes = null;
390     if (len < 0) {
391       // what we just read was the version
392       version = Version.fromCode(len);
393       // We only compress V2 of HLogkey.
394       // If compression is on, the length is handled by the dictionary
395       if (compressionContext == null || !version.atLeast(Version.COMPRESSED)) {
396         len = WritableUtils.readVInt(in);
397       }
398     }
399     if (compressionContext == null || !version.atLeast(Version.COMPRESSED)) {
400       this.encodedRegionName = new byte[len];
401       in.readFully(this.encodedRegionName);
402       tablenameBytes = Bytes.readByteArray(in);
403     } else {
404       this.encodedRegionName = Compressor.readCompressed(in, compressionContext.regionDict);
405       tablenameBytes = Compressor.readCompressed(in, compressionContext.tableDict);
406     }
407 
408     this.logSeqNum = in.readLong();
409     this.writeTime = in.readLong();
410 
411     this.clusterIds.clear();
412     if (version.atLeast(Version.INITIAL)) {
413       if (in.readBoolean()) {
414         // read the older log
415         // Definitely is the originating cluster
416         clusterIds.add(new UUID(in.readLong(), in.readLong()));
417       }
418     } else {
419       try {
420         // dummy read (former byte cluster id)
421         in.readByte();
422       } catch(EOFException e) {
423         // Means it's a very old key, just continue
424       }
425     }
426     try {
427       this.tablename = TableName.valueOf(tablenameBytes);
428     } catch (IllegalArgumentException iae) {
429       if (Bytes.toString(tablenameBytes).equals(TableName.OLD_META_STR)) {
430         // It is a pre-namespace meta table edit, continue with new format.
431         LOG.info("Got an old .META. edit, continuing with new format ");
432         this.tablename = TableName.META_TABLE_NAME;
433         this.encodedRegionName = HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes();
434       } else if (Bytes.toString(tablenameBytes).equals(TableName.OLD_ROOT_STR)) {
435         this.tablename = TableName.OLD_ROOT_TABLE_NAME;
436          throw iae;
437       } else throw iae;
438     }
439     // Do not need to read the clusters information as we are using protobufs from 0.95
440   }
441 
442   public WALKey.Builder getBuilder(
443       WALCellCodec.ByteStringCompressor compressor) throws IOException {
444     WALKey.Builder builder = WALKey.newBuilder();
445     if (compressionContext == null) {
446       builder.setEncodedRegionName(ByteStringer.wrap(this.encodedRegionName));
447       builder.setTableName(ByteStringer.wrap(this.tablename.getName()));
448     } else {
449       builder.setEncodedRegionName(
450           compressor.compress(this.encodedRegionName, compressionContext.regionDict));
451       builder.setTableName(compressor.compress(this.tablename.getName(),
452           compressionContext.tableDict));
453     }
454     builder.setLogSequenceNumber(this.logSeqNum);
455     builder.setWriteTime(writeTime);
456     if (this.nonce != HConstants.NO_NONCE) {
457       builder.setNonce(nonce);
458     }
459     if (this.nonceGroup != HConstants.NO_NONCE) {
460       builder.setNonceGroup(nonceGroup);
461     }
462     HBaseProtos.UUID.Builder uuidBuilder = HBaseProtos.UUID.newBuilder();
463     for (UUID clusterId : clusterIds) {
464       uuidBuilder.setLeastSigBits(clusterId.getLeastSignificantBits());
465       uuidBuilder.setMostSigBits(clusterId.getMostSignificantBits());
466       builder.addClusterIds(uuidBuilder.build());
467     }
468     if (scopes != null) {
469       for (Map.Entry<byte[], Integer> e : scopes.entrySet()) {
470         ByteString family = (compressionContext == null) ? ByteStringer.wrap(e.getKey())
471             : compressor.compress(e.getKey(), compressionContext.familyDict);
472         builder.addScopes(FamilyScope.newBuilder()
473             .setFamily(family).setScopeType(ScopeType.valueOf(e.getValue())));
474       }
475     }
476     return builder;
477   }
478 
479   public void readFieldsFromPb(
480       WALKey walKey, WALCellCodec.ByteStringUncompressor uncompressor) throws IOException {
481     if (this.compressionContext != null) {
482       this.encodedRegionName = uncompressor.uncompress(
483           walKey.getEncodedRegionName(), compressionContext.regionDict);
484       byte[] tablenameBytes = uncompressor.uncompress(
485           walKey.getTableName(), compressionContext.tableDict);
486       this.tablename = TableName.valueOf(tablenameBytes);
487     } else {
488       this.encodedRegionName = walKey.getEncodedRegionName().toByteArray();
489       this.tablename = TableName.valueOf(walKey.getTableName().toByteArray());
490     }
491     clusterIds.clear();
492     if (walKey.hasClusterId()) {
493       //When we are reading the older log (0.95.1 release)
494       //This is definitely the originating cluster
495       clusterIds.add(new UUID(walKey.getClusterId().getMostSigBits(), walKey.getClusterId()
496           .getLeastSigBits()));
497     }
498     for (HBaseProtos.UUID clusterId : walKey.getClusterIdsList()) {
499       clusterIds.add(new UUID(clusterId.getMostSigBits(), clusterId.getLeastSigBits()));
500     }
501     if (walKey.hasNonceGroup()) {
502       this.nonceGroup = walKey.getNonceGroup();
503     }
504     if (walKey.hasNonce()) {
505       this.nonce = walKey.getNonce();
506     }
507     this.scopes = null;
508     if (walKey.getScopesCount() > 0) {
509       this.scopes = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
510       for (FamilyScope scope : walKey.getScopesList()) {
511         byte[] family = (compressionContext == null) ? scope.getFamily().toByteArray() :
512           uncompressor.uncompress(scope.getFamily(), compressionContext.familyDict);
513         this.scopes.put(family, scope.getScopeType().getNumber());
514       }
515     }
516     this.logSeqNum = walKey.getLogSequenceNumber();
517     this.writeTime = walKey.getWriteTime();
518   }
519 }