View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.FileNotFoundException;
23  import java.io.IOException;
24  import java.util.ArrayList;
25  import java.util.Collection;
26  import java.util.List;
27  import java.util.Map;
28  import java.util.UUID;
29  
30  import org.apache.commons.logging.Log;
31  import org.apache.commons.logging.LogFactory;
32  import org.apache.hadoop.classification.InterfaceAudience;
33  import org.apache.hadoop.conf.Configuration;
34  import org.apache.hadoop.fs.FSDataInputStream;
35  import org.apache.hadoop.fs.FSDataOutputStream;
36  import org.apache.hadoop.fs.FileStatus;
37  import org.apache.hadoop.fs.FileSystem;
38  import org.apache.hadoop.fs.FileUtil;
39  import org.apache.hadoop.fs.Path;
40  import org.apache.hadoop.fs.PathFilter;
41  import org.apache.hadoop.fs.permission.FsPermission;
42  import org.apache.hadoop.hbase.HColumnDescriptor;
43  import org.apache.hadoop.hbase.HConstants;
44  import org.apache.hadoop.hbase.HRegionInfo;
45  import org.apache.hadoop.hbase.HTableDescriptor;
46  import org.apache.hadoop.hbase.KeyValue;
47  import org.apache.hadoop.hbase.backup.HFileArchiver;
48  import org.apache.hadoop.hbase.fs.HFileSystem;
49  import org.apache.hadoop.hbase.io.Reference;
50  import org.apache.hadoop.hbase.util.Bytes;
51  import org.apache.hadoop.hbase.util.FSHDFSUtils;
52  import org.apache.hadoop.hbase.util.FSUtils;
53  import org.apache.hadoop.hbase.util.Threads;
54  
55  /**
56   * View to an on-disk Region.
57   * Provides the set of methods necessary to interact with the on-disk region data.
58   */
59  @InterfaceAudience.Private
60  public class HRegionFileSystem {
61    public static final Log LOG = LogFactory.getLog(HRegionFileSystem.class);
62  
63    /** Name of the region info file that resides just under the region directory. */
64    public final static String REGION_INFO_FILE = ".regioninfo";
65  
66    /** Temporary subdirectory of the region directory used for merges. */
67    public static final String REGION_MERGES_DIR = ".merges";
68  
69    /** Temporary subdirectory of the region directory used for splits. */
70    public static final String REGION_SPLITS_DIR = ".splits";
71  
72    /** Temporary subdirectory of the region directory used for compaction output. */
73    private static final String REGION_TEMP_DIR = ".tmp";
74  
75    private final HRegionInfo regionInfo;
76    private final Configuration conf;
77    private final Path tableDir;
78    private final FileSystem fs;
79    
80    /**
81     * In order to handle NN connectivity hiccups, one need to retry non-idempotent operation at the
82     * client level.
83     */
84    private final int hdfsClientRetriesNumber;
85    private final int baseSleepBeforeRetries;
86    private static final int DEFAULT_HDFS_CLIENT_RETRIES_NUMBER = 10;
87    private static final int DEFAULT_BASE_SLEEP_BEFORE_RETRIES = 1000;
88  
89    /**
90     * Create a view to the on-disk region
91     * @param conf the {@link Configuration} to use
92     * @param fs {@link FileSystem} that contains the region
93     * @param tableDir {@link Path} to where the table is being stored
94     * @param regionInfo {@link HRegionInfo} for region
95     */
96    HRegionFileSystem(final Configuration conf, final FileSystem fs, final Path tableDir,
97        final HRegionInfo regionInfo) {
98      this.fs = fs;
99      this.conf = conf;
100     this.tableDir = tableDir;
101     this.regionInfo = regionInfo;
102     this.hdfsClientRetriesNumber = conf.getInt("hdfs.client.retries.number",
103       DEFAULT_HDFS_CLIENT_RETRIES_NUMBER);
104     this.baseSleepBeforeRetries = conf.getInt("hdfs.client.sleep.before.retries",
105       DEFAULT_BASE_SLEEP_BEFORE_RETRIES);
106  }
107 
108   /** @return the underlying {@link FileSystem} */
109   public FileSystem getFileSystem() {
110     return this.fs;
111   }
112 
113   /** @return the {@link HRegionInfo} that describe this on-disk region view */
114   public HRegionInfo getRegionInfo() {
115     return this.regionInfo;
116   }
117 
118   /** @return {@link Path} to the region's root directory. */
119   public Path getTableDir() {
120     return this.tableDir;
121   }
122 
123   /** @return {@link Path} to the region directory. */
124   public Path getRegionDir() {
125     return new Path(this.tableDir, this.regionInfo.getEncodedName());
126   }
127 
128   // ===========================================================================
129   //  Temp Helpers
130   // ===========================================================================
131   /** @return {@link Path} to the region's temp directory, used for file creations */
132   Path getTempDir() {
133     return new Path(getRegionDir(), REGION_TEMP_DIR);
134   }
135 
136   /**
137    * Clean up any temp detritus that may have been left around from previous operation attempts.
138    */
139   void cleanupTempDir() throws IOException {
140     deleteDir(getTempDir());
141   }
142 
143   // ===========================================================================
144   //  Store/StoreFile Helpers
145   // ===========================================================================
146   /**
147    * Returns the directory path of the specified family
148    * @param familyName Column Family Name
149    * @return {@link Path} to the directory of the specified family
150    */
151   Path getStoreDir(final String familyName) {
152     return new Path(this.getRegionDir(), familyName);
153   }
154 
155   /**
156    * Create the store directory for the specified family name
157    * @param familyName Column Family Name
158    * @return {@link Path} to the directory of the specified family
159    * @throws IOException if the directory creation fails.
160    */
161   Path createStoreDir(final String familyName) throws IOException {
162     Path storeDir = getStoreDir(familyName);
163     if(!fs.exists(storeDir) && !createDir(storeDir))
164       throw new IOException("Failed creating "+storeDir);
165     return storeDir;
166   }
167 
168   /**
169    * Returns the store files available for the family.
170    * This methods performs the filtering based on the valid store files.
171    * @param familyName Column Family Name
172    * @return a set of {@link StoreFileInfo} for the specified family.
173    */
174   public Collection<StoreFileInfo> getStoreFiles(final byte[] familyName) throws IOException {
175     return getStoreFiles(Bytes.toString(familyName));
176   }
177 
178   /**
179    * Returns the store files available for the family.
180    * This methods performs the filtering based on the valid store files.
181    * @param familyName Column Family Name
182    * @return a set of {@link StoreFileInfo} for the specified family.
183    */
184   public Collection<StoreFileInfo> getStoreFiles(final String familyName) throws IOException {
185     Path familyDir = getStoreDir(familyName);
186     FileStatus[] files = FSUtils.listStatus(this.fs, familyDir);
187     if (files == null) return null;
188 
189     ArrayList<StoreFileInfo> storeFiles = new ArrayList<StoreFileInfo>(files.length);
190     for (FileStatus status: files) {
191       if (!StoreFileInfo.isValid(status)) continue;
192 
193       storeFiles.add(new StoreFileInfo(this.conf, this.fs, status));
194     }
195     return storeFiles;
196   }
197 
198   /**
199    * Return Qualified Path of the specified family/file
200    *
201    * @param familyName Column Family Name
202    * @param fileName File Name
203    * @return The qualified Path for the specified family/file
204    */
205   Path getStoreFilePath(final String familyName, final String fileName) {
206     Path familyDir = getStoreDir(familyName);
207     return new Path(familyDir, fileName).makeQualified(this.fs);
208   }
209 
210   /**
211    * Return the store file information of the specified family/file.
212    *
213    * @param familyName Column Family Name
214    * @param fileName File Name
215    * @return The {@link StoreFileInfo} for the specified family/file
216    */
217   StoreFileInfo getStoreFileInfo(final String familyName, final String fileName)
218       throws IOException {
219     Path familyDir = getStoreDir(familyName);
220     FileStatus status = fs.getFileStatus(new Path(familyDir, fileName));
221     return new StoreFileInfo(this.conf, this.fs, status);
222   }
223 
224   /**
225    * Returns true if the specified family has reference files
226    * @param familyName Column Family Name
227    * @return true if family contains reference files
228    * @throws IOException
229    */
230   public boolean hasReferences(final String familyName) throws IOException {
231     FileStatus[] files = FSUtils.listStatus(fs, getStoreDir(familyName),
232       new PathFilter () {
233         public boolean accept(Path path) {
234           return StoreFileInfo.isReference(path);
235         }
236       }
237     );
238     return files != null && files.length > 0;
239   }
240 
241   /**
242    * Check whether region has Reference file
243    * @param htd table desciptor of the region
244    * @return true if region has reference file
245    * @throws IOException
246    */
247   public boolean hasReferences(final HTableDescriptor htd) throws IOException {
248     for (HColumnDescriptor family : htd.getFamilies()) {
249       if (hasReferences(family.getNameAsString())) {
250         return true;
251       }
252     }
253     return false;
254   }
255 
256   /**
257    * @return the set of families present on disk
258    * @throws IOException
259    */
260   public Collection<String> getFamilies() throws IOException {
261     FileStatus[] fds = FSUtils.listStatus(fs, getRegionDir(), new FSUtils.FamilyDirFilter(fs));
262     if (fds == null) return null;
263 
264     ArrayList<String> families = new ArrayList<String>(fds.length);
265     for (FileStatus status: fds) {
266       families.add(status.getPath().getName());
267     }
268 
269     return families;
270   }
271 
272   /**
273    * Remove the region family from disk, archiving the store files.
274    * @param familyName Column Family Name
275    * @throws IOException if an error occours during the archiving
276    */
277   public void deleteFamily(final String familyName) throws IOException {
278     // archive family store files
279     HFileArchiver.archiveFamily(fs, conf, regionInfo, tableDir, Bytes.toBytes(familyName));
280 
281     // delete the family folder
282     Path familyDir = getStoreDir(familyName);
283     if(fs.exists(familyDir) && !deleteDir(familyDir))
284       throw new IOException("Could not delete family " + familyName
285           + " from FileSystem for region " + regionInfo.getRegionNameAsString() + "("
286           + regionInfo.getEncodedName() + ")");
287   }
288 
289   /**
290    * Generate a unique file name, used by createTempName() and commitStoreFile()
291    * @param suffix extra information to append to the generated name
292    * @return Unique file name
293    */
294   private static String generateUniqueName(final String suffix) {
295     String name = UUID.randomUUID().toString().replaceAll("-", "");
296     if (suffix != null) name += suffix;
297     return name;
298   }
299 
300   /**
301    * Generate a unique temporary Path. Used in conjuction with commitStoreFile()
302    * to get a safer file creation.
303    * <code>
304    * Path file = fs.createTempName();
305    * ...StoreFile.Writer(file)...
306    * fs.commitStoreFile("family", file);
307    * </code>
308    *
309    * @return Unique {@link Path} of the temporary file
310    */
311   public Path createTempName() {
312     return createTempName(null);
313   }
314 
315   /**
316    * Generate a unique temporary Path. Used in conjuction with commitStoreFile()
317    * to get a safer file creation.
318    * <code>
319    * Path file = fs.createTempName();
320    * ...StoreFile.Writer(file)...
321    * fs.commitStoreFile("family", file);
322    * </code>
323    *
324    * @param suffix extra information to append to the generated name
325    * @return Unique {@link Path} of the temporary file
326    */
327   public Path createTempName(final String suffix) {
328     return new Path(getTempDir(), generateUniqueName(suffix));
329   }
330 
331   /**
332    * Move the file from a build/temp location to the main family store directory.
333    * @param familyName Family that will gain the file
334    * @param buildPath {@link Path} to the file to commit.
335    * @return The new {@link Path} of the committed file
336    * @throws IOException
337    */
338   public Path commitStoreFile(final String familyName, final Path buildPath) throws IOException {
339     return commitStoreFile(familyName, buildPath, -1, false);
340   }
341 
342   /**
343    * Move the file from a build/temp location to the main family store directory.
344    * @param familyName Family that will gain the file
345    * @param buildPath {@link Path} to the file to commit.
346    * @param seqNum Sequence Number to append to the file name (less then 0 if no sequence number)
347    * @param generateNewName False if you want to keep the buildPath name
348    * @return The new {@link Path} of the committed file
349    * @throws IOException
350    */
351   private Path commitStoreFile(final String familyName, final Path buildPath,
352       final long seqNum, final boolean generateNewName) throws IOException {
353     Path storeDir = getStoreDir(familyName);
354     if(!fs.exists(storeDir) && !createDir(storeDir))
355       throw new IOException("Failed creating " + storeDir);
356     
357     String name = buildPath.getName();
358     if (generateNewName) {
359       name = generateUniqueName((seqNum < 0) ? null : "_SeqId_" + seqNum + "_");
360     }
361     Path dstPath = new Path(storeDir, name);
362     if (!fs.exists(buildPath)) {
363       throw new FileNotFoundException(buildPath.toString());
364     }
365     LOG.debug("Committing store file " + buildPath + " as " + dstPath);
366     // buildPath exists, therefore not doing an exists() check.
367     if (!rename(buildPath, dstPath)) {
368       throw new IOException("Failed rename of " + buildPath + " to " + dstPath);
369     }
370     return dstPath;
371   }
372 
373 
374   /**
375    * Moves multiple store files to the relative region's family store directory.
376    * @param storeFiles list of store files divided by family
377    * @throws IOException
378    */
379   void commitStoreFiles(final Map<byte[], List<StoreFile>> storeFiles) throws IOException {
380     for (Map.Entry<byte[], List<StoreFile>> es: storeFiles.entrySet()) {
381       String familyName = Bytes.toString(es.getKey());
382       for (StoreFile sf: es.getValue()) {
383         commitStoreFile(familyName, sf.getPath());
384       }
385     }
386   }
387 
388   /**
389    * Archives the specified store file from the specified family.
390    * @param familyName Family that contains the store files
391    * @param filePath {@link Path} to the store file to remove
392    * @throws IOException if the archiving fails
393    */
394   public void removeStoreFile(final String familyName, final Path filePath)
395       throws IOException {
396     HFileArchiver.archiveStoreFile(this.conf, this.fs, this.regionInfo,
397         this.tableDir, Bytes.toBytes(familyName), filePath);
398   }
399 
400   /**
401    * Closes and archives the specified store files from the specified family.
402    * @param familyName Family that contains the store files
403    * @param storeFiles set of store files to remove
404    * @throws IOException if the archiving fails
405    */
406   public void removeStoreFiles(final String familyName, final Collection<StoreFile> storeFiles)
407       throws IOException {
408     HFileArchiver.archiveStoreFiles(this.conf, this.fs, this.regionInfo,
409         this.tableDir, Bytes.toBytes(familyName), storeFiles);
410   }
411 
412   /**
413    * Bulk load: Add a specified store file to the specified family.
414    * If the source file is on the same different file-system is moved from the
415    * source location to the destination location, otherwise is copied over.
416    *
417    * @param familyName Family that will gain the file
418    * @param srcPath {@link Path} to the file to import
419    * @param seqNum Bulk Load sequence number
420    * @return The destination {@link Path} of the bulk loaded file
421    * @throws IOException
422    */
423   Path bulkLoadStoreFile(final String familyName, Path srcPath, long seqNum)
424       throws IOException {
425     // Copy the file if it's on another filesystem
426     FileSystem srcFs = srcPath.getFileSystem(conf);
427     FileSystem desFs = fs instanceof HFileSystem ? ((HFileSystem)fs).getBackingFs() : fs;
428 
429     // We can't compare FileSystem instances as equals() includes UGI instance
430     // as part of the comparison and won't work when doing SecureBulkLoad
431     // TODO deal with viewFS
432     if (!FSHDFSUtils.isSameHdfs(conf, srcFs, desFs)) {
433       LOG.info("Bulk-load file " + srcPath + " is on different filesystem than " +
434           "the destination store. Copying file over to destination filesystem.");
435       Path tmpPath = createTempName();
436       FileUtil.copy(srcFs, srcPath, fs, tmpPath, false, conf);
437       LOG.info("Copied " + srcPath + " to temporary path on destination filesystem: " + tmpPath);
438       srcPath = tmpPath;
439     }
440 
441     return commitStoreFile(familyName, srcPath, seqNum, true);
442   }
443 
444   // ===========================================================================
445   //  Splits Helpers
446   // ===========================================================================
447   /** @return {@link Path} to the temp directory used during split operations */
448   Path getSplitsDir() {
449     return new Path(getRegionDir(), REGION_SPLITS_DIR);
450   }
451 
452   Path getSplitsDir(final HRegionInfo hri) {
453     return new Path(getSplitsDir(), hri.getEncodedName());
454   }
455 
456   /**
457    * Clean up any split detritus that may have been left around from previous split attempts.
458    */
459   void cleanupSplitsDir() throws IOException {
460     deleteDir(getSplitsDir());
461   }
462 
463   /**
464    * Clean up any split detritus that may have been left around from previous
465    * split attempts.
466    * Call this method on initial region deploy.
467    * @throws IOException
468    */
469   void cleanupAnySplitDetritus() throws IOException {
470     Path splitdir = this.getSplitsDir();
471     if (!fs.exists(splitdir)) return;
472     // Look at the splitdir.  It could have the encoded names of the daughter
473     // regions we tried to make.  See if the daughter regions actually got made
474     // out under the tabledir.  If here under splitdir still, then the split did
475     // not complete.  Try and do cleanup.  This code WILL NOT catch the case
476     // where we successfully created daughter a but regionserver crashed during
477     // the creation of region b.  In this case, there'll be an orphan daughter
478     // dir in the filesystem.  TOOD: Fix.
479     FileStatus[] daughters = FSUtils.listStatus(fs, splitdir, new FSUtils.DirFilter(fs));
480     if (daughters != null) {
481       for (FileStatus daughter: daughters) {
482         Path daughterDir = new Path(getTableDir(), daughter.getPath().getName());
483         if (fs.exists(daughterDir) && !deleteDir(daughterDir)) {
484           throw new IOException("Failed delete of " + daughterDir);
485         }
486       }
487     }
488     cleanupSplitsDir();
489     LOG.info("Cleaned up old failed split transaction detritus: " + splitdir);
490   }
491 
492   /**
493    * Remove daughter region
494    * @param regionInfo daughter {@link HRegionInfo}
495    * @throws IOException
496    */
497   void cleanupDaughterRegion(final HRegionInfo regionInfo) throws IOException {
498     Path regionDir = new Path(this.tableDir, regionInfo.getEncodedName());
499     if (this.fs.exists(regionDir) && !deleteDir(regionDir)) {
500       throw new IOException("Failed delete of " + regionDir);
501     }
502   }
503 
504   /**
505    * Commit a daughter region, moving it from the split temporary directory
506    * to the proper location in the filesystem.
507    * @param regionInfo daughter {@link HRegionInfo}
508    * @throws IOException
509    */
510   Path commitDaughterRegion(final HRegionInfo regionInfo) throws IOException {
511     Path regionDir = new Path(this.tableDir, regionInfo.getEncodedName());
512     Path daughterTmpDir = this.getSplitsDir(regionInfo);
513     if (fs.exists(daughterTmpDir)) {
514       // Write HRI to a file in case we need to recover hbase:meta
515       Path regionInfoFile = new Path(daughterTmpDir, REGION_INFO_FILE);
516       byte[] regionInfoContent = getRegionInfoFileContent(regionInfo);
517       writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent);
518 
519       // Move the daughter temp dir to the table dir
520       if (!rename(daughterTmpDir, regionDir)) {
521         throw new IOException("Unable to rename " + daughterTmpDir + " to " + regionDir);
522       }
523     }
524     return regionDir;
525   }
526 
527   /**
528    * Create the region splits directory.
529    */
530   void createSplitsDir() throws IOException {
531     Path splitdir = getSplitsDir();
532     if (fs.exists(splitdir)) {
533       LOG.info("The " + splitdir + " directory exists.  Hence deleting it to recreate it");
534       if (!deleteDir(splitdir)) {
535         throw new IOException("Failed deletion of " + splitdir
536             + " before creating them again.");
537       }
538     }
539     // splitDir doesn't exists now. No need to do an exists() call for it.
540     if (!createDir(splitdir)) {
541       throw new IOException("Failed create of " + splitdir);
542     }
543   }
544 
545   /**
546    * Write out a split reference. Package local so it doesnt leak out of
547    * regionserver.
548    * @param hri {@link HRegionInfo} of the destination
549    * @param familyName Column Family Name
550    * @param f File to split.
551    * @param splitRow Split Row
552    * @param top True if we are referring to the top half of the hfile.
553    * @return Path to created reference.
554    * @throws IOException
555    */
556   Path splitStoreFile(final HRegionInfo hri, final String familyName,
557       final StoreFile f, final byte[] splitRow, final boolean top) throws IOException {
558     
559     // Check whether the split row lies in the range of the store file
560     // If it is outside the range, return directly.
561     if (top) {
562       //check if larger than last key.
563       KeyValue splitKey = KeyValue.createFirstOnRow(splitRow);
564       byte[] lastKey = f.createReader().getLastKey();      
565       // If lastKey is null means storefile is empty.
566       if (lastKey == null) return null;
567       if (f.getReader().getComparator().compareFlatKey(splitKey.getBuffer(), 
568           splitKey.getKeyOffset(), splitKey.getKeyLength(), lastKey, 0, lastKey.length) > 0) {
569         return null;
570       }
571     } else {
572       //check if smaller than first key
573       KeyValue splitKey = KeyValue.createLastOnRow(splitRow);
574       byte[] firstKey = f.createReader().getFirstKey();
575       // If firstKey is null means storefile is empty.
576       if (firstKey == null) return null;
577       if (f.getReader().getComparator().compareFlatKey(splitKey.getBuffer(), 
578           splitKey.getKeyOffset(), splitKey.getKeyLength(), firstKey, 0, firstKey.length) < 0) {
579         return null;
580       }      
581     }
582  
583     f.getReader().close(true);
584     
585     Path splitDir = new Path(getSplitsDir(hri), familyName);
586     // A reference to the bottom half of the hsf store file.
587     Reference r =
588       top ? Reference.createTopReference(splitRow): Reference.createBottomReference(splitRow);
589     // Add the referred-to regions name as a dot separated suffix.
590     // See REF_NAME_REGEX regex above.  The referred-to regions name is
591     // up in the path of the passed in <code>f</code> -- parentdir is family,
592     // then the directory above is the region name.
593     String parentRegionName = regionInfo.getEncodedName();
594     // Write reference with same file id only with the other region name as
595     // suffix and into the new region location (under same family).
596     Path p = new Path(splitDir, f.getPath().getName() + "." + parentRegionName);
597     return r.write(fs, p);
598   }
599 
600   // ===========================================================================
601   //  Merge Helpers
602   // ===========================================================================
603   /** @return {@link Path} to the temp directory used during merge operations */
604   Path getMergesDir() {
605     return new Path(getRegionDir(), REGION_MERGES_DIR);
606   }
607 
608   Path getMergesDir(final HRegionInfo hri) {
609     return new Path(getMergesDir(), hri.getEncodedName());
610   }
611 
612   /**
613    * Clean up any merge detritus that may have been left around from previous merge attempts.
614    */
615   void cleanupMergesDir() throws IOException {
616     deleteDir(getMergesDir());
617   }
618 
619   /**
620    * Remove merged region
621    * @param mergedRegion {@link HRegionInfo}
622    * @throws IOException
623    */
624   void cleanupMergedRegion(final HRegionInfo mergedRegion) throws IOException {
625     Path regionDir = new Path(this.tableDir, mergedRegion.getEncodedName());
626     if (this.fs.exists(regionDir) && !this.fs.delete(regionDir, true)) {
627       throw new IOException("Failed delete of " + regionDir);
628     }
629   }
630 
631   /**
632    * Create the region merges directory.
633    * @throws IOException If merges dir already exists or we fail to create it.
634    * @see HRegionFileSystem#cleanupMergesDir()
635    */
636   void createMergesDir() throws IOException {
637     Path mergesdir = getMergesDir();
638     if (fs.exists(mergesdir)) {
639       LOG.info("The " + mergesdir
640           + " directory exists.  Hence deleting it to recreate it");
641       if (!fs.delete(mergesdir, true)) {
642         throw new IOException("Failed deletion of " + mergesdir
643             + " before creating them again.");
644       }
645     }
646     if (!fs.mkdirs(mergesdir))
647       throw new IOException("Failed create of " + mergesdir);
648   }
649 
650   /**
651    * Write out a merge reference under the given merges directory. Package local
652    * so it doesnt leak out of regionserver.
653    * @param mergedRegion {@link HRegionInfo} of the merged region
654    * @param familyName Column Family Name
655    * @param f File to create reference.
656    * @param mergedDir
657    * @return Path to created reference.
658    * @throws IOException
659    */
660   Path mergeStoreFile(final HRegionInfo mergedRegion, final String familyName,
661       final StoreFile f, final Path mergedDir)
662       throws IOException {
663     Path referenceDir = new Path(new Path(mergedDir,
664         mergedRegion.getEncodedName()), familyName);
665     // A whole reference to the store file.
666     Reference r = Reference.createTopReference(regionInfo.getStartKey());
667     // Add the referred-to regions name as a dot separated suffix.
668     // See REF_NAME_REGEX regex above. The referred-to regions name is
669     // up in the path of the passed in <code>f</code> -- parentdir is family,
670     // then the directory above is the region name.
671     String mergingRegionName = regionInfo.getEncodedName();
672     // Write reference with same file id only with the other region name as
673     // suffix and into the new region location (under same family).
674     Path p = new Path(referenceDir, f.getPath().getName() + "."
675         + mergingRegionName);
676     return r.write(fs, p);
677   }
678 
679   /**
680    * Commit a merged region, moving it from the merges temporary directory to
681    * the proper location in the filesystem.
682    * @param mergedRegionInfo merged region {@link HRegionInfo}
683    * @throws IOException 
684    */
685   void commitMergedRegion(final HRegionInfo mergedRegionInfo) throws IOException {
686     Path regionDir = new Path(this.tableDir, mergedRegionInfo.getEncodedName());
687     Path mergedRegionTmpDir = this.getMergesDir(mergedRegionInfo);
688     // Move the tmp dir in the expected location
689     if (mergedRegionTmpDir != null && fs.exists(mergedRegionTmpDir)) {
690       if (!fs.rename(mergedRegionTmpDir, regionDir)) {
691         throw new IOException("Unable to rename " + mergedRegionTmpDir + " to "
692             + regionDir);
693       }
694     }
695   }
696 
697   // ===========================================================================
698   //  Create/Open/Delete Helpers
699   // ===========================================================================
700   /**
701    * Log the current state of the region
702    * @param LOG log to output information
703    * @throws IOException if an unexpected exception occurs
704    */
705   void logFileSystemState(final Log LOG) throws IOException {
706     FSUtils.logFileSystemState(fs, this.getRegionDir(), LOG);
707   }
708 
709   /**
710    * @param hri
711    * @return Content of the file we write out to the filesystem under a region
712    * @throws IOException
713    */
714   private static byte[] getRegionInfoFileContent(final HRegionInfo hri) throws IOException {
715     return hri.toDelimitedByteArray();
716   }
717 
718   /**
719    * Create a {@link HRegionInfo} from the serialized version on-disk.
720    * @param fs {@link FileSystem} that contains the Region Info file
721    * @param regionDir {@link Path} to the Region Directory that contains the Info file
722    * @return An {@link HRegionInfo} instance gotten from the Region Info file.
723    * @throws IOException if an error occurred during file open/read operation.
724    */
725   public static HRegionInfo loadRegionInfoFileContent(final FileSystem fs, final Path regionDir)
726       throws IOException {
727     FSDataInputStream in = fs.open(new Path(regionDir, REGION_INFO_FILE));
728     try {
729       return HRegionInfo.parseFrom(in);
730     } finally {
731       in.close();
732     }
733   }
734 
735   /**
736    * Write the .regioninfo file on-disk.
737    */
738   private static void writeRegionInfoFileContent(final Configuration conf, final FileSystem fs,
739       final Path regionInfoFile, final byte[] content) throws IOException {
740     // First check to get the permissions
741     FsPermission perms = FSUtils.getFilePermissions(fs, conf, HConstants.DATA_FILE_UMASK_KEY);
742     // Write the RegionInfo file content
743     FSDataOutputStream out = FSUtils.create(fs, regionInfoFile, perms, null);
744     try {
745       out.write(content);
746     } finally {
747       out.close();
748     }
749   }
750 
751   /**
752    * Write out an info file under the stored region directory. Useful recovering mangled regions.
753    * If the regionInfo already exists on-disk, then we fast exit.
754    */
755   void checkRegionInfoOnFilesystem() throws IOException {
756     // Compose the content of the file so we can compare to length in filesystem. If not same,
757     // rewrite it (it may have been written in the old format using Writables instead of pb). The
758     // pb version is much shorter -- we write now w/o the toString version -- so checking length
759     // only should be sufficient. I don't want to read the file every time to check if it pb
760     // serialized.
761     byte[] content = getRegionInfoFileContent(regionInfo);
762     try {
763       Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE);
764 
765       FileStatus status = fs.getFileStatus(regionInfoFile);
766       if (status != null && status.getLen() == content.length) {
767         // Then assume the content good and move on.
768         // NOTE: that the length is not sufficient to define the the content matches.
769         return;
770       }
771 
772       LOG.info("Rewriting .regioninfo file at: " + regionInfoFile);
773       if (!fs.delete(regionInfoFile, false)) {
774         throw new IOException("Unable to remove existing " + regionInfoFile);
775       }
776     } catch (FileNotFoundException e) {
777       LOG.warn(REGION_INFO_FILE + " file not found for region: " + regionInfo.getEncodedName());
778     }
779 
780     // Write HRI to a file in case we need to recover hbase:meta
781     writeRegionInfoOnFilesystem(content, true);
782   }
783 
784   /**
785    * Write out an info file under the region directory. Useful recovering mangled regions.
786    * @param useTempDir indicate whether or not using the region .tmp dir for a safer file creation.
787    */
788   private void writeRegionInfoOnFilesystem(boolean useTempDir) throws IOException {
789     byte[] content = getRegionInfoFileContent(regionInfo);
790     writeRegionInfoOnFilesystem(content, useTempDir);
791   }
792 
793   /**
794    * Write out an info file under the region directory. Useful recovering mangled regions.
795    * @param regionInfoContent serialized version of the {@link HRegionInfo}
796    * @param useTempDir indicate whether or not using the region .tmp dir for a safer file creation.
797    */
798   private void writeRegionInfoOnFilesystem(final byte[] regionInfoContent,
799       final boolean useTempDir) throws IOException {
800     Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE);
801     if (useTempDir) {
802       // Create in tmpDir and then move into place in case we crash after
803       // create but before close. If we don't successfully close the file,
804       // subsequent region reopens will fail the below because create is
805       // registered in NN.
806 
807       // And then create the file
808       Path tmpPath = new Path(getTempDir(), REGION_INFO_FILE);
809 
810       // If datanode crashes or if the RS goes down just before the close is called while trying to
811       // close the created regioninfo file in the .tmp directory then on next
812       // creation we will be getting AlreadyCreatedException.
813       // Hence delete and create the file if exists.
814       if (FSUtils.isExists(fs, tmpPath)) {
815         FSUtils.delete(fs, tmpPath, true);
816       }
817 
818       // Write HRI to a file in case we need to recover hbase:meta
819       writeRegionInfoFileContent(conf, fs, tmpPath, regionInfoContent);
820 
821       // Move the created file to the original path
822       if (fs.exists(tmpPath) &&  !rename(tmpPath, regionInfoFile)) {
823         throw new IOException("Unable to rename " + tmpPath + " to " + regionInfoFile);
824       }
825     } else {
826       // Write HRI to a file in case we need to recover hbase:meta
827       writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent);
828     }
829   }
830 
831   /**
832    * Create a new Region on file-system.
833    * @param conf the {@link Configuration} to use
834    * @param fs {@link FileSystem} from which to add the region
835    * @param tableDir {@link Path} to where the table is being stored
836    * @param regionInfo {@link HRegionInfo} for region to be added
837    * @throws IOException if the region creation fails due to a FileSystem exception.
838    */
839   public static HRegionFileSystem createRegionOnFileSystem(final Configuration conf,
840       final FileSystem fs, final Path tableDir, final HRegionInfo regionInfo) throws IOException {
841     HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
842     Path regionDir = regionFs.getRegionDir();
843 
844     if (fs.exists(regionDir)) {
845       LOG.warn("Trying to create a region that already exists on disk: " + regionDir);
846       throw new IOException("The specified region already exists on disk: " + regionDir);
847     }
848 
849     // Create the region directory
850     if (!createDirOnFileSystem(fs, conf, regionDir)) {
851       LOG.warn("Unable to create the region directory: " + regionDir);
852       throw new IOException("Unable to create region directory: " + regionDir);
853     }
854 
855     // Write HRI to a file in case we need to recover hbase:meta
856     regionFs.writeRegionInfoOnFilesystem(false);
857     return regionFs;
858   }
859 
860   /**
861    * Open Region from file-system.
862    * @param conf the {@link Configuration} to use
863    * @param fs {@link FileSystem} from which to add the region
864    * @param tableDir {@link Path} to where the table is being stored
865    * @param regionInfo {@link HRegionInfo} for region to be added
866    * @param readOnly True if you don't want to edit the region data
867    * @throws IOException if the region creation fails due to a FileSystem exception.
868    */
869   public static HRegionFileSystem openRegionFromFileSystem(final Configuration conf,
870       final FileSystem fs, final Path tableDir, final HRegionInfo regionInfo, boolean readOnly)
871       throws IOException {
872     HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
873     Path regionDir = regionFs.getRegionDir();
874 
875     if (!fs.exists(regionDir)) {
876       LOG.warn("Trying to open a region that do not exists on disk: " + regionDir);
877       throw new IOException("The specified region do not exists on disk: " + regionDir);
878     }
879 
880     if (!readOnly) {
881       // Cleanup temporary directories
882       regionFs.cleanupTempDir();
883       regionFs.cleanupSplitsDir();
884       regionFs.cleanupMergesDir();
885 
886       // if it doesn't exists, Write HRI to a file, in case we need to recover hbase:meta
887       regionFs.checkRegionInfoOnFilesystem();
888     }
889 
890     return regionFs;
891   }
892 
893   /**
894    * Remove the region from the table directory, archiving the region's hfiles.
895    * @param conf the {@link Configuration} to use
896    * @param fs {@link FileSystem} from which to remove the region
897    * @param tableDir {@link Path} to where the table is being stored
898    * @param regionInfo {@link HRegionInfo} for region to be deleted
899    * @throws IOException if the request cannot be completed
900    */
901   public static void deleteRegionFromFileSystem(final Configuration conf,
902       final FileSystem fs, final Path tableDir, final HRegionInfo regionInfo) throws IOException {
903     HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
904     Path regionDir = regionFs.getRegionDir();
905 
906     if (!fs.exists(regionDir)) {
907       LOG.warn("Trying to delete a region that do not exists on disk: " + regionDir);
908       return;
909     }
910 
911     if (LOG.isDebugEnabled()) {
912       LOG.debug("DELETING region " + regionDir);
913     }
914 
915     // Archive region
916     Path rootDir = FSUtils.getRootDir(conf);
917     HFileArchiver.archiveRegion(fs, rootDir, tableDir, regionDir);
918 
919     // Delete empty region dir
920     if (!fs.delete(regionDir, true)) {
921       LOG.warn("Failed delete of " + regionDir);
922     }
923   }
924 
925   /**
926    * Creates a directory. Assumes the user has already checked for this directory existence.
927    * @param dir
928    * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks
929    *         whether the directory exists or not, and returns true if it exists.
930    * @throws IOException
931    */
932   boolean createDir(Path dir) throws IOException {
933     int i = 0;
934     IOException lastIOE = null;
935     do {
936       try {
937         return fs.mkdirs(dir);
938       } catch (IOException ioe) {
939         lastIOE = ioe;
940         if (fs.exists(dir)) return true; // directory is present
941         sleepBeforeRetry("Create Directory", i+1);
942       }
943     } while (++i <= hdfsClientRetriesNumber);
944     throw new IOException("Exception in createDir", lastIOE);
945   }
946 
947   /**
948    * Renames a directory. Assumes the user has already checked for this directory existence.
949    * @param srcpath
950    * @param dstPath
951    * @return true if rename is successful.
952    * @throws IOException
953    */
954   boolean rename(Path srcpath, Path dstPath) throws IOException {
955     IOException lastIOE = null;
956     int i = 0;
957     do {
958       try {
959         return fs.rename(srcpath, dstPath);
960       } catch (IOException ioe) {
961         lastIOE = ioe;
962         if (!fs.exists(srcpath) && fs.exists(dstPath)) return true; // successful move
963         // dir is not there, retry after some time.
964         sleepBeforeRetry("Rename Directory", i+1);
965       }
966     } while (++i <= hdfsClientRetriesNumber);
967     throw new IOException("Exception in rename", lastIOE);
968   }
969 
970   /**
971    * Deletes a directory. Assumes the user has already checked for this directory existence.
972    * @param dir
973    * @return true if the directory is deleted.
974    * @throws IOException
975    */
976   boolean deleteDir(Path dir) throws IOException {
977     IOException lastIOE = null;
978     int i = 0;
979     do {
980       try {
981         return fs.delete(dir, true);
982       } catch (IOException ioe) {
983         lastIOE = ioe;
984         if (!fs.exists(dir)) return true;
985         // dir is there, retry deleting after some time.
986         sleepBeforeRetry("Delete Directory", i+1);
987       }
988     } while (++i <= hdfsClientRetriesNumber);
989     throw new IOException("Exception in DeleteDir", lastIOE);
990   }
991 
992   /**
993    * sleeping logic; handles the interrupt exception.
994    */
995   private void sleepBeforeRetry(String msg, int sleepMultiplier) {
996     sleepBeforeRetry(msg, sleepMultiplier, baseSleepBeforeRetries, hdfsClientRetriesNumber);
997   }
998 
999   /**
1000    * Creates a directory for a filesystem and configuration object. Assumes the user has already
1001    * checked for this directory existence.
1002    * @param fs
1003    * @param conf
1004    * @param dir
1005    * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks
1006    *         whether the directory exists or not, and returns true if it exists.
1007    * @throws IOException
1008    */
1009   private static boolean createDirOnFileSystem(FileSystem fs, Configuration conf, Path dir)
1010       throws IOException {
1011     int i = 0;
1012     IOException lastIOE = null;
1013     int hdfsClientRetriesNumber = conf.getInt("hdfs.client.retries.number",
1014       DEFAULT_HDFS_CLIENT_RETRIES_NUMBER);
1015     int baseSleepBeforeRetries = conf.getInt("hdfs.client.sleep.before.retries",
1016       DEFAULT_BASE_SLEEP_BEFORE_RETRIES);
1017     do {
1018       try {
1019         return fs.mkdirs(dir);
1020       } catch (IOException ioe) {
1021         lastIOE = ioe;
1022         if (fs.exists(dir)) return true; // directory is present
1023         sleepBeforeRetry("Create Directory", i+1, baseSleepBeforeRetries, hdfsClientRetriesNumber);
1024       }
1025     } while (++i <= hdfsClientRetriesNumber);
1026     throw new IOException("Exception in createDir", lastIOE);
1027   }
1028 
1029   /**
1030    * sleeping logic for static methods; handles the interrupt exception. Keeping a static version
1031    * for this to avoid re-looking for the integer values.
1032    */
1033   private static void sleepBeforeRetry(String msg, int sleepMultiplier, int baseSleepBeforeRetries,
1034       int hdfsClientRetriesNumber) {
1035     if (sleepMultiplier > hdfsClientRetriesNumber) {
1036       LOG.debug(msg + ", retries exhausted");
1037       return;
1038     }
1039     LOG.debug(msg + ", sleeping " + baseSleepBeforeRetries + " times " + sleepMultiplier);
1040     Threads.sleep((long)baseSleepBeforeRetries * sleepMultiplier);
1041   }
1042 }