View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations under
15   * the License.
16   */
17  
18  package org.apache.hadoop.hbase.io.hfile;
19  
20  import java.io.IOException;
21  import java.nio.ByteBuffer;
22  import java.util.Random;
23  import java.util.StringTokenizer;
24  
25  import junit.framework.TestCase;
26  
27  import org.apache.commons.cli.CommandLine;
28  import org.apache.commons.cli.CommandLineParser;
29  import org.apache.commons.cli.GnuParser;
30  import org.apache.commons.cli.HelpFormatter;
31  import org.apache.commons.cli.Option;
32  import org.apache.commons.cli.OptionBuilder;
33  import org.apache.commons.cli.Options;
34  import org.apache.commons.cli.ParseException;
35  import org.apache.commons.logging.Log;
36  import org.apache.commons.logging.LogFactory;
37  import org.apache.hadoop.conf.Configuration;
38  import org.apache.hadoop.fs.FSDataInputStream;
39  import org.apache.hadoop.fs.FSDataOutputStream;
40  import org.apache.hadoop.fs.FileSystem;
41  import org.apache.hadoop.fs.Path;
42  import org.apache.hadoop.fs.RawLocalFileSystem;
43  import org.apache.hadoop.hbase.HBaseTestingUtility;
44  import org.apache.hadoop.hbase.KeyValue;
45  import org.apache.hadoop.hbase.MediumTests;
46  import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
47  import org.apache.hadoop.hbase.io.hfile.HFile.Writer;
48  import org.apache.hadoop.io.BytesWritable;
49  import org.junit.experimental.categories.Category;
50  
51  /**
52   * test the performance for seek.
53   * <p>
54   * Copied from
55   * <a href="https://issues.apache.org/jira/browse/HADOOP-3315">hadoop-3315 tfile</a>.
56   * Remove after tfile is committed and use the tfile version of this class
57   * instead.</p>
58   */
59  @Category(MediumTests.class)
60  public class TestHFileSeek extends TestCase {
61    private static final boolean USE_PREAD = true;
62    private MyOptions options;
63    private Configuration conf;
64    private Path path;
65    private FileSystem fs;
66    private NanoTimer timer;
67    private Random rng;
68    private RandomDistribution.DiscreteRNG keyLenGen;
69    private KVGenerator kvGen;
70  
71    private static final Log LOG = LogFactory.getLog(TestHFileSeek.class);
72  
73    private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
74  
75    @Override
76    public void setUp() throws IOException {
77      if (options == null) {
78        options = new MyOptions(new String[0]);
79      }
80  
81      conf = new Configuration();
82      
83      if (options.useRawFs) {
84        conf.setClass("fs.file.impl", RawLocalFileSystem.class, FileSystem.class);
85      }
86      
87      conf.setInt("tfile.fs.input.buffer.size", options.fsInputBufferSize);
88      conf.setInt("tfile.fs.output.buffer.size", options.fsOutputBufferSize);
89      path = new Path(new Path(options.rootDir), options.file);
90      fs = path.getFileSystem(conf);
91      timer = new NanoTimer(false);
92      rng = new Random(options.seed);
93      keyLenGen =
94          new RandomDistribution.Zipf(new Random(rng.nextLong()),
95              options.minKeyLen, options.maxKeyLen, 1.2);
96      RandomDistribution.DiscreteRNG valLenGen =
97          new RandomDistribution.Flat(new Random(rng.nextLong()),
98              options.minValLength, options.maxValLength);
99      RandomDistribution.DiscreteRNG wordLenGen =
100         new RandomDistribution.Flat(new Random(rng.nextLong()),
101             options.minWordLen, options.maxWordLen);
102     kvGen =
103         new KVGenerator(rng, true, keyLenGen, valLenGen, wordLenGen,
104             options.dictSize);
105   }
106 
107   @Override
108   public void tearDown() {
109     try {
110       fs.close();
111     }
112     catch (Exception e) {
113       // Nothing
114     }
115   }
116 
117   private static FSDataOutputStream createFSOutput(Path name, FileSystem fs)
118     throws IOException {
119     if (fs.exists(name)) {
120       fs.delete(name, true);
121     }
122     FSDataOutputStream fout = fs.create(name);
123     return fout;
124   }
125 
126   private void createTFile() throws IOException {
127     long totalBytes = 0;
128     FSDataOutputStream fout = createFSOutput(path, fs);
129     try {
130       HFileContext context = new HFileContextBuilder()
131                             .withBlockSize(options.minBlockSize)
132                             .withCompression(AbstractHFileWriter.compressionByName(options.compress))
133                             .build();
134       Writer writer = HFile.getWriterFactoryNoCache(conf)
135           .withOutputStream(fout)
136           .withFileContext(context)
137           .withComparator(new KeyValue.RawBytesComparator())
138           .create();
139       try {
140         BytesWritable key = new BytesWritable();
141         BytesWritable val = new BytesWritable();
142         timer.start();
143         for (long i = 0; true; ++i) {
144           if (i % 1000 == 0) { // test the size for every 1000 rows.
145             if (fs.getFileStatus(path).getLen() >= options.fileSize) {
146               break;
147             }
148           }
149           kvGen.next(key, val, false);
150           byte [] k = new byte [key.getLength()];
151           System.arraycopy(key.getBytes(), 0, k, 0, key.getLength());
152           byte [] v = new byte [val.getLength()];
153           System.arraycopy(val.getBytes(), 0, v, 0, key.getLength());
154           writer.append(k, v);
155           totalBytes += key.getLength();
156           totalBytes += val.getLength();
157         }
158         timer.stop();
159       }
160       finally {
161         writer.close();
162       }
163     }
164     finally {
165       fout.close();
166     }
167     double duration = (double)timer.read()/1000; // in us.
168     long fsize = fs.getFileStatus(path).getLen();
169 
170     System.out.printf(
171         "time: %s...uncompressed: %.2fMB...raw thrpt: %.2fMB/s\n",
172         timer.toString(), (double) totalBytes / 1024 / 1024, totalBytes
173             / duration);
174     System.out.printf("time: %s...file size: %.2fMB...disk thrpt: %.2fMB/s\n",
175         timer.toString(), (double) fsize / 1024 / 1024, fsize / duration);
176   }
177 
178   public void seekTFile() throws IOException {
179     int miss = 0;
180     long totalBytes = 0;
181     FSDataInputStream fsdis = fs.open(path);
182     Reader reader = HFile.createReaderFromStream(path, fsdis,
183         fs.getFileStatus(path).getLen(), new CacheConfig(conf), conf);
184     reader.loadFileInfo();
185     KeySampler kSampler =
186         new KeySampler(rng, reader.getFirstKey(), reader.getLastKey(),
187             keyLenGen);
188     HFileScanner scanner = reader.getScanner(false, USE_PREAD);
189     BytesWritable key = new BytesWritable();
190     timer.reset();
191     timer.start();
192     for (int i = 0; i < options.seekCount; ++i) {
193       kSampler.next(key);
194       byte [] k = new byte [key.getLength()];
195       System.arraycopy(key.getBytes(), 0, k, 0, key.getLength());
196       if (scanner.seekTo(k) >= 0) {
197         ByteBuffer bbkey = scanner.getKey();
198         ByteBuffer bbval = scanner.getValue();
199         totalBytes += bbkey.limit();
200         totalBytes += bbval.limit();
201       }
202       else {
203         ++miss;
204       }
205     }
206     timer.stop();
207     System.out.printf(
208         "time: %s...avg seek: %s...%d hit...%d miss...avg I/O size: %.2fKB\n",
209         timer.toString(), NanoTimer.nanoTimeToString(timer.read()
210             / options.seekCount), options.seekCount - miss, miss,
211         (double) totalBytes / 1024 / (options.seekCount - miss));
212 
213   }
214 
215   public void testSeeks() throws IOException {
216     if (options.doCreate()) {
217       createTFile();
218     }
219 
220     if (options.doRead()) {
221       seekTFile();
222     }
223 
224     if (options.doCreate()) {
225       fs.delete(path, true);
226     }
227   }
228 
229   private static class IntegerRange {
230     private final int from, to;
231 
232     public IntegerRange(int from, int to) {
233       this.from = from;
234       this.to = to;
235     }
236 
237     public static IntegerRange parse(String s) throws ParseException {
238       StringTokenizer st = new StringTokenizer(s, " \t,");
239       if (st.countTokens() != 2) {
240         throw new ParseException("Bad integer specification: " + s);
241       }
242       int from = Integer.parseInt(st.nextToken());
243       int to = Integer.parseInt(st.nextToken());
244       return new IntegerRange(from, to);
245     }
246 
247     public int from() {
248       return from;
249     }
250 
251     public int to() {
252       return to;
253     }
254   }
255 
256   private static class MyOptions {
257     // hard coded constants
258     int dictSize = 1000;
259     int minWordLen = 5;
260     int maxWordLen = 20;
261 
262     private HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
263     String rootDir =
264       TEST_UTIL.getDataTestDir("TestTFileSeek").toString();
265     String file = "TestTFileSeek";
266     // String compress = "lzo"; DISABLED
267     String compress = "none";
268     int minKeyLen = 10;
269     int maxKeyLen = 50;
270     int minValLength = 1024;
271     int maxValLength = 2 * 1024;
272     int minBlockSize = 1 * 1024 * 1024;
273     int fsOutputBufferSize = 1;
274     int fsInputBufferSize = 0;
275     // Default writing 10MB.
276     long fileSize = 10 * 1024 * 1024;
277     long seekCount = 1000;
278     long trialCount = 1;
279     long seed;
280     boolean useRawFs = false;
281 
282     static final int OP_CREATE = 1;
283     static final int OP_READ = 2;
284     int op = OP_CREATE | OP_READ;
285 
286     boolean proceed = false;
287 
288     public MyOptions(String[] args) {
289       seed = System.nanoTime();
290 
291       try {
292         Options opts = buildOptions();
293         CommandLineParser parser = new GnuParser();
294         CommandLine line = parser.parse(opts, args, true);
295         processOptions(line, opts);
296         validateOptions();
297       }
298       catch (ParseException e) {
299         System.out.println(e.getMessage());
300         System.out.println("Try \"--help\" option for details.");
301         setStopProceed();
302       }
303     }
304 
305     public boolean proceed() {
306       return proceed;
307     }
308 
309     private Options buildOptions() {
310       Option compress =
311           OptionBuilder.withLongOpt("compress").withArgName("[none|lzo|gz|snappy]")
312               .hasArg().withDescription("compression scheme").create('c');
313 
314       Option fileSize =
315           OptionBuilder.withLongOpt("file-size").withArgName("size-in-MB")
316               .hasArg().withDescription("target size of the file (in MB).")
317               .create('s');
318 
319       Option fsInputBufferSz =
320           OptionBuilder.withLongOpt("fs-input-buffer").withArgName("size")
321               .hasArg().withDescription(
322                   "size of the file system input buffer (in bytes).").create(
323                   'i');
324 
325       Option fsOutputBufferSize =
326           OptionBuilder.withLongOpt("fs-output-buffer").withArgName("size")
327               .hasArg().withDescription(
328                   "size of the file system output buffer (in bytes).").create(
329                   'o');
330 
331       Option keyLen =
332           OptionBuilder
333               .withLongOpt("key-length")
334               .withArgName("min,max")
335               .hasArg()
336               .withDescription(
337                   "the length range of the key (in bytes)")
338               .create('k');
339 
340       Option valueLen =
341           OptionBuilder
342               .withLongOpt("value-length")
343               .withArgName("min,max")
344               .hasArg()
345               .withDescription(
346                   "the length range of the value (in bytes)")
347               .create('v');
348 
349       Option blockSz =
350           OptionBuilder.withLongOpt("block").withArgName("size-in-KB").hasArg()
351               .withDescription("minimum block size (in KB)").create('b');
352 
353       Option operation =
354           OptionBuilder.withLongOpt("operation").withArgName("r|w|rw").hasArg()
355               .withDescription(
356                   "action: seek-only, create-only, seek-after-create").create(
357                   'x');
358 
359       Option rootDir =
360           OptionBuilder.withLongOpt("root-dir").withArgName("path").hasArg()
361               .withDescription(
362                   "specify root directory where files will be created.")
363               .create('r');
364 
365       Option file =
366           OptionBuilder.withLongOpt("file").withArgName("name").hasArg()
367               .withDescription("specify the file name to be created or read.")
368               .create('f');
369 
370       Option seekCount =
371           OptionBuilder
372               .withLongOpt("seek")
373               .withArgName("count")
374               .hasArg()
375               .withDescription(
376                   "specify how many seek operations we perform (requires -x r or -x rw.")
377               .create('n');
378       
379       Option trialCount =
380           OptionBuilder 
381               .withLongOpt("trials")
382               .withArgName("n")
383               .hasArg()
384               .withDescription(
385                   "specify how many times to run the whole benchmark")
386               .create('t');
387 
388       Option useRawFs =
389           OptionBuilder
390             .withLongOpt("rawfs")
391             .withDescription("use raw instead of checksummed file system")
392             .create();
393       
394       Option help =
395           OptionBuilder.withLongOpt("help").hasArg(false).withDescription(
396               "show this screen").create("h");
397 
398       return new Options().addOption(compress).addOption(fileSize).addOption(
399           fsInputBufferSz).addOption(fsOutputBufferSize).addOption(keyLen)
400           .addOption(blockSz).addOption(rootDir).addOption(valueLen)
401           .addOption(operation).addOption(seekCount).addOption(file)
402           .addOption(trialCount).addOption(useRawFs).addOption(help);
403 
404     }
405 
406     private void processOptions(CommandLine line, Options opts)
407         throws ParseException {
408       // --help -h and --version -V must be processed first.
409       if (line.hasOption('h')) {
410         HelpFormatter formatter = new HelpFormatter();
411         System.out.println("TFile and SeqFile benchmark.");
412         System.out.println();
413         formatter.printHelp(100,
414             "java ... TestTFileSeqFileComparison [options]",
415             "\nSupported options:", opts, "");
416         return;
417       }
418 
419       if (line.hasOption('c')) {
420         compress = line.getOptionValue('c');
421       }
422 
423       if (line.hasOption('d')) {
424         dictSize = Integer.parseInt(line.getOptionValue('d'));
425       }
426 
427       if (line.hasOption('s')) {
428         fileSize = Long.parseLong(line.getOptionValue('s')) * 1024 * 1024;
429       }
430 
431       if (line.hasOption('i')) {
432         fsInputBufferSize = Integer.parseInt(line.getOptionValue('i'));
433       }
434 
435       if (line.hasOption('o')) {
436         fsOutputBufferSize = Integer.parseInt(line.getOptionValue('o'));
437       }
438 
439       if (line.hasOption('n')) {
440         seekCount = Integer.parseInt(line.getOptionValue('n'));
441       }
442       
443       if (line.hasOption('t')) {
444         trialCount = Integer.parseInt(line.getOptionValue('t'));
445       }
446 
447       if (line.hasOption('k')) {
448         IntegerRange ir = IntegerRange.parse(line.getOptionValue('k'));
449         minKeyLen = ir.from();
450         maxKeyLen = ir.to();
451       }
452 
453       if (line.hasOption('v')) {
454         IntegerRange ir = IntegerRange.parse(line.getOptionValue('v'));
455         minValLength = ir.from();
456         maxValLength = ir.to();
457       }
458 
459       if (line.hasOption('b')) {
460         minBlockSize = Integer.parseInt(line.getOptionValue('b')) * 1024;
461       }
462 
463       if (line.hasOption('r')) {
464         rootDir = line.getOptionValue('r');
465       }
466 
467       if (line.hasOption('f')) {
468         file = line.getOptionValue('f');
469       }
470 
471       if (line.hasOption('S')) {
472         seed = Long.parseLong(line.getOptionValue('S'));
473       }
474 
475       if (line.hasOption('x')) {
476         String strOp = line.getOptionValue('x');
477         if (strOp.equals("r")) {
478           op = OP_READ;
479         }
480         else if (strOp.equals("w")) {
481           op = OP_CREATE;
482         }
483         else if (strOp.equals("rw")) {
484           op = OP_CREATE | OP_READ;
485         }
486         else {
487           throw new ParseException("Unknown action specifier: " + strOp);
488         }
489       }
490       
491       useRawFs = line.hasOption("rawfs");
492 
493       proceed = true;
494     }
495 
496     private void validateOptions() throws ParseException {
497       if (!compress.equals("none") && !compress.equals("lzo")
498           && !compress.equals("gz") && !compress.equals("snappy")) {
499         throw new ParseException("Unknown compression scheme: " + compress);
500       }
501 
502       if (minKeyLen >= maxKeyLen) {
503         throw new ParseException(
504             "Max key length must be greater than min key length.");
505       }
506 
507       if (minValLength >= maxValLength) {
508         throw new ParseException(
509             "Max value length must be greater than min value length.");
510       }
511 
512       if (minWordLen >= maxWordLen) {
513         throw new ParseException(
514             "Max word length must be greater than min word length.");
515       }
516       return;
517     }
518 
519     private void setStopProceed() {
520       proceed = false;
521     }
522 
523     public boolean doCreate() {
524       return (op & OP_CREATE) != 0;
525     }
526 
527     public boolean doRead() {
528       return (op & OP_READ) != 0;
529     }
530   }
531 
532   public static void main(String[] argv) throws IOException {
533     TestHFileSeek testCase = new TestHFileSeek();
534     MyOptions options = new MyOptions(argv);
535 
536     if (options.proceed == false) {
537       return;
538     }
539 
540     testCase.options = options;
541     for (int i = 0; i < options.trialCount; i++) {
542       LOG.info("Beginning trial " + (i+1));
543       testCase.setUp();
544       testCase.testSeeks();
545       testCase.tearDown();
546     }
547   }
548 
549 }
550