1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.io.hfile;
21
22 import java.io.BufferedReader;
23 import java.io.FileReader;
24 import java.io.IOException;
25 import java.nio.ByteBuffer;
26 import java.util.ArrayList;
27 import java.util.List;
28 import java.util.Random;
29
30 import org.apache.hadoop.conf.Configuration;
31 import org.apache.hadoop.fs.LocalFileSystem;
32 import org.apache.hadoop.fs.Path;
33 import org.apache.hadoop.fs.RawLocalFileSystem;
34 import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
35 import org.apache.hadoop.hbase.util.Bytes;
36
37
38
39
40 public class RandomSeek {
41 private static List<String> slurp(String fname) throws IOException {
42 BufferedReader istream = new BufferedReader(new FileReader(fname));
43 String str;
44 List<String> l = new ArrayList<String>();
45 while ( (str=istream.readLine()) != null) {
46 String [] parts = str.split(",");
47 l.add(parts[0] + ":" + parts[1] + ":" + parts[2]);
48 }
49 istream.close();
50 return l;
51 }
52
53 private static String randKey(List<String> keys) {
54 Random r = new Random();
55
56 return "2" + Integer.toString(7+r.nextInt(2)) + Integer.toString(r.nextInt(100));
57
58 }
59
60 public static void main(String [] argv) throws IOException {
61 Configuration conf = new Configuration();
62 conf.setInt("io.file.buffer.size", 64*1024);
63 RawLocalFileSystem rlfs = new RawLocalFileSystem();
64 rlfs.setConf(conf);
65 LocalFileSystem lfs = new LocalFileSystem(rlfs);
66
67 Path path = new Path("/Users/ryan/rfile.big.txt");
68 long start = System.currentTimeMillis();
69 SimpleBlockCache cache = new SimpleBlockCache();
70
71 Reader reader = new HFile.Reader(lfs, path, cache, false);
72 reader.loadFileInfo();
73 System.out.println(reader.trailer);
74 long end = System.currentTimeMillis();
75
76 System.out.println("Index read time: " + (end - start));
77
78 List<String> keys = slurp("/Users/ryan/xaa.50k");
79
80
81 HFileScanner scanner = reader.getScanner(false, true);
82 int count;
83 long totalBytes = 0;
84 int notFound = 0;
85
86 start = System.nanoTime();
87 for(count = 0; count < 500000; ++count) {
88 String key = randKey(keys);
89 byte [] bkey = Bytes.toBytes(key);
90 int res = scanner.seekTo(bkey);
91 if (res == 0) {
92 ByteBuffer k = scanner.getKey();
93 ByteBuffer v = scanner.getValue();
94 totalBytes += k.limit();
95 totalBytes += v.limit();
96 } else {
97 ++ notFound;
98 }
99 if (res == -1) {
100 scanner.seekTo();
101 }
102
103 for (int i = 0; i < 1000; ++i) {
104 if (!scanner.next())
105 break;
106 ByteBuffer k = scanner.getKey();
107 ByteBuffer v = scanner.getValue();
108 totalBytes += k.limit();
109 totalBytes += v.limit();
110 }
111
112 if ( count % 1000 == 0 ) {
113 end = System.nanoTime();
114
115 System.out.println("Cache block count: " + cache.size() + " dumped: "+ cache.dumps);
116
117 double msTime = ((end - start) / 1000000.0);
118 System.out.println("Seeked: "+ count + " in " + msTime + " (ms) "
119 + (1000.0 / msTime ) + " seeks/ms "
120 + (msTime / 1000.0) + " ms/seek");
121
122 start = System.nanoTime();
123 }
124 }
125 System.out.println("Total bytes: " + totalBytes + " not found: " + notFound);
126 }
127 }