View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.regionserver;
19  
20  import java.io.IOException;
21  import java.util.ArrayList;
22  import java.util.List;
23  import java.util.concurrent.atomic.AtomicLong;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.hadoop.conf.Configuration;
28  import org.apache.hadoop.fs.FileSystem;
29  import org.apache.hadoop.fs.Path;
30  import org.apache.hadoop.hbase.*;
31  import org.apache.hadoop.hbase.MultithreadedTestUtil.RepeatingTestThread;
32  import org.apache.hadoop.hbase.MultithreadedTestUtil.TestContext;
33  import org.apache.hadoop.hbase.client.HConnection;
34  import org.apache.hadoop.hbase.client.HTable;
35  import org.apache.hadoop.hbase.client.RegionServerCallable;
36  import org.apache.hadoop.hbase.client.Result;
37  import org.apache.hadoop.hbase.client.ResultScanner;
38  import org.apache.hadoop.hbase.client.RpcRetryingCaller;
39  import org.apache.hadoop.hbase.client.RpcRetryingCallerFactory;
40  import org.apache.hadoop.hbase.client.Scan;
41  import org.apache.hadoop.hbase.TableExistsException;
42  import org.apache.hadoop.hbase.io.compress.Compression;
43  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
44  import org.apache.hadoop.hbase.io.hfile.HFile;
45  import org.apache.hadoop.hbase.protobuf.RequestConverter;
46  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
47  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.CompactRegionRequest;
48  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.BulkLoadHFileRequest;
49  import org.apache.hadoop.hbase.util.Bytes;
50  import org.apache.hadoop.hbase.util.Pair;
51  import org.junit.Test;
52  
53  import com.google.common.collect.Lists;
54  import org.junit.experimental.categories.Category;
55  
56  /**
57   * Tests bulk loading of HFiles and shows the atomicity or lack of atomicity of
58   * the region server's bullkLoad functionality.
59   */
60  @Category(LargeTests.class)
61  public class TestHRegionServerBulkLoad {
62    final static Log LOG = LogFactory.getLog(TestHRegionServerBulkLoad.class);
63    private static HBaseTestingUtility UTIL = new HBaseTestingUtility();
64    private final static Configuration conf = UTIL.getConfiguration();
65    private final static byte[] QUAL = Bytes.toBytes("qual");
66    private final static int NUM_CFS = 10;
67    public static int BLOCKSIZE = 64 * 1024;
68    public static String COMPRESSION = Compression.Algorithm.NONE.getName();
69  
70    private final static byte[][] families = new byte[NUM_CFS][];
71    static {
72      for (int i = 0; i < NUM_CFS; i++) {
73        families[i] = Bytes.toBytes(family(i));
74      }
75    }
76  
77    static byte[] rowkey(int i) {
78      return Bytes.toBytes(String.format("row_%08d", i));
79    }
80  
81    static String family(int i) {
82      return String.format("family_%04d", i);
83    }
84  
85    /**
86     * Create an HFile with the given number of rows with a specified value.
87     */
88    public static void createHFile(FileSystem fs, Path path, byte[] family,
89        byte[] qualifier, byte[] value, int numRows) throws IOException {
90      HFile.Writer writer = HFile
91          .getWriterFactory(conf, new CacheConfig(conf))
92          .withPath(fs, path)
93          .withBlockSize(BLOCKSIZE)
94          .withCompression(COMPRESSION)
95          .withComparator(KeyValue.KEY_COMPARATOR)
96          .create();
97      long now = System.currentTimeMillis();
98      try {
99        // subtract 2 since iterateOnSplits doesn't include boundary keys
100       for (int i = 0; i < numRows; i++) {
101         KeyValue kv = new KeyValue(rowkey(i), family, qualifier, now, value);
102         writer.append(kv);
103       }
104     } finally {
105       writer.close();
106     }
107   }
108 
109   /**
110    * Thread that does full scans of the table looking for any partially
111    * completed rows.
112    *
113    * Each iteration of this loads 10 hdfs files, which occupies 5 file open file
114    * handles. So every 10 iterations (500 file handles) it does a region
115    * compaction to reduce the number of open file handles.
116    */
117   public static class AtomicHFileLoader extends RepeatingTestThread {
118     final AtomicLong numBulkLoads = new AtomicLong();
119     final AtomicLong numCompactions = new AtomicLong();
120     private String tableName;
121 
122     public AtomicHFileLoader(String tableName, TestContext ctx,
123         byte targetFamilies[][]) throws IOException {
124       super(ctx);
125       this.tableName = tableName;
126     }
127 
128     public void doAnAction() throws Exception {
129       long iteration = numBulkLoads.getAndIncrement();
130       Path dir =  UTIL.getDataTestDirOnTestFS(String.format("bulkLoad_%08d",
131           iteration));
132 
133       // create HFiles for different column families
134       FileSystem fs = UTIL.getTestFileSystem();
135       byte[] val = Bytes.toBytes(String.format("%010d", iteration));
136       final List<Pair<byte[], String>> famPaths = new ArrayList<Pair<byte[], String>>(
137           NUM_CFS);
138       for (int i = 0; i < NUM_CFS; i++) {
139         Path hfile = new Path(dir, family(i));
140         byte[] fam = Bytes.toBytes(family(i));
141         createHFile(fs, hfile, fam, QUAL, val, 1000);
142         famPaths.add(new Pair<byte[], String>(fam, hfile.toString()));
143       }
144 
145       // bulk load HFiles
146       final HConnection conn = UTIL.getHBaseAdmin().getConnection();
147       TableName tbl = TableName.valueOf(tableName);
148       RegionServerCallable<Void> callable =
149           new RegionServerCallable<Void>(conn, tbl, Bytes.toBytes("aaa")) {
150         @Override
151         public Void call() throws Exception {
152           LOG.debug("Going to connect to server " + getLocation() + " for row "
153               + Bytes.toStringBinary(getRow()));
154           byte[] regionName = getLocation().getRegionInfo().getRegionName();
155           BulkLoadHFileRequest request =
156             RequestConverter.buildBulkLoadHFileRequest(famPaths, regionName, true);
157           getStub().bulkLoadHFile(null, request);
158           return null;
159         }
160       };
161       RpcRetryingCallerFactory factory = new RpcRetryingCallerFactory(conf);
162       RpcRetryingCaller<Void> caller = factory.<Void> newCaller();
163       caller.callWithRetries(callable);
164 
165       // Periodically do compaction to reduce the number of open file handles.
166       if (numBulkLoads.get() % 10 == 0) {
167         // 10 * 50 = 500 open file handles!
168         callable = new RegionServerCallable<Void>(conn, tbl, Bytes.toBytes("aaa")) {
169           @Override
170           public Void call() throws Exception {
171             LOG.debug("compacting " + getLocation() + " for row "
172                 + Bytes.toStringBinary(getRow()));
173             AdminProtos.AdminService.BlockingInterface server =
174               conn.getAdmin(getLocation().getServerName());
175             CompactRegionRequest request =
176               RequestConverter.buildCompactRegionRequest(
177                 getLocation().getRegionInfo().getRegionName(), true, null);
178             server.compactRegion(null, request);
179             numCompactions.incrementAndGet();
180             return null;
181           }
182         };
183         caller.callWithRetries(callable);
184       }
185     }
186   }
187 
188   /**
189    * Thread that does full scans of the table looking for any partially
190    * completed rows.
191    */
192   public static class AtomicScanReader extends RepeatingTestThread {
193     byte targetFamilies[][];
194     HTable table;
195     AtomicLong numScans = new AtomicLong();
196     AtomicLong numRowsScanned = new AtomicLong();
197     String TABLE_NAME;
198 
199     public AtomicScanReader(String TABLE_NAME, TestContext ctx,
200         byte targetFamilies[][]) throws IOException {
201       super(ctx);
202       this.TABLE_NAME = TABLE_NAME;
203       this.targetFamilies = targetFamilies;
204       table = new HTable(conf, TABLE_NAME);
205     }
206 
207     public void doAnAction() throws Exception {
208       Scan s = new Scan();
209       for (byte[] family : targetFamilies) {
210         s.addFamily(family);
211       }
212       ResultScanner scanner = table.getScanner(s);
213 
214       for (Result res : scanner) {
215         byte[] lastRow = null, lastFam = null, lastQual = null;
216         byte[] gotValue = null;
217         for (byte[] family : targetFamilies) {
218           byte qualifier[] = QUAL;
219           byte thisValue[] = res.getValue(family, qualifier);
220           if (gotValue != null && thisValue != null
221               && !Bytes.equals(gotValue, thisValue)) {
222 
223             StringBuilder msg = new StringBuilder();
224             msg.append("Failed on scan ").append(numScans)
225                 .append(" after scanning ").append(numRowsScanned)
226                 .append(" rows!\n");
227             msg.append("Current  was " + Bytes.toString(res.getRow()) + "/"
228                 + Bytes.toString(family) + ":" + Bytes.toString(qualifier)
229                 + " = " + Bytes.toString(thisValue) + "\n");
230             msg.append("Previous  was " + Bytes.toString(lastRow) + "/"
231                 + Bytes.toString(lastFam) + ":" + Bytes.toString(lastQual)
232                 + " = " + Bytes.toString(gotValue));
233             throw new RuntimeException(msg.toString());
234           }
235 
236           lastFam = family;
237           lastQual = qualifier;
238           lastRow = res.getRow();
239           gotValue = thisValue;
240         }
241         numRowsScanned.getAndIncrement();
242       }
243       numScans.getAndIncrement();
244     }
245   }
246 
247   /**
248    * Creates a table with given table name and specified number of column
249    * families if the table does not already exist.
250    */
251   private void setupTable(String table, int cfs) throws IOException {
252     try {
253       LOG.info("Creating table " + table);
254       HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(table));
255       for (int i = 0; i < 10; i++) {
256         htd.addFamily(new HColumnDescriptor(family(i)));
257       }
258 
259       UTIL.getHBaseAdmin().createTable(htd);
260     } catch (TableExistsException tee) {
261       LOG.info("Table " + table + " already exists");
262     }
263   }
264 
265   /**
266    * Atomic bulk load.
267    */
268   @Test
269   public void testAtomicBulkLoad() throws Exception {
270     String TABLE_NAME = "atomicBulkLoad";
271 
272     int millisToRun = 30000;
273     int numScanners = 50;
274 
275     UTIL.startMiniCluster(1);
276     try {
277       runAtomicBulkloadTest(TABLE_NAME, millisToRun, numScanners);
278     } finally {
279       UTIL.shutdownMiniCluster();
280     }
281   }
282 
283   void runAtomicBulkloadTest(String tableName, int millisToRun, int numScanners)
284       throws Exception {
285     setupTable(tableName, 10);
286 
287     TestContext ctx = new TestContext(UTIL.getConfiguration());
288 
289     AtomicHFileLoader loader = new AtomicHFileLoader(tableName, ctx, null);
290     ctx.addThread(loader);
291 
292     List<AtomicScanReader> scanners = Lists.newArrayList();
293     for (int i = 0; i < numScanners; i++) {
294       AtomicScanReader scanner = new AtomicScanReader(tableName, ctx, families);
295       scanners.add(scanner);
296       ctx.addThread(scanner);
297     }
298 
299     ctx.startThreads();
300     ctx.waitFor(millisToRun);
301     ctx.stop();
302 
303     LOG.info("Loaders:");
304     LOG.info("  loaded " + loader.numBulkLoads.get());
305     LOG.info("  compations " + loader.numCompactions.get());
306 
307     LOG.info("Scanners:");
308     for (AtomicScanReader scanner : scanners) {
309       LOG.info("  scanned " + scanner.numScans.get());
310       LOG.info("  verified " + scanner.numRowsScanned.get() + " rows");
311     }
312   }
313 
314   /**
315    * Run test on an HBase instance for 5 minutes. This assumes that the table
316    * under test only has a single region.
317    */
318   public static void main(String args[]) throws Exception {
319     try {
320       Configuration c = HBaseConfiguration.create();
321       TestHRegionServerBulkLoad test = new TestHRegionServerBulkLoad();
322       test.setConf(c);
323       test.runAtomicBulkloadTest("atomicTableTest", 5 * 60 * 1000, 50);
324     } finally {
325       System.exit(0); // something hangs (believe it is lru threadpool)
326     }
327   }
328 
329   private void setConf(Configuration c) {
330     UTIL = new HBaseTestingUtility(c);
331   }
332 
333 }
334