View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.mapreduce;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertTrue;
22  import static org.junit.Assert.fail;
23  
24  import java.io.IOException;
25  import java.nio.ByteBuffer;
26  import java.util.Collection;
27  import java.util.Deque;
28  import java.util.List;
29  import java.util.NavigableMap;
30  import java.util.concurrent.ExecutorService;
31  import java.util.concurrent.atomic.AtomicInteger;
32  
33  import org.apache.commons.logging.Log;
34  import org.apache.commons.logging.LogFactory;
35  import org.apache.hadoop.conf.Configuration;
36  import org.apache.hadoop.fs.FileSystem;
37  import org.apache.hadoop.fs.Path;
38  import org.apache.hadoop.hbase.HBaseTestingUtility;
39  import org.apache.hadoop.hbase.HColumnDescriptor;
40  import org.apache.hadoop.hbase.HConstants;
41  import org.apache.hadoop.hbase.HRegionInfo;
42  import org.apache.hadoop.hbase.HRegionLocation;
43  import org.apache.hadoop.hbase.HTableDescriptor;
44  import org.apache.hadoop.hbase.MetaTableAccessor;
45  import org.apache.hadoop.hbase.ServerName;
46  import org.apache.hadoop.hbase.TableExistsException;
47  import org.apache.hadoop.hbase.TableName;
48  import org.apache.hadoop.hbase.client.Admin;
49  import org.apache.hadoop.hbase.client.Connection;
50  import org.apache.hadoop.hbase.client.ConnectionFactory;
51  import org.apache.hadoop.hbase.client.HConnection;
52  import org.apache.hadoop.hbase.client.HTable;
53  import org.apache.hadoop.hbase.client.Result;
54  import org.apache.hadoop.hbase.client.ResultScanner;
55  import org.apache.hadoop.hbase.client.Scan;
56  import org.apache.hadoop.hbase.client.Table;
57  import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
58  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
59  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
60  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.BulkLoadHFileRequest;
61  import org.apache.hadoop.hbase.regionserver.HRegionServer;
62  import org.apache.hadoop.hbase.regionserver.TestHRegionServerBulkLoad;
63  import org.apache.hadoop.hbase.util.Bytes;
64  import org.apache.hadoop.hbase.util.Pair;
65  import org.apache.hadoop.hbase.testclassification.LargeTests;
66  import org.junit.AfterClass;
67  import org.junit.BeforeClass;
68  import org.junit.Test;
69  import org.junit.experimental.categories.Category;
70  import org.mockito.Mockito;
71  
72  import com.google.common.collect.Multimap;
73  import com.google.protobuf.RpcController;
74  import com.google.protobuf.ServiceException;
75  
76  /**
77   * Test cases for the atomic load error handling of the bulk load functionality.
78   */
79  @Category(LargeTests.class)
80  public class TestLoadIncrementalHFilesSplitRecovery {
81    final static Log LOG = LogFactory.getLog(TestHRegionServerBulkLoad.class);
82  
83    static HBaseTestingUtility util;
84    //used by secure subclass
85    static boolean useSecure = false;
86  
87    final static int NUM_CFS = 10;
88    final static byte[] QUAL = Bytes.toBytes("qual");
89    final static int ROWCOUNT = 100;
90  
91    private final static byte[][] families = new byte[NUM_CFS][];
92    static {
93      for (int i = 0; i < NUM_CFS; i++) {
94        families[i] = Bytes.toBytes(family(i));
95      }
96    }
97  
98    static byte[] rowkey(int i) {
99      return Bytes.toBytes(String.format("row_%08d", i));
100   }
101 
102   static String family(int i) {
103     return String.format("family_%04d", i);
104   }
105 
106   static byte[] value(int i) {
107     return Bytes.toBytes(String.format("%010d", i));
108   }
109 
110   public static void buildHFiles(FileSystem fs, Path dir, int value)
111       throws IOException {
112     byte[] val = value(value);
113     for (int i = 0; i < NUM_CFS; i++) {
114       Path testIn = new Path(dir, family(i));
115 
116       TestHRegionServerBulkLoad.createHFile(fs, new Path(testIn, "hfile_" + i),
117           Bytes.toBytes(family(i)), QUAL, val, ROWCOUNT);
118     }
119   }
120 
121   /**
122    * Creates a table with given table name and specified number of column
123    * families if the table does not already exist.
124    */
125   private void setupTable(final Connection connection, TableName table, int cfs)
126   throws IOException {
127     try {
128       LOG.info("Creating table " + table);
129       HTableDescriptor htd = new HTableDescriptor(table);
130       for (int i = 0; i < cfs; i++) {
131         htd.addFamily(new HColumnDescriptor(family(i)));
132       }
133       try (Admin admin = connection.getAdmin()) {
134         admin.createTable(htd);
135       }
136     } catch (TableExistsException tee) {
137       LOG.info("Table " + table + " already exists");
138     }
139   }
140 
141   /**
142    * Creates a table with given table name,specified number of column families<br>
143    * and splitkeys if the table does not already exist.
144    * @param table
145    * @param cfs
146    * @param SPLIT_KEYS
147    */
148   private void setupTableWithSplitkeys(TableName table, int cfs, byte[][] SPLIT_KEYS)
149       throws IOException {
150     try {
151       LOG.info("Creating table " + table);
152       HTableDescriptor htd = new HTableDescriptor(table);
153       for (int i = 0; i < cfs; i++) {
154         htd.addFamily(new HColumnDescriptor(family(i)));
155       }
156 
157       util.createTable(htd, SPLIT_KEYS);
158     } catch (TableExistsException tee) {
159       LOG.info("Table " + table + " already exists");
160     }
161   }
162 
163   private Path buildBulkFiles(TableName table, int value) throws Exception {
164     Path dir = util.getDataTestDirOnTestFS(table.getNameAsString());
165     Path bulk1 = new Path(dir, table.getNameAsString() + value);
166     FileSystem fs = util.getTestFileSystem();
167     buildHFiles(fs, bulk1, value);
168     return bulk1;
169   }
170 
171   /**
172    * Populate table with known values.
173    */
174   private void populateTable(final Connection connection, TableName table, int value)
175   throws Exception {
176     // create HFiles for different column families
177     LoadIncrementalHFiles lih = new LoadIncrementalHFiles(util.getConfiguration());
178     Path bulk1 = buildBulkFiles(table, value);
179     try (Table t = connection.getTable(table)) {
180       lih.doBulkLoad(bulk1, (HTable)t);
181     }
182   }
183 
184   /**
185    * Split the known table in half.  (this is hard coded for this test suite)
186    */
187   private void forceSplit(TableName table) {
188     try {
189       // need to call regions server to by synchronous but isn't visible.
190       HRegionServer hrs = util.getRSForFirstRegionInTable(table);
191 
192       for (HRegionInfo hri :
193           ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices())) {
194         if (hri.getTable().equals(table)) {
195           // splitRegion doesn't work if startkey/endkey are null
196           ProtobufUtil.split(hrs.getRSRpcServices(), hri, rowkey(ROWCOUNT / 2)); // hard code split
197         }
198       }
199 
200       // verify that split completed.
201       int regions;
202       do {
203         regions = 0;
204         for (HRegionInfo hri :
205             ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices())) {
206           if (hri.getTable().equals(table)) {
207             regions++;
208           }
209         }
210         if (regions != 2) {
211           LOG.info("Taking some time to complete split...");
212           Thread.sleep(250);
213         }
214       } while (regions != 2);
215     } catch (IOException e) {
216       e.printStackTrace();
217     } catch (InterruptedException e) {
218       e.printStackTrace();
219     }
220   }
221 
222   @BeforeClass
223   public static void setupCluster() throws Exception {
224     util = new HBaseTestingUtility();
225     util.getConfiguration().set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,"");
226     util.startMiniCluster(1);
227   }
228 
229   @AfterClass
230   public static void teardownCluster() throws Exception {
231     util.shutdownMiniCluster();
232   }
233 
234   /**
235    * Checks that all columns have the expected value and that there is the
236    * expected number of rows.
237    * @throws IOException
238    */
239   void assertExpectedTable(TableName table, int count, int value) throws IOException {
240     HTableDescriptor [] htds = util.getHBaseAdmin().listTables(table.getNameAsString());
241     assertEquals(htds.length, 1);
242     Table t = null;
243     try {
244       t = new HTable(util.getConfiguration(), table);
245       Scan s = new Scan();
246       ResultScanner sr = t.getScanner(s);
247       int i = 0;
248       for (Result r : sr) {
249         i++;
250         for (NavigableMap<byte[], byte[]> nm : r.getNoVersionMap().values()) {
251           for (byte[] val : nm.values()) {
252             assertTrue(Bytes.equals(val, value(value)));
253           }
254         }
255       }
256       assertEquals(count, i);
257     } catch (IOException e) {
258       fail("Failed due to exception");
259     } finally {
260       if (t != null) t.close();
261     }
262   }
263 
264   /**
265    * Test that shows that exception thrown from the RS side will result in an
266    * exception on the LIHFile client.
267    */
268   @Test(expected=IOException.class, timeout=120000)
269   public void testBulkLoadPhaseFailure() throws Exception {
270     TableName table = TableName.valueOf("bulkLoadPhaseFailure");
271     final AtomicInteger attmptedCalls = new AtomicInteger();
272     final AtomicInteger failedCalls = new AtomicInteger();
273     util.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 2);
274     try (Connection connection = ConnectionFactory.createConnection(this.util.getConfiguration())) {
275       setupTable(connection, table, 10);
276       LoadIncrementalHFiles lih = new LoadIncrementalHFiles(util.getConfiguration()) {
277         protected List<LoadQueueItem> tryAtomicRegionLoad(final HConnection conn,
278             TableName tableName, final byte[] first, Collection<LoadQueueItem> lqis)
279                 throws IOException {
280           int i = attmptedCalls.incrementAndGet();
281           if (i == 1) {
282             Connection errConn = null;
283             try {
284               errConn = getMockedConnection(util.getConfiguration());
285             } catch (Exception e) {
286               LOG.fatal("mocking cruft, should never happen", e);
287               throw new RuntimeException("mocking cruft, should never happen");
288             }
289             failedCalls.incrementAndGet();
290             return super.tryAtomicRegionLoad((HConnection)errConn, tableName, first, lqis);
291           }
292 
293           return super.tryAtomicRegionLoad((HConnection)conn, tableName, first, lqis);
294         }
295       };
296       try {
297         // create HFiles for different column families
298         Path dir = buildBulkFiles(table, 1);
299         try (Table t = connection.getTable(table)) {
300           lih.doBulkLoad(dir, (HTable)t);
301         }
302       } finally {
303         util.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER,
304             HConstants.DEFAULT_HBASE_CLIENT_RETRIES_NUMBER);
305       }
306       fail("doBulkLoad should have thrown an exception");
307     }
308   }
309 
310   @SuppressWarnings("deprecation")
311   private HConnection getMockedConnection(final Configuration conf)
312   throws IOException, ServiceException {
313     HConnection c = Mockito.mock(HConnection.class);
314     Mockito.when(c.getConfiguration()).thenReturn(conf);
315     Mockito.doNothing().when(c).close();
316     // Make it so we return a particular location when asked.
317     final HRegionLocation loc = new HRegionLocation(HRegionInfo.FIRST_META_REGIONINFO,
318         ServerName.valueOf("example.org", 1234, 0));
319     Mockito.when(c.getRegionLocation((TableName) Mockito.any(),
320         (byte[]) Mockito.any(), Mockito.anyBoolean())).
321       thenReturn(loc);
322     Mockito.when(c.locateRegion((TableName) Mockito.any(), (byte[]) Mockito.any())).
323       thenReturn(loc);
324     ClientProtos.ClientService.BlockingInterface hri =
325       Mockito.mock(ClientProtos.ClientService.BlockingInterface.class);
326     Mockito.when(hri.bulkLoadHFile((RpcController)Mockito.any(), (BulkLoadHFileRequest)Mockito.any())).
327       thenThrow(new ServiceException(new IOException("injecting bulk load error")));
328     Mockito.when(c.getClient(Mockito.any(ServerName.class))).
329       thenReturn(hri);
330     return c;
331   }
332 
333   /**
334    * This test exercises the path where there is a split after initial
335    * validation but before the atomic bulk load call. We cannot use presplitting
336    * to test this path, so we actually inject a split just before the atomic
337    * region load.
338    */
339   @Test (timeout=120000)
340   public void testSplitWhileBulkLoadPhase() throws Exception {
341     final TableName table = TableName.valueOf("splitWhileBulkloadPhase");
342     try (Connection connection = ConnectionFactory.createConnection(util.getConfiguration())) {
343       setupTable(connection, table, 10);
344       populateTable(connection, table,1);
345       assertExpectedTable(table, ROWCOUNT, 1);
346 
347       // Now let's cause trouble.  This will occur after checks and cause bulk
348       // files to fail when attempt to atomically import.  This is recoverable.
349       final AtomicInteger attemptedCalls = new AtomicInteger();
350       LoadIncrementalHFiles lih2 = new LoadIncrementalHFiles(util.getConfiguration()) {
351         protected void bulkLoadPhase(final Table htable, final HConnection conn,
352             ExecutorService pool, Deque<LoadQueueItem> queue,
353             final Multimap<ByteBuffer, LoadQueueItem> regionGroups) throws IOException {
354           int i = attemptedCalls.incrementAndGet();
355           if (i == 1) {
356             // On first attempt force a split.
357             forceSplit(table);
358           }
359           super.bulkLoadPhase(htable, conn, pool, queue, regionGroups);
360         }
361       };
362 
363       // create HFiles for different column families
364       try (Table t = connection.getTable(table)) {
365         Path bulk = buildBulkFiles(table, 2);
366         lih2.doBulkLoad(bulk, (HTable)t);
367       }
368 
369       // check that data was loaded
370       // The three expected attempts are 1) failure because need to split, 2)
371       // load of split top 3) load of split bottom
372       assertEquals(attemptedCalls.get(), 3);
373       assertExpectedTable(table, ROWCOUNT, 2);
374     }
375   }
376 
377   /**
378    * This test splits a table and attempts to bulk load.  The bulk import files
379    * should be split before atomically importing.
380    */
381   @Test (timeout=120000)
382   public void testGroupOrSplitPresplit() throws Exception {
383     final TableName table = TableName.valueOf("groupOrSplitPresplit");
384     try (Connection connection = ConnectionFactory.createConnection(util.getConfiguration())) {
385       setupTable(connection, table, 10);
386       populateTable(connection, table, 1);
387       assertExpectedTable(connection, table, ROWCOUNT, 1);
388       forceSplit(table);
389 
390       final AtomicInteger countedLqis= new AtomicInteger();
391       LoadIncrementalHFiles lih = new LoadIncrementalHFiles(
392           util.getConfiguration()) {
393         protected List<LoadQueueItem> groupOrSplit(
394             Multimap<ByteBuffer, LoadQueueItem> regionGroups,
395             final LoadQueueItem item, final HTable htable,
396             final Pair<byte[][], byte[][]> startEndKeys) throws IOException {
397           List<LoadQueueItem> lqis = super.groupOrSplit(regionGroups, item, htable, startEndKeys);
398           if (lqis != null) {
399             countedLqis.addAndGet(lqis.size());
400           }
401           return lqis;
402         }
403       };
404 
405       // create HFiles for different column families
406       Path bulk = buildBulkFiles(table, 2);
407       try (Table t = connection.getTable(table)) {
408         lih.doBulkLoad(bulk, (HTable)t);
409       }
410       assertExpectedTable(connection, table, ROWCOUNT, 2);
411       assertEquals(20, countedLqis.get());
412     }
413   }
414 
415   /**
416    * This simulates an remote exception which should cause LIHF to exit with an
417    * exception.
418    */
419   @Test(expected = IOException.class, timeout=120000)
420   public void testGroupOrSplitFailure() throws Exception {
421     TableName table = TableName.valueOf("groupOrSplitFailure");
422     try (Connection connection = ConnectionFactory.createConnection(util.getConfiguration())) {
423       setupTable(connection, table, 10);
424 
425       LoadIncrementalHFiles lih = new LoadIncrementalHFiles(
426           util.getConfiguration()) {
427         int i = 0;
428 
429         protected List<LoadQueueItem> groupOrSplit(
430             Multimap<ByteBuffer, LoadQueueItem> regionGroups,
431             final LoadQueueItem item, final HTable table,
432             final Pair<byte[][], byte[][]> startEndKeys) throws IOException {
433           i++;
434 
435           if (i == 5) {
436             throw new IOException("failure");
437           }
438           return super.groupOrSplit(regionGroups, item, table, startEndKeys);
439         }
440       };
441 
442       // create HFiles for different column families
443       Path dir = buildBulkFiles(table,1);
444       try (Table t = connection.getTable(table)) {
445         lih.doBulkLoad(dir, (HTable)t);
446       }
447     }
448 
449     fail("doBulkLoad should have thrown an exception");
450   }
451 
452   @Test (timeout=120000)
453   public void testGroupOrSplitWhenRegionHoleExistsInMeta() throws Exception {
454     TableName tableName = TableName.valueOf("testGroupOrSplitWhenRegionHoleExistsInMeta");
455     byte[][] SPLIT_KEYS = new byte[][] { Bytes.toBytes("row_00000100") };
456     // Share connection. We were failing to find the table with our new reverse scan because it
457     // looks for first region, not any region -- that is how it works now.  The below removes first
458     // region in test.  Was reliant on the Connection caching having first region.
459     Connection connection = ConnectionFactory.createConnection(util.getConfiguration());
460     Table table = connection.getTable(tableName);
461 
462     setupTableWithSplitkeys(tableName, 10, SPLIT_KEYS);
463     Path dir = buildBulkFiles(tableName, 2);
464 
465     final AtomicInteger countedLqis = new AtomicInteger();
466     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration()) {
467 
468       protected List<LoadQueueItem> groupOrSplit(
469           Multimap<ByteBuffer, LoadQueueItem> regionGroups,
470           final LoadQueueItem item, final HTable htable,
471           final Pair<byte[][], byte[][]> startEndKeys) throws IOException {
472         List<LoadQueueItem> lqis = super.groupOrSplit(regionGroups, item, htable, startEndKeys);
473         if (lqis != null) {
474           countedLqis.addAndGet(lqis.size());
475         }
476         return lqis;
477       }
478     };
479 
480     // do bulkload when there is no region hole in hbase:meta.
481     try {
482       loader.doBulkLoad(dir, (HTable)table);
483     } catch (Exception e) {
484       LOG.error("exeception=", e);
485     }
486     // check if all the data are loaded into the table.
487     this.assertExpectedTable(tableName, ROWCOUNT, 2);
488 
489     dir = buildBulkFiles(tableName, 3);
490 
491     // Mess it up by leaving a hole in the hbase:meta
492     List<HRegionInfo> regionInfos = MetaTableAccessor.getTableRegions(util.getZooKeeperWatcher(),
493       connection, tableName);
494     for (HRegionInfo regionInfo : regionInfos) {
495       if (Bytes.equals(regionInfo.getStartKey(), HConstants.EMPTY_BYTE_ARRAY)) {
496         MetaTableAccessor.deleteRegion(connection, regionInfo);
497         break;
498       }
499     }
500 
501     try {
502       loader.doBulkLoad(dir, (HTable)table);
503     } catch (Exception e) {
504       LOG.error("exeception=", e);
505       assertTrue("IOException expected", e instanceof IOException);
506     }
507 
508     table.close();
509 
510     // Make sure at least the one region that still exists can be found.
511     regionInfos = MetaTableAccessor.getTableRegions(util.getZooKeeperWatcher(),
512       connection, tableName);
513     assertTrue(regionInfos.size() >= 1);
514 
515     this.assertExpectedTable(connection, tableName, ROWCOUNT, 2);
516     connection.close();
517   }
518 
519   /**
520    * Checks that all columns have the expected value and that there is the
521    * expected number of rows.
522    * @throws IOException
523    */
524   void assertExpectedTable(final Connection connection, TableName table, int count, int value)
525   throws IOException {
526     HTableDescriptor [] htds = util.getHBaseAdmin().listTables(table.getNameAsString());
527     assertEquals(htds.length, 1);
528     Table t = null;
529     try {
530       t = connection.getTable(table);
531       Scan s = new Scan();
532       ResultScanner sr = t.getScanner(s);
533       int i = 0;
534       for (Result r : sr) {
535         i++;
536         for (NavigableMap<byte[], byte[]> nm : r.getNoVersionMap().values()) {
537           for (byte[] val : nm.values()) {
538             assertTrue(Bytes.equals(val, value(value)));
539           }
540         }
541       }
542       assertEquals(count, i);
543     } catch (IOException e) {
544       fail("Failed due to exception");
545     } finally {
546       if (t != null) t.close();
547     }
548   }
549 }