1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import static java.lang.String.format;
22
23 import java.io.FileNotFoundException;
24 import java.io.IOException;
25 import java.io.InterruptedIOException;
26 import java.nio.ByteBuffer;
27 import java.util.ArrayList;
28 import java.util.Arrays;
29 import java.util.Collection;
30 import java.util.Deque;
31 import java.util.HashMap;
32 import java.util.HashSet;
33 import java.util.LinkedList;
34 import java.util.List;
35 import java.util.Map;
36 import java.util.Map.Entry;
37 import java.util.Set;
38 import java.util.TreeMap;
39 import java.util.UUID;
40 import java.util.concurrent.Callable;
41 import java.util.concurrent.ExecutionException;
42 import java.util.concurrent.ExecutorService;
43 import java.util.concurrent.Future;
44 import java.util.concurrent.LinkedBlockingQueue;
45 import java.util.concurrent.ThreadPoolExecutor;
46 import java.util.concurrent.TimeUnit;
47
48 import org.apache.commons.lang.mutable.MutableInt;
49 import org.apache.commons.logging.Log;
50 import org.apache.commons.logging.LogFactory;
51 import org.apache.hadoop.conf.Configuration;
52 import org.apache.hadoop.conf.Configured;
53 import org.apache.hadoop.fs.FileStatus;
54 import org.apache.hadoop.fs.FileSystem;
55 import org.apache.hadoop.fs.FileUtil;
56 import org.apache.hadoop.fs.Path;
57 import org.apache.hadoop.fs.permission.FsPermission;
58 import org.apache.hadoop.hbase.HBaseConfiguration;
59 import org.apache.hadoop.hbase.HColumnDescriptor;
60 import org.apache.hadoop.hbase.HConstants;
61 import org.apache.hadoop.hbase.HTableDescriptor;
62 import org.apache.hadoop.hbase.KeyValue;
63 import org.apache.hadoop.hbase.TableName;
64 import org.apache.hadoop.hbase.TableNotFoundException;
65 import org.apache.hadoop.hbase.classification.InterfaceAudience;
66 import org.apache.hadoop.hbase.classification.InterfaceStability;
67 import org.apache.hadoop.hbase.client.HBaseAdmin;
68 import org.apache.hadoop.hbase.client.HConnection;
69 import org.apache.hadoop.hbase.client.HTable;
70 import org.apache.hadoop.hbase.client.RegionServerCallable;
71 import org.apache.hadoop.hbase.client.RpcRetryingCallerFactory;
72 import org.apache.hadoop.hbase.client.coprocessor.SecureBulkLoadClient;
73 import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
74 import org.apache.hadoop.hbase.io.HalfStoreFileReader;
75 import org.apache.hadoop.hbase.io.Reference;
76 import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
77 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
78 import org.apache.hadoop.hbase.io.hfile.HFile;
79 import org.apache.hadoop.hbase.io.hfile.HFileContext;
80 import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
81 import org.apache.hadoop.hbase.io.hfile.HFileScanner;
82 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
83 import org.apache.hadoop.hbase.regionserver.BloomType;
84 import org.apache.hadoop.hbase.regionserver.HStore;
85 import org.apache.hadoop.hbase.regionserver.StoreFile;
86 import org.apache.hadoop.hbase.security.UserProvider;
87 import org.apache.hadoop.hbase.security.token.FsDelegationToken;
88 import org.apache.hadoop.hbase.util.Bytes;
89 import org.apache.hadoop.hbase.util.FSHDFSUtils;
90 import org.apache.hadoop.hbase.util.Pair;
91 import org.apache.hadoop.util.Tool;
92 import org.apache.hadoop.util.ToolRunner;
93
94 import com.google.common.collect.HashMultimap;
95 import com.google.common.collect.Multimap;
96 import com.google.common.collect.Multimaps;
97 import com.google.common.util.concurrent.ThreadFactoryBuilder;
98
99
100
101
102
103 @InterfaceAudience.Public
104 @InterfaceStability.Stable
105 public class LoadIncrementalHFiles extends Configured implements Tool {
106 private static final Log LOG = LogFactory.getLog(LoadIncrementalHFiles.class);
107 private HBaseAdmin hbAdmin;
108
109 public static final String NAME = "completebulkload";
110 public static final String MAX_FILES_PER_REGION_PER_FAMILY
111 = "hbase.mapreduce.bulkload.max.hfiles.perRegion.perFamily";
112 private static final String ASSIGN_SEQ_IDS = "hbase.mapreduce.bulkload.assign.sequenceNumbers";
113 public final static String CREATE_TABLE_CONF_KEY = "create.table";
114
115 private int maxFilesPerRegionPerFamily;
116 private boolean assignSeqIds;
117
118
119 private FileSystem fs;
120
121 private FsDelegationToken fsDelegationToken;
122 private String bulkToken;
123 private UserProvider userProvider;
124
125 private LoadIncrementalHFiles() {}
126
127 public LoadIncrementalHFiles(Configuration conf) throws Exception {
128 super(conf);
129 initialize();
130 }
131
132 private void initialize() throws Exception {
133 if (hbAdmin == null) {
134
135 setConf(HBaseConfiguration.create(getConf()));
136 Configuration conf = getConf();
137
138 conf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0);
139 this.hbAdmin = new HBaseAdmin(conf);
140 this.userProvider = UserProvider.instantiate(conf);
141 this.fsDelegationToken = new FsDelegationToken(userProvider, "renewer");
142 assignSeqIds = conf.getBoolean(ASSIGN_SEQ_IDS, true);
143 maxFilesPerRegionPerFamily = conf.getInt(MAX_FILES_PER_REGION_PER_FAMILY, 32);
144 }
145 }
146
147 private void usage() {
148 System.err.println("usage: " + NAME + " /path/to/hfileoutputformat-output tablename" + "\n -D"
149 + CREATE_TABLE_CONF_KEY + "=no - can be used to avoid creation of table by this tool\n"
150 + " Note: if you set this to 'no', then the target table must already exist in HBase\n"
151 + "\n");
152 }
153
154
155
156
157
158
159
160
161
162 static class LoadQueueItem {
163 final byte[] family;
164 final Path hfilePath;
165
166 public LoadQueueItem(byte[] family, Path hfilePath) {
167 this.family = family;
168 this.hfilePath = hfilePath;
169 }
170
171 public String toString() {
172 return "family:"+ Bytes.toString(family) + " path:" + hfilePath.toString();
173 }
174 }
175
176
177
178
179
180 private void discoverLoadQueue(Deque<LoadQueueItem> ret, Path hfofDir)
181 throws IOException {
182 fs = hfofDir.getFileSystem(getConf());
183
184 if (!fs.exists(hfofDir)) {
185 throw new FileNotFoundException("HFileOutputFormat dir " +
186 hfofDir + " not found");
187 }
188
189 FileStatus[] familyDirStatuses = fs.listStatus(hfofDir);
190 if (familyDirStatuses == null) {
191 throw new FileNotFoundException("No families found in " + hfofDir);
192 }
193
194 for (FileStatus stat : familyDirStatuses) {
195 if (!stat.isDir()) {
196 LOG.warn("Skipping non-directory " + stat.getPath());
197 continue;
198 }
199 Path familyDir = stat.getPath();
200 if (familyDir.getName().equals("_logs")) {
201
202
203
204 continue;
205 }
206 byte[] family = familyDir.getName().getBytes();
207 FileStatus[] hfileStatuses = fs.listStatus(familyDir);
208 for (FileStatus hfileStatus : hfileStatuses) {
209 long length = hfileStatus.getLen();
210 Path hfile = hfileStatus.getPath();
211 if (hfile.getName().startsWith("_")) continue;
212 if(length > getConf().getLong(HConstants.HREGION_MAX_FILESIZE,
213 HConstants.DEFAULT_MAX_FILE_SIZE)) {
214 LOG.warn("Trying to bulk load hfile " + hfofDir.toString() + " with size: " +
215 length + " bytes can be problematic as it may lead to oversplitting.");
216 }
217 ret.add(new LoadQueueItem(family, hfile));
218 }
219 }
220 }
221
222
223
224
225
226
227
228
229
230
231 @SuppressWarnings("deprecation")
232 public void doBulkLoad(Path hfofDir, final HTable table)
233 throws TableNotFoundException, IOException
234 {
235 final HConnection conn = table.getConnection();
236
237 if (!conn.isTableAvailable(table.getName())) {
238 throw new TableNotFoundException("Table " +
239 Bytes.toStringBinary(table.getTableName()) +
240 "is not currently available.");
241 }
242
243
244 int nrThreads = getConf().getInt("hbase.loadincremental.threads.max",
245 Runtime.getRuntime().availableProcessors());
246 ThreadFactoryBuilder builder = new ThreadFactoryBuilder();
247 builder.setNameFormat("LoadIncrementalHFiles-%1$d");
248 ExecutorService pool = new ThreadPoolExecutor(nrThreads, nrThreads,
249 60, TimeUnit.SECONDS,
250 new LinkedBlockingQueue<Runnable>(),
251 builder.build());
252 ((ThreadPoolExecutor)pool).allowCoreThreadTimeOut(true);
253
254
255
256 Deque<LoadQueueItem> queue = new LinkedList<LoadQueueItem>();
257 try {
258 discoverLoadQueue(queue, hfofDir);
259
260 Collection<HColumnDescriptor> families = table.getTableDescriptor().getFamilies();
261 ArrayList<String> familyNames = new ArrayList<String>();
262 for (HColumnDescriptor family : families) {
263 familyNames.add(family.getNameAsString());
264 }
265 ArrayList<String> unmatchedFamilies = new ArrayList<String>();
266 for (LoadQueueItem lqi : queue) {
267 String familyNameInHFile = Bytes.toString(lqi.family);
268 if (!familyNames.contains(familyNameInHFile)) {
269 unmatchedFamilies.add(familyNameInHFile);
270 }
271 }
272 if (unmatchedFamilies.size() > 0) {
273 String msg =
274 "Unmatched family names found: unmatched family names in HFiles to be bulkloaded: "
275 + unmatchedFamilies + "; valid family names of table "
276 + Bytes.toString(table.getTableName()) + " are: " + familyNames;
277 LOG.error(msg);
278 throw new IOException(msg);
279 }
280 int count = 0;
281
282 if (queue.isEmpty()) {
283 LOG.warn("Bulk load operation did not find any files to load in " +
284 "directory " + hfofDir.toUri() + ". Does it contain files in " +
285 "subdirectories that correspond to column family names?");
286 return;
287 }
288
289
290
291
292 fsDelegationToken.acquireDelegationToken(fs);
293 if(isSecureBulkLoadEndpointAvailable()) {
294 bulkToken = new SecureBulkLoadClient(table).prepareBulkLoad(table.getName());
295 }
296
297
298 while (!queue.isEmpty()) {
299
300 final Pair<byte[][], byte[][]> startEndKeys = table.getStartEndKeys();
301 if (count != 0) {
302 LOG.info("Split occured while grouping HFiles, retry attempt " +
303 + count + " with " + queue.size() + " files remaining to group or split");
304 }
305
306 int maxRetries = getConf().getInt("hbase.bulkload.retries.number", 0);
307 if (maxRetries != 0 && count >= maxRetries) {
308 throw new IOException("Retry attempted " + count +
309 " times without completing, bailing out");
310 }
311 count++;
312
313
314 Multimap<ByteBuffer, LoadQueueItem> regionGroups = groupOrSplitPhase(table,
315 pool, queue, startEndKeys);
316
317 if (!checkHFilesCountPerRegionPerFamily(regionGroups)) {
318
319 throw new IOException("Trying to load more than " + maxFilesPerRegionPerFamily
320 + " hfiles to one family of one region");
321 }
322
323 bulkLoadPhase(table, conn, pool, queue, regionGroups);
324
325
326
327
328 }
329
330 } finally {
331 fsDelegationToken.releaseDelegationToken();
332 if(bulkToken != null) {
333 new SecureBulkLoadClient(table).cleanupBulkLoad(bulkToken);
334 }
335 pool.shutdown();
336 if (queue != null && !queue.isEmpty()) {
337 StringBuilder err = new StringBuilder();
338 err.append("-------------------------------------------------\n");
339 err.append("Bulk load aborted with some files not yet loaded:\n");
340 err.append("-------------------------------------------------\n");
341 for (LoadQueueItem q : queue) {
342 err.append(" ").append(q.hfilePath).append('\n');
343 }
344 LOG.error(err);
345 }
346 }
347
348 if (queue != null && !queue.isEmpty()) {
349 throw new RuntimeException("Bulk load aborted with some files not yet loaded."
350 + "Please check log for more details.");
351 }
352 }
353
354
355
356
357
358
359 protected void bulkLoadPhase(final HTable table, final HConnection conn,
360 ExecutorService pool, Deque<LoadQueueItem> queue,
361 final Multimap<ByteBuffer, LoadQueueItem> regionGroups) throws IOException {
362
363 Set<Future<List<LoadQueueItem>>> loadingFutures = new HashSet<Future<List<LoadQueueItem>>>();
364 for (Entry<ByteBuffer, ? extends Collection<LoadQueueItem>> e: regionGroups.asMap().entrySet()) {
365 final byte[] first = e.getKey().array();
366 final Collection<LoadQueueItem> lqis = e.getValue();
367
368 final Callable<List<LoadQueueItem>> call = new Callable<List<LoadQueueItem>>() {
369 public List<LoadQueueItem> call() throws Exception {
370 List<LoadQueueItem> toRetry =
371 tryAtomicRegionLoad(conn, table.getName(), first, lqis);
372 return toRetry;
373 }
374 };
375 loadingFutures.add(pool.submit(call));
376 }
377
378
379 for (Future<List<LoadQueueItem>> future : loadingFutures) {
380 try {
381 List<LoadQueueItem> toRetry = future.get();
382
383
384 queue.addAll(toRetry);
385
386 } catch (ExecutionException e1) {
387 Throwable t = e1.getCause();
388 if (t instanceof IOException) {
389
390
391 throw new IOException("BulkLoad encountered an unrecoverable problem", t);
392 }
393 LOG.error("Unexpected execution exception during bulk load", e1);
394 throw new IllegalStateException(t);
395 } catch (InterruptedException e1) {
396 LOG.error("Unexpected interrupted exception during bulk load", e1);
397 throw (InterruptedIOException)new InterruptedIOException().initCause(e1);
398 }
399 }
400 }
401
402 private boolean checkHFilesCountPerRegionPerFamily(
403 final Multimap<ByteBuffer, LoadQueueItem> regionGroups) {
404 for (Entry<ByteBuffer,
405 ? extends Collection<LoadQueueItem>> e: regionGroups.asMap().entrySet()) {
406 final Collection<LoadQueueItem> lqis = e.getValue();
407 HashMap<byte[], MutableInt> filesMap = new HashMap<byte[], MutableInt>();
408 for (LoadQueueItem lqi: lqis) {
409 MutableInt count = filesMap.get(lqi.family);
410 if (count == null) {
411 count = new MutableInt();
412 filesMap.put(lqi.family, count);
413 }
414 count.increment();
415 if (count.intValue() > maxFilesPerRegionPerFamily) {
416 LOG.error("Trying to load more than " + maxFilesPerRegionPerFamily
417 + " hfiles to family " + Bytes.toStringBinary(lqi.family)
418 + " of region with start key "
419 + Bytes.toStringBinary(e.getKey()));
420 return false;
421 }
422 }
423 }
424 return true;
425 }
426
427
428
429
430
431 private Multimap<ByteBuffer, LoadQueueItem> groupOrSplitPhase(final HTable table,
432 ExecutorService pool, Deque<LoadQueueItem> queue,
433 final Pair<byte[][], byte[][]> startEndKeys) throws IOException {
434
435
436 Multimap<ByteBuffer, LoadQueueItem> rgs = HashMultimap.create();
437 final Multimap<ByteBuffer, LoadQueueItem> regionGroups = Multimaps.synchronizedMultimap(rgs);
438
439
440 Set<Future<List<LoadQueueItem>>> splittingFutures = new HashSet<Future<List<LoadQueueItem>>>();
441 while (!queue.isEmpty()) {
442 final LoadQueueItem item = queue.remove();
443
444 final Callable<List<LoadQueueItem>> call = new Callable<List<LoadQueueItem>>() {
445 public List<LoadQueueItem> call() throws Exception {
446 List<LoadQueueItem> splits = groupOrSplit(regionGroups, item, table, startEndKeys);
447 return splits;
448 }
449 };
450 splittingFutures.add(pool.submit(call));
451 }
452
453
454 for (Future<List<LoadQueueItem>> lqis : splittingFutures) {
455 try {
456 List<LoadQueueItem> splits = lqis.get();
457 if (splits != null) {
458 queue.addAll(splits);
459 }
460 } catch (ExecutionException e1) {
461 Throwable t = e1.getCause();
462 if (t instanceof IOException) {
463 LOG.error("IOException during splitting", e1);
464 throw (IOException)t;
465 }
466 LOG.error("Unexpected execution exception during splitting", e1);
467 throw new IllegalStateException(t);
468 } catch (InterruptedException e1) {
469 LOG.error("Unexpected interrupted exception during splitting", e1);
470 throw (InterruptedIOException)new InterruptedIOException().initCause(e1);
471 }
472 }
473 return regionGroups;
474 }
475
476
477 private String getUniqueName() {
478 return UUID.randomUUID().toString().replaceAll("-", "");
479 }
480
481 protected List<LoadQueueItem> splitStoreFile(final LoadQueueItem item,
482 final HTable table, byte[] startKey,
483 byte[] splitKey) throws IOException {
484 final Path hfilePath = item.hfilePath;
485
486
487
488 final Path tmpDir = new Path(item.hfilePath.getParent(), "_tmp");
489
490 LOG.info("HFile at " + hfilePath + " no longer fits inside a single " +
491 "region. Splitting...");
492
493 String uniqueName = getUniqueName();
494 HColumnDescriptor familyDesc = table.getTableDescriptor().getFamily(item.family);
495 Path botOut = new Path(tmpDir, uniqueName + ".bottom");
496 Path topOut = new Path(tmpDir, uniqueName + ".top");
497 splitStoreFile(getConf(), hfilePath, familyDesc, splitKey,
498 botOut, topOut);
499
500 FileSystem fs = tmpDir.getFileSystem(getConf());
501 fs.setPermission(tmpDir, FsPermission.valueOf("-rwxrwxrwx"));
502 fs.setPermission(botOut, FsPermission.valueOf("-rwxrwxrwx"));
503
504
505
506 List<LoadQueueItem> lqis = new ArrayList<LoadQueueItem>(2);
507 lqis.add(new LoadQueueItem(item.family, botOut));
508 lqis.add(new LoadQueueItem(item.family, topOut));
509
510 LOG.info("Successfully split into new HFiles " + botOut + " and " + topOut);
511 return lqis;
512 }
513
514
515
516
517
518
519
520
521
522
523 protected List<LoadQueueItem> groupOrSplit(Multimap<ByteBuffer, LoadQueueItem> regionGroups,
524 final LoadQueueItem item, final HTable table,
525 final Pair<byte[][], byte[][]> startEndKeys)
526 throws IOException {
527 final Path hfilePath = item.hfilePath;
528 HFile.Reader hfr = HFile.createReader(fs, hfilePath,
529 new CacheConfig(getConf()), getConf());
530 final byte[] first, last;
531 try {
532 hfr.loadFileInfo();
533 first = hfr.getFirstRowKey();
534 last = hfr.getLastRowKey();
535 } finally {
536 hfr.close();
537 }
538
539 LOG.info("Trying to load hfile=" + hfilePath +
540 " first=" + Bytes.toStringBinary(first) +
541 " last=" + Bytes.toStringBinary(last));
542 if (first == null || last == null) {
543 assert first == null && last == null;
544
545 LOG.info("hfile " + hfilePath + " has no entries, skipping");
546 return null;
547 }
548 if (Bytes.compareTo(first, last) > 0) {
549 throw new IllegalArgumentException(
550 "Invalid range: " + Bytes.toStringBinary(first) +
551 " > " + Bytes.toStringBinary(last));
552 }
553 int idx = Arrays.binarySearch(startEndKeys.getFirst(), first,
554 Bytes.BYTES_COMPARATOR);
555 if (idx < 0) {
556
557
558 idx = -(idx + 1) - 1;
559 }
560 final int indexForCallable = idx;
561
562
563
564
565
566
567 if (indexForCallable < 0) {
568 throw new IOException("The first region info for table "
569 + Bytes.toString(table.getTableName())
570 + " cann't be found in hbase:meta.Please use hbck tool to fix it first.");
571 } else if ((indexForCallable == startEndKeys.getFirst().length - 1)
572 && !Bytes.equals(startEndKeys.getSecond()[indexForCallable], HConstants.EMPTY_BYTE_ARRAY)) {
573 throw new IOException("The last region info for table "
574 + Bytes.toString(table.getTableName())
575 + " cann't be found in hbase:meta.Please use hbck tool to fix it first.");
576 } else if (indexForCallable + 1 < startEndKeys.getFirst().length
577 && !(Bytes.compareTo(startEndKeys.getSecond()[indexForCallable],
578 startEndKeys.getFirst()[indexForCallable + 1]) == 0)) {
579 throw new IOException("The endkey of one region for table "
580 + Bytes.toString(table.getTableName())
581 + " is not equal to the startkey of the next region in hbase:meta."
582 + "Please use hbck tool to fix it first.");
583 }
584
585 boolean lastKeyInRange =
586 Bytes.compareTo(last, startEndKeys.getSecond()[idx]) < 0 ||
587 Bytes.equals(startEndKeys.getSecond()[idx], HConstants.EMPTY_BYTE_ARRAY);
588 if (!lastKeyInRange) {
589 List<LoadQueueItem> lqis = splitStoreFile(item, table,
590 startEndKeys.getFirst()[indexForCallable],
591 startEndKeys.getSecond()[indexForCallable]);
592 return lqis;
593 }
594
595
596 regionGroups.put(ByteBuffer.wrap(startEndKeys.getFirst()[idx]), item);
597 return null;
598 }
599
600
601
602
603 @Deprecated
604 protected List<LoadQueueItem> tryAtomicRegionLoad(final HConnection conn,
605 final byte [] tableName, final byte[] first, Collection<LoadQueueItem> lqis)
606 throws IOException {
607 return tryAtomicRegionLoad(conn, TableName.valueOf(tableName), first, lqis);
608 }
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623 protected List<LoadQueueItem> tryAtomicRegionLoad(final HConnection conn,
624 final TableName tableName, final byte[] first, Collection<LoadQueueItem> lqis)
625 throws IOException {
626 final List<Pair<byte[], String>> famPaths =
627 new ArrayList<Pair<byte[], String>>(lqis.size());
628 for (LoadQueueItem lqi : lqis) {
629 famPaths.add(Pair.newPair(lqi.family, lqi.hfilePath.toString()));
630 }
631
632 final RegionServerCallable<Boolean> svrCallable =
633 new RegionServerCallable<Boolean>(conn, tableName, first) {
634 @Override
635 public Boolean call() throws Exception {
636 SecureBulkLoadClient secureClient = null;
637 boolean success = false;
638
639 try {
640 LOG.debug("Going to connect to server " + getLocation() + " for row "
641 + Bytes.toStringBinary(getRow()) + " with hfile group " + famPaths);
642 byte[] regionName = getLocation().getRegionInfo().getRegionName();
643 if (!isSecureBulkLoadEndpointAvailable()) {
644 success = ProtobufUtil.bulkLoadHFile(getStub(), famPaths, regionName, assignSeqIds);
645 } else {
646 HTable table = new HTable(conn.getConfiguration(), getTableName());
647 secureClient = new SecureBulkLoadClient(table);
648 success = secureClient.bulkLoadHFiles(famPaths, fsDelegationToken.getUserToken(),
649 bulkToken, getLocation().getRegionInfo().getStartKey());
650 }
651 return success;
652 } finally {
653
654
655
656 if(secureClient != null && !success) {
657 FileSystem targetFs = FileSystem.get(getConf());
658
659
660
661 if (FSHDFSUtils.isSameHdfs(getConf(), fs, targetFs)) {
662 for(Pair<byte[], String> el : famPaths) {
663 Path hfileStagingPath = null;
664 Path hfileOrigPath = new Path(el.getSecond());
665 try {
666 hfileStagingPath= new Path(secureClient.getStagingPath(bulkToken, el.getFirst()),
667 hfileOrigPath.getName());
668 if(targetFs.rename(hfileStagingPath, hfileOrigPath)) {
669 LOG.debug("Moved back file " + hfileOrigPath + " from " +
670 hfileStagingPath);
671 } else if(targetFs.exists(hfileStagingPath)){
672 LOG.debug("Unable to move back file " + hfileOrigPath + " from " +
673 hfileStagingPath);
674 }
675 } catch(Exception ex) {
676 LOG.debug("Unable to move back file " + hfileOrigPath + " from " +
677 hfileStagingPath, ex);
678 }
679 }
680 }
681 }
682 }
683 }
684 };
685
686 try {
687 List<LoadQueueItem> toRetry = new ArrayList<LoadQueueItem>();
688 Configuration conf = getConf();
689 boolean success = RpcRetryingCallerFactory.instantiate(conf, null).<Boolean> newCaller()
690 .callWithRetries(svrCallable);
691 if (!success) {
692 LOG.warn("Attempt to bulk load region containing "
693 + Bytes.toStringBinary(first) + " into table "
694 + tableName + " with files " + lqis
695 + " failed. This is recoverable and they will be retried.");
696 toRetry.addAll(lqis);
697 }
698
699 return toRetry;
700 } catch (IOException e) {
701 LOG.error("Encountered unrecoverable error from region server, additional details: "
702 + svrCallable.getExceptionMessageAdditionalDetail(), e);
703 throw e;
704 }
705 }
706
707 private boolean isSecureBulkLoadEndpointAvailable() {
708 String classes = getConf().get(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY, "");
709 return classes.contains("org.apache.hadoop.hbase.security.access.SecureBulkLoadEndpoint");
710 }
711
712
713
714
715
716 static void splitStoreFile(
717 Configuration conf, Path inFile,
718 HColumnDescriptor familyDesc, byte[] splitKey,
719 Path bottomOut, Path topOut) throws IOException
720 {
721
722 Reference topReference = Reference.createTopReference(splitKey);
723 Reference bottomReference = Reference.createBottomReference(splitKey);
724
725 copyHFileHalf(conf, inFile, topOut, topReference, familyDesc);
726 copyHFileHalf(conf, inFile, bottomOut, bottomReference, familyDesc);
727 }
728
729
730
731
732 private static void copyHFileHalf(
733 Configuration conf, Path inFile, Path outFile, Reference reference,
734 HColumnDescriptor familyDescriptor)
735 throws IOException {
736 FileSystem fs = inFile.getFileSystem(conf);
737 CacheConfig cacheConf = new CacheConfig(conf);
738 HalfStoreFileReader halfReader = null;
739 StoreFile.Writer halfWriter = null;
740 try {
741 halfReader = new HalfStoreFileReader(fs, inFile, cacheConf, reference, conf);
742 Map<byte[], byte[]> fileInfo = halfReader.loadFileInfo();
743
744 int blocksize = familyDescriptor.getBlocksize();
745 Algorithm compression = familyDescriptor.getCompression();
746 BloomType bloomFilterType = familyDescriptor.getBloomFilterType();
747 HFileContext hFileContext = new HFileContextBuilder()
748 .withCompression(compression)
749 .withChecksumType(HStore.getChecksumType(conf))
750 .withBytesPerCheckSum(HStore.getBytesPerChecksum(conf))
751 .withBlockSize(blocksize)
752 .withDataBlockEncoding(familyDescriptor.getDataBlockEncoding())
753 .build();
754 halfWriter = new StoreFile.WriterBuilder(conf, cacheConf,
755 fs)
756 .withFilePath(outFile)
757 .withBloomType(bloomFilterType)
758 .withFileContext(hFileContext)
759 .build();
760 HFileScanner scanner = halfReader.getScanner(false, false, false);
761 scanner.seekTo();
762 do {
763 KeyValue kv = scanner.getKeyValue();
764 halfWriter.append(kv);
765 } while (scanner.next());
766
767 for (Map.Entry<byte[],byte[]> entry : fileInfo.entrySet()) {
768 if (shouldCopyHFileMetaKey(entry.getKey())) {
769 halfWriter.appendFileInfo(entry.getKey(), entry.getValue());
770 }
771 }
772 } finally {
773 if (halfWriter != null) halfWriter.close();
774 if (halfReader != null) halfReader.close(cacheConf.shouldEvictOnClose());
775 }
776 }
777
778 private static boolean shouldCopyHFileMetaKey(byte[] key) {
779 return !HFile.isReservedFileInfoKey(key);
780 }
781
782 private boolean doesTableExist(TableName tableName) throws Exception {
783 return hbAdmin.tableExists(tableName);
784 }
785
786
787
788
789
790
791
792
793
794
795
796
797
798 public static byte[][] inferBoundaries(TreeMap<byte[], Integer> bdryMap) {
799 ArrayList<byte[]> keysArray = new ArrayList<byte[]>();
800 int runningValue = 0;
801 byte[] currStartKey = null;
802 boolean firstBoundary = true;
803
804 for (Map.Entry<byte[], Integer> item: bdryMap.entrySet()) {
805 if (runningValue == 0) currStartKey = item.getKey();
806 runningValue += item.getValue();
807 if (runningValue == 0) {
808 if (!firstBoundary) keysArray.add(currStartKey);
809 firstBoundary = false;
810 }
811 }
812
813 return keysArray.toArray(new byte[0][0]);
814 }
815
816
817
818
819
820 private void createTable(TableName tableName, String dirPath) throws Exception {
821 Path hfofDir = new Path(dirPath);
822 FileSystem fs = hfofDir.getFileSystem(getConf());
823
824 if (!fs.exists(hfofDir)) {
825 throw new FileNotFoundException("HFileOutputFormat dir " +
826 hfofDir + " not found");
827 }
828
829 FileStatus[] familyDirStatuses = fs.listStatus(hfofDir);
830 if (familyDirStatuses == null) {
831 throw new FileNotFoundException("No families found in " + hfofDir);
832 }
833
834 HTableDescriptor htd = new HTableDescriptor(tableName);
835 HColumnDescriptor hcd;
836
837
838
839 byte[][] keys;
840 TreeMap<byte[], Integer> map = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
841
842 for (FileStatus stat : familyDirStatuses) {
843 if (!stat.isDir()) {
844 LOG.warn("Skipping non-directory " + stat.getPath());
845 continue;
846 }
847 Path familyDir = stat.getPath();
848 if (familyDir.getName().equals("_logs")) {
849
850
851
852 continue;
853 }
854 byte[] family = familyDir.getName().getBytes();
855
856 hcd = new HColumnDescriptor(family);
857 htd.addFamily(hcd);
858
859 Path[] hfiles = FileUtil.stat2Paths(fs.listStatus(familyDir));
860 for (Path hfile : hfiles) {
861 if (hfile.getName().startsWith("_")) continue;
862 HFile.Reader reader = HFile.createReader(fs, hfile,
863 new CacheConfig(getConf()), getConf());
864 final byte[] first, last;
865 try {
866 if (hcd.getCompressionType() != reader.getFileContext().getCompression()) {
867 hcd.setCompressionType(reader.getFileContext().getCompression());
868 LOG.info("Setting compression " + hcd.getCompressionType().name() +
869 " for family " + hcd.toString());
870 }
871 reader.loadFileInfo();
872 first = reader.getFirstRowKey();
873 last = reader.getLastRowKey();
874
875 LOG.info("Trying to figure out region boundaries hfile=" + hfile +
876 " first=" + Bytes.toStringBinary(first) +
877 " last=" + Bytes.toStringBinary(last));
878
879
880 Integer value = map.containsKey(first)? map.get(first):0;
881 map.put(first, value+1);
882
883 value = map.containsKey(last)? map.get(last):0;
884 map.put(last, value-1);
885 } finally {
886 reader.close();
887 }
888 }
889 }
890
891 keys = LoadIncrementalHFiles.inferBoundaries(map);
892 this.hbAdmin.createTable(htd,keys);
893
894 LOG.info("Table "+ tableName +" is available!!");
895 }
896
897 @Override
898 public int run(String[] args) throws Exception {
899 if (args.length != 2) {
900 usage();
901 return -1;
902 }
903
904 initialize();
905
906 String dirPath = args[0];
907 TableName tableName = TableName.valueOf(args[1]);
908
909 boolean tableExists = this.doesTableExist(tableName);
910 if (!tableExists) {
911 if ("yes".equalsIgnoreCase(getConf().get(CREATE_TABLE_CONF_KEY, "yes"))) {
912 this.createTable(tableName, dirPath);
913 } else {
914 String errorMsg = format("Table '%s' does not exist.", tableName);
915 LOG.error(errorMsg);
916 throw new TableNotFoundException(errorMsg);
917 }
918 }
919
920 Path hfofDir = new Path(dirPath);
921 HTable table = new HTable(getConf(), tableName);
922
923 doBulkLoad(hfofDir, table);
924 return 0;
925 }
926
927 public static void main(String[] args) throws Exception {
928 Configuration conf = HBaseConfiguration.create();
929 int ret = ToolRunner.run(conf, new LoadIncrementalHFiles(), args);
930 System.exit(ret);
931 }
932
933 }