1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import java.io.FileNotFoundException;
22 import java.io.IOException;
23 import java.io.InterruptedIOException;
24 import java.nio.ByteBuffer;
25 import java.util.ArrayList;
26 import java.util.Arrays;
27 import java.util.Collection;
28 import java.util.Deque;
29 import java.util.HashSet;
30 import java.util.LinkedList;
31 import java.util.List;
32 import java.util.Map;
33 import java.util.Map.Entry;
34 import java.util.Set;
35 import java.util.TreeMap;
36 import java.util.concurrent.Callable;
37 import java.util.concurrent.ExecutionException;
38 import java.util.concurrent.ExecutorService;
39 import java.util.concurrent.Future;
40 import java.util.concurrent.LinkedBlockingQueue;
41 import java.util.concurrent.ThreadPoolExecutor;
42 import java.util.concurrent.TimeUnit;
43 import java.util.concurrent.atomic.AtomicLong;
44
45 import org.apache.commons.logging.Log;
46 import org.apache.commons.logging.LogFactory;
47 import org.apache.hadoop.classification.InterfaceAudience;
48 import org.apache.hadoop.classification.InterfaceStability;
49 import org.apache.hadoop.conf.Configuration;
50 import org.apache.hadoop.conf.Configured;
51 import org.apache.hadoop.fs.FileStatus;
52 import org.apache.hadoop.fs.FileSystem;
53 import org.apache.hadoop.fs.FileUtil;
54 import org.apache.hadoop.fs.Path;
55 import org.apache.hadoop.hbase.HBaseConfiguration;
56 import org.apache.hadoop.hbase.HColumnDescriptor;
57 import org.apache.hadoop.hbase.HConstants;
58 import org.apache.hadoop.hbase.HTableDescriptor;
59 import org.apache.hadoop.hbase.KeyValue;
60 import org.apache.hadoop.hbase.TableName;
61 import org.apache.hadoop.hbase.TableNotFoundException;
62 import org.apache.hadoop.hbase.client.HBaseAdmin;
63 import org.apache.hadoop.hbase.client.HConnection;
64 import org.apache.hadoop.hbase.client.HTable;
65 import org.apache.hadoop.hbase.client.RegionServerCallable;
66 import org.apache.hadoop.hbase.client.RpcRetryingCallerFactory;
67 import org.apache.hadoop.hbase.client.coprocessor.SecureBulkLoadClient;
68 import org.apache.hadoop.hbase.io.HalfStoreFileReader;
69 import org.apache.hadoop.hbase.io.Reference;
70 import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
71 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
72 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
73 import org.apache.hadoop.hbase.io.hfile.HFile;
74 import org.apache.hadoop.hbase.io.hfile.HFileContext;
75 import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
76 import org.apache.hadoop.hbase.io.hfile.HFileScanner;
77 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
78 import org.apache.hadoop.hbase.regionserver.BloomType;
79 import org.apache.hadoop.hbase.regionserver.HStore;
80 import org.apache.hadoop.hbase.regionserver.StoreFile;
81 import org.apache.hadoop.hbase.security.UserProvider;
82 import org.apache.hadoop.hbase.util.Bytes;
83 import org.apache.hadoop.hbase.util.Pair;
84 import org.apache.hadoop.security.token.Token;
85 import org.apache.hadoop.util.Tool;
86 import org.apache.hadoop.util.ToolRunner;
87
88 import com.google.common.collect.HashMultimap;
89 import com.google.common.collect.Multimap;
90 import com.google.common.collect.Multimaps;
91 import com.google.common.util.concurrent.ThreadFactoryBuilder;
92
93
94
95
96
97 @InterfaceAudience.Public
98 @InterfaceStability.Stable
99 public class LoadIncrementalHFiles extends Configured implements Tool {
100 private static final Log LOG = LogFactory.getLog(LoadIncrementalHFiles.class);
101 static final AtomicLong regionCount = new AtomicLong(0);
102 private HBaseAdmin hbAdmin;
103 private Configuration cfg;
104
105 public static final String NAME = "completebulkload";
106 private static final String ASSIGN_SEQ_IDS = "hbase.mapreduce.bulkload.assign.sequenceNumbers";
107 private boolean assignSeqIds;
108
109 private boolean hasForwardedToken;
110 private Token<?> userToken;
111 private String bulkToken;
112 private UserProvider userProvider;
113
114 public LoadIncrementalHFiles(Configuration conf) throws Exception {
115 super(conf);
116 this.cfg = conf;
117 this.hbAdmin = new HBaseAdmin(conf);
118 this.userProvider = UserProvider.instantiate(conf);
119 assignSeqIds = conf.getBoolean(ASSIGN_SEQ_IDS, true);
120 }
121
122 private void usage() {
123 System.err.println("usage: " + NAME +
124 " /path/to/hfileoutputformat-output " +
125 "tablename");
126 }
127
128
129
130
131
132
133
134
135
136 static class LoadQueueItem {
137 final byte[] family;
138 final Path hfilePath;
139
140 public LoadQueueItem(byte[] family, Path hfilePath) {
141 this.family = family;
142 this.hfilePath = hfilePath;
143 }
144
145 public String toString() {
146 return "family:"+ Bytes.toString(family) + " path:" + hfilePath.toString();
147 }
148 }
149
150
151
152
153
154 private void discoverLoadQueue(Deque<LoadQueueItem> ret, Path hfofDir)
155 throws IOException {
156 FileSystem fs = hfofDir.getFileSystem(getConf());
157
158 if (!fs.exists(hfofDir)) {
159 throw new FileNotFoundException("HFileOutputFormat dir " +
160 hfofDir + " not found");
161 }
162
163 FileStatus[] familyDirStatuses = fs.listStatus(hfofDir);
164 if (familyDirStatuses == null) {
165 throw new FileNotFoundException("No families found in " + hfofDir);
166 }
167
168 for (FileStatus stat : familyDirStatuses) {
169 if (!stat.isDir()) {
170 LOG.warn("Skipping non-directory " + stat.getPath());
171 continue;
172 }
173 Path familyDir = stat.getPath();
174
175 if (familyDir.getName().startsWith("_")) continue;
176 byte[] family = familyDir.getName().getBytes();
177 Path[] hfiles = FileUtil.stat2Paths(fs.listStatus(familyDir));
178 for (Path hfile : hfiles) {
179 if (hfile.getName().startsWith("_")) continue;
180 ret.add(new LoadQueueItem(family, hfile));
181 }
182 }
183 }
184
185
186
187
188
189
190
191
192
193
194 @SuppressWarnings("deprecation")
195 public void doBulkLoad(Path hfofDir, final HTable table)
196 throws TableNotFoundException, IOException
197 {
198 final HConnection conn = table.getConnection();
199
200 if (!conn.isTableAvailable(table.getName())) {
201 throw new TableNotFoundException("Table " +
202 Bytes.toStringBinary(table.getTableName()) +
203 "is not currently available.");
204 }
205
206
207 int nrThreads = cfg.getInt("hbase.loadincremental.threads.max",
208 Runtime.getRuntime().availableProcessors());
209 ThreadFactoryBuilder builder = new ThreadFactoryBuilder();
210 builder.setNameFormat("LoadIncrementalHFiles-%1$d");
211 ExecutorService pool = new ThreadPoolExecutor(nrThreads, nrThreads,
212 60, TimeUnit.SECONDS,
213 new LinkedBlockingQueue<Runnable>(),
214 builder.build());
215 ((ThreadPoolExecutor)pool).allowCoreThreadTimeOut(true);
216
217
218
219 Deque<LoadQueueItem> queue = new LinkedList<LoadQueueItem>();
220 try {
221 discoverLoadQueue(queue, hfofDir);
222
223 Collection<HColumnDescriptor> families = table.getTableDescriptor().getFamilies();
224 ArrayList<String> familyNames = new ArrayList<String>();
225 for (HColumnDescriptor family : families) {
226 familyNames.add(family.getNameAsString());
227 }
228 ArrayList<String> unmatchedFamilies = new ArrayList<String>();
229 for (LoadQueueItem lqi : queue) {
230 String familyNameInHFile = Bytes.toString(lqi.family);
231 if (!familyNames.contains(familyNameInHFile)) {
232 unmatchedFamilies.add(familyNameInHFile);
233 }
234 }
235 if (unmatchedFamilies.size() > 0) {
236 String msg =
237 "Unmatched family names found: unmatched family names in HFiles to be bulkloaded: "
238 + unmatchedFamilies + "; valid family names of table "
239 + Bytes.toString(table.getTableName()) + " are: " + familyNames;
240 LOG.error(msg);
241 throw new IOException(msg);
242 }
243 int count = 0;
244
245 if (queue.isEmpty()) {
246 LOG.warn("Bulk load operation did not find any files to load in " +
247 "directory " + hfofDir.toUri() + ". Does it contain files in " +
248 "subdirectories that correspond to column family names?");
249 return;
250 }
251
252
253
254 if (userProvider.isHBaseSecurityEnabled()) {
255 FileSystem fs = FileSystem.get(cfg);
256
257
258 if (userProvider.isHadoopSecurityEnabled()) {
259 userToken = userProvider.getCurrent().getToken("HDFS_DELEGATION_TOKEN",
260 fs.getCanonicalServiceName());
261 if (userToken == null) {
262 hasForwardedToken = false;
263 userToken = fs.getDelegationToken("renewer");
264 } else {
265 hasForwardedToken = true;
266 LOG.info("Use the existing token: " + userToken);
267 }
268 }
269 bulkToken = new SecureBulkLoadClient(table).prepareBulkLoad(table.getName());
270 }
271
272
273 while (!queue.isEmpty()) {
274
275 final Pair<byte[][], byte[][]> startEndKeys = table.getStartEndKeys();
276 if (count != 0) {
277 LOG.info("Split occured while grouping HFiles, retry attempt " +
278 + count + " with " + queue.size() + " files remaining to group or split");
279 }
280
281 int maxRetries = cfg.getInt("hbase.bulkload.retries.number", 0);
282 if (maxRetries != 0 && count >= maxRetries) {
283 LOG.error("Retry attempted " + count + " times without completing, bailing out");
284 return;
285 }
286 count++;
287
288
289 Multimap<ByteBuffer, LoadQueueItem> regionGroups = groupOrSplitPhase(table,
290 pool, queue, startEndKeys);
291
292 bulkLoadPhase(table, conn, pool, queue, regionGroups);
293
294
295
296
297 }
298
299 } finally {
300 if (userProvider.isHBaseSecurityEnabled()) {
301 if (userToken != null && !hasForwardedToken) {
302 try {
303 userToken.cancel(cfg);
304 } catch (Exception e) {
305 LOG.warn("Failed to cancel HDFS delegation token.", e);
306 }
307 }
308 if(bulkToken != null) {
309 new SecureBulkLoadClient(table).cleanupBulkLoad(bulkToken);
310 }
311 }
312 pool.shutdown();
313 if (queue != null && !queue.isEmpty()) {
314 StringBuilder err = new StringBuilder();
315 err.append("-------------------------------------------------\n");
316 err.append("Bulk load aborted with some files not yet loaded:\n");
317 err.append("-------------------------------------------------\n");
318 for (LoadQueueItem q : queue) {
319 err.append(" ").append(q.hfilePath).append('\n');
320 }
321 LOG.error(err);
322 }
323 }
324
325 if (queue != null && !queue.isEmpty()) {
326 throw new RuntimeException("Bulk load aborted with some files not yet loaded."
327 + "Please check log for more details.");
328 }
329 }
330
331
332
333
334
335
336 protected void bulkLoadPhase(final HTable table, final HConnection conn,
337 ExecutorService pool, Deque<LoadQueueItem> queue,
338 final Multimap<ByteBuffer, LoadQueueItem> regionGroups) throws IOException {
339
340 Set<Future<List<LoadQueueItem>>> loadingFutures = new HashSet<Future<List<LoadQueueItem>>>();
341 for (Entry<ByteBuffer, ? extends Collection<LoadQueueItem>> e: regionGroups.asMap().entrySet()) {
342 final byte[] first = e.getKey().array();
343 final Collection<LoadQueueItem> lqis = e.getValue();
344
345 final Callable<List<LoadQueueItem>> call = new Callable<List<LoadQueueItem>>() {
346 public List<LoadQueueItem> call() throws Exception {
347 List<LoadQueueItem> toRetry =
348 tryAtomicRegionLoad(conn, table.getName(), first, lqis);
349 return toRetry;
350 }
351 };
352 loadingFutures.add(pool.submit(call));
353 }
354
355
356 for (Future<List<LoadQueueItem>> future : loadingFutures) {
357 try {
358 List<LoadQueueItem> toRetry = future.get();
359
360
361 queue.addAll(toRetry);
362
363 } catch (ExecutionException e1) {
364 Throwable t = e1.getCause();
365 if (t instanceof IOException) {
366
367
368 throw new IOException("BulkLoad encountered an unrecoverable problem", t);
369 }
370 LOG.error("Unexpected execution exception during bulk load", e1);
371 throw new IllegalStateException(t);
372 } catch (InterruptedException e1) {
373 LOG.error("Unexpected interrupted exception during bulk load", e1);
374 throw (InterruptedIOException)new InterruptedIOException().initCause(e1);
375 }
376 }
377 }
378
379
380
381
382
383 private Multimap<ByteBuffer, LoadQueueItem> groupOrSplitPhase(final HTable table,
384 ExecutorService pool, Deque<LoadQueueItem> queue,
385 final Pair<byte[][], byte[][]> startEndKeys) throws IOException {
386
387
388 Multimap<ByteBuffer, LoadQueueItem> rgs = HashMultimap.create();
389 final Multimap<ByteBuffer, LoadQueueItem> regionGroups = Multimaps.synchronizedMultimap(rgs);
390
391
392 Set<Future<List<LoadQueueItem>>> splittingFutures = new HashSet<Future<List<LoadQueueItem>>>();
393 while (!queue.isEmpty()) {
394 final LoadQueueItem item = queue.remove();
395
396 final Callable<List<LoadQueueItem>> call = new Callable<List<LoadQueueItem>>() {
397 public List<LoadQueueItem> call() throws Exception {
398 List<LoadQueueItem> splits = groupOrSplit(regionGroups, item, table, startEndKeys);
399 return splits;
400 }
401 };
402 splittingFutures.add(pool.submit(call));
403 }
404
405
406 for (Future<List<LoadQueueItem>> lqis : splittingFutures) {
407 try {
408 List<LoadQueueItem> splits = lqis.get();
409 if (splits != null) {
410 queue.addAll(splits);
411 }
412 } catch (ExecutionException e1) {
413 Throwable t = e1.getCause();
414 if (t instanceof IOException) {
415 LOG.error("IOException during splitting", e1);
416 throw (IOException)t;
417 }
418 LOG.error("Unexpected execution exception during splitting", e1);
419 throw new IllegalStateException(t);
420 } catch (InterruptedException e1) {
421 LOG.error("Unexpected interrupted exception during splitting", e1);
422 throw (InterruptedIOException)new InterruptedIOException().initCause(e1);
423 }
424 }
425 return regionGroups;
426 }
427
428
429 String getUniqueName(TableName tableName) {
430 String name = tableName + "," + regionCount.incrementAndGet();
431 return name;
432 }
433
434 protected List<LoadQueueItem> splitStoreFile(final LoadQueueItem item,
435 final HTable table, byte[] startKey,
436 byte[] splitKey) throws IOException {
437 final Path hfilePath = item.hfilePath;
438
439
440
441 final Path tmpDir = new Path(item.hfilePath.getParent(), "_tmp");
442
443 LOG.info("HFile at " + hfilePath + " no longer fits inside a single " +
444 "region. Splitting...");
445
446 String uniqueName = getUniqueName(table.getName());
447 HColumnDescriptor familyDesc = table.getTableDescriptor().getFamily(item.family);
448 Path botOut = new Path(tmpDir, uniqueName + ".bottom");
449 Path topOut = new Path(tmpDir, uniqueName + ".top");
450 splitStoreFile(getConf(), hfilePath, familyDesc, splitKey,
451 botOut, topOut);
452
453
454
455 List<LoadQueueItem> lqis = new ArrayList<LoadQueueItem>(2);
456 lqis.add(new LoadQueueItem(item.family, botOut));
457 lqis.add(new LoadQueueItem(item.family, topOut));
458
459 LOG.info("Successfully split into new HFiles " + botOut + " and " + topOut);
460 return lqis;
461 }
462
463
464
465
466
467
468
469
470
471 protected List<LoadQueueItem> groupOrSplit(Multimap<ByteBuffer, LoadQueueItem> regionGroups,
472 final LoadQueueItem item, final HTable table,
473 final Pair<byte[][], byte[][]> startEndKeys)
474 throws IOException {
475 final Path hfilePath = item.hfilePath;
476 final FileSystem fs = hfilePath.getFileSystem(getConf());
477 HFile.Reader hfr = HFile.createReader(fs, hfilePath,
478 new CacheConfig(getConf()), getConf());
479 final byte[] first, last;
480 try {
481 hfr.loadFileInfo();
482 first = hfr.getFirstRowKey();
483 last = hfr.getLastRowKey();
484 } finally {
485 hfr.close();
486 }
487
488 LOG.info("Trying to load hfile=" + hfilePath +
489 " first=" + Bytes.toStringBinary(first) +
490 " last=" + Bytes.toStringBinary(last));
491 if (first == null || last == null) {
492 assert first == null && last == null;
493
494 LOG.info("hfile " + hfilePath + " has no entries, skipping");
495 return null;
496 }
497 if (Bytes.compareTo(first, last) > 0) {
498 throw new IllegalArgumentException(
499 "Invalid range: " + Bytes.toStringBinary(first) +
500 " > " + Bytes.toStringBinary(last));
501 }
502 int idx = Arrays.binarySearch(startEndKeys.getFirst(), first,
503 Bytes.BYTES_COMPARATOR);
504 if (idx < 0) {
505
506
507 idx = -(idx + 1) - 1;
508 }
509 final int indexForCallable = idx;
510 boolean lastKeyInRange =
511 Bytes.compareTo(last, startEndKeys.getSecond()[idx]) < 0 ||
512 Bytes.equals(startEndKeys.getSecond()[idx], HConstants.EMPTY_BYTE_ARRAY);
513 if (!lastKeyInRange) {
514 List<LoadQueueItem> lqis = splitStoreFile(item, table,
515 startEndKeys.getFirst()[indexForCallable],
516 startEndKeys.getSecond()[indexForCallable]);
517 return lqis;
518 }
519
520
521 regionGroups.put(ByteBuffer.wrap(startEndKeys.getFirst()[idx]), item);
522 return null;
523 }
524
525
526
527
528 @Deprecated
529 protected List<LoadQueueItem> tryAtomicRegionLoad(final HConnection conn,
530 final byte [] tableName, final byte[] first, Collection<LoadQueueItem> lqis)
531 throws IOException {
532 return tryAtomicRegionLoad(conn, TableName.valueOf(tableName), first, lqis);
533 }
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548 protected List<LoadQueueItem> tryAtomicRegionLoad(final HConnection conn,
549 final TableName tableName, final byte[] first, Collection<LoadQueueItem> lqis)
550 throws IOException {
551 final List<Pair<byte[], String>> famPaths =
552 new ArrayList<Pair<byte[], String>>(lqis.size());
553 for (LoadQueueItem lqi : lqis) {
554 famPaths.add(Pair.newPair(lqi.family, lqi.hfilePath.toString()));
555 }
556
557 final RegionServerCallable<Boolean> svrCallable =
558 new RegionServerCallable<Boolean>(conn, tableName, first) {
559 @Override
560 public Boolean call() throws Exception {
561 SecureBulkLoadClient secureClient = null;
562 boolean success = false;
563
564 try {
565 LOG.debug("Going to connect to server " + getLocation() + " for row "
566 + Bytes.toStringBinary(getRow()) + " with hfile group " + famPaths);
567 byte[] regionName = getLocation().getRegionInfo().getRegionName();
568 if(!userProvider.isHBaseSecurityEnabled()) {
569 success = ProtobufUtil.bulkLoadHFile(getStub(), famPaths, regionName, assignSeqIds);
570 } else {
571 HTable table = new HTable(conn.getConfiguration(), getTableName());
572 secureClient = new SecureBulkLoadClient(table);
573 success = secureClient.bulkLoadHFiles(famPaths, userToken, bulkToken,
574 getLocation().getRegionInfo().getStartKey());
575 }
576 return success;
577 } finally {
578
579
580
581 if(secureClient != null && !success) {
582 FileSystem fs = FileSystem.get(cfg);
583 for(Pair<byte[], String> el : famPaths) {
584 Path hfileStagingPath = null;
585 Path hfileOrigPath = new Path(el.getSecond());
586 try {
587 hfileStagingPath= new Path(secureClient.getStagingPath(bulkToken, el.getFirst()),
588 hfileOrigPath.getName());
589 if(fs.rename(hfileStagingPath, hfileOrigPath)) {
590 LOG.debug("Moved back file " + hfileOrigPath + " from " +
591 hfileStagingPath);
592 } else if(fs.exists(hfileStagingPath)){
593 LOG.debug("Unable to move back file " + hfileOrigPath + " from " +
594 hfileStagingPath);
595 }
596 } catch(Exception ex) {
597 LOG.debug("Unable to move back file " + hfileOrigPath + " from " +
598 hfileStagingPath, ex);
599 }
600 }
601 }
602 }
603 }
604 };
605
606 try {
607 List<LoadQueueItem> toRetry = new ArrayList<LoadQueueItem>();
608 Configuration conf = getConf();
609 boolean success = RpcRetryingCallerFactory.instantiate(conf).<Boolean> newCaller()
610 .callWithRetries(svrCallable);
611 if (!success) {
612 LOG.warn("Attempt to bulk load region containing "
613 + Bytes.toStringBinary(first) + " into table "
614 + tableName + " with files " + lqis
615 + " failed. This is recoverable and they will be retried.");
616 toRetry.addAll(lqis);
617 }
618
619 return toRetry;
620 } catch (IOException e) {
621 LOG.error("Encountered unrecoverable error from region server", e);
622 throw e;
623 }
624 }
625
626
627
628
629
630 static void splitStoreFile(
631 Configuration conf, Path inFile,
632 HColumnDescriptor familyDesc, byte[] splitKey,
633 Path bottomOut, Path topOut) throws IOException
634 {
635
636 Reference topReference = Reference.createTopReference(splitKey);
637 Reference bottomReference = Reference.createBottomReference(splitKey);
638
639 copyHFileHalf(conf, inFile, topOut, topReference, familyDesc);
640 copyHFileHalf(conf, inFile, bottomOut, bottomReference, familyDesc);
641 }
642
643
644
645
646 private static void copyHFileHalf(
647 Configuration conf, Path inFile, Path outFile, Reference reference,
648 HColumnDescriptor familyDescriptor)
649 throws IOException {
650 FileSystem fs = inFile.getFileSystem(conf);
651 CacheConfig cacheConf = new CacheConfig(conf);
652 HalfStoreFileReader halfReader = null;
653 StoreFile.Writer halfWriter = null;
654 try {
655 halfReader = new HalfStoreFileReader(fs, inFile, cacheConf, reference, conf);
656 Map<byte[], byte[]> fileInfo = halfReader.loadFileInfo();
657
658 int blocksize = familyDescriptor.getBlocksize();
659 Algorithm compression = familyDescriptor.getCompression();
660 BloomType bloomFilterType = familyDescriptor.getBloomFilterType();
661 HFileContext hFileContext = new HFileContextBuilder()
662 .withCompression(compression)
663 .withChecksumType(HStore.getChecksumType(conf))
664 .withBytesPerCheckSum(HStore.getBytesPerChecksum(conf))
665 .withBlockSize(blocksize)
666 .withDataBlockEncoding(familyDescriptor.getDataBlockEncoding())
667 .build();
668 halfWriter = new StoreFile.WriterBuilder(conf, cacheConf,
669 fs)
670 .withFilePath(outFile)
671 .withBloomType(bloomFilterType)
672 .withFileContext(hFileContext)
673 .build();
674 HFileScanner scanner = halfReader.getScanner(false, false, false);
675 scanner.seekTo();
676 do {
677 KeyValue kv = scanner.getKeyValue();
678 halfWriter.append(kv);
679 } while (scanner.next());
680
681 for (Map.Entry<byte[],byte[]> entry : fileInfo.entrySet()) {
682 if (shouldCopyHFileMetaKey(entry.getKey())) {
683 halfWriter.appendFileInfo(entry.getKey(), entry.getValue());
684 }
685 }
686 } finally {
687 if (halfWriter != null) halfWriter.close();
688 if (halfReader != null) halfReader.close(cacheConf.shouldEvictOnClose());
689 }
690 }
691
692 private static boolean shouldCopyHFileMetaKey(byte[] key) {
693 return !HFile.isReservedFileInfoKey(key);
694 }
695
696 private boolean doesTableExist(TableName tableName) throws Exception {
697 return hbAdmin.tableExists(tableName);
698 }
699
700
701
702
703
704
705
706
707
708
709
710
711
712 public static byte[][] inferBoundaries(TreeMap<byte[], Integer> bdryMap) {
713 ArrayList<byte[]> keysArray = new ArrayList<byte[]>();
714 int runningValue = 0;
715 byte[] currStartKey = null;
716 boolean firstBoundary = true;
717
718 for (Map.Entry<byte[], Integer> item: bdryMap.entrySet()) {
719 if (runningValue == 0) currStartKey = item.getKey();
720 runningValue += item.getValue();
721 if (runningValue == 0) {
722 if (!firstBoundary) keysArray.add(currStartKey);
723 firstBoundary = false;
724 }
725 }
726
727 return keysArray.toArray(new byte[0][0]);
728 }
729
730
731
732
733
734 private void createTable(TableName tableName, String dirPath) throws Exception {
735 Path hfofDir = new Path(dirPath);
736 FileSystem fs = hfofDir.getFileSystem(getConf());
737
738 if (!fs.exists(hfofDir)) {
739 throw new FileNotFoundException("HFileOutputFormat dir " +
740 hfofDir + " not found");
741 }
742
743 FileStatus[] familyDirStatuses = fs.listStatus(hfofDir);
744 if (familyDirStatuses == null) {
745 throw new FileNotFoundException("No families found in " + hfofDir);
746 }
747
748 HTableDescriptor htd = new HTableDescriptor(tableName);
749 HColumnDescriptor hcd;
750
751
752
753 byte[][] keys;
754 TreeMap<byte[], Integer> map = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
755
756 for (FileStatus stat : familyDirStatuses) {
757 if (!stat.isDir()) {
758 LOG.warn("Skipping non-directory " + stat.getPath());
759 continue;
760 }
761 Path familyDir = stat.getPath();
762
763 if (familyDir.getName().startsWith("_")) continue;
764 byte[] family = familyDir.getName().getBytes();
765
766 hcd = new HColumnDescriptor(family);
767 htd.addFamily(hcd);
768
769 Path[] hfiles = FileUtil.stat2Paths(fs.listStatus(familyDir));
770 for (Path hfile : hfiles) {
771 if (hfile.getName().startsWith("_")) continue;
772 HFile.Reader reader = HFile.createReader(fs, hfile,
773 new CacheConfig(getConf()), getConf());
774 final byte[] first, last;
775 try {
776 if (hcd.getCompressionType() != reader.getFileContext().getCompression()) {
777 hcd.setCompressionType(reader.getFileContext().getCompression());
778 LOG.info("Setting compression " + hcd.getCompressionType().name() +
779 " for family " + hcd.toString());
780 }
781 reader.loadFileInfo();
782 first = reader.getFirstRowKey();
783 last = reader.getLastRowKey();
784
785 LOG.info("Trying to figure out region boundaries hfile=" + hfile +
786 " first=" + Bytes.toStringBinary(first) +
787 " last=" + Bytes.toStringBinary(last));
788
789
790 Integer value = map.containsKey(first)? map.get(first):0;
791 map.put(first, value+1);
792
793 value = map.containsKey(last)? map.get(last):0;
794 map.put(last, value-1);
795 } finally {
796 reader.close();
797 }
798 }
799 }
800
801 keys = LoadIncrementalHFiles.inferBoundaries(map);
802 this.hbAdmin.createTable(htd,keys);
803
804 LOG.info("Table "+ tableName +" is available!!");
805 }
806
807 @Override
808 public int run(String[] args) throws Exception {
809 if (args.length != 2) {
810 usage();
811 return -1;
812 }
813
814 String dirPath = args[0];
815 TableName tableName = TableName.valueOf(args[1]);
816
817 boolean tableExists = this.doesTableExist(tableName);
818 if (!tableExists) this.createTable(tableName,dirPath);
819
820 Path hfofDir = new Path(dirPath);
821 HTable table = new HTable(this.cfg, tableName);
822
823 doBulkLoad(hfofDir, table);
824 return 0;
825 }
826
827 public static void main(String[] args) throws Exception {
828 int ret = ToolRunner.run(new LoadIncrementalHFiles(HBaseConfiguration.create()), args);
829 System.exit(ret);
830 }
831
832 }