1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.regionserver;
20
21 import static org.apache.hadoop.hbase.executor.EventType.RS_ZK_REQUEST_REGION_SPLIT;
22 import static org.apache.hadoop.hbase.executor.EventType.RS_ZK_REGION_SPLIT;
23 import static org.apache.hadoop.hbase.executor.EventType.RS_ZK_REGION_SPLITTING;
24
25 import java.io.IOException;
26 import java.io.InterruptedIOException;
27 import java.security.PrivilegedExceptionAction;
28 import java.util.ArrayList;
29 import java.util.List;
30 import java.util.ListIterator;
31 import java.util.Map;
32 import java.util.concurrent.Callable;
33 import java.util.concurrent.ExecutionException;
34 import java.util.concurrent.Executors;
35 import java.util.concurrent.Future;
36 import java.util.concurrent.ThreadFactory;
37 import java.util.concurrent.ThreadPoolExecutor;
38 import java.util.concurrent.TimeUnit;
39
40 import org.apache.commons.logging.Log;
41 import org.apache.commons.logging.LogFactory;
42 import org.apache.hadoop.fs.Path;
43 import org.apache.hadoop.hbase.classification.InterfaceAudience;
44 import org.apache.hadoop.hbase.HConstants;
45 import org.apache.hadoop.hbase.HRegionInfo;
46 import org.apache.hadoop.hbase.RegionTransition;
47 import org.apache.hadoop.hbase.Server;
48 import org.apache.hadoop.hbase.ServerName;
49 import org.apache.hadoop.hbase.catalog.CatalogTracker;
50 import org.apache.hadoop.hbase.catalog.MetaEditor;
51 import org.apache.hadoop.hbase.client.Mutation;
52 import org.apache.hadoop.hbase.client.Put;
53 import org.apache.hadoop.hbase.executor.EventType;
54 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
55 import org.apache.hadoop.hbase.security.User;
56 import org.apache.hadoop.hbase.util.Bytes;
57 import org.apache.hadoop.hbase.util.CancelableProgressable;
58 import org.apache.hadoop.hbase.util.ConfigUtil;
59 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
60 import org.apache.hadoop.hbase.util.FSUtils;
61 import org.apache.hadoop.hbase.util.HasThread;
62 import org.apache.hadoop.hbase.util.Pair;
63 import org.apache.hadoop.hbase.util.PairOfSameType;
64 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
65 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
66 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
67 import org.apache.zookeeper.KeeperException;
68 import org.apache.zookeeper.KeeperException.NodeExistsException;
69 import org.apache.zookeeper.data.Stat;
70
71 import com.google.common.util.concurrent.ThreadFactoryBuilder;
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96 @InterfaceAudience.Private
97 public class SplitTransaction {
98 private static final Log LOG = LogFactory.getLog(SplitTransaction.class);
99
100
101
102
103 private final HRegion parent;
104 private HRegionInfo hri_a;
105 private HRegionInfo hri_b;
106 private long fileSplitTimeout = 30000;
107 private int znodeVersion = -1;
108 boolean useZKForAssignment;
109
110
111
112
113 private final byte [] splitrow;
114
115
116
117
118
119
120 static enum JournalEntryType {
121
122
123
124 STARTED,
125
126
127
128 PREPARED,
129
130
131
132 BEFORE_PRE_SPLIT_HOOK,
133
134
135
136 AFTER_PRE_SPLIT_HOOK,
137
138
139
140 SET_SPLITTING_IN_ZK,
141
142
143
144 CREATE_SPLIT_DIR,
145
146
147
148 CLOSED_PARENT_REGION,
149
150
151
152 OFFLINED_PARENT,
153
154
155
156 STARTED_REGION_A_CREATION,
157
158
159
160 STARTED_REGION_B_CREATION,
161
162
163
164 OPENED_REGION_A,
165
166
167
168 OPENED_REGION_B,
169
170
171
172 BEFORE_POST_SPLIT_HOOK,
173
174
175
176 AFTER_POST_SPLIT_HOOK,
177
178
179
180
181
182 PONR
183 }
184
185 static class JournalEntry {
186 private JournalEntryType type;
187 private long timestamp;
188
189 public JournalEntry(JournalEntryType type) {
190 this(type, EnvironmentEdgeManager.currentTimeMillis());
191 }
192
193 public JournalEntry(JournalEntryType type, long timestamp) {
194 this.type = type;
195 this.timestamp = timestamp;
196 }
197
198 @Override
199 public String toString() {
200 StringBuilder sb = new StringBuilder();
201 sb.append(type);
202 sb.append(" at ");
203 sb.append(timestamp);
204 return sb.toString();
205 }
206 }
207
208
209
210
211 private final List<JournalEntry> journal = new ArrayList<JournalEntry>();
212
213
214
215
216
217
218 public SplitTransaction(final HRegion r, final byte [] splitrow) {
219 this.parent = r;
220 this.splitrow = splitrow;
221 this.journal.add(new JournalEntry(JournalEntryType.STARTED));
222 this.useZKForAssignment = ConfigUtil.useZKForAssignment(r.getBaseConf());
223 }
224
225
226
227
228
229
230 public boolean prepare() {
231 if (!this.parent.isSplittable()) return false;
232
233 if (this.splitrow == null) return false;
234 HRegionInfo hri = this.parent.getRegionInfo();
235 parent.prepareToSplit();
236
237 byte [] startKey = hri.getStartKey();
238 byte [] endKey = hri.getEndKey();
239 if (Bytes.equals(startKey, splitrow) ||
240 !this.parent.getRegionInfo().containsRow(splitrow)) {
241 LOG.info("Split row is not inside region key range or is equal to " +
242 "startkey: " + Bytes.toStringBinary(this.splitrow));
243 return false;
244 }
245 long rid = getDaughterRegionIdTimestamp(hri);
246 this.hri_a = new HRegionInfo(hri.getTable(), startKey, this.splitrow, false, rid);
247 this.hri_b = new HRegionInfo(hri.getTable(), this.splitrow, endKey, false, rid);
248 this.journal.add(new JournalEntry(JournalEntryType.PREPARED));
249 return true;
250 }
251
252
253
254
255
256
257 private static long getDaughterRegionIdTimestamp(final HRegionInfo hri) {
258 long rid = EnvironmentEdgeManager.currentTimeMillis();
259
260
261 if (rid < hri.getRegionId()) {
262 LOG.warn("Clock skew; parent regions id is " + hri.getRegionId() +
263 " but current time here is " + rid);
264 rid = hri.getRegionId() + 1;
265 }
266 return rid;
267 }
268
269 private static IOException closedByOtherException = new IOException(
270 "Failed to close region: already closed by another thread");
271
272
273
274
275
276
277
278
279
280
281
282 @Deprecated
283
284 final RegionServerServices services) throws IOException {
285 return createDaughters(server, services, null);
286 }
287
288
289 final RegionServerServices services, User user) throws IOException {
290 LOG.info("Starting split of region " + this.parent);
291 if ((server != null && server.isStopped()) ||
292 (services != null && services.isStopping())) {
293 throw new IOException("Server is stopped or stopping");
294 }
295 assert !this.parent.lock.writeLock().isHeldByCurrentThread():
296 "Unsafe to hold write lock while performing RPCs";
297
298 journal.add(new JournalEntry(JournalEntryType.BEFORE_PRE_SPLIT_HOOK));
299
300
301 if (this.parent.getCoprocessorHost() != null) {
302 if (user == null) {
303
304 parent.getCoprocessorHost().preSplit();
305 parent.getCoprocessorHost().preSplit(splitrow);
306 } else {
307 try {
308 user.getUGI().doAs(new PrivilegedExceptionAction<Void>() {
309 @Override
310 public Void run() throws Exception {
311 parent.getCoprocessorHost().preSplit();
312 parent.getCoprocessorHost().preSplit(splitrow);
313 return null;
314 }
315 });
316 } catch (InterruptedException ie) {
317 InterruptedIOException iioe = new InterruptedIOException();
318 iioe.initCause(ie);
319 throw iioe;
320 }
321 }
322 }
323
324 journal.add(new JournalEntry(JournalEntryType.AFTER_PRE_SPLIT_HOOK));
325
326
327 boolean testing = server == null? true:
328 server.getConfiguration().getBoolean("hbase.testing.nocluster", false);
329 this.fileSplitTimeout = testing ? this.fileSplitTimeout :
330 server.getConfiguration().getLong("hbase.regionserver.fileSplitTimeout",
331 this.fileSplitTimeout);
332
333 PairOfSameType<HRegion> daughterRegions = stepsBeforePONR(server, services, testing);
334
335 final List<Mutation> metaEntries = new ArrayList<Mutation>();
336 boolean ret = false;
337 if (this.parent.getCoprocessorHost() != null) {
338 if (user == null) {
339 ret = parent.getCoprocessorHost().preSplitBeforePONR(splitrow, metaEntries);
340 } else {
341 try {
342 ret = user.getUGI().doAs(new PrivilegedExceptionAction<Boolean>() {
343 @Override
344 public Boolean run() throws Exception {
345 return parent.getCoprocessorHost().preSplitBeforePONR(splitrow, metaEntries);
346 }
347 });
348 } catch (InterruptedException ie) {
349 InterruptedIOException iioe = new InterruptedIOException();
350 iioe.initCause(ie);
351 throw iioe;
352 }
353 }
354 if (ret) {
355 throw new IOException("Coprocessor bypassing region "
356 + this.parent.getRegionNameAsString() + " split.");
357 }
358 try {
359 for (Mutation p : metaEntries) {
360 HRegionInfo.parseRegionName(p.getRow());
361 }
362 } catch (IOException e) {
363 LOG.error("Row key of mutation from coprossor is not parsable as region name."
364 + "Mutations from coprocessor should only for hbase:meta table.");
365 throw e;
366 }
367 }
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384 this.journal.add(new JournalEntry(JournalEntryType.PONR));
385
386
387
388
389
390
391 if (!testing && useZKForAssignment) {
392 if (metaEntries == null || metaEntries.isEmpty()) {
393 MetaEditor.splitRegion(server.getCatalogTracker(), parent.getRegionInfo(), daughterRegions
394 .getFirst().getRegionInfo(), daughterRegions.getSecond().getRegionInfo(), server
395 .getServerName());
396 } else {
397 offlineParentInMetaAndputMetaEntries(server.getCatalogTracker(), parent.getRegionInfo(),
398 daughterRegions.getFirst().getRegionInfo(), daughterRegions.getSecond().getRegionInfo(),
399 server.getServerName(), metaEntries);
400 }
401 } else if (services != null && !useZKForAssignment) {
402 if (!services.reportRegionStateTransition(TransitionCode.SPLIT_PONR, parent.getRegionInfo(),
403 hri_a, hri_b)) {
404
405 throw new IOException("Failed to notify master that split passed PONR: "
406 + parent.getRegionInfo().getRegionNameAsString());
407 }
408 }
409 return daughterRegions;
410 }
411
412 public PairOfSameType<HRegion> stepsBeforePONR(final Server server,
413 final RegionServerServices services, boolean testing) throws IOException {
414
415
416 if (server != null && server.getZooKeeper() != null && useZKForAssignment) {
417 try {
418 createNodeSplitting(server.getZooKeeper(),
419 parent.getRegionInfo(), server.getServerName(), hri_a, hri_b);
420 } catch (KeeperException e) {
421 throw new IOException("Failed creating PENDING_SPLIT znode on " +
422 this.parent.getRegionNameAsString(), e);
423 }
424 } else if (services != null && !useZKForAssignment) {
425 if (!services.reportRegionStateTransition(TransitionCode.READY_TO_SPLIT,
426 parent.getRegionInfo(), hri_a, hri_b)) {
427 throw new IOException("Failed to get ok from master to split "
428 + parent.getRegionNameAsString());
429 }
430 }
431 this.journal.add(new JournalEntry(JournalEntryType.SET_SPLITTING_IN_ZK));
432 if (server != null && server.getZooKeeper() != null && useZKForAssignment) {
433
434
435
436 znodeVersion = getZKNode(server, services);
437 }
438
439 this.parent.getRegionFileSystem().createSplitsDir();
440 this.journal.add(new JournalEntry(JournalEntryType.CREATE_SPLIT_DIR));
441
442 Map<byte[], List<StoreFile>> hstoreFilesToSplit = null;
443 Exception exceptionToThrow = null;
444 try{
445 hstoreFilesToSplit = this.parent.close(false);
446 } catch (Exception e) {
447 exceptionToThrow = e;
448 }
449 if (exceptionToThrow == null && hstoreFilesToSplit == null) {
450
451
452
453
454
455 exceptionToThrow = closedByOtherException;
456 }
457 if (exceptionToThrow != closedByOtherException) {
458 this.journal.add(new JournalEntry(JournalEntryType.CLOSED_PARENT_REGION));
459 }
460 if (exceptionToThrow != null) {
461 if (exceptionToThrow instanceof IOException) throw (IOException)exceptionToThrow;
462 throw new IOException(exceptionToThrow);
463 }
464 if (!testing) {
465 services.removeFromOnlineRegions(this.parent, null);
466 }
467 this.journal.add(new JournalEntry(JournalEntryType.OFFLINED_PARENT));
468
469
470
471
472
473
474
475 Pair<Integer, Integer> expectedReferences = splitStoreFiles(hstoreFilesToSplit);
476
477
478
479
480
481 this.journal.add(new JournalEntry(JournalEntryType.STARTED_REGION_A_CREATION));
482 assertReferenceFileCount(expectedReferences.getFirst(),
483 this.parent.getRegionFileSystem().getSplitsDir(this.hri_a));
484 HRegion a = this.parent.createDaughterRegionFromSplits(this.hri_a);
485 assertReferenceFileCount(expectedReferences.getFirst(),
486 new Path(this.parent.getRegionFileSystem().getTableDir(), this.hri_a.getEncodedName()));
487
488
489 this.journal.add(new JournalEntry(JournalEntryType.STARTED_REGION_B_CREATION));
490 assertReferenceFileCount(expectedReferences.getSecond(),
491 this.parent.getRegionFileSystem().getSplitsDir(this.hri_b));
492 HRegion b = this.parent.createDaughterRegionFromSplits(this.hri_b);
493 assertReferenceFileCount(expectedReferences.getSecond(),
494 new Path(this.parent.getRegionFileSystem().getTableDir(), this.hri_b.getEncodedName()));
495
496 return new PairOfSameType<HRegion>(a, b);
497 }
498
499 void assertReferenceFileCount(int expectedReferenceFileCount, Path dir)
500 throws IOException {
501 if (expectedReferenceFileCount != 0 &&
502 expectedReferenceFileCount != FSUtils.getRegionReferenceFileCount(this.parent.getFilesystem(), dir)) {
503 throw new IOException("Failing split. Expected reference file count isn't equal.");
504 }
505 }
506
507
508
509
510
511
512
513
514
515
516
517
518 final RegionServerServices services, HRegion a, HRegion b)
519 throws IOException {
520 boolean stopped = server != null && server.isStopped();
521 boolean stopping = services != null && services.isStopping();
522
523 if (stopped || stopping) {
524 LOG.info("Not opening daughters " +
525 b.getRegionInfo().getRegionNameAsString() +
526 " and " +
527 a.getRegionInfo().getRegionNameAsString() +
528 " because stopping=" + stopping + ", stopped=" + stopped);
529 } else {
530
531 DaughterOpener aOpener = new DaughterOpener(server, a);
532 DaughterOpener bOpener = new DaughterOpener(server, b);
533 aOpener.start();
534 bOpener.start();
535 try {
536 aOpener.join();
537 if (aOpener.getException() == null) {
538 journal.add(new JournalEntry(JournalEntryType.OPENED_REGION_A));
539 }
540 bOpener.join();
541 if (bOpener.getException() == null) {
542 journal.add(new JournalEntry(JournalEntryType.OPENED_REGION_B));
543 }
544 } catch (InterruptedException e) {
545 throw (InterruptedIOException)new InterruptedIOException().initCause(e);
546 }
547 if (aOpener.getException() != null) {
548 throw new IOException("Failed " +
549 aOpener.getName(), aOpener.getException());
550 }
551 if (bOpener.getException() != null) {
552 throw new IOException("Failed " +
553 bOpener.getName(), bOpener.getException());
554 }
555 if (services != null) {
556 try {
557 if (useZKForAssignment) {
558
559 services.postOpenDeployTasks(b, server.getCatalogTracker());
560 } else if (!services.reportRegionStateTransition(TransitionCode.SPLIT,
561 parent.getRegionInfo(), hri_a, hri_b)) {
562 throw new IOException("Failed to report split region to master: "
563 + parent.getRegionInfo().getShortNameToLog());
564 }
565
566 services.addToOnlineRegions(b);
567 if (useZKForAssignment) {
568 services.postOpenDeployTasks(a, server.getCatalogTracker());
569 }
570 services.addToOnlineRegions(a);
571 } catch (KeeperException ke) {
572 throw new IOException(ke);
573 }
574 }
575 }
576 }
577
578
579
580
581
582
583
584
585
586
587
588
589 final RegionServerServices services, HRegion a, HRegion b)
590 throws IOException {
591
592 if (server != null && server.getZooKeeper() != null) {
593 try {
594 this.znodeVersion = transitionSplittingNode(server.getZooKeeper(),
595 parent.getRegionInfo(), a.getRegionInfo(), b.getRegionInfo(),
596 server.getServerName(), this.znodeVersion,
597 RS_ZK_REGION_SPLITTING, RS_ZK_REGION_SPLIT);
598
599 int spins = 0;
600
601
602
603 do {
604 if (spins % 10 == 0) {
605 LOG.debug("Still waiting on the master to process the split for " +
606 this.parent.getRegionInfo().getEncodedName());
607 }
608 Thread.sleep(100);
609
610 this.znodeVersion = transitionSplittingNode(server.getZooKeeper(),
611 parent.getRegionInfo(), a.getRegionInfo(), b.getRegionInfo(),
612 server.getServerName(), this.znodeVersion,
613 RS_ZK_REGION_SPLIT, RS_ZK_REGION_SPLIT);
614 spins++;
615 } while (this.znodeVersion != -1 && !server.isStopped()
616 && !services.isStopping());
617 } catch (Exception e) {
618 if (e instanceof InterruptedException) {
619 Thread.currentThread().interrupt();
620 }
621 throw new IOException("Failed telling master about split", e);
622 }
623 }
624
625
626
627
628
629
630 }
631
632
633
634
635
636
637
638
639
640 private int getZKNode(final Server server,
641 final RegionServerServices services) throws IOException {
642
643 try {
644 int spins = 0;
645 Stat stat = new Stat();
646 ZooKeeperWatcher zkw = server.getZooKeeper();
647 ServerName expectedServer = server.getServerName();
648 String node = parent.getRegionInfo().getEncodedName();
649 while (!(server.isStopped() || services.isStopping())) {
650 if (spins % 5 == 0) {
651 LOG.debug("Still waiting for master to process "
652 + "the pending_split for " + node);
653 transitionSplittingNode(zkw, parent.getRegionInfo(),
654 hri_a, hri_b, expectedServer, -1, RS_ZK_REQUEST_REGION_SPLIT,
655 RS_ZK_REQUEST_REGION_SPLIT);
656 }
657 Thread.sleep(100);
658 spins++;
659 byte [] data = ZKAssign.getDataNoWatch(zkw, node, stat);
660 if (data == null) {
661 throw new IOException("Data is null, splitting node "
662 + node + " no longer exists");
663 }
664 RegionTransition rt = RegionTransition.parseFrom(data);
665 EventType et = rt.getEventType();
666 if (et == RS_ZK_REGION_SPLITTING) {
667 ServerName serverName = rt.getServerName();
668 if (!serverName.equals(expectedServer)) {
669 throw new IOException("Splitting node " + node + " is for "
670 + serverName + ", not us " + expectedServer);
671 }
672 byte [] payloadOfSplitting = rt.getPayload();
673 List<HRegionInfo> splittingRegions = HRegionInfo.parseDelimitedFrom(
674 payloadOfSplitting, 0, payloadOfSplitting.length);
675 assert splittingRegions.size() == 2;
676 HRegionInfo a = splittingRegions.get(0);
677 HRegionInfo b = splittingRegions.get(1);
678 if (!(hri_a.equals(a) && hri_b.equals(b))) {
679 throw new IOException("Splitting node " + node + " is for " + a + ", "
680 + b + ", not expected daughters: " + hri_a + ", " + hri_b);
681 }
682
683 return stat.getVersion();
684 }
685 if (et != RS_ZK_REQUEST_REGION_SPLIT) {
686 throw new IOException("Splitting node " + node
687 + " moved out of splitting to " + et);
688 }
689 }
690
691 throw new IOException("Server is "
692 + (services.isStopping() ? "stopping" : "stopped"));
693 } catch (Exception e) {
694 if (e instanceof InterruptedException) {
695 Thread.currentThread().interrupt();
696 }
697 throw new IOException("Failed getting SPLITTING znode on "
698 + parent.getRegionNameAsString(), e);
699 }
700 }
701
702 public PairOfSameType<HRegion> execute(final Server server,
703 final RegionServerServices services)
704 throws IOException {
705 if (User.isHBaseSecurityEnabled(parent.getBaseConf())) {
706 LOG.warn("Should use execute(Server, RegionServerServices, User)");
707 }
708 return execute(server, services, null);
709 }
710
711
712
713
714
715
716
717
718
719
720
721
722 public PairOfSameType<HRegion> execute(final Server server,
723 final RegionServerServices services, User user)
724 throws IOException {
725 useZKForAssignment =
726 server == null ? true : ConfigUtil.useZKForAssignment(server.getConfiguration());
727 PairOfSameType<HRegion> regions = createDaughters(server, services, user);
728 if (this.parent.getCoprocessorHost() != null) {
729 if (user == null) {
730 parent.getCoprocessorHost().preSplitAfterPONR();
731 } else {
732 try {
733 user.getUGI().doAs(new PrivilegedExceptionAction<Void>() {
734 @Override
735 public Void run() throws Exception {
736 parent.getCoprocessorHost().preSplitAfterPONR();
737 return null;
738 }
739 });
740 } catch (InterruptedException ie) {
741 InterruptedIOException iioe = new InterruptedIOException();
742 iioe.initCause(ie);
743 throw iioe;
744 }
745 }
746 }
747 return stepsAfterPONR(server, services, regions, user);
748 }
749
750 @Deprecated
751 public PairOfSameType<HRegion> stepsAfterPONR(final Server server,
752 final RegionServerServices services, final PairOfSameType<HRegion> regions)
753 throws IOException {
754 return stepsAfterPONR(server, services, regions, null);
755 }
756
757 public PairOfSameType<HRegion> stepsAfterPONR(final Server server,
758 final RegionServerServices services, final PairOfSameType<HRegion> regions, User user)
759 throws IOException {
760 openDaughters(server, services, regions.getFirst(), regions.getSecond());
761 if (server != null && server.getZooKeeper() != null && useZKForAssignment) {
762 transitionZKNode(server, services, regions.getFirst(), regions.getSecond());
763 }
764 journal.add(new JournalEntry(JournalEntryType.BEFORE_POST_SPLIT_HOOK));
765
766 if (this.parent.getCoprocessorHost() != null) {
767 if (user == null) {
768 this.parent.getCoprocessorHost().postSplit(regions.getFirst(), regions.getSecond());
769 } else {
770 try {
771 user.getUGI().doAs(new PrivilegedExceptionAction<Void>() {
772 @Override
773 public Void run() throws Exception {
774 parent.getCoprocessorHost().postSplit(regions.getFirst(), regions.getSecond());
775 return null;
776 }
777 });
778 } catch (InterruptedException ie) {
779 InterruptedIOException iioe = new InterruptedIOException();
780 iioe.initCause(ie);
781 throw iioe;
782 }
783 }
784 }
785 journal.add(new JournalEntry(JournalEntryType.AFTER_POST_SPLIT_HOOK));
786 return regions;
787 }
788
789 private void offlineParentInMetaAndputMetaEntries(CatalogTracker catalogTracker,
790 HRegionInfo parent, HRegionInfo splitA, HRegionInfo splitB,
791 ServerName serverName, List<Mutation> metaEntries) throws IOException {
792 List<Mutation> mutations = metaEntries;
793 HRegionInfo copyOfParent = new HRegionInfo(parent);
794 copyOfParent.setOffline(true);
795 copyOfParent.setSplit(true);
796
797
798 Put putParent = MetaEditor.makePutFromRegionInfo(copyOfParent);
799 MetaEditor.addDaughtersToPut(putParent, splitA, splitB);
800 mutations.add(putParent);
801
802
803 Put putA = MetaEditor.makePutFromRegionInfo(splitA);
804 Put putB = MetaEditor.makePutFromRegionInfo(splitB);
805
806 addLocation(putA, serverName, 1);
807 addLocation(putB, serverName, 1);
808 mutations.add(putA);
809 mutations.add(putB);
810 MetaEditor.mutateMetaTable(catalogTracker, mutations);
811 }
812
813 public Put addLocation(final Put p, final ServerName sn, long openSeqNum) {
814 p.addImmutable(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
815 Bytes.toBytes(sn.getHostAndPort()));
816 p.addImmutable(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER,
817 Bytes.toBytes(sn.getStartcode()));
818 p.addImmutable(HConstants.CATALOG_FAMILY, HConstants.SEQNUM_QUALIFIER,
819 Bytes.toBytes(openSeqNum));
820 return p;
821 }
822
823
824
825
826
827 class DaughterOpener extends HasThread {
828 private final Server server;
829 private final HRegion r;
830 private Throwable t = null;
831
832 DaughterOpener(final Server s, final HRegion r) {
833 super((s == null? "null-services": s.getServerName()) +
834 "-daughterOpener=" + r.getRegionInfo().getEncodedName());
835 setDaemon(true);
836 this.server = s;
837 this.r = r;
838 }
839
840
841
842
843
844 Throwable getException() {
845 return this.t;
846 }
847
848 @Override
849 public void run() {
850 try {
851 openDaughterRegion(this.server, r);
852 } catch (Throwable t) {
853 this.t = t;
854 }
855 }
856 }
857
858
859
860
861
862
863
864
865 void openDaughterRegion(final Server server, final HRegion daughter)
866 throws IOException, KeeperException {
867 HRegionInfo hri = daughter.getRegionInfo();
868 LoggingProgressable reporter = server == null ? null
869 : new LoggingProgressable(hri, server.getConfiguration().getLong(
870 "hbase.regionserver.split.daughter.open.log.interval", 10000));
871 daughter.openHRegion(reporter);
872 }
873
874 static class LoggingProgressable implements CancelableProgressable {
875 private final HRegionInfo hri;
876 private long lastLog = -1;
877 private final long interval;
878
879 LoggingProgressable(final HRegionInfo hri, final long interval) {
880 this.hri = hri;
881 this.interval = interval;
882 }
883
884 @Override
885 public boolean progress() {
886 long now = EnvironmentEdgeManager.currentTimeMillis();
887 if (now - lastLog > this.interval) {
888 LOG.info("Opening " + this.hri.getRegionNameAsString());
889 this.lastLog = now;
890 }
891 return true;
892 }
893 }
894
895
896
897
898
899
900
901
902 private Pair<Integer, Integer> splitStoreFiles(
903 final Map<byte[], List<StoreFile>> hstoreFilesToSplit)
904 throws IOException {
905 if (hstoreFilesToSplit == null) {
906
907 throw new IOException("Close returned empty list of StoreFiles");
908 }
909
910
911
912 int nbFiles = 0;
913 for (Map.Entry<byte[], List<StoreFile>> entry: hstoreFilesToSplit.entrySet()) {
914 nbFiles += entry.getValue().size();
915 }
916 if (nbFiles == 0) {
917
918 return new Pair<Integer, Integer>(0,0);
919 }
920
921
922 int defMaxThreads = Math.min(parent.conf.getInt(HStore.BLOCKING_STOREFILES_KEY,
923 HStore.DEFAULT_BLOCKING_STOREFILE_COUNT),
924 Runtime.getRuntime().availableProcessors());
925
926 int maxThreads = Math.min(parent.conf.getInt(HConstants.REGION_SPLIT_THREADS_MAX,
927 defMaxThreads), nbFiles);
928 LOG.info("Preparing to split " + nbFiles + " storefiles for region " + this.parent +
929 " using " + maxThreads + " threads");
930 ThreadFactoryBuilder builder = new ThreadFactoryBuilder();
931 builder.setNameFormat("StoreFileSplitter-%1$d");
932 ThreadFactory factory = builder.build();
933 ThreadPoolExecutor threadPool =
934 (ThreadPoolExecutor) Executors.newFixedThreadPool(maxThreads, factory);
935 List<Future<Pair<Path,Path>>> futures = new ArrayList<Future<Pair<Path,Path>>> (nbFiles);
936
937
938 for (Map.Entry<byte[], List<StoreFile>> entry: hstoreFilesToSplit.entrySet()) {
939 for (StoreFile sf: entry.getValue()) {
940 StoreFileSplitter sfs = new StoreFileSplitter(entry.getKey(), sf);
941 futures.add(threadPool.submit(sfs));
942 }
943 }
944
945 threadPool.shutdown();
946
947
948 try {
949 boolean stillRunning = !threadPool.awaitTermination(
950 this.fileSplitTimeout, TimeUnit.MILLISECONDS);
951 if (stillRunning) {
952 threadPool.shutdownNow();
953
954 while (!threadPool.isTerminated()) {
955 Thread.sleep(50);
956 }
957 throw new IOException("Took too long to split the" +
958 " files and create the references, aborting split");
959 }
960 } catch (InterruptedException e) {
961 throw (InterruptedIOException)new InterruptedIOException().initCause(e);
962 }
963
964 int created_a = 0;
965 int created_b = 0;
966
967 for (Future<Pair<Path, Path>> future : futures) {
968 try {
969 Pair<Path, Path> p = future.get();
970 created_a += p.getFirst() != null ? 1 : 0;
971 created_b += p.getSecond() != null ? 1 : 0;
972 } catch (InterruptedException e) {
973 throw (InterruptedIOException) new InterruptedIOException().initCause(e);
974 } catch (ExecutionException e) {
975 throw new IOException(e);
976 }
977 }
978
979 if (LOG.isDebugEnabled()) {
980 LOG.debug("Split storefiles for region " + this.parent + " Daugther A: " + created_a
981 + " storefiles, Daugther B: " + created_b + " storefiles.");
982 }
983 return new Pair<Integer, Integer>(created_a, created_b);
984 }
985
986 private Pair<Path, Path> splitStoreFile(final byte[] family, final StoreFile sf)
987 throws IOException {
988 if (LOG.isDebugEnabled()) {
989 LOG.debug("Splitting started for store file: " + sf.getPath() + " for region: " +
990 this.parent);
991 }
992 HRegionFileSystem fs = this.parent.getRegionFileSystem();
993 String familyName = Bytes.toString(family);
994 Path path_a =
995 fs.splitStoreFile(this.hri_a, familyName, sf, this.splitrow, false,
996 this.parent.getSplitPolicy());
997 Path path_b =
998 fs.splitStoreFile(this.hri_b, familyName, sf, this.splitrow, true,
999 this.parent.getSplitPolicy());
1000 if (LOG.isDebugEnabled()) {
1001 LOG.debug("Splitting complete for store file: " + sf.getPath() + " for region: " +
1002 this.parent);
1003 }
1004 return new Pair<Path,Path>(path_a, path_b);
1005 }
1006
1007
1008
1009
1010
1011 class StoreFileSplitter implements Callable<Pair<Path,Path>> {
1012 private final byte[] family;
1013 private final StoreFile sf;
1014
1015
1016
1017
1018
1019
1020 public StoreFileSplitter(final byte[] family, final StoreFile sf) {
1021 this.sf = sf;
1022 this.family = family;
1023 }
1024
1025 public Pair<Path,Path> call() throws IOException {
1026 return splitStoreFile(family, sf);
1027 }
1028 }
1029
1030 public boolean rollback(final Server server, final RegionServerServices services)
1031 throws IOException {
1032 if (User.isHBaseSecurityEnabled(parent.getBaseConf())) {
1033 LOG.warn("Should use rollback(Server, RegionServerServices, User)");
1034 }
1035 return rollback(server, services, null);
1036 }
1037
1038
1039
1040
1041
1042
1043
1044
1045 @SuppressWarnings("deprecation")
1046 public boolean rollback(final Server server, final RegionServerServices services, User user)
1047 throws IOException {
1048
1049 if (this.parent.getCoprocessorHost() != null) {
1050 if (user == null) {
1051 this.parent.getCoprocessorHost().preRollBackSplit();
1052 } else {
1053 try {
1054 user.getUGI().doAs(new PrivilegedExceptionAction<Void>() {
1055 @Override
1056 public Void run() throws Exception {
1057 parent.getCoprocessorHost().preRollBackSplit();
1058 return null;
1059 }
1060 });
1061 } catch (InterruptedException ie) {
1062 InterruptedIOException iioe = new InterruptedIOException();
1063 iioe.initCause(ie);
1064 throw iioe;
1065 }
1066 }
1067 }
1068
1069 boolean result = true;
1070 ListIterator<JournalEntry> iterator =
1071 this.journal.listIterator(this.journal.size());
1072
1073 while (iterator.hasPrevious()) {
1074 JournalEntry je = iterator.previous();
1075 switch(je.type) {
1076
1077 case SET_SPLITTING_IN_ZK:
1078 if (server != null && server.getZooKeeper() != null && useZKForAssignment) {
1079 cleanZK(server, this.parent.getRegionInfo());
1080 } else if (services != null
1081 && !useZKForAssignment
1082 && !services.reportRegionStateTransition(TransitionCode.SPLIT_REVERTED,
1083 parent.getRegionInfo(), hri_a, hri_b)) {
1084 return false;
1085 }
1086 break;
1087
1088 case CREATE_SPLIT_DIR:
1089 this.parent.writestate.writesEnabled = true;
1090 this.parent.getRegionFileSystem().cleanupSplitsDir();
1091 break;
1092
1093 case CLOSED_PARENT_REGION:
1094 try {
1095
1096
1097
1098
1099
1100 this.parent.initialize();
1101 } catch (IOException e) {
1102 LOG.error("Failed rollbacking CLOSED_PARENT_REGION of region " +
1103 this.parent.getRegionNameAsString(), e);
1104 throw new RuntimeException(e);
1105 }
1106 break;
1107
1108 case STARTED_REGION_A_CREATION:
1109 this.parent.getRegionFileSystem().cleanupDaughterRegion(this.hri_a);
1110 break;
1111
1112 case STARTED_REGION_B_CREATION:
1113 this.parent.getRegionFileSystem().cleanupDaughterRegion(this.hri_b);
1114 break;
1115
1116 case OFFLINED_PARENT:
1117 if (services != null) services.addToOnlineRegions(this.parent);
1118 break;
1119
1120 case PONR:
1121
1122
1123
1124
1125 return false;
1126
1127
1128 case STARTED:
1129 case PREPARED:
1130 case BEFORE_PRE_SPLIT_HOOK:
1131 case AFTER_PRE_SPLIT_HOOK:
1132 case BEFORE_POST_SPLIT_HOOK:
1133 case AFTER_POST_SPLIT_HOOK:
1134 case OPENED_REGION_A:
1135 case OPENED_REGION_B:
1136 break;
1137
1138 default:
1139 throw new RuntimeException("Unhandled journal entry: " + je);
1140 }
1141 }
1142
1143 if (this.parent.getCoprocessorHost() != null) {
1144 if (user == null) {
1145 this.parent.getCoprocessorHost().postRollBackSplit();
1146 } else {
1147 try {
1148 user.getUGI().doAs(new PrivilegedExceptionAction<Void>() {
1149 @Override
1150 public Void run() throws Exception {
1151 parent.getCoprocessorHost().postRollBackSplit();
1152 return null;
1153 }
1154 });
1155 } catch (InterruptedException ie) {
1156 InterruptedIOException iioe = new InterruptedIOException();
1157 iioe.initCause(ie);
1158 throw iioe;
1159 }
1160 }
1161 }
1162 return result;
1163 }
1164
1165 HRegionInfo getFirstDaughter() {
1166 return hri_a;
1167 }
1168
1169 HRegionInfo getSecondDaughter() {
1170 return hri_b;
1171 }
1172
1173 private static void cleanZK(final Server server, final HRegionInfo hri) {
1174 try {
1175
1176 if (!ZKAssign.deleteNode(server.getZooKeeper(), hri.getEncodedName(),
1177 RS_ZK_REQUEST_REGION_SPLIT, server.getServerName())) {
1178 ZKAssign.deleteNode(server.getZooKeeper(), hri.getEncodedName(),
1179 RS_ZK_REGION_SPLITTING, server.getServerName());
1180 }
1181 } catch (KeeperException.NoNodeException e) {
1182 LOG.info("Failed cleanup zk node of " + hri.getRegionNameAsString(), e);
1183 } catch (KeeperException e) {
1184 server.abort("Failed cleanup of " + hri.getRegionNameAsString(), e);
1185 }
1186 }
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201 public static void createNodeSplitting(final ZooKeeperWatcher zkw, final HRegionInfo region,
1202 final ServerName serverName, final HRegionInfo a,
1203 final HRegionInfo b) throws KeeperException, IOException {
1204 LOG.debug(zkw.prefix("Creating ephemeral node for " +
1205 region.getEncodedName() + " in PENDING_SPLIT state"));
1206 byte [] payload = HRegionInfo.toDelimitedByteArray(a, b);
1207 RegionTransition rt = RegionTransition.createRegionTransition(
1208 RS_ZK_REQUEST_REGION_SPLIT, region.getRegionName(), serverName, payload);
1209 String node = ZKAssign.getNodeName(zkw, region.getEncodedName());
1210 if (!ZKUtil.createEphemeralNodeAndWatch(zkw, node, rt.toByteArray())) {
1211 throw new IOException("Failed create of ephemeral " + node);
1212 }
1213 }
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249 public static int transitionSplittingNode(ZooKeeperWatcher zkw,
1250 HRegionInfo parent, HRegionInfo a, HRegionInfo b, ServerName serverName,
1251 final int znodeVersion, final EventType beginState,
1252 final EventType endState) throws KeeperException, IOException {
1253 byte [] payload = HRegionInfo.toDelimitedByteArray(a, b);
1254 return ZKAssign.transitionNode(zkw, parent, serverName,
1255 beginState, endState, znodeVersion, payload);
1256 }
1257
1258 List<JournalEntry> getJournal() {
1259 return journal;
1260 }
1261 }