1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.regionserver;
20
21 import static org.apache.hadoop.hbase.executor.EventType.RS_ZK_REQUEST_REGION_SPLIT;
22 import static org.apache.hadoop.hbase.executor.EventType.RS_ZK_REGION_SPLIT;
23 import static org.apache.hadoop.hbase.executor.EventType.RS_ZK_REGION_SPLITTING;
24
25 import java.io.IOException;
26 import java.io.InterruptedIOException;
27 import java.util.ArrayList;
28 import java.util.List;
29 import java.util.ListIterator;
30 import java.util.Map;
31 import java.util.concurrent.Callable;
32 import java.util.concurrent.ExecutionException;
33 import java.util.concurrent.Executors;
34 import java.util.concurrent.Future;
35 import java.util.concurrent.ThreadFactory;
36 import java.util.concurrent.ThreadPoolExecutor;
37 import java.util.concurrent.TimeUnit;
38
39 import org.apache.commons.logging.Log;
40 import org.apache.commons.logging.LogFactory;
41 import org.apache.hadoop.hbase.classification.InterfaceAudience;
42 import org.apache.hadoop.hbase.HConstants;
43 import org.apache.hadoop.hbase.HRegionInfo;
44 import org.apache.hadoop.hbase.RegionTransition;
45 import org.apache.hadoop.hbase.Server;
46 import org.apache.hadoop.hbase.ServerName;
47 import org.apache.hadoop.hbase.catalog.CatalogTracker;
48 import org.apache.hadoop.hbase.catalog.MetaEditor;
49 import org.apache.hadoop.hbase.client.Mutation;
50 import org.apache.hadoop.hbase.client.Put;
51 import org.apache.hadoop.hbase.executor.EventType;
52 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
53 import org.apache.hadoop.hbase.util.Bytes;
54 import org.apache.hadoop.hbase.util.CancelableProgressable;
55 import org.apache.hadoop.hbase.util.ConfigUtil;
56 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
57 import org.apache.hadoop.hbase.util.HasThread;
58 import org.apache.hadoop.hbase.util.PairOfSameType;
59 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
60 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
61 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
62 import org.apache.zookeeper.KeeperException;
63 import org.apache.zookeeper.KeeperException.NodeExistsException;
64 import org.apache.zookeeper.data.Stat;
65
66 import com.google.common.util.concurrent.ThreadFactoryBuilder;
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91 @InterfaceAudience.Private
92 public class SplitTransaction {
93 private static final Log LOG = LogFactory.getLog(SplitTransaction.class);
94
95
96
97
98 private final HRegion parent;
99 private HRegionInfo hri_a;
100 private HRegionInfo hri_b;
101 private long fileSplitTimeout = 30000;
102 private int znodeVersion = -1;
103 boolean useZKForAssignment;
104
105
106
107
108 private final byte [] splitrow;
109
110
111
112
113
114
115 static enum JournalEntryType {
116
117
118
119 STARTED,
120
121
122
123 PREPARED,
124
125
126
127 BEFORE_PRE_SPLIT_HOOK,
128
129
130
131 AFTER_PRE_SPLIT_HOOK,
132
133
134
135 SET_SPLITTING_IN_ZK,
136
137
138
139 CREATE_SPLIT_DIR,
140
141
142
143 CLOSED_PARENT_REGION,
144
145
146
147 OFFLINED_PARENT,
148
149
150
151 STARTED_REGION_A_CREATION,
152
153
154
155 STARTED_REGION_B_CREATION,
156
157
158
159 OPENED_REGION_A,
160
161
162
163 OPENED_REGION_B,
164
165
166
167 BEFORE_POST_SPLIT_HOOK,
168
169
170
171 AFTER_POST_SPLIT_HOOK,
172
173
174
175
176
177 PONR
178 }
179
180 static class JournalEntry {
181 public JournalEntryType type;
182 public long timestamp;
183
184 public JournalEntry(JournalEntryType type) {
185 this(type, EnvironmentEdgeManager.currentTimeMillis());
186 }
187
188 public JournalEntry(JournalEntryType type, long timestamp) {
189 this.type = type;
190 this.timestamp = timestamp;
191 }
192
193 @Override
194 public String toString() {
195 StringBuilder sb = new StringBuilder();
196 sb.append(type);
197 sb.append(" at ");
198 sb.append(timestamp);
199 return sb.toString();
200 }
201 }
202
203
204
205
206 private final List<JournalEntry> journal = new ArrayList<JournalEntry>();
207
208
209
210
211
212
213 public SplitTransaction(final HRegion r, final byte [] splitrow) {
214 this.parent = r;
215 this.splitrow = splitrow;
216 this.journal.add(new JournalEntry(JournalEntryType.STARTED));
217 }
218
219
220
221
222
223
224 public boolean prepare() {
225 if (!this.parent.isSplittable()) return false;
226
227 if (this.splitrow == null) return false;
228 HRegionInfo hri = this.parent.getRegionInfo();
229 parent.prepareToSplit();
230
231 byte [] startKey = hri.getStartKey();
232 byte [] endKey = hri.getEndKey();
233 if (Bytes.equals(startKey, splitrow) ||
234 !this.parent.getRegionInfo().containsRow(splitrow)) {
235 LOG.info("Split row is not inside region key range or is equal to " +
236 "startkey: " + Bytes.toStringBinary(this.splitrow));
237 return false;
238 }
239 long rid = getDaughterRegionIdTimestamp(hri);
240 this.hri_a = new HRegionInfo(hri.getTable(), startKey, this.splitrow, false, rid);
241 this.hri_b = new HRegionInfo(hri.getTable(), this.splitrow, endKey, false, rid);
242 this.journal.add(new JournalEntry(JournalEntryType.PREPARED));
243 return true;
244 }
245
246
247
248
249
250
251 private static long getDaughterRegionIdTimestamp(final HRegionInfo hri) {
252 long rid = EnvironmentEdgeManager.currentTimeMillis();
253
254
255 if (rid < hri.getRegionId()) {
256 LOG.warn("Clock skew; parent regions id is " + hri.getRegionId() +
257 " but current time here is " + rid);
258 rid = hri.getRegionId() + 1;
259 }
260 return rid;
261 }
262
263 private static IOException closedByOtherException = new IOException(
264 "Failed to close region: already closed by another thread");
265
266
267
268
269
270
271
272
273
274
275
276 final RegionServerServices services) throws IOException {
277 LOG.info("Starting split of region " + this.parent);
278 if ((server != null && server.isStopped()) ||
279 (services != null && services.isStopping())) {
280 throw new IOException("Server is stopped or stopping");
281 }
282 assert !this.parent.lock.writeLock().isHeldByCurrentThread():
283 "Unsafe to hold write lock while performing RPCs";
284
285 journal.add(new JournalEntry(JournalEntryType.BEFORE_PRE_SPLIT_HOOK));
286
287
288 if (this.parent.getCoprocessorHost() != null) {
289
290 this.parent.getCoprocessorHost().preSplit();
291 this.parent.getCoprocessorHost().preSplit(this.splitrow);
292 }
293
294 journal.add(new JournalEntry(JournalEntryType.AFTER_PRE_SPLIT_HOOK));
295
296
297 boolean testing = server == null? true:
298 server.getConfiguration().getBoolean("hbase.testing.nocluster", false);
299 this.fileSplitTimeout = testing ? this.fileSplitTimeout :
300 server.getConfiguration().getLong("hbase.regionserver.fileSplitTimeout",
301 this.fileSplitTimeout);
302
303 PairOfSameType<HRegion> daughterRegions = stepsBeforePONR(server, services, testing);
304
305 List<Mutation> metaEntries = new ArrayList<Mutation>();
306 if (this.parent.getCoprocessorHost() != null) {
307 if (this.parent.getCoprocessorHost().
308 preSplitBeforePONR(this.splitrow, metaEntries)) {
309 throw new IOException("Coprocessor bypassing region "
310 + this.parent.getRegionNameAsString() + " split.");
311 }
312 try {
313 for (Mutation p : metaEntries) {
314 HRegionInfo.parseRegionName(p.getRow());
315 }
316 } catch (IOException e) {
317 LOG.error("Row key of mutation from coprossor is not parsable as region name."
318 + "Mutations from coprocessor should only for hbase:meta table.");
319 throw e;
320 }
321 }
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338 this.journal.add(new JournalEntry(JournalEntryType.PONR));
339
340
341
342
343
344
345 if (!testing && useZKForAssignment) {
346 if (metaEntries == null || metaEntries.isEmpty()) {
347 MetaEditor.splitRegion(server.getCatalogTracker(), parent.getRegionInfo(), daughterRegions
348 .getFirst().getRegionInfo(), daughterRegions.getSecond().getRegionInfo(), server
349 .getServerName());
350 } else {
351 offlineParentInMetaAndputMetaEntries(server.getCatalogTracker(), parent.getRegionInfo(),
352 daughterRegions.getFirst().getRegionInfo(), daughterRegions.getSecond().getRegionInfo(),
353 server.getServerName(), metaEntries);
354 }
355 } else if (services != null && !useZKForAssignment) {
356 if (!services.reportRegionStateTransition(TransitionCode.SPLIT_PONR, parent.getRegionInfo(),
357 hri_a, hri_b)) {
358
359 throw new IOException("Failed to notify master that split passed PONR: "
360 + parent.getRegionInfo().getRegionNameAsString());
361 }
362 }
363 return daughterRegions;
364 }
365
366 public PairOfSameType<HRegion> stepsBeforePONR(final Server server,
367 final RegionServerServices services, boolean testing) throws IOException {
368
369
370 if (server != null && server.getZooKeeper() != null && useZKForAssignment) {
371 try {
372 createNodeSplitting(server.getZooKeeper(),
373 parent.getRegionInfo(), server.getServerName(), hri_a, hri_b);
374 } catch (KeeperException e) {
375 throw new IOException("Failed creating PENDING_SPLIT znode on " +
376 this.parent.getRegionNameAsString(), e);
377 }
378 } else if (services != null && !useZKForAssignment) {
379 if (!services.reportRegionStateTransition(TransitionCode.READY_TO_SPLIT,
380 parent.getRegionInfo(), hri_a, hri_b)) {
381 throw new IOException("Failed to get ok from master to split "
382 + parent.getRegionNameAsString());
383 }
384 }
385 this.journal.add(new JournalEntry(JournalEntryType.SET_SPLITTING_IN_ZK));
386 if (server != null && server.getZooKeeper() != null && useZKForAssignment) {
387
388
389
390 znodeVersion = getZKNode(server, services);
391 }
392
393 this.parent.getRegionFileSystem().createSplitsDir();
394 this.journal.add(new JournalEntry(JournalEntryType.CREATE_SPLIT_DIR));
395
396 Map<byte[], List<StoreFile>> hstoreFilesToSplit = null;
397 Exception exceptionToThrow = null;
398 try{
399 hstoreFilesToSplit = this.parent.close(false);
400 } catch (Exception e) {
401 exceptionToThrow = e;
402 }
403 if (exceptionToThrow == null && hstoreFilesToSplit == null) {
404
405
406
407
408
409 exceptionToThrow = closedByOtherException;
410 }
411 if (exceptionToThrow != closedByOtherException) {
412 this.journal.add(new JournalEntry(JournalEntryType.CLOSED_PARENT_REGION));
413 }
414 if (exceptionToThrow != null) {
415 if (exceptionToThrow instanceof IOException) throw (IOException)exceptionToThrow;
416 throw new IOException(exceptionToThrow);
417 }
418 if (!testing) {
419 services.removeFromOnlineRegions(this.parent, null);
420 }
421 this.journal.add(new JournalEntry(JournalEntryType.OFFLINED_PARENT));
422
423
424
425
426
427
428
429 splitStoreFiles(hstoreFilesToSplit);
430
431
432
433
434
435 this.journal.add(new JournalEntry(JournalEntryType.STARTED_REGION_A_CREATION));
436 HRegion a = this.parent.createDaughterRegionFromSplits(this.hri_a);
437
438
439 this.journal.add(new JournalEntry(JournalEntryType.STARTED_REGION_B_CREATION));
440 HRegion b = this.parent.createDaughterRegionFromSplits(this.hri_b);
441 return new PairOfSameType<HRegion>(a, b);
442 }
443
444
445
446
447
448
449
450
451
452
453
454
455 final RegionServerServices services, HRegion a, HRegion b)
456 throws IOException {
457 boolean stopped = server != null && server.isStopped();
458 boolean stopping = services != null && services.isStopping();
459
460 if (stopped || stopping) {
461 LOG.info("Not opening daughters " +
462 b.getRegionInfo().getRegionNameAsString() +
463 " and " +
464 a.getRegionInfo().getRegionNameAsString() +
465 " because stopping=" + stopping + ", stopped=" + stopped);
466 } else {
467
468 DaughterOpener aOpener = new DaughterOpener(server, a);
469 DaughterOpener bOpener = new DaughterOpener(server, b);
470 aOpener.start();
471 bOpener.start();
472 try {
473 aOpener.join();
474 if (aOpener.getException() == null) {
475 journal.add(new JournalEntry(JournalEntryType.OPENED_REGION_A));
476 }
477 bOpener.join();
478 if (bOpener.getException() == null) {
479 journal.add(new JournalEntry(JournalEntryType.OPENED_REGION_B));
480 }
481 } catch (InterruptedException e) {
482 throw (InterruptedIOException)new InterruptedIOException().initCause(e);
483 }
484 if (aOpener.getException() != null) {
485 throw new IOException("Failed " +
486 aOpener.getName(), aOpener.getException());
487 }
488 if (bOpener.getException() != null) {
489 throw new IOException("Failed " +
490 bOpener.getName(), bOpener.getException());
491 }
492 if (services != null) {
493 try {
494 if (useZKForAssignment) {
495
496 services.postOpenDeployTasks(b, server.getCatalogTracker());
497 } else if (!services.reportRegionStateTransition(TransitionCode.SPLIT,
498 parent.getRegionInfo(), hri_a, hri_b)) {
499 throw new IOException("Failed to report split region to master: "
500 + parent.getRegionInfo().getShortNameToLog());
501 }
502
503 services.addToOnlineRegions(b);
504 if (useZKForAssignment) {
505 services.postOpenDeployTasks(a, server.getCatalogTracker());
506 }
507 services.addToOnlineRegions(a);
508 } catch (KeeperException ke) {
509 throw new IOException(ke);
510 }
511 }
512 }
513 }
514
515
516
517
518
519
520
521
522
523
524
525
526 final RegionServerServices services, HRegion a, HRegion b)
527 throws IOException {
528
529 if (server != null && server.getZooKeeper() != null) {
530 try {
531 this.znodeVersion = transitionSplittingNode(server.getZooKeeper(),
532 parent.getRegionInfo(), a.getRegionInfo(), b.getRegionInfo(),
533 server.getServerName(), this.znodeVersion,
534 RS_ZK_REGION_SPLITTING, RS_ZK_REGION_SPLIT);
535
536 int spins = 0;
537
538
539
540 do {
541 if (spins % 10 == 0) {
542 LOG.debug("Still waiting on the master to process the split for " +
543 this.parent.getRegionInfo().getEncodedName());
544 }
545 Thread.sleep(100);
546
547 this.znodeVersion = transitionSplittingNode(server.getZooKeeper(),
548 parent.getRegionInfo(), a.getRegionInfo(), b.getRegionInfo(),
549 server.getServerName(), this.znodeVersion,
550 RS_ZK_REGION_SPLIT, RS_ZK_REGION_SPLIT);
551 spins++;
552 } while (this.znodeVersion != -1 && !server.isStopped()
553 && !services.isStopping());
554 } catch (Exception e) {
555 if (e instanceof InterruptedException) {
556 Thread.currentThread().interrupt();
557 }
558 throw new IOException("Failed telling master about split", e);
559 }
560 }
561
562
563
564
565
566
567 }
568
569
570
571
572
573
574
575
576
577 private int getZKNode(final Server server,
578 final RegionServerServices services) throws IOException {
579
580 try {
581 int spins = 0;
582 Stat stat = new Stat();
583 ZooKeeperWatcher zkw = server.getZooKeeper();
584 ServerName expectedServer = server.getServerName();
585 String node = parent.getRegionInfo().getEncodedName();
586 while (!(server.isStopped() || services.isStopping())) {
587 if (spins % 5 == 0) {
588 LOG.debug("Still waiting for master to process "
589 + "the pending_split for " + node);
590 transitionSplittingNode(zkw, parent.getRegionInfo(),
591 hri_a, hri_b, expectedServer, -1, RS_ZK_REQUEST_REGION_SPLIT,
592 RS_ZK_REQUEST_REGION_SPLIT);
593 }
594 Thread.sleep(100);
595 spins++;
596 byte [] data = ZKAssign.getDataNoWatch(zkw, node, stat);
597 if (data == null) {
598 throw new IOException("Data is null, splitting node "
599 + node + " no longer exists");
600 }
601 RegionTransition rt = RegionTransition.parseFrom(data);
602 EventType et = rt.getEventType();
603 if (et == RS_ZK_REGION_SPLITTING) {
604 ServerName serverName = rt.getServerName();
605 if (!serverName.equals(expectedServer)) {
606 throw new IOException("Splitting node " + node + " is for "
607 + serverName + ", not us " + expectedServer);
608 }
609 byte [] payloadOfSplitting = rt.getPayload();
610 List<HRegionInfo> splittingRegions = HRegionInfo.parseDelimitedFrom(
611 payloadOfSplitting, 0, payloadOfSplitting.length);
612 assert splittingRegions.size() == 2;
613 HRegionInfo a = splittingRegions.get(0);
614 HRegionInfo b = splittingRegions.get(1);
615 if (!(hri_a.equals(a) && hri_b.equals(b))) {
616 throw new IOException("Splitting node " + node + " is for " + a + ", "
617 + b + ", not expected daughters: " + hri_a + ", " + hri_b);
618 }
619
620 return stat.getVersion();
621 }
622 if (et != RS_ZK_REQUEST_REGION_SPLIT) {
623 throw new IOException("Splitting node " + node
624 + " moved out of splitting to " + et);
625 }
626 }
627
628 throw new IOException("Server is "
629 + (services.isStopping() ? "stopping" : "stopped"));
630 } catch (Exception e) {
631 if (e instanceof InterruptedException) {
632 Thread.currentThread().interrupt();
633 }
634 throw new IOException("Failed getting SPLITTING znode on "
635 + parent.getRegionNameAsString(), e);
636 }
637 }
638
639
640
641
642
643
644
645
646
647
648
649
650 public PairOfSameType<HRegion> execute(final Server server,
651 final RegionServerServices services)
652 throws IOException {
653 useZKForAssignment =
654 server == null ? true : ConfigUtil.useZKForAssignment(server.getConfiguration());
655 PairOfSameType<HRegion> regions = createDaughters(server, services);
656 if (this.parent.getCoprocessorHost() != null) {
657 this.parent.getCoprocessorHost().preSplitAfterPONR();
658 }
659 return stepsAfterPONR(server, services, regions);
660 }
661
662 public PairOfSameType<HRegion> stepsAfterPONR(final Server server,
663 final RegionServerServices services, PairOfSameType<HRegion> regions)
664 throws IOException {
665 openDaughters(server, services, regions.getFirst(), regions.getSecond());
666 if (server != null && server.getZooKeeper() != null && useZKForAssignment) {
667 transitionZKNode(server, services, regions.getFirst(), regions.getSecond());
668 }
669 journal.add(new JournalEntry(JournalEntryType.BEFORE_POST_SPLIT_HOOK));
670
671 if (this.parent.getCoprocessorHost() != null) {
672 this.parent.getCoprocessorHost().postSplit(regions.getFirst(), regions.getSecond());
673 }
674 journal.add(new JournalEntry(JournalEntryType.AFTER_POST_SPLIT_HOOK));
675 return regions;
676 }
677
678 private void offlineParentInMetaAndputMetaEntries(CatalogTracker catalogTracker,
679 HRegionInfo parent, HRegionInfo splitA, HRegionInfo splitB,
680 ServerName serverName, List<Mutation> metaEntries) throws IOException {
681 List<Mutation> mutations = metaEntries;
682 HRegionInfo copyOfParent = new HRegionInfo(parent);
683 copyOfParent.setOffline(true);
684 copyOfParent.setSplit(true);
685
686
687 Put putParent = MetaEditor.makePutFromRegionInfo(copyOfParent);
688 MetaEditor.addDaughtersToPut(putParent, splitA, splitB);
689 mutations.add(putParent);
690
691
692 Put putA = MetaEditor.makePutFromRegionInfo(splitA);
693 Put putB = MetaEditor.makePutFromRegionInfo(splitB);
694
695 addLocation(putA, serverName, 1);
696 addLocation(putB, serverName, 1);
697 mutations.add(putA);
698 mutations.add(putB);
699 MetaEditor.mutateMetaTable(catalogTracker, mutations);
700 }
701
702 public Put addLocation(final Put p, final ServerName sn, long openSeqNum) {
703 p.addImmutable(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
704 Bytes.toBytes(sn.getHostAndPort()));
705 p.addImmutable(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER,
706 Bytes.toBytes(sn.getStartcode()));
707 p.addImmutable(HConstants.CATALOG_FAMILY, HConstants.SEQNUM_QUALIFIER,
708 Bytes.toBytes(openSeqNum));
709 return p;
710 }
711
712
713
714
715
716 class DaughterOpener extends HasThread {
717 private final Server server;
718 private final HRegion r;
719 private Throwable t = null;
720
721 DaughterOpener(final Server s, final HRegion r) {
722 super((s == null? "null-services": s.getServerName()) +
723 "-daughterOpener=" + r.getRegionInfo().getEncodedName());
724 setDaemon(true);
725 this.server = s;
726 this.r = r;
727 }
728
729
730
731
732
733 Throwable getException() {
734 return this.t;
735 }
736
737 @Override
738 public void run() {
739 try {
740 openDaughterRegion(this.server, r);
741 } catch (Throwable t) {
742 this.t = t;
743 }
744 }
745 }
746
747
748
749
750
751
752
753
754 void openDaughterRegion(final Server server, final HRegion daughter)
755 throws IOException, KeeperException {
756 HRegionInfo hri = daughter.getRegionInfo();
757 LoggingProgressable reporter = server == null ? null
758 : new LoggingProgressable(hri, server.getConfiguration().getLong(
759 "hbase.regionserver.split.daughter.open.log.interval", 10000));
760 daughter.openHRegion(reporter);
761 }
762
763 static class LoggingProgressable implements CancelableProgressable {
764 private final HRegionInfo hri;
765 private long lastLog = -1;
766 private final long interval;
767
768 LoggingProgressable(final HRegionInfo hri, final long interval) {
769 this.hri = hri;
770 this.interval = interval;
771 }
772
773 @Override
774 public boolean progress() {
775 long now = EnvironmentEdgeManager.currentTimeMillis();
776 if (now - lastLog > this.interval) {
777 LOG.info("Opening " + this.hri.getRegionNameAsString());
778 this.lastLog = now;
779 }
780 return true;
781 }
782 }
783
784 private void splitStoreFiles(final Map<byte[], List<StoreFile>> hstoreFilesToSplit)
785 throws IOException {
786 if (hstoreFilesToSplit == null) {
787
788 throw new IOException("Close returned empty list of StoreFiles");
789 }
790
791
792
793 int nbFiles = hstoreFilesToSplit.size();
794 if (nbFiles == 0) {
795
796 return;
797 }
798 ThreadFactoryBuilder builder = new ThreadFactoryBuilder();
799 builder.setNameFormat("StoreFileSplitter-%1$d");
800 ThreadFactory factory = builder.build();
801 ThreadPoolExecutor threadPool =
802 (ThreadPoolExecutor) Executors.newFixedThreadPool(nbFiles, factory);
803 List<Future<Void>> futures = new ArrayList<Future<Void>>(nbFiles);
804
805
806 for (Map.Entry<byte[], List<StoreFile>> entry: hstoreFilesToSplit.entrySet()) {
807 for (StoreFile sf: entry.getValue()) {
808 StoreFileSplitter sfs = new StoreFileSplitter(entry.getKey(), sf);
809 futures.add(threadPool.submit(sfs));
810 }
811 }
812
813 threadPool.shutdown();
814
815
816 try {
817 boolean stillRunning = !threadPool.awaitTermination(
818 this.fileSplitTimeout, TimeUnit.MILLISECONDS);
819 if (stillRunning) {
820 threadPool.shutdownNow();
821
822 while (!threadPool.isTerminated()) {
823 Thread.sleep(50);
824 }
825 throw new IOException("Took too long to split the" +
826 " files and create the references, aborting split");
827 }
828 } catch (InterruptedException e) {
829 throw (InterruptedIOException)new InterruptedIOException().initCause(e);
830 }
831
832
833 for (Future<Void> future: futures) {
834 try {
835 future.get();
836 } catch (InterruptedException e) {
837 throw (InterruptedIOException)new InterruptedIOException().initCause(e);
838 } catch (ExecutionException e) {
839 throw new IOException(e);
840 }
841 }
842 }
843
844 private void splitStoreFile(final byte[] family, final StoreFile sf) throws IOException {
845 HRegionFileSystem fs = this.parent.getRegionFileSystem();
846 String familyName = Bytes.toString(family);
847 fs.splitStoreFile(this.hri_a, familyName, sf, this.splitrow, false);
848 fs.splitStoreFile(this.hri_b, familyName, sf, this.splitrow, true);
849 }
850
851
852
853
854
855 class StoreFileSplitter implements Callable<Void> {
856 private final byte[] family;
857 private final StoreFile sf;
858
859
860
861
862
863
864 public StoreFileSplitter(final byte[] family, final StoreFile sf) {
865 this.sf = sf;
866 this.family = family;
867 }
868
869 public Void call() throws IOException {
870 splitStoreFile(family, sf);
871 return null;
872 }
873 }
874
875
876
877
878
879
880
881
882 @SuppressWarnings("deprecation")
883 public boolean rollback(final Server server, final RegionServerServices services)
884 throws IOException {
885
886 if (this.parent.getCoprocessorHost() != null) {
887 this.parent.getCoprocessorHost().preRollBackSplit();
888 }
889
890 boolean result = true;
891 ListIterator<JournalEntry> iterator =
892 this.journal.listIterator(this.journal.size());
893
894 while (iterator.hasPrevious()) {
895 JournalEntry je = iterator.previous();
896 switch(je.type) {
897
898 case SET_SPLITTING_IN_ZK:
899 if (server != null && server.getZooKeeper() != null && useZKForAssignment) {
900 cleanZK(server, this.parent.getRegionInfo());
901 } else if (services != null
902 && !useZKForAssignment
903 && !services.reportRegionStateTransition(TransitionCode.SPLIT_REVERTED,
904 parent.getRegionInfo(), hri_a, hri_b)) {
905 return false;
906 }
907 break;
908
909 case CREATE_SPLIT_DIR:
910 this.parent.writestate.writesEnabled = true;
911 this.parent.getRegionFileSystem().cleanupSplitsDir();
912 break;
913
914 case CLOSED_PARENT_REGION:
915 try {
916
917
918
919
920
921 this.parent.initialize();
922 } catch (IOException e) {
923 LOG.error("Failed rollbacking CLOSED_PARENT_REGION of region " +
924 this.parent.getRegionNameAsString(), e);
925 throw new RuntimeException(e);
926 }
927 break;
928
929 case STARTED_REGION_A_CREATION:
930 this.parent.getRegionFileSystem().cleanupDaughterRegion(this.hri_a);
931 break;
932
933 case STARTED_REGION_B_CREATION:
934 this.parent.getRegionFileSystem().cleanupDaughterRegion(this.hri_b);
935 break;
936
937 case OFFLINED_PARENT:
938 if (services != null) services.addToOnlineRegions(this.parent);
939 break;
940
941 case PONR:
942
943
944
945
946 return false;
947
948
949 case STARTED:
950 case PREPARED:
951 case BEFORE_PRE_SPLIT_HOOK:
952 case AFTER_PRE_SPLIT_HOOK:
953 case BEFORE_POST_SPLIT_HOOK:
954 case AFTER_POST_SPLIT_HOOK:
955 case OPENED_REGION_A:
956 case OPENED_REGION_B:
957 break;
958
959 default:
960 throw new RuntimeException("Unhandled journal entry: " + je);
961 }
962 }
963
964 if (this.parent.getCoprocessorHost() != null) {
965 this.parent.getCoprocessorHost().postRollBackSplit();
966 }
967 return result;
968 }
969
970 HRegionInfo getFirstDaughter() {
971 return hri_a;
972 }
973
974 HRegionInfo getSecondDaughter() {
975 return hri_b;
976 }
977
978 private static void cleanZK(final Server server, final HRegionInfo hri) {
979 try {
980
981 if (!ZKAssign.deleteNode(server.getZooKeeper(), hri.getEncodedName(),
982 RS_ZK_REQUEST_REGION_SPLIT, server.getServerName())) {
983 ZKAssign.deleteNode(server.getZooKeeper(), hri.getEncodedName(),
984 RS_ZK_REGION_SPLITTING, server.getServerName());
985 }
986 } catch (KeeperException.NoNodeException e) {
987 LOG.info("Failed cleanup zk node of " + hri.getRegionNameAsString(), e);
988 } catch (KeeperException e) {
989 server.abort("Failed cleanup of " + hri.getRegionNameAsString(), e);
990 }
991 }
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006 public static void createNodeSplitting(final ZooKeeperWatcher zkw, final HRegionInfo region,
1007 final ServerName serverName, final HRegionInfo a,
1008 final HRegionInfo b) throws KeeperException, IOException {
1009 LOG.debug(zkw.prefix("Creating ephemeral node for " +
1010 region.getEncodedName() + " in PENDING_SPLIT state"));
1011 byte [] payload = HRegionInfo.toDelimitedByteArray(a, b);
1012 RegionTransition rt = RegionTransition.createRegionTransition(
1013 RS_ZK_REQUEST_REGION_SPLIT, region.getRegionName(), serverName, payload);
1014 String node = ZKAssign.getNodeName(zkw, region.getEncodedName());
1015 if (!ZKUtil.createEphemeralNodeAndWatch(zkw, node, rt.toByteArray())) {
1016 throw new IOException("Failed create of ephemeral " + node);
1017 }
1018 }
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054 public static int transitionSplittingNode(ZooKeeperWatcher zkw,
1055 HRegionInfo parent, HRegionInfo a, HRegionInfo b, ServerName serverName,
1056 final int znodeVersion, final EventType beginState,
1057 final EventType endState) throws KeeperException, IOException {
1058 byte [] payload = HRegionInfo.toDelimitedByteArray(a, b);
1059 return ZKAssign.transitionNode(zkw, parent, serverName,
1060 beginState, endState, znodeVersion, payload);
1061 }
1062
1063 List<JournalEntry> getJournal() {
1064 return journal;
1065 }
1066 }