View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import java.io.IOException;
22  import java.io.InterruptedIOException;
23  import java.util.ArrayList;
24  import java.util.List;
25  import java.util.ListIterator;
26  import java.util.Map;
27  import java.util.concurrent.Callable;
28  import java.util.concurrent.ExecutionException;
29  import java.util.concurrent.Executors;
30  import java.util.concurrent.Future;
31  import java.util.concurrent.ThreadFactory;
32  import java.util.concurrent.ThreadPoolExecutor;
33  import java.util.concurrent.TimeUnit;
34  
35  import org.apache.commons.logging.Log;
36  import org.apache.commons.logging.LogFactory;
37  import org.apache.hadoop.fs.Path;
38  import org.apache.hadoop.hbase.classification.InterfaceAudience;
39  import org.apache.hadoop.hbase.HConstants;
40  import org.apache.hadoop.hbase.HRegionInfo;
41  import org.apache.hadoop.hbase.Server;
42  import org.apache.hadoop.hbase.ServerName;
43  import org.apache.hadoop.hbase.MetaTableAccessor;
44  import org.apache.hadoop.hbase.client.HConnection;
45  import org.apache.hadoop.hbase.client.Mutation;
46  import org.apache.hadoop.hbase.client.Put;
47  import org.apache.hadoop.hbase.coordination.BaseCoordinatedStateManager;
48  import org.apache.hadoop.hbase.coordination.SplitTransactionCoordination;
49  import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
50  import org.apache.hadoop.hbase.util.Bytes;
51  import org.apache.hadoop.hbase.util.CancelableProgressable;
52  import org.apache.hadoop.hbase.util.ConfigUtil;
53  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
54  import org.apache.hadoop.hbase.util.FSUtils;
55  import org.apache.hadoop.hbase.util.HasThread;
56  import org.apache.hadoop.hbase.util.Pair;
57  import org.apache.hadoop.hbase.util.PairOfSameType;
58  import org.apache.zookeeper.KeeperException;
59  
60  import com.google.common.util.concurrent.ThreadFactoryBuilder;
61  
62  /**
63   * Executes region split as a "transaction".  Call {@link #prepare()} to setup
64   * the transaction, {@link #execute(Server, RegionServerServices)} to run the
65   * transaction and {@link #rollback(Server, RegionServerServices)} to cleanup if execute fails.
66   *
67   * <p>Here is an example of how you would use this class:
68   * <pre>
69   *  SplitTransaction st = new SplitTransaction(this.conf, parent, midKey)
70   *  if (!st.prepare()) return;
71   *  try {
72   *    st.execute(server, services);
73   *  } catch (IOException ioe) {
74   *    try {
75   *      st.rollback(server, services);
76   *      return;
77   *    } catch (RuntimeException e) {
78   *      myAbortable.abort("Failed split, abort");
79   *    }
80   *  }
81   * </Pre>
82   * <p>This class is not thread safe.  Caller needs ensure split is run by
83   * one thread only.
84   */
85  @InterfaceAudience.Private
86  public class SplitTransaction {
87    private static final Log LOG = LogFactory.getLog(SplitTransaction.class);
88  
89    /*
90     * Region to split
91     */
92    private final HRegion parent;
93    private HRegionInfo hri_a;
94    private HRegionInfo hri_b;
95    private long fileSplitTimeout = 30000;
96    public SplitTransactionCoordination.SplitTransactionDetails std;
97    boolean useZKForAssignment;
98  
99    /*
100    * Row to split around
101    */
102   private final byte [] splitrow;
103 
104   /**
105    * Types to add to the transaction journal.
106    * Each enum is a step in the split transaction. Used to figure how much
107    * we need to rollback.
108    */
109   static enum JournalEntryType {
110     /**
111      * Started
112      */
113     STARTED,
114     /**
115      * Prepared (after table lock)
116      */
117     PREPARED,
118     /**
119      * Before preSplit coprocessor hook
120      */
121     BEFORE_PRE_SPLIT_HOOK,
122     /**
123      * After preSplit coprocessor hook
124      */
125     AFTER_PRE_SPLIT_HOOK,
126     /**
127      * Set region as in transition, set it into SPLITTING state.
128      */
129     SET_SPLITTING,
130     /**
131      * We created the temporary split data directory.
132      */
133     CREATE_SPLIT_DIR,
134     /**
135      * Closed the parent region.
136      */
137     CLOSED_PARENT_REGION,
138     /**
139      * The parent has been taken out of the server's online regions list.
140      */
141     OFFLINED_PARENT,
142     /**
143      * Started in on creation of the first daughter region.
144      */
145     STARTED_REGION_A_CREATION,
146     /**
147      * Started in on the creation of the second daughter region.
148      */
149     STARTED_REGION_B_CREATION,
150     /**
151      * Opened the first daughter region
152      */
153     OPENED_REGION_A,
154     /**
155      * Opened the second daughter region
156      */
157     OPENED_REGION_B,
158     /**
159      * Before postSplit coprocessor hook
160      */
161     BEFORE_POST_SPLIT_HOOK,
162     /**
163      * After postSplit coprocessor hook
164      */
165     AFTER_POST_SPLIT_HOOK,
166     /**
167      * Point of no return.
168      * If we got here, then transaction is not recoverable other than by
169      * crashing out the regionserver.
170      */
171     PONR
172   }
173 
174   static class JournalEntry {
175     private JournalEntryType type;
176     private long timestamp;
177 
178     public JournalEntry(JournalEntryType type) {
179       this(type, EnvironmentEdgeManager.currentTime());
180     }
181 
182     public JournalEntry(JournalEntryType type, long timestamp) {
183       this.type = type;
184       this.timestamp = timestamp;
185     }
186 
187     @Override
188     public String toString() {
189       StringBuilder sb = new StringBuilder();
190       sb.append(type);
191       sb.append(" at ");
192       sb.append(timestamp);
193       return sb.toString();
194     }
195   }
196 
197   /*
198    * Journal of how far the split transaction has progressed.
199    */
200   private final List<JournalEntry> journal = new ArrayList<JournalEntry>();
201 
202   /**
203    * Constructor
204    * @param r Region to split
205    * @param splitrow Row to split around
206    */
207   public SplitTransaction(final HRegion r, final byte [] splitrow) {
208     this.parent = r;
209     this.splitrow = splitrow;
210     this.journal.add(new JournalEntry(JournalEntryType.STARTED));
211     useZKForAssignment = ConfigUtil.useZKForAssignment(r.getBaseConf());
212   }
213 
214   /**
215    * Does checks on split inputs.
216    * @return <code>true</code> if the region is splittable else
217    * <code>false</code> if it is not (e.g. its already closed, etc.).
218    */
219   public boolean prepare() {
220     if (!this.parent.isSplittable()) return false;
221     // Split key can be null if this region is unsplittable; i.e. has refs.
222     if (this.splitrow == null) return false;
223     HRegionInfo hri = this.parent.getRegionInfo();
224     parent.prepareToSplit();
225     // Check splitrow.
226     byte [] startKey = hri.getStartKey();
227     byte [] endKey = hri.getEndKey();
228     if (Bytes.equals(startKey, splitrow) ||
229         !this.parent.getRegionInfo().containsRow(splitrow)) {
230       LOG.info("Split row is not inside region key range or is equal to " +
231           "startkey: " + Bytes.toStringBinary(this.splitrow));
232       return false;
233     }
234     long rid = getDaughterRegionIdTimestamp(hri);
235     this.hri_a = new HRegionInfo(hri.getTable(), startKey, this.splitrow, false, rid);
236     this.hri_b = new HRegionInfo(hri.getTable(), this.splitrow, endKey, false, rid);
237     this.journal.add(new JournalEntry(JournalEntryType.PREPARED));
238     return true;
239   }
240 
241   /**
242    * Calculate daughter regionid to use.
243    * @param hri Parent {@link HRegionInfo}
244    * @return Daughter region id (timestamp) to use.
245    */
246   private static long getDaughterRegionIdTimestamp(final HRegionInfo hri) {
247     long rid = EnvironmentEdgeManager.currentTime();
248     // Regionid is timestamp.  Can't be less than that of parent else will insert
249     // at wrong location in hbase:meta (See HBASE-710).
250     if (rid < hri.getRegionId()) {
251       LOG.warn("Clock skew; parent regions id is " + hri.getRegionId() +
252         " but current time here is " + rid);
253       rid = hri.getRegionId() + 1;
254     }
255     return rid;
256   }
257 
258   private static IOException closedByOtherException = new IOException(
259       "Failed to close region: already closed by another thread");
260 
261   /**
262    * Prepare the regions and region files.
263    * @param server Hosting server instance.  Can be null when testing (won't try
264    * and update in zk if a null server)
265    * @param services Used to online/offline regions.
266    * @throws IOException If thrown, transaction failed.
267    *    Call {@link #rollback(Server, RegionServerServices)}
268    * @return Regions created
269    */
270   /* package */PairOfSameType<HRegion> createDaughters(final Server server,
271       final RegionServerServices services) throws IOException {
272     LOG.info("Starting split of region " + this.parent);
273     if ((server != null && server.isStopped()) ||
274         (services != null && services.isStopping())) {
275       throw new IOException("Server is stopped or stopping");
276     }
277     assert !this.parent.lock.writeLock().isHeldByCurrentThread():
278       "Unsafe to hold write lock while performing RPCs";
279 
280     journal.add(new JournalEntry(JournalEntryType.BEFORE_PRE_SPLIT_HOOK));
281 
282     // Coprocessor callback
283     if (this.parent.getCoprocessorHost() != null) {
284       // TODO: Remove one of these
285       this.parent.getCoprocessorHost().preSplit();
286       this.parent.getCoprocessorHost().preSplit(this.splitrow);
287     }
288 
289     journal.add(new JournalEntry(JournalEntryType.AFTER_PRE_SPLIT_HOOK));
290 
291     // If true, no cluster to write meta edits to or to update znodes in.
292     boolean testing = server == null? true:
293         server.getConfiguration().getBoolean("hbase.testing.nocluster", false);
294     this.fileSplitTimeout = testing ? this.fileSplitTimeout :
295         server.getConfiguration().getLong("hbase.regionserver.fileSplitTimeout",
296           this.fileSplitTimeout);
297 
298     PairOfSameType<HRegion> daughterRegions = stepsBeforePONR(server, services, testing);
299 
300     List<Mutation> metaEntries = new ArrayList<Mutation>();
301     if (this.parent.getCoprocessorHost() != null) {
302       if (this.parent.getCoprocessorHost().
303           preSplitBeforePONR(this.splitrow, metaEntries)) {
304         throw new IOException("Coprocessor bypassing region "
305             + this.parent.getRegionNameAsString() + " split.");
306       }
307       try {
308         for (Mutation p : metaEntries) {
309           HRegionInfo.parseRegionName(p.getRow());
310         }
311       } catch (IOException e) {
312         LOG.error("Row key of mutation from coprossor is not parsable as region name."
313             + "Mutations from coprocessor should only for hbase:meta table.");
314         throw e;
315       }
316     }
317 
318     // This is the point of no return.  Adding subsequent edits to .META. as we
319     // do below when we do the daughter opens adding each to .META. can fail in
320     // various interesting ways the most interesting of which is a timeout
321     // BUT the edits all go through (See HBASE-3872).  IF we reach the PONR
322     // then subsequent failures need to crash out this regionserver; the
323     // server shutdown processing should be able to fix-up the incomplete split.
324     // The offlined parent will have the daughters as extra columns.  If
325     // we leave the daughter regions in place and do not remove them when we
326     // crash out, then they will have their references to the parent in place
327     // still and the server shutdown fixup of .META. will point to these
328     // regions.
329     // We should add PONR JournalEntry before offlineParentInMeta,so even if
330     // OfflineParentInMeta timeout,this will cause regionserver exit,and then
331     // master ServerShutdownHandler will fix daughter & avoid data loss. (See
332     // HBase-4562).
333     this.journal.add(new JournalEntry(JournalEntryType.PONR));
334 
335     // Edit parent in meta.  Offlines parent region and adds splita and splitb
336     // as an atomic update. See HBASE-7721. This update to META makes the region
337     // will determine whether the region is split or not in case of failures.
338     // If it is successful, master will roll-forward, if not, master will rollback
339     // and assign the parent region.
340     if (!testing && useZKForAssignment) {
341       if (metaEntries == null || metaEntries.isEmpty()) {
342         MetaTableAccessor.splitRegion(server.getConnection(),
343           parent.getRegionInfo(), daughterRegions.getFirst().getRegionInfo(),
344           daughterRegions.getSecond().getRegionInfo(), server.getServerName());
345       } else {
346         offlineParentInMetaAndputMetaEntries(server.getConnection(),
347           parent.getRegionInfo(), daughterRegions.getFirst().getRegionInfo(), daughterRegions
348               .getSecond().getRegionInfo(), server.getServerName(), metaEntries);
349       }
350     } else if (services != null && !useZKForAssignment) {
351       if (!services.reportRegionStateTransition(TransitionCode.SPLIT_PONR,
352           parent.getRegionInfo(), hri_a, hri_b)) {
353         // Passed PONR, let SSH clean it up
354         throw new IOException("Failed to notify master that split passed PONR: "
355           + parent.getRegionInfo().getRegionNameAsString());
356       }
357     }
358     return daughterRegions;
359   }
360 
361   public PairOfSameType<HRegion> stepsBeforePONR(final Server server,
362       final RegionServerServices services, boolean testing) throws IOException {
363 
364     if (useCoordinatedStateManager(server)) {
365       if (std == null) {
366         std =
367             ((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
368                 .getSplitTransactionCoordination().getDefaultDetails();
369       }
370       ((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
371           .getSplitTransactionCoordination().startSplitTransaction(parent, server.getServerName(),
372             hri_a, hri_b);
373     } else if (services != null && !useZKForAssignment) {
374       if (!services.reportRegionStateTransition(TransitionCode.READY_TO_SPLIT,
375           parent.getRegionInfo(), hri_a, hri_b)) {
376         throw new IOException("Failed to get ok from master to split "
377           + parent.getRegionNameAsString());
378       }
379     }
380     this.journal.add(new JournalEntry(JournalEntryType.SET_SPLITTING));
381     if (useCoordinatedStateManager(server)) {
382       ((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
383           .getSplitTransactionCoordination().waitForSplitTransaction(services, parent, hri_a,
384             hri_b, std);
385     }
386 
387     this.parent.getRegionFileSystem().createSplitsDir();
388     this.journal.add(new JournalEntry(JournalEntryType.CREATE_SPLIT_DIR));
389 
390     Map<byte[], List<StoreFile>> hstoreFilesToSplit = null;
391     Exception exceptionToThrow = null;
392     try{
393       hstoreFilesToSplit = this.parent.close(false);
394     } catch (Exception e) {
395       exceptionToThrow = e;
396     }
397     if (exceptionToThrow == null && hstoreFilesToSplit == null) {
398       // The region was closed by a concurrent thread.  We can't continue
399       // with the split, instead we must just abandon the split.  If we
400       // reopen or split this could cause problems because the region has
401       // probably already been moved to a different server, or is in the
402       // process of moving to a different server.
403       exceptionToThrow = closedByOtherException;
404     }
405     if (exceptionToThrow != closedByOtherException) {
406       this.journal.add(new JournalEntry(JournalEntryType.CLOSED_PARENT_REGION));
407     }
408     if (exceptionToThrow != null) {
409       if (exceptionToThrow instanceof IOException) throw (IOException)exceptionToThrow;
410       throw new IOException(exceptionToThrow);
411     }
412     if (!testing) {
413       services.removeFromOnlineRegions(this.parent, null);
414     }
415     this.journal.add(new JournalEntry(JournalEntryType.OFFLINED_PARENT));
416 
417     // TODO: If splitStoreFiles were multithreaded would we complete steps in
418     // less elapsed time?  St.Ack 20100920
419     //
420     // splitStoreFiles creates daughter region dirs under the parent splits dir
421     // Nothing to unroll here if failure -- clean up of CREATE_SPLIT_DIR will
422     // clean this up.
423     Pair<Integer, Integer> expectedReferences = splitStoreFiles(hstoreFilesToSplit);
424 
425     // Log to the journal that we are creating region A, the first daughter
426     // region.  We could fail halfway through.  If we do, we could have left
427     // stuff in fs that needs cleanup -- a storefile or two.  Thats why we
428     // add entry to journal BEFORE rather than AFTER the change.
429     this.journal.add(new JournalEntry(JournalEntryType.STARTED_REGION_A_CREATION));
430     assertReferenceFileCount(expectedReferences.getFirst(),
431         this.parent.getRegionFileSystem().getSplitsDir(this.hri_a));
432     HRegion a = this.parent.createDaughterRegionFromSplits(this.hri_a);
433     assertReferenceFileCount(expectedReferences.getFirst(),
434         new Path(this.parent.getRegionFileSystem().getTableDir(), this.hri_a.getEncodedName()));
435 
436     // Ditto
437     this.journal.add(new JournalEntry(JournalEntryType.STARTED_REGION_B_CREATION));
438     assertReferenceFileCount(expectedReferences.getSecond(),
439         this.parent.getRegionFileSystem().getSplitsDir(this.hri_b));
440     HRegion b = this.parent.createDaughterRegionFromSplits(this.hri_b);
441     assertReferenceFileCount(expectedReferences.getSecond(),
442         new Path(this.parent.getRegionFileSystem().getTableDir(), this.hri_b.getEncodedName()));
443 
444     return new PairOfSameType<HRegion>(a, b);
445   }
446 
447   void assertReferenceFileCount(int expectedReferenceFileCount, Path dir)
448       throws IOException {
449     if (expectedReferenceFileCount != 0 &&
450         expectedReferenceFileCount != FSUtils.getRegionReferenceFileCount(this.parent.getFilesystem(), dir)) {
451       throw new IOException("Failing split. Expected reference file count isn't equal.");
452     }
453   }
454 
455   /**
456    * Perform time consuming opening of the daughter regions.
457    * @param server Hosting server instance.  Can be null when testing
458    * @param services Used to online/offline regions.
459    * @param a first daughter region
460    * @param a second daughter region
461    * @throws IOException If thrown, transaction failed.
462    *          Call {@link #rollback(Server, RegionServerServices)}
463    */
464   /* package */void openDaughters(final Server server,
465       final RegionServerServices services, HRegion a, HRegion b)
466       throws IOException {
467     boolean stopped = server != null && server.isStopped();
468     boolean stopping = services != null && services.isStopping();
469     // TODO: Is this check needed here?
470     if (stopped || stopping) {
471       LOG.info("Not opening daughters " +
472           b.getRegionInfo().getRegionNameAsString() +
473           " and " +
474           a.getRegionInfo().getRegionNameAsString() +
475           " because stopping=" + stopping + ", stopped=" + stopped);
476     } else {
477       // Open daughters in parallel.
478       DaughterOpener aOpener = new DaughterOpener(server, a);
479       DaughterOpener bOpener = new DaughterOpener(server, b);
480       aOpener.start();
481       bOpener.start();
482       try {
483         aOpener.join();
484         if (aOpener.getException() == null) {
485           journal.add(new JournalEntry(JournalEntryType.OPENED_REGION_A));
486         }
487         bOpener.join();
488         if (bOpener.getException() == null) {
489           journal.add(new JournalEntry(JournalEntryType.OPENED_REGION_B));
490         }
491       } catch (InterruptedException e) {
492         throw (InterruptedIOException)new InterruptedIOException().initCause(e);
493       }
494       if (aOpener.getException() != null) {
495         throw new IOException("Failed " +
496           aOpener.getName(), aOpener.getException());
497       }
498       if (bOpener.getException() != null) {
499         throw new IOException("Failed " +
500           bOpener.getName(), bOpener.getException());
501       }
502       if (services != null) {
503         try {
504           if (useZKForAssignment) {
505             // add 2nd daughter first (see HBASE-4335)
506             services.postOpenDeployTasks(b);
507           } else if (!services.reportRegionStateTransition(TransitionCode.SPLIT,
508               parent.getRegionInfo(), hri_a, hri_b)) {
509             throw new IOException("Failed to report split region to master: "
510               + parent.getRegionInfo().getShortNameToLog());
511           }
512           // Should add it to OnlineRegions
513           services.addToOnlineRegions(b);
514           if (useZKForAssignment) {
515             services.postOpenDeployTasks(a);
516           }
517           services.addToOnlineRegions(a);
518         } catch (KeeperException ke) {
519           throw new IOException(ke);
520         }
521       }
522     }
523   }
524 
525   /**
526    * Run the transaction.
527    * @param server Hosting server instance.  Can be null when testing
528    * @param services Used to online/offline regions.
529    * @throws IOException If thrown, transaction failed.
530    *          Call {@link #rollback(Server, RegionServerServices)}
531    * @return Regions created
532    * @throws IOException
533    * @see #rollback(Server, RegionServerServices)
534    */
535   public PairOfSameType<HRegion> execute(final Server server,
536       final RegionServerServices services)
537   throws IOException {
538     useZKForAssignment = server == null ? true :
539       ConfigUtil.useZKForAssignment(server.getConfiguration());
540     if (useCoordinatedStateManager(server)) {
541       std =
542           ((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
543               .getSplitTransactionCoordination().getDefaultDetails();
544     }
545     PairOfSameType<HRegion> regions = createDaughters(server, services);
546     if (this.parent.getCoprocessorHost() != null) {
547       this.parent.getCoprocessorHost().preSplitAfterPONR();
548     }
549     return stepsAfterPONR(server, services, regions);
550   }
551 
552   public PairOfSameType<HRegion> stepsAfterPONR(final Server server,
553       final RegionServerServices services, PairOfSameType<HRegion> regions)
554       throws IOException {
555     openDaughters(server, services, regions.getFirst(), regions.getSecond());
556     if (useCoordinatedStateManager(server)) {
557       ((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
558           .getSplitTransactionCoordination().completeSplitTransaction(services, regions.getFirst(),
559             regions.getSecond(), std, parent);
560     }
561     journal.add(new JournalEntry(JournalEntryType.BEFORE_POST_SPLIT_HOOK));
562     // Coprocessor callback
563     if (parent.getCoprocessorHost() != null) {
564       parent.getCoprocessorHost().postSplit(regions.getFirst(), regions.getSecond());
565     }
566     journal.add(new JournalEntry(JournalEntryType.AFTER_POST_SPLIT_HOOK));
567     return regions;
568   }
569 
570   private void offlineParentInMetaAndputMetaEntries(HConnection hConnection,
571       HRegionInfo parent, HRegionInfo splitA, HRegionInfo splitB,
572       ServerName serverName, List<Mutation> metaEntries) throws IOException {
573     List<Mutation> mutations = metaEntries;
574     HRegionInfo copyOfParent = new HRegionInfo(parent);
575     copyOfParent.setOffline(true);
576     copyOfParent.setSplit(true);
577 
578     //Put for parent
579     Put putParent = MetaTableAccessor.makePutFromRegionInfo(copyOfParent);
580     MetaTableAccessor.addDaughtersToPut(putParent, splitA, splitB);
581     mutations.add(putParent);
582     
583     //Puts for daughters
584     Put putA = MetaTableAccessor.makePutFromRegionInfo(splitA);
585     Put putB = MetaTableAccessor.makePutFromRegionInfo(splitB);
586 
587     addLocation(putA, serverName, 1); //these are new regions, openSeqNum = 1 is fine.
588     addLocation(putB, serverName, 1);
589     mutations.add(putA);
590     mutations.add(putB);
591     MetaTableAccessor.mutateMetaTable(hConnection, mutations);
592   }
593 
594   public Put addLocation(final Put p, final ServerName sn, long openSeqNum) {
595     p.addImmutable(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
596       Bytes.toBytes(sn.getHostAndPort()));
597     p.addImmutable(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER,
598       Bytes.toBytes(sn.getStartcode()));
599     p.addImmutable(HConstants.CATALOG_FAMILY, HConstants.SEQNUM_QUALIFIER,
600         Bytes.toBytes(openSeqNum));
601     return p;
602   }
603 
604   /*
605    * Open daughter region in its own thread.
606    * If we fail, abort this hosting server.
607    */
608   class DaughterOpener extends HasThread {
609     private final Server server;
610     private final HRegion r;
611     private Throwable t = null;
612 
613     DaughterOpener(final Server s, final HRegion r) {
614       super((s == null? "null-services": s.getServerName()) +
615         "-daughterOpener=" + r.getRegionInfo().getEncodedName());
616       setDaemon(true);
617       this.server = s;
618       this.r = r;
619     }
620 
621     /**
622      * @return Null if open succeeded else exception that causes us fail open.
623      * Call it after this thread exits else you may get wrong view on result.
624      */
625     Throwable getException() {
626       return this.t;
627     }
628 
629     @Override
630     public void run() {
631       try {
632         openDaughterRegion(this.server, r);
633       } catch (Throwable t) {
634         this.t = t;
635       }
636     }
637   }
638 
639   /**
640    * Open daughter regions, add them to online list and update meta.
641    * @param server
642    * @param daughter
643    * @throws IOException
644    * @throws KeeperException
645    */
646   void openDaughterRegion(final Server server, final HRegion daughter)
647   throws IOException, KeeperException {
648     HRegionInfo hri = daughter.getRegionInfo();
649     LoggingProgressable reporter = server == null ? null
650         : new LoggingProgressable(hri, server.getConfiguration().getLong(
651             "hbase.regionserver.split.daughter.open.log.interval", 10000));
652     daughter.openHRegion(reporter);
653   }
654 
655   static class LoggingProgressable implements CancelableProgressable {
656     private final HRegionInfo hri;
657     private long lastLog = -1;
658     private final long interval;
659 
660     LoggingProgressable(final HRegionInfo hri, final long interval) {
661       this.hri = hri;
662       this.interval = interval;
663     }
664 
665     @Override
666     public boolean progress() {
667       long now = EnvironmentEdgeManager.currentTime();
668       if (now - lastLog > this.interval) {
669         LOG.info("Opening " + this.hri.getRegionNameAsString());
670         this.lastLog = now;
671       }
672       return true;
673     }
674   }
675 
676   private boolean useCoordinatedStateManager(final Server server) {
677     return server != null && useZKForAssignment && server.getCoordinatedStateManager() != null;
678   }
679 
680   /**
681    * Creates reference files for top and bottom half of the
682    * @param hstoreFilesToSplit map of store files to create half file references for.
683    * @return the number of reference files that were created.
684    * @throws IOException
685    */
686   private Pair<Integer, Integer> splitStoreFiles(
687       final Map<byte[], List<StoreFile>> hstoreFilesToSplit)
688       throws IOException {
689     if (hstoreFilesToSplit == null) {
690       // Could be null because close didn't succeed -- for now consider it fatal
691       throw new IOException("Close returned empty list of StoreFiles");
692     }
693     // The following code sets up a thread pool executor with as many slots as
694     // there's files to split. It then fires up everything, waits for
695     // completion and finally checks for any exception
696     int nbFiles = hstoreFilesToSplit.size();
697     if (nbFiles == 0) {
698       // no file needs to be splitted.
699       return new Pair<Integer, Integer>(0,0);
700     }
701     LOG.info("Preparing to split " + nbFiles + " storefiles for region " + this.parent);
702     ThreadFactoryBuilder builder = new ThreadFactoryBuilder();
703     builder.setNameFormat("StoreFileSplitter-%1$d");
704     ThreadFactory factory = builder.build();
705     ThreadPoolExecutor threadPool =
706       (ThreadPoolExecutor) Executors.newFixedThreadPool(nbFiles, factory);
707     List<Future<Pair<Path,Path>>> futures = new ArrayList<Future<Pair<Path,Path>>> (nbFiles);
708 
709     // Split each store file.
710     for (Map.Entry<byte[], List<StoreFile>> entry: hstoreFilesToSplit.entrySet()) {
711       for (StoreFile sf: entry.getValue()) {
712         StoreFileSplitter sfs = new StoreFileSplitter(entry.getKey(), sf);
713         futures.add(threadPool.submit(sfs));
714       }
715     }
716     // Shutdown the pool
717     threadPool.shutdown();
718 
719     // Wait for all the tasks to finish
720     try {
721       boolean stillRunning = !threadPool.awaitTermination(
722           this.fileSplitTimeout, TimeUnit.MILLISECONDS);
723       if (stillRunning) {
724         threadPool.shutdownNow();
725         // wait for the thread to shutdown completely.
726         while (!threadPool.isTerminated()) {
727           Thread.sleep(50);
728         }
729         throw new IOException("Took too long to split the" +
730             " files and create the references, aborting split");
731       }
732     } catch (InterruptedException e) {
733       throw (InterruptedIOException)new InterruptedIOException().initCause(e);
734     }
735 
736     int created_a = 0;
737     int created_b = 0;
738     // Look for any exception
739     for (Future<Pair<Path, Path>> future : futures) {
740       try {
741         Pair<Path, Path> p = future.get();
742         created_a += p.getFirst() != null ? 1 : 0;
743         created_b += p.getSecond() != null ? 1 : 0;
744       } catch (InterruptedException e) {
745         throw (InterruptedIOException) new InterruptedIOException().initCause(e);
746       } catch (ExecutionException e) {
747         throw new IOException(e);
748       }
749     }
750 
751     if (LOG.isDebugEnabled()) {
752       LOG.debug("Split storefiles for region " + this.parent + " Daugther A: " + created_a
753           + " storefiles, Daugther B: " + created_b + " storefiles.");
754     }
755     return new Pair<Integer, Integer>(created_a, created_b);
756   }
757 
758   private Pair<Path, Path> splitStoreFile(final byte[] family, final StoreFile sf) throws IOException {
759     HRegionFileSystem fs = this.parent.getRegionFileSystem();
760     String familyName = Bytes.toString(family);
761 
762     Path path_a =
763         fs.splitStoreFile(this.hri_a, familyName, sf, this.splitrow, false,
764           this.parent.getSplitPolicy());
765     Path path_b =
766         fs.splitStoreFile(this.hri_b, familyName, sf, this.splitrow, true,
767           this.parent.getSplitPolicy());
768     return new Pair<Path,Path>(path_a, path_b);
769   }
770 
771   /**
772    * Utility class used to do the file splitting / reference writing
773    * in parallel instead of sequentially.
774    */
775   class StoreFileSplitter implements Callable<Pair<Path,Path>> {
776     private final byte[] family;
777     private final StoreFile sf;
778 
779     /**
780      * Constructor that takes what it needs to split
781      * @param family Family that contains the store file
782      * @param sf which file
783      */
784     public StoreFileSplitter(final byte[] family, final StoreFile sf) {
785       this.sf = sf;
786       this.family = family;
787     }
788 
789     public Pair<Path,Path> call() throws IOException {
790       return splitStoreFile(family, sf);
791     }
792   }
793 
794   /**
795    * @param server Hosting server instance (May be null when testing).
796    * @param services
797    * @throws IOException If thrown, rollback failed.  Take drastic action.
798    * @return True if we successfully rolled back, false if we got to the point
799    * of no return and so now need to abort the server to minimize damage.
800    */
801   @SuppressWarnings("deprecation")
802   public boolean rollback(final Server server, final RegionServerServices services)
803   throws IOException {
804     // Coprocessor callback
805     if (this.parent.getCoprocessorHost() != null) {
806       this.parent.getCoprocessorHost().preRollBackSplit();
807     }
808 
809     boolean result = true;
810     ListIterator<JournalEntry> iterator =
811       this.journal.listIterator(this.journal.size());
812     // Iterate in reverse.
813     while (iterator.hasPrevious()) {
814       JournalEntry je = iterator.previous();
815       switch(je.type) {
816 
817       case SET_SPLITTING:
818         if (useCoordinatedStateManager(server) && server instanceof HRegionServer) {
819           ((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
820               .getSplitTransactionCoordination().clean(this.parent.getRegionInfo());
821         } else if (services != null && !useZKForAssignment
822             && !services.reportRegionStateTransition(TransitionCode.SPLIT_REVERTED,
823                 parent.getRegionInfo(), hri_a, hri_b)) {
824           return false;
825         }
826         break;
827 
828       case CREATE_SPLIT_DIR:
829         this.parent.writestate.writesEnabled = true;
830         this.parent.getRegionFileSystem().cleanupSplitsDir();
831         break;
832 
833       case CLOSED_PARENT_REGION:
834         try {
835           // So, this returns a seqid but if we just closed and then reopened, we
836           // should be ok. On close, we flushed using sequenceid obtained from
837           // hosting regionserver so no need to propagate the sequenceid returned
838           // out of initialize below up into regionserver as we normally do.
839           // TODO: Verify.
840           this.parent.initialize();
841         } catch (IOException e) {
842           LOG.error("Failed rollbacking CLOSED_PARENT_REGION of region " +
843             this.parent.getRegionNameAsString(), e);
844           throw new RuntimeException(e);
845         }
846         break;
847 
848       case STARTED_REGION_A_CREATION:
849         this.parent.getRegionFileSystem().cleanupDaughterRegion(this.hri_a);
850         break;
851 
852       case STARTED_REGION_B_CREATION:
853         this.parent.getRegionFileSystem().cleanupDaughterRegion(this.hri_b);
854         break;
855 
856       case OFFLINED_PARENT:
857         if (services != null) services.addToOnlineRegions(this.parent);
858         break;
859 
860       case PONR:
861         // We got to the point-of-no-return so we need to just abort. Return
862         // immediately.  Do not clean up created daughter regions.  They need
863         // to be in place so we don't delete the parent region mistakenly.
864         // See HBASE-3872.
865         return false;
866 
867       // Informational only cases
868       case STARTED:
869       case PREPARED:
870       case BEFORE_PRE_SPLIT_HOOK:
871       case AFTER_PRE_SPLIT_HOOK:
872       case BEFORE_POST_SPLIT_HOOK:
873       case AFTER_POST_SPLIT_HOOK:
874       case OPENED_REGION_A:
875       case OPENED_REGION_B:
876         break;
877 
878       default:
879         throw new RuntimeException("Unhandled journal entry: " + je);
880       }
881     }
882     // Coprocessor callback
883     if (this.parent.getCoprocessorHost() != null) {
884       this.parent.getCoprocessorHost().postRollBackSplit();
885     }
886     return result;
887   }
888 
889   HRegionInfo getFirstDaughter() {
890     return hri_a;
891   }
892 
893   HRegionInfo getSecondDaughter() {
894     return hri_b;
895   }
896 
897   List<JournalEntry> getJournal() {
898     return journal;
899   }
900 }