View Javadoc

1   /**
2    * Copyright The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one or more
5    * contributor license agreements. See the NOTICE file distributed with this
6    * work for additional information regarding copyright ownership. The ASF
7    * licenses this file to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance with the License.
9    * You may obtain a copy of the License at
10   *
11   * http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
15   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
16   * License for the specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import java.io.IOException;
22  import java.util.ArrayList;
23  import java.util.List;
24  import java.util.ListIterator;
25  import java.util.Map;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.classification.InterfaceAudience;
30  import org.apache.hadoop.fs.Path;
31  import org.apache.hadoop.hbase.HConstants;
32  import org.apache.hadoop.hbase.HRegionInfo;
33  import org.apache.hadoop.hbase.RegionTransition;
34  import org.apache.hadoop.hbase.Server;
35  import org.apache.hadoop.hbase.ServerName;
36  import org.apache.hadoop.hbase.catalog.MetaEditor;
37  import org.apache.hadoop.hbase.catalog.MetaReader;
38  import org.apache.hadoop.hbase.executor.EventType;
39  import org.apache.hadoop.hbase.regionserver.SplitTransaction.LoggingProgressable;
40  import org.apache.hadoop.hbase.util.Bytes;
41  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
42  import org.apache.hadoop.hbase.util.Pair;
43  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
44  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
45  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
46  import org.apache.zookeeper.KeeperException;
47  import org.apache.zookeeper.KeeperException.NodeExistsException;
48  
49  /**
50   * Executes region merge as a "transaction". It is similar with
51   * SplitTransaction. Call {@link #prepare(RegionServerServices)} to setup the
52   * transaction, {@link #execute(Server, RegionServerServices)} to run the
53   * transaction and {@link #rollback(Server, RegionServerServices)} to cleanup if
54   * execute fails.
55   * 
56   * <p>
57   * Here is an example of how you would use this class:
58   * 
59   * <pre>
60   *  RegionMergeTransaction mt = new RegionMergeTransaction(this.conf, parent, midKey)
61   *  if (!mt.prepare(services)) return;
62   *  try {
63   *    mt.execute(server, services);
64   *  } catch (IOException ioe) {
65   *    try {
66   *      mt.rollback(server, services);
67   *      return;
68   *    } catch (RuntimeException e) {
69   *      myAbortable.abort("Failed merge, abort");
70   *    }
71   *  }
72   * </Pre>
73   * <p>
74   * This class is not thread safe. Caller needs ensure merge is run by one thread
75   * only.
76   */
77  @InterfaceAudience.Private
78  public class RegionMergeTransaction {
79    private static final Log LOG = LogFactory.getLog(RegionMergeTransaction.class);
80  
81    // Merged region info
82    private HRegionInfo mergedRegionInfo;
83    // region_a sorts before region_b
84    private final HRegion region_a;
85    private final HRegion region_b;
86    // merges dir is under region_a
87    private final Path mergesdir;
88    private int znodeVersion = -1;
89    // We only merge adjacent regions if forcible is false
90    private final boolean forcible;
91  
92    /**
93     * Types to add to the transaction journal. Each enum is a step in the merge
94     * transaction. Used to figure how much we need to rollback.
95     */
96    enum JournalEntry {
97      /**
98       * Set region as in transition, set it into MERGING state.
99       */
100     SET_MERGING_IN_ZK,
101     /**
102      * We created the temporary merge data directory.
103      */
104     CREATED_MERGE_DIR,
105     /**
106      * Closed the merging region A.
107      */
108     CLOSED_REGION_A,
109     /**
110      * The merging region A has been taken out of the server's online regions list.
111      */
112     OFFLINED_REGION_A,
113     /**
114      * Closed the merging region B.
115      */
116     CLOSED_REGION_B,
117     /**
118      * The merging region B has been taken out of the server's online regions list.
119      */
120     OFFLINED_REGION_B,
121     /**
122      * Started in on creation of the merged region.
123      */
124     STARTED_MERGED_REGION_CREATION,
125     /**
126      * Point of no return. If we got here, then transaction is not recoverable
127      * other than by crashing out the regionserver.
128      */
129     PONR
130   }
131 
132   /*
133    * Journal of how far the merge transaction has progressed.
134    */
135   private final List<JournalEntry> journal = new ArrayList<JournalEntry>();
136 
137   private static IOException closedByOtherException = new IOException(
138       "Failed to close region: already closed by another thread");
139 
140   /**
141    * Constructor
142    * @param a region a to merge
143    * @param b region b to merge
144    * @param forcible if false, we will only merge adjacent regions
145    */
146   public RegionMergeTransaction(final HRegion a, final HRegion b,
147       final boolean forcible) {
148     if (a.getRegionInfo().compareTo(b.getRegionInfo()) <= 0) {
149       this.region_a = a;
150       this.region_b = b;
151     } else {
152       this.region_a = b;
153       this.region_b = a;
154     }
155     this.forcible = forcible;
156     this.mergesdir = region_a.getRegionFileSystem().getMergesDir();
157   }
158 
159   /**
160    * Does checks on merge inputs.
161    * @param services
162    * @return <code>true</code> if the regions are mergeable else
163    *         <code>false</code> if they are not (e.g. its already closed, etc.).
164    */
165   public boolean prepare(final RegionServerServices services) {
166     if (!region_a.getTableDesc().getTableName()
167         .equals(region_b.getTableDesc().getTableName())) {
168       LOG.info("Can't merge regions " + region_a + "," + region_b
169           + " because they do not belong to the same table");
170       return false;
171     }
172     if (region_a.getRegionInfo().equals(region_b.getRegionInfo())) {
173       LOG.info("Can't merge the same region " + region_a);
174       return false;
175     }
176     if (!forcible && !HRegionInfo.areAdjacent(region_a.getRegionInfo(),
177             region_b.getRegionInfo())) {
178       String msg = "Skip merging " + this.region_a.getRegionNameAsString()
179           + " and " + this.region_b.getRegionNameAsString()
180           + ", because they are not adjacent.";
181       LOG.info(msg);
182       return false;
183     }
184     if (!this.region_a.isMergeable() || !this.region_b.isMergeable()) {
185       return false;
186     }
187     try {
188       boolean regionAHasMergeQualifier = hasMergeQualifierInMeta(services,
189           region_a.getRegionName());
190       if (regionAHasMergeQualifier ||
191           hasMergeQualifierInMeta(services, region_b.getRegionName())) {
192         LOG.debug("Region " + (regionAHasMergeQualifier ? region_a.getRegionNameAsString()
193                 : region_b.getRegionNameAsString())
194             + " is not mergeable because it has merge qualifier in META");
195         return false;
196       }
197     } catch (IOException e) {
198       LOG.warn("Failed judging whether merge transaction is available for "
199               + region_a.getRegionNameAsString() + " and "
200               + region_b.getRegionNameAsString(), e);
201       return false;
202     }
203 
204     // WARN: make sure there is no parent region of the two merging regions in
205     // .META. If exists, fixing up daughters would cause daughter regions(we
206     // have merged one) online again when we restart master, so we should clear
207     // the parent region to prevent the above case
208     // Since HBASE-7721, we don't need fix up daughters any more. so here do
209     // nothing
210 
211     this.mergedRegionInfo = getMergedRegionInfo(region_a.getRegionInfo(),
212         region_b.getRegionInfo());
213     return true;
214   }
215 
216   /**
217    * Run the transaction.
218    * @param server Hosting server instance. Can be null when testing (won't try
219    *          and update in zk if a null server)
220    * @param services Used to online/offline regions.
221    * @throws IOException If thrown, transaction failed. Call
222    *           {@link #rollback(Server, RegionServerServices)}
223    * @return merged region
224    * @throws IOException
225    * @see #rollback(Server, RegionServerServices)
226    */
227   public HRegion execute(final Server server,
228       final RegionServerServices services) throws IOException {
229     HRegion mergedRegion = createMergedRegion(server, services);
230     openMergedRegion(server, services, mergedRegion);
231     transitionZKNode(server, services);
232     return mergedRegion;
233   }
234 
235   /**
236    * Prepare the merged region and region files.
237    * @param server Hosting server instance. Can be null when testing (won't try
238    *          and update in zk if a null server)
239    * @param services Used to online/offline regions.
240    * @return merged region
241    * @throws IOException If thrown, transaction failed. Call
242    *           {@link #rollback(Server, RegionServerServices)}
243    */
244   HRegion createMergedRegion(final Server server,
245       final RegionServerServices services) throws IOException {
246     LOG.info("Starting merge of " + region_a + " and "
247         + region_b.getRegionNameAsString() + ", forcible=" + forcible);
248     if ((server != null && server.isStopped())
249         || (services != null && services.isStopping())) {
250       throw new IOException("Server is stopped or stopping");
251     }
252 
253     // If true, no cluster to write meta edits to or to update znodes in.
254     boolean testing = server == null ? true : server.getConfiguration()
255         .getBoolean("hbase.testing.nocluster", false);
256 
257     // Set ephemeral MERGING znode up in zk. Mocked servers sometimes don't
258     // have zookeeper so don't do zk stuff if server or zookeeper is null
259     if (server != null && server.getZooKeeper() != null) {
260       try {
261         createNodeMerging(server.getZooKeeper(), this.mergedRegionInfo,
262           server.getServerName(), region_a.getRegionInfo(), region_b.getRegionInfo());
263       } catch (KeeperException e) {
264         throw new IOException("Failed creating MERGING znode on "
265             + this.mergedRegionInfo.getRegionNameAsString(), e);
266       }
267     }
268     this.journal.add(JournalEntry.SET_MERGING_IN_ZK);
269     if (server != null && server.getZooKeeper() != null) {
270       try {
271         // Transition node from MERGING to MERGING after creating the merge
272         // node. Master will get the callback for node change only if the
273         // transition is successful.
274         // Note that if the transition fails then the rollback will delete the
275         // created znode as the journal entry SET_MERGING_IN_ZK is added.
276         this.znodeVersion = transitionNodeMerging(server.getZooKeeper(),
277             this.mergedRegionInfo, server.getServerName(), -1,
278             region_a.getRegionInfo(), region_b.getRegionInfo());
279       } catch (KeeperException e) {
280         throw new IOException("Failed setting MERGING znode on "
281             + this.mergedRegionInfo.getRegionNameAsString(), e);
282       }
283     }
284 
285     this.region_a.getRegionFileSystem().createMergesDir();
286     this.journal.add(JournalEntry.CREATED_MERGE_DIR);
287 
288     Map<byte[], List<StoreFile>> hstoreFilesOfRegionA = closeAndOfflineRegion(
289         services, this.region_a, true, testing);
290     Map<byte[], List<StoreFile>> hstoreFilesOfRegionB = closeAndOfflineRegion(
291         services, this.region_b, false, testing);
292 
293     assert hstoreFilesOfRegionA != null && hstoreFilesOfRegionB != null;
294 
295 
296     //
297     // mergeStoreFiles creates merged region dirs under the region_a merges dir
298     // Nothing to unroll here if failure -- clean up of CREATE_MERGE_DIR will
299     // clean this up.
300     mergeStoreFiles(hstoreFilesOfRegionA, hstoreFilesOfRegionB);
301 
302     if (server != null && server.getZooKeeper() != null) {
303       try {
304         // Do one more check on the merging znode (before it is too late) in case
305         // any merging region is moved somehow. If so, the znode transition will fail.
306         this.znodeVersion = transitionNodeMerging(server.getZooKeeper(),
307             this.mergedRegionInfo, server.getServerName(), this.znodeVersion,
308             region_a.getRegionInfo(), region_b.getRegionInfo());
309       } catch (KeeperException e) {
310         throw new IOException("Failed setting MERGING znode on "
311             + this.mergedRegionInfo.getRegionNameAsString(), e);
312       }
313     }
314 
315     // Log to the journal that we are creating merged region. We could fail
316     // halfway through. If we do, we could have left
317     // stuff in fs that needs cleanup -- a storefile or two. Thats why we
318     // add entry to journal BEFORE rather than AFTER the change.
319     this.journal.add(JournalEntry.STARTED_MERGED_REGION_CREATION);
320     HRegion mergedRegion = createMergedRegionFromMerges(this.region_a,
321         this.region_b, this.mergedRegionInfo);
322 
323 
324     // This is the point of no return. Similar with SplitTransaction.
325     // IF we reach the PONR then subsequent failures need to crash out this
326     // regionserver
327     this.journal.add(JournalEntry.PONR);
328 
329     // Add merged region and delete region_a and region_b
330     // as an atomic update. See HBASE-7721. This update to META makes the region
331     // will determine whether the region is merged or not in case of failures.
332     // If it is successful, master will roll-forward, if not, master will
333     // rollback
334     if (!testing) {
335       MetaEditor.mergeRegions(server.getCatalogTracker(),
336           mergedRegion.getRegionInfo(), region_a.getRegionInfo(),
337           region_b.getRegionInfo(), server.getServerName());
338     }
339     return mergedRegion;
340   }
341 
342   /**
343    * Create a merged region from the merges directory under region a. In order
344    * to mock it for tests, place it with a new method.
345    * @param a hri of region a
346    * @param b hri of region b
347    * @param mergedRegion hri of merged region
348    * @return merged HRegion.
349    * @throws IOException
350    */
351   HRegion createMergedRegionFromMerges(final HRegion a, final HRegion b,
352       final HRegionInfo mergedRegion) throws IOException {
353     return a.createMergedRegionFromMerges(mergedRegion, b);
354   }
355 
356   /**
357    * Close the merging region and offline it in regionserver
358    * @param services
359    * @param region
360    * @param isRegionA true if it is merging region a, false if it is region b
361    * @param testing true if it is testing
362    * @return a map of family name to list of store files
363    * @throws IOException
364    */
365   private Map<byte[], List<StoreFile>> closeAndOfflineRegion(
366       final RegionServerServices services, final HRegion region,
367       final boolean isRegionA, final boolean testing) throws IOException {
368     Map<byte[], List<StoreFile>> hstoreFilesToMerge = null;
369     Exception exceptionToThrow = null;
370     try {
371       hstoreFilesToMerge = region.close(false);
372     } catch (Exception e) {
373       exceptionToThrow = e;
374     }
375     if (exceptionToThrow == null && hstoreFilesToMerge == null) {
376       // The region was closed by a concurrent thread. We can't continue
377       // with the merge, instead we must just abandon the merge. If we
378       // reopen or merge this could cause problems because the region has
379       // probably already been moved to a different server, or is in the
380       // process of moving to a different server.
381       exceptionToThrow = closedByOtherException;
382     }
383     if (exceptionToThrow != closedByOtherException) {
384       this.journal.add(isRegionA ? JournalEntry.CLOSED_REGION_A
385           : JournalEntry.CLOSED_REGION_B);
386     }
387     if (exceptionToThrow != null) {
388       if (exceptionToThrow instanceof IOException)
389         throw (IOException) exceptionToThrow;
390       throw new IOException(exceptionToThrow);
391     }
392 
393     if (!testing) {
394       services.removeFromOnlineRegions(region, null);
395     }
396     this.journal.add(isRegionA ? JournalEntry.OFFLINED_REGION_A
397         : JournalEntry.OFFLINED_REGION_B);
398     return hstoreFilesToMerge;
399   }
400 
401   /**
402    * Get merged region info through the specified two regions
403    * @param a merging region A
404    * @param b merging region B
405    * @return the merged region info
406    */
407   public static HRegionInfo getMergedRegionInfo(final HRegionInfo a,
408       final HRegionInfo b) {
409     long rid = EnvironmentEdgeManager.currentTimeMillis();
410     // Regionid is timestamp. Merged region's id can't be less than that of
411     // merging regions else will insert at wrong location in .META.
412     if (rid < a.getRegionId() || rid < b.getRegionId()) {
413       LOG.warn("Clock skew; merging regions id are " + a.getRegionId()
414           + " and " + b.getRegionId() + ", but current time here is " + rid);
415       rid = Math.max(a.getRegionId(), b.getRegionId()) + 1;
416     }
417 
418     byte[] startKey = null;
419     byte[] endKey = null;
420     // Choose the smaller as start key
421     if (a.compareTo(b) <= 0) {
422       startKey = a.getStartKey();
423     } else {
424       startKey = b.getStartKey();
425     }
426     // Choose the bigger as end key
427     if (Bytes.equals(a.getEndKey(), HConstants.EMPTY_BYTE_ARRAY)
428         || (!Bytes.equals(b.getEndKey(), HConstants.EMPTY_BYTE_ARRAY)
429             && Bytes.compareTo(a.getEndKey(), b.getEndKey()) > 0)) {
430       endKey = a.getEndKey();
431     } else {
432       endKey = b.getEndKey();
433     }
434 
435     // Merged region is sorted between two merging regions in META
436     HRegionInfo mergedRegionInfo = new HRegionInfo(a.getTableName(), startKey,
437         endKey, false, rid);
438     return mergedRegionInfo;
439   }
440 
441   /**
442    * Perform time consuming opening of the merged region.
443    * @param server Hosting server instance. Can be null when testing (won't try
444    *          and update in zk if a null server)
445    * @param services Used to online/offline regions.
446    * @param merged the merged region
447    * @throws IOException If thrown, transaction failed. Call
448    *           {@link #rollback(Server, RegionServerServices)}
449    */
450   void openMergedRegion(final Server server,
451       final RegionServerServices services, HRegion merged) throws IOException {
452     boolean stopped = server != null && server.isStopped();
453     boolean stopping = services != null && services.isStopping();
454     if (stopped || stopping) {
455       LOG.info("Not opening merged region  " + merged.getRegionNameAsString()
456           + " because stopping=" + stopping + ", stopped=" + stopped);
457       return;
458     }
459     HRegionInfo hri = merged.getRegionInfo();
460     LoggingProgressable reporter = server == null ? null
461         : new LoggingProgressable(hri, server.getConfiguration().getLong(
462             "hbase.regionserver.regionmerge.open.log.interval", 10000));
463     merged.openHRegion(reporter);
464 
465     if (services != null) {
466       try {
467         services.postOpenDeployTasks(merged, server.getCatalogTracker());
468         services.addToOnlineRegions(merged);
469       } catch (KeeperException ke) {
470         throw new IOException(ke);
471       }
472     }
473 
474   }
475 
476   /**
477    * Finish off merge transaction, transition the zknode
478    * @param server Hosting server instance. Can be null when testing (won't try
479    *          and update in zk if a null server)
480    * @param services Used to online/offline regions.
481    * @throws IOException If thrown, transaction failed. Call
482    *           {@link #rollback(Server, RegionServerServices)}
483    */
484   void transitionZKNode(final Server server, final RegionServerServices services)
485       throws IOException {
486     if (server == null || server.getZooKeeper() == null) {
487       return;
488     }
489 
490     // Tell master about merge by updating zk. If we fail, abort.
491     try {
492       this.znodeVersion = transitionNodeMerge(server.getZooKeeper(),
493           this.mergedRegionInfo, region_a.getRegionInfo(),
494           region_b.getRegionInfo(), server.getServerName(), this.znodeVersion);
495 
496       long startTime = EnvironmentEdgeManager.currentTimeMillis();
497       int spins = 0;
498       // Now wait for the master to process the merge. We know it's done
499       // when the znode is deleted. The reason we keep tickling the znode is
500       // that it's possible for the master to miss an event.
501       do {
502         if (spins % 10 == 0) {
503           LOG.debug("Still waiting on the master to process the merge for "
504               + this.mergedRegionInfo.getEncodedName() + ", waited "
505               + (EnvironmentEdgeManager.currentTimeMillis() - startTime) + "ms");
506         }
507         Thread.sleep(100);
508         // When this returns -1 it means the znode doesn't exist
509         this.znodeVersion = tickleNodeMerge(server.getZooKeeper(),
510             this.mergedRegionInfo, region_a.getRegionInfo(),
511             region_b.getRegionInfo(), server.getServerName(), this.znodeVersion);
512         spins++;
513       } while (this.znodeVersion != -1 && !server.isStopped()
514           && !services.isStopping());
515     } catch (Exception e) {
516       if (e instanceof InterruptedException) {
517         Thread.currentThread().interrupt();
518       }
519       throw new IOException("Failed telling master about merge "
520           + mergedRegionInfo.getEncodedName(), e);
521     }
522 
523 
524     // Leaving here, the mergedir with its dross will be in place but since the
525     // merge was successful, just leave it; it'll be cleaned when region_a is
526     // cleaned up by CatalogJanitor on master
527   }
528 
529   /**
530    * Create reference file(s) of merging regions under the region_a merges dir
531    * @param hstoreFilesOfRegionA
532    * @param hstoreFilesOfRegionB
533    * @throws IOException
534    */
535   private void mergeStoreFiles(
536       Map<byte[], List<StoreFile>> hstoreFilesOfRegionA,
537       Map<byte[], List<StoreFile>> hstoreFilesOfRegionB)
538       throws IOException {
539     // Create reference file(s) of region A in mergdir
540     HRegionFileSystem fs_a = this.region_a.getRegionFileSystem();
541     for (Map.Entry<byte[], List<StoreFile>> entry : hstoreFilesOfRegionA
542         .entrySet()) {
543       String familyName = Bytes.toString(entry.getKey());
544       for (StoreFile storeFile : entry.getValue()) {
545         fs_a.mergeStoreFile(this.mergedRegionInfo, familyName, storeFile,
546             this.mergesdir);
547       }
548     }
549     // Create reference file(s) of region B in mergedir
550     HRegionFileSystem fs_b = this.region_b.getRegionFileSystem();
551     for (Map.Entry<byte[], List<StoreFile>> entry : hstoreFilesOfRegionB
552         .entrySet()) {
553       String familyName = Bytes.toString(entry.getKey());
554       for (StoreFile storeFile : entry.getValue()) {
555         fs_b.mergeStoreFile(this.mergedRegionInfo, familyName, storeFile,
556             this.mergesdir);
557       }
558     }
559   }
560 
561   /**
562    * @param server Hosting server instance (May be null when testing).
563    * @param services Services of regionserver, used to online regions.
564    * @throws IOException If thrown, rollback failed. Take drastic action.
565    * @return True if we successfully rolled back, false if we got to the point
566    *         of no return and so now need to abort the server to minimize
567    *         damage.
568    */
569   public boolean rollback(final Server server,
570       final RegionServerServices services) throws IOException {
571     assert this.mergedRegionInfo != null;
572     boolean result = true;
573     ListIterator<JournalEntry> iterator = this.journal
574         .listIterator(this.journal.size());
575     // Iterate in reverse.
576     while (iterator.hasPrevious()) {
577       JournalEntry je = iterator.previous();
578       switch (je) {
579 
580         case SET_MERGING_IN_ZK:
581           if (server != null && server.getZooKeeper() != null) {
582             cleanZK(server, this.mergedRegionInfo);
583           }
584           break;
585 
586         case CREATED_MERGE_DIR:
587           this.region_a.writestate.writesEnabled = true;
588           this.region_b.writestate.writesEnabled = true;
589           this.region_a.getRegionFileSystem().cleanupMergesDir();
590           break;
591 
592         case CLOSED_REGION_A:
593           try {
594             // So, this returns a seqid but if we just closed and then reopened,
595             // we should be ok. On close, we flushed using sequenceid obtained
596             // from hosting regionserver so no need to propagate the sequenceid
597             // returned out of initialize below up into regionserver as we
598             // normally do.
599             this.region_a.initialize();
600           } catch (IOException e) {
601             LOG.error("Failed rollbacking CLOSED_REGION_A of region "
602                 + this.region_a.getRegionNameAsString(), e);
603             throw new RuntimeException(e);
604           }
605           break;
606 
607         case OFFLINED_REGION_A:
608           if (services != null)
609             services.addToOnlineRegions(this.region_a);
610           break;
611 
612         case CLOSED_REGION_B:
613           try {
614             this.region_b.initialize();
615           } catch (IOException e) {
616             LOG.error("Failed rollbacking CLOSED_REGION_A of region "
617                 + this.region_b.getRegionNameAsString(), e);
618             throw new RuntimeException(e);
619           }
620           break;
621 
622         case OFFLINED_REGION_B:
623           if (services != null)
624             services.addToOnlineRegions(this.region_b);
625           break;
626 
627         case STARTED_MERGED_REGION_CREATION:
628           this.region_a.getRegionFileSystem().cleanupMergedRegion(
629               this.mergedRegionInfo);
630           break;
631 
632         case PONR:
633           // We got to the point-of-no-return so we need to just abort. Return
634           // immediately. Do not clean up created merged regions.
635           return false;
636 
637         default:
638           throw new RuntimeException("Unhandled journal entry: " + je);
639       }
640     }
641     return result;
642   }
643 
644   HRegionInfo getMergedRegionInfo() {
645     return this.mergedRegionInfo;
646   }
647 
648   // For unit testing.
649   Path getMergesDir() {
650     return this.mergesdir;
651   }
652 
653   private static void cleanZK(final Server server, final HRegionInfo hri) {
654     try {
655       // Only delete if its in expected state; could have been hijacked.
656       ZKAssign.deleteNode(server.getZooKeeper(), hri.getEncodedName(),
657           EventType.RS_ZK_REGION_MERGING);
658     } catch (KeeperException.NoNodeException e) {
659       LOG.warn("Failed cleanup zk node of " + hri.getRegionNameAsString(), e);
660     } catch (KeeperException e) {
661       server.abort("Failed cleanup zk node of " + hri.getRegionNameAsString(),e);
662     }
663 
664   }
665 
666   /**
667    * Creates a new ephemeral node in the MERGING state for the merged region.
668    * Create it ephemeral in case regionserver dies mid-merge.
669    * 
670    * <p>
671    * Does not transition nodes from other states. If a node already exists for
672    * this region, a {@link NodeExistsException} will be thrown.
673    *
674    * @param zkw zk reference
675    * @param region region to be created as offline
676    * @param serverName server event originates from
677    * @return Version of znode created.
678    * @throws KeeperException
679    * @throws IOException
680    */
681   int createNodeMerging(final ZooKeeperWatcher zkw, final HRegionInfo region,
682       final ServerName serverName, final HRegionInfo a,
683       final HRegionInfo b) throws KeeperException, IOException {
684     LOG.debug(zkw.prefix("Creating ephemeral node for "
685         + region.getEncodedName() + " in MERGING state"));
686     byte [] payload = HRegionInfo.toDelimitedByteArray(a, b);
687     RegionTransition rt = RegionTransition.createRegionTransition(
688         EventType.RS_ZK_REGION_MERGING, region.getRegionName(), serverName, payload);
689     String node = ZKAssign.getNodeName(zkw, region.getEncodedName());
690     if (!ZKUtil.createEphemeralNodeAndWatch(zkw, node, rt.toByteArray())) {
691       throw new IOException("Failed create of ephemeral " + node);
692     }
693     // Transition node from MERGING to MERGING and pick up version so we
694     // can be sure this znode is ours; version is needed deleting.
695     return transitionNodeMerging(zkw, region, serverName, -1, a, b);
696   }
697 
698   /**
699    * Transitions an existing node for the specified region which is currently in
700    * the MERGING state to be in the MERGE state. Converts the ephemeral MERGING
701    * znode to an ephemeral MERGE node. Master cleans up MERGE znode when it
702    * reads it (or if we crash, zk will clean it up).
703    *
704    * <p>
705    * Does not transition nodes from other states. If for some reason the node
706    * could not be transitioned, the method returns -1. If the transition is
707    * successful, the version of the node after transition is returned.
708    *
709    * <p>
710    * This method can fail and return false for three different reasons:
711    * <ul>
712    * <li>Node for this region does not exist</li>
713    * <li>Node for this region is not in MERGING state</li>
714    * <li>After verifying MERGING state, update fails because of wrong version
715    * (this should never actually happen since an RS only does this transition
716    * following a transition to MERGING. if two RS are conflicting, one would
717    * fail the original transition to MERGING and not this transition)</li>
718    * </ul>
719    *
720    * <p>
721    * Does not set any watches.
722    *
723    * <p>
724    * This method should only be used by a RegionServer when completing the open
725    * of merged region.
726    *
727    * @param zkw zk reference
728    * @param merged region to be transitioned to opened
729    * @param a merging region A
730    * @param b merging region B
731    * @param serverName server event originates from
732    * @param znodeVersion expected version of data before modification
733    * @return version of node after transition, -1 if unsuccessful transition
734    * @throws KeeperException if unexpected zookeeper exception
735    * @throws IOException
736    */
737   private static int transitionNodeMerge(ZooKeeperWatcher zkw,
738       HRegionInfo merged, HRegionInfo a, HRegionInfo b, ServerName serverName,
739       final int znodeVersion) throws KeeperException, IOException {
740     byte[] payload = HRegionInfo.toDelimitedByteArray(merged, a, b);
741     return ZKAssign.transitionNode(zkw, merged, serverName,
742         EventType.RS_ZK_REGION_MERGING, EventType.RS_ZK_REGION_MERGED,
743         znodeVersion, payload);
744   }
745 
746   /**
747    *
748    * @param zkw zk reference
749    * @param parent region to be transitioned to merging
750    * @param serverName server event originates from
751    * @param version znode version
752    * @return version of node after transition, -1 if unsuccessful transition
753    * @throws KeeperException
754    * @throws IOException
755    */
756   int transitionNodeMerging(final ZooKeeperWatcher zkw,
757       final HRegionInfo parent, final ServerName serverName, final int version,
758       final HRegionInfo a, final HRegionInfo b) throws KeeperException, IOException {
759     byte[] payload = HRegionInfo.toDelimitedByteArray(a, b);
760     return ZKAssign.transitionNode(zkw, parent, serverName,
761             EventType.RS_ZK_REGION_MERGING, EventType.RS_ZK_REGION_MERGING,
762         version, payload);
763   }
764 
765   private static int tickleNodeMerge(ZooKeeperWatcher zkw, HRegionInfo merged,
766       HRegionInfo a, HRegionInfo b, ServerName serverName,
767       final int znodeVersion) throws KeeperException, IOException {
768     byte[] payload = HRegionInfo.toDelimitedByteArray(a, b);
769     return ZKAssign.transitionNode(zkw, merged, serverName,
770         EventType.RS_ZK_REGION_MERGED, EventType.RS_ZK_REGION_MERGED,
771         znodeVersion, payload);
772   }
773 
774   /**
775    * Checks if the given region has merge qualifier in .META.
776    * @param services
777    * @param regionName name of specified region
778    * @return true if the given region has merge qualifier in META.(It will be
779    *         cleaned by CatalogJanitor)
780    * @throws IOException
781    */
782   boolean hasMergeQualifierInMeta(final RegionServerServices services,
783       final byte[] regionName) throws IOException {
784     if (services == null) return false;
785     // Get merge regions if it is a merged region and already has merge
786     // qualifier
787     Pair<HRegionInfo, HRegionInfo> mergeRegions = MetaReader
788         .getRegionsFromMergeQualifier(services.getCatalogTracker(), regionName);
789     if (mergeRegions != null &&
790         (mergeRegions.getFirst() != null || mergeRegions.getSecond() != null)) {
791       // It has merge qualifier
792       return true;
793     }
794     return false;
795   }
796 }
797