View Javadoc

1   /**
2    * Copyright The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one or more
5    * contributor license agreements. See the NOTICE file distributed with this
6    * work for additional information regarding copyright ownership. The ASF
7    * licenses this file to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance with the License.
9    * You may obtain a copy of the License at
10   *
11   * http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
15   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
16   * License for the specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import static org.apache.hadoop.hbase.executor.EventType.RS_ZK_REGION_MERGED;
22  import static org.apache.hadoop.hbase.executor.EventType.RS_ZK_REGION_MERGING;
23  import static org.apache.hadoop.hbase.executor.EventType.RS_ZK_REQUEST_REGION_MERGE;
24  
25  import java.io.IOException;
26  import java.util.ArrayList;
27  import java.util.List;
28  import java.util.ListIterator;
29  import java.util.Map;
30  
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.classification.InterfaceAudience;
34  import org.apache.hadoop.fs.Path;
35  import org.apache.hadoop.hbase.HConstants;
36  import org.apache.hadoop.hbase.HRegionInfo;
37  import org.apache.hadoop.hbase.RegionTransition;
38  import org.apache.hadoop.hbase.Server;
39  import org.apache.hadoop.hbase.ServerName;
40  import org.apache.hadoop.hbase.catalog.MetaEditor;
41  import org.apache.hadoop.hbase.catalog.MetaReader;
42  import org.apache.hadoop.hbase.executor.EventType;
43  import org.apache.hadoop.hbase.regionserver.SplitTransaction.LoggingProgressable;
44  import org.apache.hadoop.hbase.util.Bytes;
45  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
46  import org.apache.hadoop.hbase.util.Pair;
47  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
48  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
49  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
50  import org.apache.zookeeper.KeeperException;
51  import org.apache.zookeeper.KeeperException.NodeExistsException;
52  import org.apache.zookeeper.data.Stat;
53  
54  /**
55   * Executes region merge as a "transaction". It is similar with
56   * SplitTransaction. Call {@link #prepare(RegionServerServices)} to setup the
57   * transaction, {@link #execute(Server, RegionServerServices)} to run the
58   * transaction and {@link #rollback(Server, RegionServerServices)} to cleanup if
59   * execute fails.
60   * 
61   * <p>
62   * Here is an example of how you would use this class:
63   * 
64   * <pre>
65   *  RegionMergeTransaction mt = new RegionMergeTransaction(this.conf, parent, midKey)
66   *  if (!mt.prepare(services)) return;
67   *  try {
68   *    mt.execute(server, services);
69   *  } catch (IOException ioe) {
70   *    try {
71   *      mt.rollback(server, services);
72   *      return;
73   *    } catch (RuntimeException e) {
74   *      myAbortable.abort("Failed merge, abort");
75   *    }
76   *  }
77   * </Pre>
78   * <p>
79   * This class is not thread safe. Caller needs ensure merge is run by one thread
80   * only.
81   */
82  @InterfaceAudience.Private
83  public class RegionMergeTransaction {
84    private static final Log LOG = LogFactory.getLog(RegionMergeTransaction.class);
85  
86    // Merged region info
87    private HRegionInfo mergedRegionInfo;
88    // region_a sorts before region_b
89    private final HRegion region_a;
90    private final HRegion region_b;
91    // merges dir is under region_a
92    private final Path mergesdir;
93    private int znodeVersion = -1;
94    // We only merge adjacent regions if forcible is false
95    private final boolean forcible;
96  
97    /**
98     * Types to add to the transaction journal. Each enum is a step in the merge
99     * transaction. Used to figure how much we need to rollback.
100    */
101   enum JournalEntry {
102     /**
103      * Set region as in transition, set it into MERGING state.
104      */
105     SET_MERGING_IN_ZK,
106     /**
107      * We created the temporary merge data directory.
108      */
109     CREATED_MERGE_DIR,
110     /**
111      * Closed the merging region A.
112      */
113     CLOSED_REGION_A,
114     /**
115      * The merging region A has been taken out of the server's online regions list.
116      */
117     OFFLINED_REGION_A,
118     /**
119      * Closed the merging region B.
120      */
121     CLOSED_REGION_B,
122     /**
123      * The merging region B has been taken out of the server's online regions list.
124      */
125     OFFLINED_REGION_B,
126     /**
127      * Started in on creation of the merged region.
128      */
129     STARTED_MERGED_REGION_CREATION,
130     /**
131      * Point of no return. If we got here, then transaction is not recoverable
132      * other than by crashing out the regionserver.
133      */
134     PONR
135   }
136 
137   /*
138    * Journal of how far the merge transaction has progressed.
139    */
140   private final List<JournalEntry> journal = new ArrayList<JournalEntry>();
141 
142   private static IOException closedByOtherException = new IOException(
143       "Failed to close region: already closed by another thread");
144 
145   /**
146    * Constructor
147    * @param a region a to merge
148    * @param b region b to merge
149    * @param forcible if false, we will only merge adjacent regions
150    */
151   public RegionMergeTransaction(final HRegion a, final HRegion b,
152       final boolean forcible) {
153     if (a.getRegionInfo().compareTo(b.getRegionInfo()) <= 0) {
154       this.region_a = a;
155       this.region_b = b;
156     } else {
157       this.region_a = b;
158       this.region_b = a;
159     }
160     this.forcible = forcible;
161     this.mergesdir = region_a.getRegionFileSystem().getMergesDir();
162   }
163 
164   /**
165    * Does checks on merge inputs.
166    * @param services
167    * @return <code>true</code> if the regions are mergeable else
168    *         <code>false</code> if they are not (e.g. its already closed, etc.).
169    */
170   public boolean prepare(final RegionServerServices services) {
171     if (!region_a.getTableDesc().getTableName()
172         .equals(region_b.getTableDesc().getTableName())) {
173       LOG.info("Can't merge regions " + region_a + "," + region_b
174           + " because they do not belong to the same table");
175       return false;
176     }
177     if (region_a.getRegionInfo().equals(region_b.getRegionInfo())) {
178       LOG.info("Can't merge the same region " + region_a);
179       return false;
180     }
181     if (!forcible && !HRegionInfo.areAdjacent(region_a.getRegionInfo(),
182             region_b.getRegionInfo())) {
183       String msg = "Skip merging " + this.region_a.getRegionNameAsString()
184           + " and " + this.region_b.getRegionNameAsString()
185           + ", because they are not adjacent.";
186       LOG.info(msg);
187       return false;
188     }
189     if (!this.region_a.isMergeable() || !this.region_b.isMergeable()) {
190       return false;
191     }
192     try {
193       boolean regionAHasMergeQualifier = hasMergeQualifierInMeta(services,
194           region_a.getRegionName());
195       if (regionAHasMergeQualifier ||
196           hasMergeQualifierInMeta(services, region_b.getRegionName())) {
197         LOG.debug("Region " + (regionAHasMergeQualifier ? region_a.getRegionNameAsString()
198                 : region_b.getRegionNameAsString())
199             + " is not mergeable because it has merge qualifier in META");
200         return false;
201       }
202     } catch (IOException e) {
203       LOG.warn("Failed judging whether merge transaction is available for "
204               + region_a.getRegionNameAsString() + " and "
205               + region_b.getRegionNameAsString(), e);
206       return false;
207     }
208 
209     // WARN: make sure there is no parent region of the two merging regions in
210     // hbase:meta If exists, fixing up daughters would cause daughter regions(we
211     // have merged one) online again when we restart master, so we should clear
212     // the parent region to prevent the above case
213     // Since HBASE-7721, we don't need fix up daughters any more. so here do
214     // nothing
215 
216     this.mergedRegionInfo = getMergedRegionInfo(region_a.getRegionInfo(),
217         region_b.getRegionInfo());
218     return true;
219   }
220 
221   /**
222    * Run the transaction.
223    * @param server Hosting server instance. Can be null when testing (won't try
224    *          and update in zk if a null server)
225    * @param services Used to online/offline regions.
226    * @throws IOException If thrown, transaction failed. Call
227    *           {@link #rollback(Server, RegionServerServices)}
228    * @return merged region
229    * @throws IOException
230    * @see #rollback(Server, RegionServerServices)
231    */
232   public HRegion execute(final Server server,
233       final RegionServerServices services) throws IOException {
234     HRegion mergedRegion = createMergedRegion(server, services);
235     openMergedRegion(server, services, mergedRegion);
236     transitionZKNode(server, services);
237     return mergedRegion;
238   }
239 
240   /**
241    * Prepare the merged region and region files.
242    * @param server Hosting server instance. Can be null when testing (won't try
243    *          and update in zk if a null server)
244    * @param services Used to online/offline regions.
245    * @return merged region
246    * @throws IOException If thrown, transaction failed. Call
247    *           {@link #rollback(Server, RegionServerServices)}
248    */
249   HRegion createMergedRegion(final Server server,
250       final RegionServerServices services) throws IOException {
251     LOG.info("Starting merge of " + region_a + " and "
252         + region_b.getRegionNameAsString() + ", forcible=" + forcible);
253     if ((server != null && server.isStopped())
254         || (services != null && services.isStopping())) {
255       throw new IOException("Server is stopped or stopping");
256     }
257 
258     // If true, no cluster to write meta edits to or to update znodes in.
259     boolean testing = server == null ? true : server.getConfiguration()
260         .getBoolean("hbase.testing.nocluster", false);
261 
262     // Set ephemeral MERGING znode up in zk. Mocked servers sometimes don't
263     // have zookeeper so don't do zk stuff if server or zookeeper is null
264     if (server != null && server.getZooKeeper() != null) {
265       try {
266         createNodeMerging(server.getZooKeeper(), this.mergedRegionInfo,
267           server.getServerName(), region_a.getRegionInfo(), region_b.getRegionInfo());
268       } catch (KeeperException e) {
269         throw new IOException("Failed creating PENDING_MERGE znode on "
270             + this.mergedRegionInfo.getRegionNameAsString(), e);
271       }
272     }
273     this.journal.add(JournalEntry.SET_MERGING_IN_ZK);
274     if (server != null && server.getZooKeeper() != null) {
275       // After creating the merge node, wait for master to transition it
276       // from PENDING_MERGE to MERGING so that we can move on. We want master
277       // knows about it and won't transition any region which is merging.
278       znodeVersion = getZKNode(server, services);
279     }
280 
281     this.region_a.getRegionFileSystem().createMergesDir();
282     this.journal.add(JournalEntry.CREATED_MERGE_DIR);
283 
284     Map<byte[], List<StoreFile>> hstoreFilesOfRegionA = closeAndOfflineRegion(
285         services, this.region_a, true, testing);
286     Map<byte[], List<StoreFile>> hstoreFilesOfRegionB = closeAndOfflineRegion(
287         services, this.region_b, false, testing);
288 
289     assert hstoreFilesOfRegionA != null && hstoreFilesOfRegionB != null;
290 
291 
292     //
293     // mergeStoreFiles creates merged region dirs under the region_a merges dir
294     // Nothing to unroll here if failure -- clean up of CREATE_MERGE_DIR will
295     // clean this up.
296     mergeStoreFiles(hstoreFilesOfRegionA, hstoreFilesOfRegionB);
297 
298     if (server != null && server.getZooKeeper() != null) {
299       try {
300         // Do one more check on the merging znode (before it is too late) in case
301         // any merging region is moved somehow. If so, the znode transition will fail.
302         this.znodeVersion = transitionMergingNode(server.getZooKeeper(),
303           this.mergedRegionInfo, region_a.getRegionInfo(), region_b.getRegionInfo(),
304           server.getServerName(), this.znodeVersion,
305           RS_ZK_REGION_MERGING, RS_ZK_REGION_MERGING);
306       } catch (KeeperException e) {
307         throw new IOException("Failed setting MERGING znode on "
308             + this.mergedRegionInfo.getRegionNameAsString(), e);
309       }
310     }
311 
312     // Log to the journal that we are creating merged region. We could fail
313     // halfway through. If we do, we could have left
314     // stuff in fs that needs cleanup -- a storefile or two. Thats why we
315     // add entry to journal BEFORE rather than AFTER the change.
316     this.journal.add(JournalEntry.STARTED_MERGED_REGION_CREATION);
317     HRegion mergedRegion = createMergedRegionFromMerges(this.region_a,
318         this.region_b, this.mergedRegionInfo);
319 
320 
321     // This is the point of no return. Similar with SplitTransaction.
322     // IF we reach the PONR then subsequent failures need to crash out this
323     // regionserver
324     this.journal.add(JournalEntry.PONR);
325 
326     // Add merged region and delete region_a and region_b
327     // as an atomic update. See HBASE-7721. This update to hbase:meta makes the region
328     // will determine whether the region is merged or not in case of failures.
329     // If it is successful, master will roll-forward, if not, master will
330     // rollback
331     if (!testing) {
332       MetaEditor.mergeRegions(server.getCatalogTracker(),
333           mergedRegion.getRegionInfo(), region_a.getRegionInfo(),
334           region_b.getRegionInfo(), server.getServerName());
335     }
336     return mergedRegion;
337   }
338 
339   /**
340    * Create a merged region from the merges directory under region a. In order
341    * to mock it for tests, place it with a new method.
342    * @param a hri of region a
343    * @param b hri of region b
344    * @param mergedRegion hri of merged region
345    * @return merged HRegion.
346    * @throws IOException
347    */
348   HRegion createMergedRegionFromMerges(final HRegion a, final HRegion b,
349       final HRegionInfo mergedRegion) throws IOException {
350     return a.createMergedRegionFromMerges(mergedRegion, b);
351   }
352 
353   /**
354    * Close the merging region and offline it in regionserver
355    * @param services
356    * @param region
357    * @param isRegionA true if it is merging region a, false if it is region b
358    * @param testing true if it is testing
359    * @return a map of family name to list of store files
360    * @throws IOException
361    */
362   private Map<byte[], List<StoreFile>> closeAndOfflineRegion(
363       final RegionServerServices services, final HRegion region,
364       final boolean isRegionA, final boolean testing) throws IOException {
365     Map<byte[], List<StoreFile>> hstoreFilesToMerge = null;
366     Exception exceptionToThrow = null;
367     try {
368       hstoreFilesToMerge = region.close(false);
369     } catch (Exception e) {
370       exceptionToThrow = e;
371     }
372     if (exceptionToThrow == null && hstoreFilesToMerge == null) {
373       // The region was closed by a concurrent thread. We can't continue
374       // with the merge, instead we must just abandon the merge. If we
375       // reopen or merge this could cause problems because the region has
376       // probably already been moved to a different server, or is in the
377       // process of moving to a different server.
378       exceptionToThrow = closedByOtherException;
379     }
380     if (exceptionToThrow != closedByOtherException) {
381       this.journal.add(isRegionA ? JournalEntry.CLOSED_REGION_A
382           : JournalEntry.CLOSED_REGION_B);
383     }
384     if (exceptionToThrow != null) {
385       if (exceptionToThrow instanceof IOException)
386         throw (IOException) exceptionToThrow;
387       throw new IOException(exceptionToThrow);
388     }
389 
390     if (!testing) {
391       services.removeFromOnlineRegions(region, null);
392     }
393     this.journal.add(isRegionA ? JournalEntry.OFFLINED_REGION_A
394         : JournalEntry.OFFLINED_REGION_B);
395     return hstoreFilesToMerge;
396   }
397 
398   /**
399    * Get merged region info through the specified two regions
400    * @param a merging region A
401    * @param b merging region B
402    * @return the merged region info
403    */
404   public static HRegionInfo getMergedRegionInfo(final HRegionInfo a,
405       final HRegionInfo b) {
406     long rid = EnvironmentEdgeManager.currentTimeMillis();
407     // Regionid is timestamp. Merged region's id can't be less than that of
408     // merging regions else will insert at wrong location in hbase:meta
409     if (rid < a.getRegionId() || rid < b.getRegionId()) {
410       LOG.warn("Clock skew; merging regions id are " + a.getRegionId()
411           + " and " + b.getRegionId() + ", but current time here is " + rid);
412       rid = Math.max(a.getRegionId(), b.getRegionId()) + 1;
413     }
414 
415     byte[] startKey = null;
416     byte[] endKey = null;
417     // Choose the smaller as start key
418     if (a.compareTo(b) <= 0) {
419       startKey = a.getStartKey();
420     } else {
421       startKey = b.getStartKey();
422     }
423     // Choose the bigger as end key
424     if (Bytes.equals(a.getEndKey(), HConstants.EMPTY_BYTE_ARRAY)
425         || (!Bytes.equals(b.getEndKey(), HConstants.EMPTY_BYTE_ARRAY)
426             && Bytes.compareTo(a.getEndKey(), b.getEndKey()) > 0)) {
427       endKey = a.getEndKey();
428     } else {
429       endKey = b.getEndKey();
430     }
431 
432     // Merged region is sorted between two merging regions in META
433     HRegionInfo mergedRegionInfo = new HRegionInfo(a.getTable(), startKey,
434         endKey, false, rid);
435     return mergedRegionInfo;
436   }
437 
438   /**
439    * Perform time consuming opening of the merged region.
440    * @param server Hosting server instance. Can be null when testing (won't try
441    *          and update in zk if a null server)
442    * @param services Used to online/offline regions.
443    * @param merged the merged region
444    * @throws IOException If thrown, transaction failed. Call
445    *           {@link #rollback(Server, RegionServerServices)}
446    */
447   void openMergedRegion(final Server server,
448       final RegionServerServices services, HRegion merged) throws IOException {
449     boolean stopped = server != null && server.isStopped();
450     boolean stopping = services != null && services.isStopping();
451     if (stopped || stopping) {
452       LOG.info("Not opening merged region  " + merged.getRegionNameAsString()
453           + " because stopping=" + stopping + ", stopped=" + stopped);
454       return;
455     }
456     HRegionInfo hri = merged.getRegionInfo();
457     LoggingProgressable reporter = server == null ? null
458         : new LoggingProgressable(hri, server.getConfiguration().getLong(
459             "hbase.regionserver.regionmerge.open.log.interval", 10000));
460     merged.openHRegion(reporter);
461 
462     if (services != null) {
463       try {
464         services.postOpenDeployTasks(merged, server.getCatalogTracker());
465         services.addToOnlineRegions(merged);
466       } catch (KeeperException ke) {
467         throw new IOException(ke);
468       }
469     }
470 
471   }
472 
473   /**
474    * Finish off merge transaction, transition the zknode
475    * @param server Hosting server instance. Can be null when testing (won't try
476    *          and update in zk if a null server)
477    * @param services Used to online/offline regions.
478    * @throws IOException If thrown, transaction failed. Call
479    *           {@link #rollback(Server, RegionServerServices)}
480    */
481   void transitionZKNode(final Server server, final RegionServerServices services)
482       throws IOException {
483     if (server == null || server.getZooKeeper() == null) {
484       return;
485     }
486 
487     // Tell master about merge by updating zk. If we fail, abort.
488     try {
489       this.znodeVersion = transitionMergingNode(server.getZooKeeper(),
490         this.mergedRegionInfo, region_a.getRegionInfo(),
491         region_b.getRegionInfo(), server.getServerName(), this.znodeVersion,
492         RS_ZK_REGION_MERGING, RS_ZK_REGION_MERGED);
493 
494       long startTime = EnvironmentEdgeManager.currentTimeMillis();
495       int spins = 0;
496       // Now wait for the master to process the merge. We know it's done
497       // when the znode is deleted. The reason we keep tickling the znode is
498       // that it's possible for the master to miss an event.
499       do {
500         if (spins % 10 == 0) {
501           LOG.debug("Still waiting on the master to process the merge for "
502               + this.mergedRegionInfo.getEncodedName() + ", waited "
503               + (EnvironmentEdgeManager.currentTimeMillis() - startTime) + "ms");
504         }
505         Thread.sleep(100);
506         // When this returns -1 it means the znode doesn't exist
507         this.znodeVersion = transitionMergingNode(server.getZooKeeper(),
508           this.mergedRegionInfo, region_a.getRegionInfo(),
509           region_b.getRegionInfo(), server.getServerName(), this.znodeVersion,
510           RS_ZK_REGION_MERGED, RS_ZK_REGION_MERGED);
511         spins++;
512       } while (this.znodeVersion != -1 && !server.isStopped()
513           && !services.isStopping());
514     } catch (Exception e) {
515       if (e instanceof InterruptedException) {
516         Thread.currentThread().interrupt();
517       }
518       throw new IOException("Failed telling master about merge "
519           + mergedRegionInfo.getEncodedName(), e);
520     }
521 
522     // Leaving here, the mergedir with its dross will be in place but since the
523     // merge was successful, just leave it; it'll be cleaned when region_a is
524     // cleaned up by CatalogJanitor on master
525   }
526 
527   /**
528    * Wait for the merging node to be transitioned from pending_merge
529    * to merging by master. That's how we are sure master has processed
530    * the event and is good with us to move on. If we don't get any update,
531    * we periodically transition the node so that master gets the callback.
532    * If the node is removed or is not in pending_merge state any more,
533    * we abort the merge.
534    */
535   private int getZKNode(final Server server,
536       final RegionServerServices services) throws IOException {
537     // Wait for the master to process the pending_merge.
538     try {
539       int spins = 0;
540       Stat stat = new Stat();
541       ZooKeeperWatcher zkw = server.getZooKeeper();
542       ServerName expectedServer = server.getServerName();
543       String node = mergedRegionInfo.getEncodedName();
544       while (!(server.isStopped() || services.isStopping())) {
545         if (spins % 5 == 0) {
546           LOG.debug("Still waiting for master to process "
547             + "the pending_merge for " + node);
548           transitionMergingNode(zkw, mergedRegionInfo, region_a.getRegionInfo(),
549             region_b.getRegionInfo(), expectedServer, -1, RS_ZK_REQUEST_REGION_MERGE,
550             RS_ZK_REQUEST_REGION_MERGE);
551         }
552         Thread.sleep(100);
553         spins++;
554         byte [] data = ZKAssign.getDataNoWatch(zkw, node, stat);
555         if (data == null) {
556           throw new IOException("Data is null, merging node "
557             + node + " no longer exists");
558         }
559         RegionTransition rt = RegionTransition.parseFrom(data);
560         EventType et = rt.getEventType();
561         if (et == RS_ZK_REGION_MERGING) {
562           ServerName serverName = rt.getServerName();
563           if (!serverName.equals(expectedServer)) {
564             throw new IOException("Merging node " + node + " is for "
565               + serverName + ", not us " + expectedServer);
566           }
567           byte [] payloadOfMerging = rt.getPayload();
568           List<HRegionInfo> mergingRegions = HRegionInfo.parseDelimitedFrom(
569             payloadOfMerging, 0, payloadOfMerging.length);
570           assert mergingRegions.size() == 3;
571           HRegionInfo a = mergingRegions.get(1);
572           HRegionInfo b = mergingRegions.get(2);
573           HRegionInfo hri_a = region_a.getRegionInfo();
574           HRegionInfo hri_b = region_b.getRegionInfo();
575           if (!(hri_a.equals(a) && hri_b.equals(b))) {
576             throw new IOException("Merging node " + node + " is for " + a + ", "
577               + b + ", not expected regions: " + hri_a + ", " + hri_b);
578           }
579           // Master has processed it.
580           return stat.getVersion();
581         }
582         if (et != RS_ZK_REQUEST_REGION_MERGE) {
583           throw new IOException("Merging node " + node
584             + " moved out of merging to " + et);
585         }
586       }
587       // Server is stopping/stopped
588       throw new IOException("Server is "
589         + (services.isStopping() ? "stopping" : "stopped"));
590     } catch (Exception e) {
591       if (e instanceof InterruptedException) {
592         Thread.currentThread().interrupt();
593       }
594       throw new IOException("Failed getting MERGING znode on "
595         + mergedRegionInfo.getRegionNameAsString(), e);
596     }
597   }
598 
599   /**
600    * Create reference file(s) of merging regions under the region_a merges dir
601    * @param hstoreFilesOfRegionA
602    * @param hstoreFilesOfRegionB
603    * @throws IOException
604    */
605   private void mergeStoreFiles(
606       Map<byte[], List<StoreFile>> hstoreFilesOfRegionA,
607       Map<byte[], List<StoreFile>> hstoreFilesOfRegionB)
608       throws IOException {
609     // Create reference file(s) of region A in mergdir
610     HRegionFileSystem fs_a = this.region_a.getRegionFileSystem();
611     for (Map.Entry<byte[], List<StoreFile>> entry : hstoreFilesOfRegionA
612         .entrySet()) {
613       String familyName = Bytes.toString(entry.getKey());
614       for (StoreFile storeFile : entry.getValue()) {
615         fs_a.mergeStoreFile(this.mergedRegionInfo, familyName, storeFile,
616             this.mergesdir);
617       }
618     }
619     // Create reference file(s) of region B in mergedir
620     HRegionFileSystem fs_b = this.region_b.getRegionFileSystem();
621     for (Map.Entry<byte[], List<StoreFile>> entry : hstoreFilesOfRegionB
622         .entrySet()) {
623       String familyName = Bytes.toString(entry.getKey());
624       for (StoreFile storeFile : entry.getValue()) {
625         fs_b.mergeStoreFile(this.mergedRegionInfo, familyName, storeFile,
626             this.mergesdir);
627       }
628     }
629   }
630 
631   /**
632    * @param server Hosting server instance (May be null when testing).
633    * @param services Services of regionserver, used to online regions.
634    * @throws IOException If thrown, rollback failed. Take drastic action.
635    * @return True if we successfully rolled back, false if we got to the point
636    *         of no return and so now need to abort the server to minimize
637    *         damage.
638    */
639   @SuppressWarnings("deprecation")
640   public boolean rollback(final Server server,
641       final RegionServerServices services) throws IOException {
642     assert this.mergedRegionInfo != null;
643     boolean result = true;
644     ListIterator<JournalEntry> iterator = this.journal
645         .listIterator(this.journal.size());
646     // Iterate in reverse.
647     while (iterator.hasPrevious()) {
648       JournalEntry je = iterator.previous();
649       switch (je) {
650 
651         case SET_MERGING_IN_ZK:
652           if (server != null && server.getZooKeeper() != null) {
653             cleanZK(server, this.mergedRegionInfo);
654           }
655           break;
656 
657         case CREATED_MERGE_DIR:
658           this.region_a.writestate.writesEnabled = true;
659           this.region_b.writestate.writesEnabled = true;
660           this.region_a.getRegionFileSystem().cleanupMergesDir();
661           break;
662 
663         case CLOSED_REGION_A:
664           try {
665             // So, this returns a seqid but if we just closed and then reopened,
666             // we should be ok. On close, we flushed using sequenceid obtained
667             // from hosting regionserver so no need to propagate the sequenceid
668             // returned out of initialize below up into regionserver as we
669             // normally do.
670             this.region_a.initialize();
671           } catch (IOException e) {
672             LOG.error("Failed rollbacking CLOSED_REGION_A of region "
673                 + this.region_a.getRegionNameAsString(), e);
674             throw new RuntimeException(e);
675           }
676           break;
677 
678         case OFFLINED_REGION_A:
679           if (services != null)
680             services.addToOnlineRegions(this.region_a);
681           break;
682 
683         case CLOSED_REGION_B:
684           try {
685             this.region_b.initialize();
686           } catch (IOException e) {
687             LOG.error("Failed rollbacking CLOSED_REGION_A of region "
688                 + this.region_b.getRegionNameAsString(), e);
689             throw new RuntimeException(e);
690           }
691           break;
692 
693         case OFFLINED_REGION_B:
694           if (services != null)
695             services.addToOnlineRegions(this.region_b);
696           break;
697 
698         case STARTED_MERGED_REGION_CREATION:
699           this.region_a.getRegionFileSystem().cleanupMergedRegion(
700               this.mergedRegionInfo);
701           break;
702 
703         case PONR:
704           // We got to the point-of-no-return so we need to just abort. Return
705           // immediately. Do not clean up created merged regions.
706           return false;
707 
708         default:
709           throw new RuntimeException("Unhandled journal entry: " + je);
710       }
711     }
712     return result;
713   }
714 
715   HRegionInfo getMergedRegionInfo() {
716     return this.mergedRegionInfo;
717   }
718 
719   // For unit testing.
720   Path getMergesDir() {
721     return this.mergesdir;
722   }
723 
724   private static void cleanZK(final Server server, final HRegionInfo hri) {
725     try {
726       // Only delete if its in expected state; could have been hijacked.
727       if (!ZKAssign.deleteNode(server.getZooKeeper(), hri.getEncodedName(),
728           RS_ZK_REQUEST_REGION_MERGE, server.getServerName())) {
729         ZKAssign.deleteNode(server.getZooKeeper(), hri.getEncodedName(),
730           RS_ZK_REGION_MERGING, server.getServerName());
731       }
732     } catch (KeeperException.NoNodeException e) {
733       LOG.info("Failed cleanup zk node of " + hri.getRegionNameAsString(), e);
734     } catch (KeeperException e) {
735       server.abort("Failed cleanup zk node of " + hri.getRegionNameAsString(),e);
736     }
737   }
738 
739   /**
740    * Creates a new ephemeral node in the PENDING_MERGE state for the merged region.
741    * Create it ephemeral in case regionserver dies mid-merge.
742    *
743    * <p>
744    * Does not transition nodes from other states. If a node already exists for
745    * this region, a {@link NodeExistsException} will be thrown.
746    *
747    * @param zkw zk reference
748    * @param region region to be created as offline
749    * @param serverName server event originates from
750    * @throws KeeperException
751    * @throws IOException
752    */
753   public static void createNodeMerging(final ZooKeeperWatcher zkw, final HRegionInfo region,
754       final ServerName serverName, final HRegionInfo a,
755       final HRegionInfo b) throws KeeperException, IOException {
756     LOG.debug(zkw.prefix("Creating ephemeral node for "
757       + region.getEncodedName() + " in PENDING_MERGE state"));
758     byte [] payload = HRegionInfo.toDelimitedByteArray(region, a, b);
759     RegionTransition rt = RegionTransition.createRegionTransition(
760       RS_ZK_REQUEST_REGION_MERGE, region.getRegionName(), serverName, payload);
761     String node = ZKAssign.getNodeName(zkw, region.getEncodedName());
762     if (!ZKUtil.createEphemeralNodeAndWatch(zkw, node, rt.toByteArray())) {
763       throw new IOException("Failed create of ephemeral " + node);
764     }
765   }
766 
767   /**
768    * Transitions an existing ephemeral node for the specified region which is
769    * currently in the begin state to be in the end state. Master cleans up the
770    * final MERGE znode when it reads it (or if we crash, zk will clean it up).
771    *
772    * <p>
773    * Does not transition nodes from other states. If for some reason the node
774    * could not be transitioned, the method returns -1. If the transition is
775    * successful, the version of the node after transition is returned.
776    *
777    * <p>
778    * This method can fail and return false for three different reasons:
779    * <ul>
780    * <li>Node for this region does not exist</li>
781    * <li>Node for this region is not in the begin state</li>
782    * <li>After verifying the begin state, update fails because of wrong version
783    * (this should never actually happen since an RS only does this transition
784    * following a transition to the begin state. If two RS are conflicting, one would
785    * fail the original transition to the begin state and not this transition)</li>
786    * </ul>
787    *
788    * <p>
789    * Does not set any watches.
790    *
791    * <p>
792    * This method should only be used by a RegionServer when merging two regions.
793    *
794    * @param zkw zk reference
795    * @param merged region to be transitioned to opened
796    * @param a merging region A
797    * @param b merging region B
798    * @param serverName server event originates from
799    * @param znodeVersion expected version of data before modification
800    * @param beginState the expected current state the znode should be
801    * @param endState the state to be transition to
802    * @return version of node after transition, -1 if unsuccessful transition
803    * @throws KeeperException if unexpected zookeeper exception
804    * @throws IOException
805    */
806   public static int transitionMergingNode(ZooKeeperWatcher zkw,
807       HRegionInfo merged, HRegionInfo a, HRegionInfo b, ServerName serverName,
808       final int znodeVersion, final EventType beginState,
809       final EventType endState) throws KeeperException, IOException {
810     byte[] payload = HRegionInfo.toDelimitedByteArray(merged, a, b);
811     return ZKAssign.transitionNode(zkw, merged, serverName,
812       beginState, endState, znodeVersion, payload);
813   }
814 
815   /**
816    * Checks if the given region has merge qualifier in hbase:meta
817    * @param services
818    * @param regionName name of specified region
819    * @return true if the given region has merge qualifier in META.(It will be
820    *         cleaned by CatalogJanitor)
821    * @throws IOException
822    */
823   boolean hasMergeQualifierInMeta(final RegionServerServices services,
824       final byte[] regionName) throws IOException {
825     if (services == null) return false;
826     // Get merge regions if it is a merged region and already has merge
827     // qualifier
828     Pair<HRegionInfo, HRegionInfo> mergeRegions = MetaReader
829         .getRegionsFromMergeQualifier(services.getCatalogTracker(), regionName);
830     if (mergeRegions != null &&
831         (mergeRegions.getFirst() != null || mergeRegions.getSecond() != null)) {
832       // It has merge qualifier
833       return true;
834     }
835     return false;
836   }
837 }
838