View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.snapshot;
20  
21  import java.io.InputStream;
22  import java.io.IOException;
23  import java.io.OutputStream;
24  import java.util.Arrays;
25  import java.util.HashMap;
26  import java.util.HashSet;
27  import java.util.LinkedList;
28  import java.util.List;
29  import java.util.Map;
30  import java.util.Set;
31  import java.util.TreeMap;
32  
33  import org.apache.commons.logging.Log;
34  import org.apache.commons.logging.LogFactory;
35  import org.apache.hadoop.classification.InterfaceAudience;
36  import org.apache.hadoop.conf.Configuration;
37  import org.apache.hadoop.fs.FileStatus;
38  import org.apache.hadoop.fs.FileSystem;
39  import org.apache.hadoop.fs.Path;
40  import org.apache.hadoop.hbase.HColumnDescriptor;
41  import org.apache.hadoop.hbase.HRegionInfo;
42  import org.apache.hadoop.hbase.HTableDescriptor;
43  import org.apache.hadoop.hbase.backup.HFileArchiver;
44  import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher;
45  import org.apache.hadoop.hbase.io.HFileLink;
46  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
47  import org.apache.hadoop.hbase.regionserver.HRegion;
48  import org.apache.hadoop.hbase.regionserver.StoreFile;
49  import org.apache.hadoop.hbase.util.Bytes;
50  import org.apache.hadoop.hbase.util.FSUtils;
51  import org.apache.hadoop.hbase.util.FSVisitor;
52  import org.apache.hadoop.hbase.util.ModifyRegionUtils;
53  import org.apache.hadoop.io.IOUtils;
54  
55  /**
56   * Helper to Restore/Clone a Snapshot
57   *
58   * <p>The helper assumes that a table is already created, and by calling restore()
59   * the content present in the snapshot will be restored as the new content of the table.
60   *
61   * <p>Clone from Snapshot: If the target table is empty, the restore operation
62   * is just a "clone operation", where the only operations are:
63   * <ul>
64   *  <li>for each region in the snapshot create a new region
65   *    (note that the region will have a different name, since the encoding contains the table name)
66   *  <li>for each file in the region create a new HFileLink to point to the original file.
67   *  <li>restore the logs, if any
68   * </ul>
69   *
70   * <p>Restore from Snapshot:
71   * <ul>
72   *  <li>for each region in the table verify which are available in the snapshot and which are not
73   *    <ul>
74   *    <li>if the region is not present in the snapshot, remove it.
75   *    <li>if the region is present in the snapshot
76   *      <ul>
77   *      <li>for each file in the table region verify which are available in the snapshot
78   *        <ul>
79   *          <li>if the hfile is not present in the snapshot, remove it
80   *          <li>if the hfile is present, keep it (nothing to do)
81   *        </ul>
82   *      <li>for each file in the snapshot region but not in the table
83   *        <ul>
84   *          <li>create a new HFileLink that point to the original file
85   *        </ul>
86   *      </ul>
87   *    </ul>
88   *  <li>for each region in the snapshot not present in the current table state
89   *    <ul>
90   *    <li>create a new region and for each file in the region create a new HFileLink
91   *      (This is the same as the clone operation)
92   *    </ul>
93   *  <li>restore the logs, if any
94   * </ul>
95   */
96  @InterfaceAudience.Private
97  public class RestoreSnapshotHelper {
98    private static final Log LOG = LogFactory.getLog(RestoreSnapshotHelper.class);
99  
100   private final Map<byte[], byte[]> regionsMap =
101         new TreeMap<byte[], byte[]>(Bytes.BYTES_COMPARATOR);
102 
103   private final ForeignExceptionDispatcher monitor;
104 
105   private final SnapshotDescription snapshotDesc;
106   private final Path snapshotDir;
107 
108   private final HTableDescriptor tableDesc;
109   private final Path tableDir;
110 
111   private final Configuration conf;
112   private final FileSystem fs;
113 
114   public RestoreSnapshotHelper(final Configuration conf, final FileSystem fs,
115       final SnapshotDescription snapshotDescription, final Path snapshotDir,
116       final HTableDescriptor tableDescriptor, final Path tableDir,
117       final ForeignExceptionDispatcher monitor)
118   {
119     this.fs = fs;
120     this.conf = conf;
121     this.snapshotDesc = snapshotDescription;
122     this.snapshotDir = snapshotDir;
123     this.tableDesc = tableDescriptor;
124     this.tableDir = tableDir;
125     this.monitor = monitor;
126   }
127 
128   /**
129    * Restore the on-disk table to a specified snapshot state.
130    * @return the set of regions touched by the restore operation
131    */
132   public RestoreMetaChanges restoreHdfsRegions() throws IOException {
133     LOG.debug("starting restore");
134     Set<String> snapshotRegionNames = SnapshotReferenceUtil.getSnapshotRegionNames(fs, snapshotDir);
135     if (snapshotRegionNames == null) {
136       LOG.warn("Nothing to restore. Snapshot " + snapshotDesc + " looks empty");
137       return null;
138     }
139 
140     RestoreMetaChanges metaChanges = new RestoreMetaChanges();
141 
142     // Identify which region are still available and which not.
143     // NOTE: we rely upon the region name as: "table name, start key, end key"
144     List<HRegionInfo> tableRegions = getTableRegions();
145     if (tableRegions != null) {
146       monitor.rethrowException();
147       for (HRegionInfo regionInfo: tableRegions) {
148         String regionName = regionInfo.getEncodedName();
149         if (snapshotRegionNames.contains(regionName)) {
150           LOG.info("region to restore: " + regionName);
151           snapshotRegionNames.remove(regionName);
152           metaChanges.addRegionToRestore(regionInfo);
153         } else {
154           LOG.info("region to remove: " + regionName);
155           metaChanges.addRegionToRemove(regionInfo);
156         }
157       }
158 
159       // Restore regions using the snapshot data
160       monitor.rethrowException();
161       restoreHdfsRegions(metaChanges.getRegionsToRestore());
162 
163       // Remove regions from the current table
164       monitor.rethrowException();
165       removeHdfsRegions(metaChanges.getRegionsToRemove());
166     }
167 
168     // Regions to Add: present in the snapshot but not in the current table
169     if (snapshotRegionNames.size() > 0) {
170       List<HRegionInfo> regionsToAdd = new LinkedList<HRegionInfo>();
171 
172       monitor.rethrowException();
173       for (String regionName: snapshotRegionNames) {
174         LOG.info("region to add: " + regionName);
175         Path regionDir = new Path(snapshotDir, regionName);
176         regionsToAdd.add(HRegion.loadDotRegionInfoFileContent(fs, regionDir));
177       }
178 
179       // Create new regions cloning from the snapshot
180       monitor.rethrowException();
181       HRegionInfo[] clonedRegions = cloneHdfsRegions(regionsToAdd);
182       metaChanges.setNewRegions(clonedRegions);
183     }
184 
185     // Restore WALs
186     monitor.rethrowException();
187     restoreWALs();
188 
189     return metaChanges;
190   }
191 
192   /**
193    * Describe the set of operations needed to update META after restore.
194    */
195   public static class RestoreMetaChanges {
196     private List<HRegionInfo> regionsToRestore = null;
197     private List<HRegionInfo> regionsToRemove = null;
198     private List<HRegionInfo> regionsToAdd = null;
199 
200     /**
201      * @return true if there're new regions
202      */
203     public boolean hasRegionsToAdd() {
204       return this.regionsToAdd != null && this.regionsToAdd.size() > 0;
205     }
206 
207     /**
208      * Returns the list of new regions added during the on-disk restore.
209      * The caller is responsible to add the regions to META.
210      * e.g MetaEditor.addRegionsToMeta(...)
211      * @return the list of regions to add to META
212      */
213     public List<HRegionInfo> getRegionsToAdd() {
214       return this.regionsToAdd;
215     }
216 
217     /**
218      * @return true if there're regions to restore
219      */
220     public boolean hasRegionsToRestore() {
221       return this.regionsToRestore != null && this.regionsToRestore.size() > 0;
222     }
223 
224     /**
225      * Returns the list of 'restored regions' during the on-disk restore.
226      * The caller is responsible to add the regions to META if not present.
227      * @return the list of regions restored
228      */
229     public List<HRegionInfo> getRegionsToRestore() {
230       return this.regionsToRestore;
231     }
232 
233     /**
234      * @return true if there're regions to remove
235      */
236     public boolean hasRegionsToRemove() {
237       return this.regionsToRemove != null && this.regionsToRemove.size() > 0;
238     }
239 
240     /**
241      * Returns the list of regions removed during the on-disk restore.
242      * The caller is responsible to remove the regions from META.
243      * e.g. MetaEditor.deleteRegions(...)
244      * @return the list of regions to remove from META
245      */
246     public List<HRegionInfo> getRegionsToRemove() {
247       return this.regionsToRemove;
248     }
249 
250     void setNewRegions(final HRegionInfo[] hris) {
251       if (hris != null) {
252         regionsToAdd = Arrays.asList(hris);
253       } else {
254         regionsToAdd = null;
255       }
256     }
257 
258     void addRegionToRemove(final HRegionInfo hri) {
259       if (regionsToRemove == null) {
260         regionsToRemove = new LinkedList<HRegionInfo>();
261       }
262       regionsToRemove.add(hri);
263     }
264 
265     void addRegionToRestore(final HRegionInfo hri) {
266       if (regionsToRestore == null) {
267         regionsToRestore = new LinkedList<HRegionInfo>();
268       }
269       regionsToRestore.add(hri);
270     }
271   }
272 
273   /**
274    * Remove specified regions from the file-system, using the archiver.
275    */
276   private void removeHdfsRegions(final List<HRegionInfo> regions) throws IOException {
277     if (regions != null && regions.size() > 0) {
278       for (HRegionInfo hri: regions) {
279         HFileArchiver.archiveRegion(conf, fs, hri);
280       }
281     }
282   }
283 
284   /**
285    * Restore specified regions by restoring content to the snapshot state.
286    */
287   private void restoreHdfsRegions(final List<HRegionInfo> regions) throws IOException {
288     if (regions == null || regions.size() == 0) return;
289     for (HRegionInfo hri: regions) restoreRegion(hri);
290   }
291 
292   /**
293    * Restore region by removing files not in the snapshot
294    * and adding the missing ones from the snapshot.
295    */
296   private void restoreRegion(HRegionInfo regionInfo) throws IOException {
297     Path snapshotRegionDir = new Path(snapshotDir, regionInfo.getEncodedName());
298     Map<String, List<String>> snapshotFiles =
299                 SnapshotReferenceUtil.getRegionHFileReferences(fs, snapshotRegionDir);
300     Path regionDir = new Path(tableDir, regionInfo.getEncodedName());
301     String tableName = tableDesc.getNameAsString();
302 
303     // Restore families present in the table
304     for (Path familyDir: FSUtils.getFamilyDirs(fs, regionDir)) {
305       byte[] family = Bytes.toBytes(familyDir.getName());
306       Set<String> familyFiles = getTableRegionFamilyFiles(familyDir);
307       List<String> snapshotFamilyFiles = snapshotFiles.remove(familyDir.getName());
308       if (snapshotFamilyFiles != null) {
309         List<String> hfilesToAdd = new LinkedList<String>();
310         for (String hfileName: snapshotFamilyFiles) {
311           if (familyFiles.contains(hfileName)) {
312             // HFile already present
313             familyFiles.remove(hfileName);
314           } else {
315             // HFile missing
316             hfilesToAdd.add(hfileName);
317           }
318         }
319 
320         // Restore Missing files
321         for (String hfileName: hfilesToAdd) {
322           LOG.trace("Adding HFileLink " + hfileName +
323             " to region=" + regionInfo.getEncodedName() + " table=" + tableName);
324           restoreStoreFile(familyDir, regionInfo, hfileName);
325         }
326 
327         // Remove hfiles not present in the snapshot
328         for (String hfileName: familyFiles) {
329           Path hfile = new Path(familyDir, hfileName);
330           LOG.trace("Removing hfile=" + hfile +
331             " from region=" + regionInfo.getEncodedName() + " table=" + tableName);
332           HFileArchiver.archiveStoreFile(fs, regionInfo, conf, tableDir, family, hfile);
333         }
334       } else {
335         // Family doesn't exists in the snapshot
336         LOG.trace("Removing family=" + Bytes.toString(family) +
337           " from region=" + regionInfo.getEncodedName() + " table=" + tableName);
338         HFileArchiver.archiveFamily(fs, conf, regionInfo, tableDir, family);
339         fs.delete(familyDir, true);
340       }
341     }
342 
343     // Add families not present in the table
344     for (Map.Entry<String, List<String>> familyEntry: snapshotFiles.entrySet()) {
345       Path familyDir = new Path(regionDir, familyEntry.getKey());
346       if (!fs.mkdirs(familyDir)) {
347         throw new IOException("Unable to create familyDir=" + familyDir);
348       }
349 
350       for (String hfileName: familyEntry.getValue()) {
351         LOG.trace("Adding HFileLink " + hfileName + " to table=" + tableName);
352         restoreStoreFile(familyDir, regionInfo, hfileName);
353       }
354     }
355   }
356 
357   /**
358    * @return The set of files in the specified family directory.
359    */
360   private Set<String> getTableRegionFamilyFiles(final Path familyDir) throws IOException {
361     Set<String> familyFiles = new HashSet<String>();
362 
363     FileStatus[] hfiles = FSUtils.listStatus(fs, familyDir);
364     if (hfiles == null) return familyFiles;
365 
366     for (FileStatus hfileRef: hfiles) {
367       String hfileName = hfileRef.getPath().getName();
368       familyFiles.add(hfileName);
369     }
370 
371     return familyFiles;
372   }
373 
374   /**
375    * Clone specified regions. For each region create a new region
376    * and create a HFileLink for each hfile.
377    */
378   private HRegionInfo[] cloneHdfsRegions(final List<HRegionInfo> regions) throws IOException {
379     if (regions == null || regions.size() == 0) return null;
380 
381     final Map<String, HRegionInfo> snapshotRegions =
382       new HashMap<String, HRegionInfo>(regions.size());
383 
384     // clone region info (change embedded tableName with the new one)
385     HRegionInfo[] clonedRegionsInfo = new HRegionInfo[regions.size()];
386     for (int i = 0; i < clonedRegionsInfo.length; ++i) {
387       // clone the region info from the snapshot region info
388       HRegionInfo snapshotRegionInfo = regions.get(i);
389       clonedRegionsInfo[i] = cloneRegionInfo(snapshotRegionInfo);
390 
391       // add the region name mapping between snapshot and cloned
392       String snapshotRegionName = snapshotRegionInfo.getEncodedName();
393       String clonedRegionName = clonedRegionsInfo[i].getEncodedName();
394       regionsMap.put(Bytes.toBytes(snapshotRegionName), Bytes.toBytes(clonedRegionName));
395       LOG.info("clone region=" + snapshotRegionName + " as " + clonedRegionName);
396 
397       // Add mapping between cloned region name and snapshot region info
398       snapshotRegions.put(clonedRegionName, snapshotRegionInfo);
399     }
400 
401     // create the regions on disk
402     ModifyRegionUtils.createRegions(conf, tableDir.getParent(),
403       tableDesc, clonedRegionsInfo, new ModifyRegionUtils.RegionFillTask() {
404         public void fillRegion(final HRegion region) throws IOException {
405           cloneRegion(region, snapshotRegions.get(region.getRegionInfo().getEncodedName()));
406         }
407       });
408 
409     return clonedRegionsInfo;
410   }
411 
412   /**
413    * Clone region directory content from the snapshot info.
414    *
415    * Each region is encoded with the table name, so the cloned region will have
416    * a different region name.
417    *
418    * Instead of copying the hfiles a HFileLink is created.
419    *
420    * @param region {@link HRegion} cloned
421    * @param snapshotRegionInfo
422    */
423   private void cloneRegion(final HRegion region, final HRegionInfo snapshotRegionInfo)
424       throws IOException {
425     final Path snapshotRegionDir = new Path(snapshotDir, snapshotRegionInfo.getEncodedName());
426     final Path regionDir = new Path(tableDir, region.getRegionInfo().getEncodedName());
427     final String tableName = tableDesc.getNameAsString();
428     SnapshotReferenceUtil.visitRegionStoreFiles(fs, snapshotRegionDir,
429       new FSVisitor.StoreFileVisitor() {
430         public void storeFile (final String region, final String family, final String hfile)
431             throws IOException {
432           LOG.info("Adding HFileLink " + hfile + " to table=" + tableName);
433           Path familyDir = new Path(regionDir, family);
434           restoreStoreFile(familyDir, snapshotRegionInfo, hfile);
435         }
436     });
437   }
438 
439   /**
440    * Create a new {@link HFileLink} to reference the store file.
441    * <p>The store file in the snapshot can be a simple hfile, an HFileLink or a reference.
442    * <ul>
443    *   <li>hfile: abc -> table=region-abc
444    *   <li>reference: abc.1234 -> table=region-abc.1234
445    *   <li>hfilelink: table=region-hfile -> table=region-hfile
446    * </ul>
447    * @param familyDir destination directory for the store file
448    * @param regionInfo destination region info for the table
449    * @param hfileName store file name (can be a Reference, HFileLink or simple HFile)
450    */
451   private void restoreStoreFile(final Path familyDir, final HRegionInfo regionInfo,
452       final String hfileName) throws IOException {
453     if (HFileLink.isHFileLink(hfileName)) {
454       HFileLink.createFromHFileLink(conf, fs, familyDir, hfileName);
455     } else if (StoreFile.isReference(hfileName)) {
456       restoreReferenceFile(familyDir, regionInfo, hfileName);
457     } else {
458       HFileLink.create(conf, fs, familyDir, regionInfo, hfileName);
459     }
460   }
461 
462   /**
463    * Create a new {@link Reference} as copy of the source one.
464    * <p><blockquote><pre>
465    * The source table looks like:
466    *    1234/abc      (original file)
467    *    5678/abc.1234 (reference file)
468    *
469    * After the clone operation looks like:
470    *   wxyz/table=1234-abc
471    *   stuv/table=1234-abc.wxyz
472    *
473    * NOTE that the region name in the clone changes (md5 of regioninfo)
474    * and the reference should reflect that change.
475    * </pre></blockquote>
476    * @param familyDir destination directory for the store file
477    * @param regionInfo destination region info for the table
478    * @param hfileName reference file name
479    */
480   private void restoreReferenceFile(final Path familyDir, final HRegionInfo regionInfo,
481       final String hfileName) throws IOException {
482     // Extract the referred information (hfile name and parent region)
483     String tableName = snapshotDesc.getTable();
484     Path refPath = StoreFile.getReferredToFile(new Path(new Path(new Path(tableName,
485         regionInfo.getEncodedName()), familyDir.getName()), hfileName));
486     String snapshotRegionName = refPath.getParent().getParent().getName();
487     String fileName = refPath.getName();
488 
489     // The new reference should have the cloned region name as parent, if it is a clone.
490     String clonedRegionName = Bytes.toString(regionsMap.get(Bytes.toBytes(snapshotRegionName)));
491     if (clonedRegionName == null) clonedRegionName = snapshotRegionName;
492 
493     // The output file should be a reference link table=snapshotRegion-fileName.clonedRegionName
494     String refLink = fileName;
495     if (!HFileLink.isHFileLink(fileName)) {
496       refLink = HFileLink.createHFileLinkName(tableName, snapshotRegionName, fileName);
497     }
498     Path outPath = new Path(familyDir, refLink + '.' + clonedRegionName);
499 
500     // Create the new reference
501     Path linkPath = new Path(familyDir,
502       HFileLink.createHFileLinkName(tableName, regionInfo.getEncodedName(), hfileName));
503     InputStream in = new HFileLink(conf, linkPath).open(fs);
504     OutputStream out = fs.create(outPath);
505     IOUtils.copyBytes(in, out, conf);
506   }
507 
508   /**
509    * Create a new {@link HRegionInfo} from the snapshot region info.
510    * Keep the same startKey, endKey, regionId and split information but change
511    * the table name.
512    *
513    * @param snapshotRegionInfo Info for region to clone.
514    * @return the new HRegion instance
515    */
516   public HRegionInfo cloneRegionInfo(final HRegionInfo snapshotRegionInfo) {
517     return new HRegionInfo(tableDesc.getName(),
518                       snapshotRegionInfo.getStartKey(), snapshotRegionInfo.getEndKey(),
519                       snapshotRegionInfo.isSplit(), snapshotRegionInfo.getRegionId());
520   }
521 
522   /**
523    * Restore snapshot WALs.
524    *
525    * Global Snapshot keep a reference to region servers logs present during the snapshot.
526    * (/hbase/.snapshot/snapshotName/.logs/hostName/logName)
527    *
528    * Since each log contains different tables data, logs must be split to
529    * extract the table that we are interested in.
530    */
531   private void restoreWALs() throws IOException {
532     final SnapshotLogSplitter logSplitter = new SnapshotLogSplitter(conf, fs, tableDir,
533                                 Bytes.toBytes(snapshotDesc.getTable()), regionsMap);
534     try {
535       // Recover.Edits
536       SnapshotReferenceUtil.visitRecoveredEdits(fs, snapshotDir,
537           new FSVisitor.RecoveredEditsVisitor() {
538         public void recoveredEdits (final String region, final String logfile) throws IOException {
539           Path path = SnapshotReferenceUtil.getRecoveredEdits(snapshotDir, region, logfile);
540           logSplitter.splitRecoveredEdit(path);
541         }
542       });
543 
544       // Region Server Logs
545       SnapshotReferenceUtil.visitLogFiles(fs, snapshotDir, new FSVisitor.LogFileVisitor() {
546         public void logFile (final String server, final String logfile) throws IOException {
547           logSplitter.splitLog(server, logfile);
548         }
549       });
550     } finally {
551       logSplitter.close();
552     }
553   }
554 
555   /**
556    * @return the set of the regions contained in the table
557    */
558   private List<HRegionInfo> getTableRegions() throws IOException {
559     LOG.debug("get table regions: " + tableDir);
560     FileStatus[] regionDirs = FSUtils.listStatus(fs, tableDir, new FSUtils.RegionDirFilter(fs));
561     if (regionDirs == null) return null;
562 
563     List<HRegionInfo> regions = new LinkedList<HRegionInfo>();
564     for (FileStatus regionDir: regionDirs) {
565       HRegionInfo hri = HRegion.loadDotRegionInfoFileContent(fs, regionDir.getPath());
566       regions.add(hri);
567     }
568     LOG.debug("found " + regions.size() + " regions for table=" + tableDesc.getNameAsString());
569     return regions;
570   }
571 
572   /**
573    * Create a new table descriptor cloning the snapshot table schema.
574    *
575    * @param snapshotTableDescriptor
576    * @param tableName
577    * @return cloned table descriptor
578    * @throws IOException
579    */
580   public static HTableDescriptor cloneTableSchema(final HTableDescriptor snapshotTableDescriptor,
581       final byte[] tableName) throws IOException {
582     HTableDescriptor htd = new HTableDescriptor(tableName);
583     for (HColumnDescriptor hcd: snapshotTableDescriptor.getColumnFamilies()) {
584       htd.addFamily(hcd);
585     }
586     return htd;
587   }
588 }