View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.snapshot;
20  
21  import java.io.InputStream;
22  import java.io.IOException;
23  import java.io.OutputStream;
24  import java.util.Arrays;
25  import java.util.HashMap;
26  import java.util.HashSet;
27  import java.util.LinkedList;
28  import java.util.List;
29  import java.util.Map;
30  import java.util.Set;
31  import java.util.TreeMap;
32  
33  import org.apache.commons.logging.Log;
34  import org.apache.commons.logging.LogFactory;
35  import org.apache.hadoop.classification.InterfaceAudience;
36  import org.apache.hadoop.conf.Configuration;
37  import org.apache.hadoop.fs.FileStatus;
38  import org.apache.hadoop.fs.FileSystem;
39  import org.apache.hadoop.fs.Path;
40  import org.apache.hadoop.hbase.HColumnDescriptor;
41  import org.apache.hadoop.hbase.HRegionInfo;
42  import org.apache.hadoop.hbase.HTableDescriptor;
43  import org.apache.hadoop.hbase.backup.HFileArchiver;
44  import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher;
45  import org.apache.hadoop.hbase.io.HFileLink;
46  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
47  import org.apache.hadoop.hbase.regionserver.HRegion;
48  import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
49  import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
50  import org.apache.hadoop.hbase.util.Bytes;
51  import org.apache.hadoop.hbase.util.FSUtils;
52  import org.apache.hadoop.hbase.util.FSVisitor;
53  import org.apache.hadoop.hbase.util.ModifyRegionUtils;
54  import org.apache.hadoop.io.IOUtils;
55  
56  /**
57   * Helper to Restore/Clone a Snapshot
58   *
59   * <p>The helper assumes that a table is already created, and by calling restore()
60   * the content present in the snapshot will be restored as the new content of the table.
61   *
62   * <p>Clone from Snapshot: If the target table is empty, the restore operation
63   * is just a "clone operation", where the only operations are:
64   * <ul>
65   *  <li>for each region in the snapshot create a new region
66   *    (note that the region will have a different name, since the encoding contains the table name)
67   *  <li>for each file in the region create a new HFileLink to point to the original file.
68   *  <li>restore the logs, if any
69   * </ul>
70   *
71   * <p>Restore from Snapshot:
72   * <ul>
73   *  <li>for each region in the table verify which are available in the snapshot and which are not
74   *    <ul>
75   *    <li>if the region is not present in the snapshot, remove it.
76   *    <li>if the region is present in the snapshot
77   *      <ul>
78   *      <li>for each file in the table region verify which are available in the snapshot
79   *        <ul>
80   *          <li>if the hfile is not present in the snapshot, remove it
81   *          <li>if the hfile is present, keep it (nothing to do)
82   *        </ul>
83   *      <li>for each file in the snapshot region but not in the table
84   *        <ul>
85   *          <li>create a new HFileLink that point to the original file
86   *        </ul>
87   *      </ul>
88   *    </ul>
89   *  <li>for each region in the snapshot not present in the current table state
90   *    <ul>
91   *    <li>create a new region and for each file in the region create a new HFileLink
92   *      (This is the same as the clone operation)
93   *    </ul>
94   *  <li>restore the logs, if any
95   * </ul>
96   */
97  @InterfaceAudience.Private
98  public class RestoreSnapshotHelper {
99    private static final Log LOG = LogFactory.getLog(RestoreSnapshotHelper.class);
100 
101   private final Map<byte[], byte[]> regionsMap =
102         new TreeMap<byte[], byte[]>(Bytes.BYTES_COMPARATOR);
103 
104   private final ForeignExceptionDispatcher monitor;
105 
106   private final SnapshotDescription snapshotDesc;
107   private final Path snapshotDir;
108 
109   private final HTableDescriptor tableDesc;
110   private final Path tableDir;
111 
112   private final Configuration conf;
113   private final FileSystem fs;
114 
115   public RestoreSnapshotHelper(final Configuration conf, final FileSystem fs,
116       final SnapshotDescription snapshotDescription, final Path snapshotDir,
117       final HTableDescriptor tableDescriptor, final Path tableDir,
118       final ForeignExceptionDispatcher monitor)
119   {
120     this.fs = fs;
121     this.conf = conf;
122     this.snapshotDesc = snapshotDescription;
123     this.snapshotDir = snapshotDir;
124     this.tableDesc = tableDescriptor;
125     this.tableDir = tableDir;
126     this.monitor = monitor;
127   }
128 
129   /**
130    * Restore the on-disk table to a specified snapshot state.
131    * @return the set of regions touched by the restore operation
132    */
133   public RestoreMetaChanges restoreHdfsRegions() throws IOException {
134     LOG.debug("starting restore");
135     Set<String> snapshotRegionNames = SnapshotReferenceUtil.getSnapshotRegionNames(fs, snapshotDir);
136     if (snapshotRegionNames == null) {
137       LOG.warn("Nothing to restore. Snapshot " + snapshotDesc + " looks empty");
138       return null;
139     }
140 
141     RestoreMetaChanges metaChanges = new RestoreMetaChanges();
142 
143     // Identify which region are still available and which not.
144     // NOTE: we rely upon the region name as: "table name, start key, end key"
145     List<HRegionInfo> tableRegions = getTableRegions();
146     if (tableRegions != null) {
147       monitor.rethrowException();
148       for (HRegionInfo regionInfo: tableRegions) {
149         String regionName = regionInfo.getEncodedName();
150         if (snapshotRegionNames.contains(regionName)) {
151           LOG.info("region to restore: " + regionName);
152           snapshotRegionNames.remove(regionName);
153           metaChanges.addRegionToRestore(regionInfo);
154         } else {
155           LOG.info("region to remove: " + regionName);
156           metaChanges.addRegionToRemove(regionInfo);
157         }
158       }
159 
160       // Restore regions using the snapshot data
161       monitor.rethrowException();
162       restoreHdfsRegions(metaChanges.getRegionsToRestore());
163 
164       // Remove regions from the current table
165       monitor.rethrowException();
166       removeHdfsRegions(metaChanges.getRegionsToRemove());
167     }
168 
169     // Regions to Add: present in the snapshot but not in the current table
170     if (snapshotRegionNames.size() > 0) {
171       List<HRegionInfo> regionsToAdd = new LinkedList<HRegionInfo>();
172 
173       monitor.rethrowException();
174       for (String regionName: snapshotRegionNames) {
175         LOG.info("region to add: " + regionName);
176         Path regionDir = new Path(snapshotDir, regionName);
177         regionsToAdd.add(HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir));
178       }
179 
180       // Create new regions cloning from the snapshot
181       monitor.rethrowException();
182       HRegionInfo[] clonedRegions = cloneHdfsRegions(regionsToAdd);
183       metaChanges.setNewRegions(clonedRegions);
184     }
185 
186     // Restore WALs
187     monitor.rethrowException();
188     restoreWALs();
189 
190     return metaChanges;
191   }
192 
193   /**
194    * Describe the set of operations needed to update META after restore.
195    */
196   public static class RestoreMetaChanges {
197     private List<HRegionInfo> regionsToRestore = null;
198     private List<HRegionInfo> regionsToRemove = null;
199     private List<HRegionInfo> regionsToAdd = null;
200 
201     /**
202      * @return true if there're new regions
203      */
204     public boolean hasRegionsToAdd() {
205       return this.regionsToAdd != null && this.regionsToAdd.size() > 0;
206     }
207 
208     /**
209      * Returns the list of new regions added during the on-disk restore.
210      * The caller is responsible to add the regions to META.
211      * e.g MetaEditor.addRegionsToMeta(...)
212      * @return the list of regions to add to META
213      */
214     public List<HRegionInfo> getRegionsToAdd() {
215       return this.regionsToAdd;
216     }
217 
218     /**
219      * @return true if there're regions to restore
220      */
221     public boolean hasRegionsToRestore() {
222       return this.regionsToRestore != null && this.regionsToRestore.size() > 0;
223     }
224 
225     /**
226      * Returns the list of 'restored regions' during the on-disk restore.
227      * The caller is responsible to add the regions to META if not present.
228      * @return the list of regions restored
229      */
230     public List<HRegionInfo> getRegionsToRestore() {
231       return this.regionsToRestore;
232     }
233 
234     /**
235      * @return true if there're regions to remove
236      */
237     public boolean hasRegionsToRemove() {
238       return this.regionsToRemove != null && this.regionsToRemove.size() > 0;
239     }
240 
241     /**
242      * Returns the list of regions removed during the on-disk restore.
243      * The caller is responsible to remove the regions from META.
244      * e.g. MetaEditor.deleteRegions(...)
245      * @return the list of regions to remove from META
246      */
247     public List<HRegionInfo> getRegionsToRemove() {
248       return this.regionsToRemove;
249     }
250 
251     void setNewRegions(final HRegionInfo[] hris) {
252       if (hris != null) {
253         regionsToAdd = Arrays.asList(hris);
254       } else {
255         regionsToAdd = null;
256       }
257     }
258 
259     void addRegionToRemove(final HRegionInfo hri) {
260       if (regionsToRemove == null) {
261         regionsToRemove = new LinkedList<HRegionInfo>();
262       }
263       regionsToRemove.add(hri);
264     }
265 
266     void addRegionToRestore(final HRegionInfo hri) {
267       if (regionsToRestore == null) {
268         regionsToRestore = new LinkedList<HRegionInfo>();
269       }
270       regionsToRestore.add(hri);
271     }
272   }
273 
274   /**
275    * Remove specified regions from the file-system, using the archiver.
276    */
277   private void removeHdfsRegions(final List<HRegionInfo> regions) throws IOException {
278     if (regions != null && regions.size() > 0) {
279       for (HRegionInfo hri: regions) {
280         HFileArchiver.archiveRegion(conf, fs, hri);
281       }
282     }
283   }
284 
285   /**
286    * Restore specified regions by restoring content to the snapshot state.
287    */
288   private void restoreHdfsRegions(final List<HRegionInfo> regions) throws IOException {
289     if (regions == null || regions.size() == 0) return;
290     for (HRegionInfo hri: regions) restoreRegion(hri);
291   }
292 
293   /**
294    * Restore region by removing files not in the snapshot
295    * and adding the missing ones from the snapshot.
296    */
297   private void restoreRegion(HRegionInfo regionInfo) throws IOException {
298     Path snapshotRegionDir = new Path(snapshotDir, regionInfo.getEncodedName());
299     Map<String, List<String>> snapshotFiles =
300                 SnapshotReferenceUtil.getRegionHFileReferences(fs, snapshotRegionDir);
301     Path regionDir = new Path(tableDir, regionInfo.getEncodedName());
302     String tableName = tableDesc.getNameAsString();
303 
304     // Restore families present in the table
305     for (Path familyDir: FSUtils.getFamilyDirs(fs, regionDir)) {
306       byte[] family = Bytes.toBytes(familyDir.getName());
307       Set<String> familyFiles = getTableRegionFamilyFiles(familyDir);
308       List<String> snapshotFamilyFiles = snapshotFiles.remove(familyDir.getName());
309       if (snapshotFamilyFiles != null) {
310         List<String> hfilesToAdd = new LinkedList<String>();
311         for (String hfileName: snapshotFamilyFiles) {
312           if (familyFiles.contains(hfileName)) {
313             // HFile already present
314             familyFiles.remove(hfileName);
315           } else {
316             // HFile missing
317             hfilesToAdd.add(hfileName);
318           }
319         }
320 
321         // Restore Missing files
322         for (String hfileName: hfilesToAdd) {
323           LOG.trace("Adding HFileLink " + hfileName +
324             " to region=" + regionInfo.getEncodedName() + " table=" + tableName);
325           restoreStoreFile(familyDir, regionInfo, hfileName);
326         }
327 
328         // Remove hfiles not present in the snapshot
329         for (String hfileName: familyFiles) {
330           Path hfile = new Path(familyDir, hfileName);
331           LOG.trace("Removing hfile=" + hfile +
332             " from region=" + regionInfo.getEncodedName() + " table=" + tableName);
333           HFileArchiver.archiveStoreFile(conf, fs, regionInfo, tableDir, family, hfile);
334         }
335       } else {
336         // Family doesn't exists in the snapshot
337         LOG.trace("Removing family=" + Bytes.toString(family) +
338           " from region=" + regionInfo.getEncodedName() + " table=" + tableName);
339         HFileArchiver.archiveFamily(fs, conf, regionInfo, tableDir, family);
340         fs.delete(familyDir, true);
341       }
342     }
343 
344     // Add families not present in the table
345     for (Map.Entry<String, List<String>> familyEntry: snapshotFiles.entrySet()) {
346       Path familyDir = new Path(regionDir, familyEntry.getKey());
347       if (!fs.mkdirs(familyDir)) {
348         throw new IOException("Unable to create familyDir=" + familyDir);
349       }
350 
351       for (String hfileName: familyEntry.getValue()) {
352         LOG.trace("Adding HFileLink " + hfileName + " to table=" + tableName);
353         restoreStoreFile(familyDir, regionInfo, hfileName);
354       }
355     }
356   }
357 
358   /**
359    * @return The set of files in the specified family directory.
360    */
361   private Set<String> getTableRegionFamilyFiles(final Path familyDir) throws IOException {
362     Set<String> familyFiles = new HashSet<String>();
363 
364     FileStatus[] hfiles = FSUtils.listStatus(fs, familyDir);
365     if (hfiles == null) return familyFiles;
366 
367     for (FileStatus hfileRef: hfiles) {
368       String hfileName = hfileRef.getPath().getName();
369       familyFiles.add(hfileName);
370     }
371 
372     return familyFiles;
373   }
374 
375   /**
376    * Clone specified regions. For each region create a new region
377    * and create a HFileLink for each hfile.
378    */
379   private HRegionInfo[] cloneHdfsRegions(final List<HRegionInfo> regions) throws IOException {
380     if (regions == null || regions.size() == 0) return null;
381 
382     final Map<String, HRegionInfo> snapshotRegions =
383       new HashMap<String, HRegionInfo>(regions.size());
384 
385     // clone region info (change embedded tableName with the new one)
386     HRegionInfo[] clonedRegionsInfo = new HRegionInfo[regions.size()];
387     for (int i = 0; i < clonedRegionsInfo.length; ++i) {
388       // clone the region info from the snapshot region info
389       HRegionInfo snapshotRegionInfo = regions.get(i);
390       clonedRegionsInfo[i] = cloneRegionInfo(snapshotRegionInfo);
391 
392       // add the region name mapping between snapshot and cloned
393       String snapshotRegionName = snapshotRegionInfo.getEncodedName();
394       String clonedRegionName = clonedRegionsInfo[i].getEncodedName();
395       regionsMap.put(Bytes.toBytes(snapshotRegionName), Bytes.toBytes(clonedRegionName));
396       LOG.info("clone region=" + snapshotRegionName + " as " + clonedRegionName);
397 
398       // Add mapping between cloned region name and snapshot region info
399       snapshotRegions.put(clonedRegionName, snapshotRegionInfo);
400     }
401 
402     // create the regions on disk
403     ModifyRegionUtils.createRegions(conf, tableDir.getParent(),
404       tableDesc, clonedRegionsInfo, new ModifyRegionUtils.RegionFillTask() {
405         public void fillRegion(final HRegion region) throws IOException {
406           cloneRegion(region, snapshotRegions.get(region.getRegionInfo().getEncodedName()));
407         }
408       });
409 
410     return clonedRegionsInfo;
411   }
412 
413   /**
414    * Clone region directory content from the snapshot info.
415    *
416    * Each region is encoded with the table name, so the cloned region will have
417    * a different region name.
418    *
419    * Instead of copying the hfiles a HFileLink is created.
420    *
421    * @param region {@link HRegion} cloned
422    * @param snapshotRegionInfo
423    */
424   private void cloneRegion(final HRegion region, final HRegionInfo snapshotRegionInfo)
425       throws IOException {
426     final Path snapshotRegionDir = new Path(snapshotDir, snapshotRegionInfo.getEncodedName());
427     final Path regionDir = new Path(tableDir, region.getRegionInfo().getEncodedName());
428     final String tableName = tableDesc.getNameAsString();
429     SnapshotReferenceUtil.visitRegionStoreFiles(fs, snapshotRegionDir,
430       new FSVisitor.StoreFileVisitor() {
431         public void storeFile (final String region, final String family, final String hfile)
432             throws IOException {
433           LOG.info("Adding HFileLink " + hfile + " to table=" + tableName);
434           Path familyDir = new Path(regionDir, family);
435           restoreStoreFile(familyDir, snapshotRegionInfo, hfile);
436         }
437     });
438   }
439 
440   /**
441    * Create a new {@link HFileLink} to reference the store file.
442    * <p>The store file in the snapshot can be a simple hfile, an HFileLink or a reference.
443    * <ul>
444    *   <li>hfile: abc -> table=region-abc
445    *   <li>reference: abc.1234 -> table=region-abc.1234
446    *   <li>hfilelink: table=region-hfile -> table=region-hfile
447    * </ul>
448    * @param familyDir destination directory for the store file
449    * @param regionInfo destination region info for the table
450    * @param hfileName store file name (can be a Reference, HFileLink or simple HFile)
451    */
452   private void restoreStoreFile(final Path familyDir, final HRegionInfo regionInfo,
453       final String hfileName) throws IOException {
454     if (HFileLink.isHFileLink(hfileName)) {
455       HFileLink.createFromHFileLink(conf, fs, familyDir, hfileName);
456     } else if (StoreFileInfo.isReference(hfileName)) {
457       restoreReferenceFile(familyDir, regionInfo, hfileName);
458     } else {
459       HFileLink.create(conf, fs, familyDir, regionInfo, hfileName);
460     }
461   }
462 
463   /**
464    * Create a new {@link Reference} as copy of the source one.
465    * <p><blockquote><pre>
466    * The source table looks like:
467    *    1234/abc      (original file)
468    *    5678/abc.1234 (reference file)
469    *
470    * After the clone operation looks like:
471    *   wxyz/table=1234-abc
472    *   stuv/table=1234-abc.wxyz
473    *
474    * NOTE that the region name in the clone changes (md5 of regioninfo)
475    * and the reference should reflect that change.
476    * </pre></blockquote>
477    * @param familyDir destination directory for the store file
478    * @param regionInfo destination region info for the table
479    * @param hfileName reference file name
480    */
481   private void restoreReferenceFile(final Path familyDir, final HRegionInfo regionInfo,
482       final String hfileName) throws IOException {
483     // Extract the referred information (hfile name and parent region)
484     String tableName = snapshotDesc.getTable();
485     Path refPath = StoreFileInfo.getReferredToFile(new Path(new Path(new Path(tableName,
486         regionInfo.getEncodedName()), familyDir.getName()), hfileName));
487     String snapshotRegionName = refPath.getParent().getParent().getName();
488     String fileName = refPath.getName();
489 
490     // The new reference should have the cloned region name as parent, if it is a clone.
491     String clonedRegionName = Bytes.toString(regionsMap.get(Bytes.toBytes(snapshotRegionName)));
492     if (clonedRegionName == null) clonedRegionName = snapshotRegionName;
493 
494     // The output file should be a reference link table=snapshotRegion-fileName.clonedRegionName
495     String refLink = fileName;
496     if (!HFileLink.isHFileLink(fileName)) {
497       refLink = HFileLink.createHFileLinkName(tableName, snapshotRegionName, fileName);
498     }
499     Path outPath = new Path(familyDir, refLink + '.' + clonedRegionName);
500 
501     // Create the new reference
502     Path linkPath = new Path(familyDir,
503       HFileLink.createHFileLinkName(tableName, regionInfo.getEncodedName(), hfileName));
504     InputStream in = new HFileLink(conf, linkPath).open(fs);
505     OutputStream out = fs.create(outPath);
506     IOUtils.copyBytes(in, out, conf);
507   }
508 
509   /**
510    * Create a new {@link HRegionInfo} from the snapshot region info.
511    * Keep the same startKey, endKey, regionId and split information but change
512    * the table name.
513    *
514    * @param snapshotRegionInfo Info for region to clone.
515    * @return the new HRegion instance
516    */
517   public HRegionInfo cloneRegionInfo(final HRegionInfo snapshotRegionInfo) {
518     return new HRegionInfo(tableDesc.getName(),
519                       snapshotRegionInfo.getStartKey(), snapshotRegionInfo.getEndKey(),
520                       snapshotRegionInfo.isSplit(), snapshotRegionInfo.getRegionId());
521   }
522 
523   /**
524    * Restore snapshot WALs.
525    *
526    * Global Snapshot keep a reference to region servers logs present during the snapshot.
527    * (/hbase/.snapshot/snapshotName/.logs/hostName/logName)
528    *
529    * Since each log contains different tables data, logs must be split to
530    * extract the table that we are interested in.
531    */
532   private void restoreWALs() throws IOException {
533     final SnapshotLogSplitter logSplitter = new SnapshotLogSplitter(conf, fs, tableDir,
534                                 Bytes.toBytes(snapshotDesc.getTable()), regionsMap);
535     try {
536       // Recover.Edits
537       SnapshotReferenceUtil.visitRecoveredEdits(fs, snapshotDir,
538           new FSVisitor.RecoveredEditsVisitor() {
539         public void recoveredEdits (final String region, final String logfile) throws IOException {
540           Path path = SnapshotReferenceUtil.getRecoveredEdits(snapshotDir, region, logfile);
541           logSplitter.splitRecoveredEdit(path);
542         }
543       });
544 
545       // Region Server Logs
546       SnapshotReferenceUtil.visitLogFiles(fs, snapshotDir, new FSVisitor.LogFileVisitor() {
547         public void logFile (final String server, final String logfile) throws IOException {
548           logSplitter.splitLog(server, logfile);
549         }
550       });
551     } finally {
552       logSplitter.close();
553     }
554   }
555 
556   /**
557    * @return the set of the regions contained in the table
558    */
559   private List<HRegionInfo> getTableRegions() throws IOException {
560     LOG.debug("get table regions: " + tableDir);
561     FileStatus[] regionDirs = FSUtils.listStatus(fs, tableDir, new FSUtils.RegionDirFilter(fs));
562     if (regionDirs == null) return null;
563 
564     List<HRegionInfo> regions = new LinkedList<HRegionInfo>();
565     for (FileStatus regionDir: regionDirs) {
566       HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir.getPath());
567       regions.add(hri);
568     }
569     LOG.debug("found " + regions.size() + " regions for table=" + tableDesc.getNameAsString());
570     return regions;
571   }
572 
573   /**
574    * Create a new table descriptor cloning the snapshot table schema.
575    *
576    * @param snapshotTableDescriptor
577    * @param tableName
578    * @return cloned table descriptor
579    * @throws IOException
580    */
581   public static HTableDescriptor cloneTableSchema(final HTableDescriptor snapshotTableDescriptor,
582       final byte[] tableName) throws IOException {
583     HTableDescriptor htd = new HTableDescriptor(tableName);
584     for (HColumnDescriptor hcd: snapshotTableDescriptor.getColumnFamilies()) {
585       htd.addFamily(hcd);
586     }
587     return htd;
588   }
589 }