View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master.snapshot;
19  
20  import java.io.IOException;
21  import java.util.List;
22  import java.util.Set;
23  
24  import org.apache.hadoop.classification.InterfaceAudience;
25  import org.apache.hadoop.classification.InterfaceStability;
26  import org.apache.hadoop.fs.FSDataInputStream;
27  import org.apache.hadoop.fs.FileStatus;
28  import org.apache.hadoop.fs.FileSystem;
29  import org.apache.hadoop.fs.Path;
30  import org.apache.hadoop.fs.PathFilter;
31  import org.apache.hadoop.hbase.HConstants;
32  import org.apache.hadoop.hbase.HRegionInfo;
33  import org.apache.hadoop.hbase.ServerName;
34  import org.apache.hadoop.hbase.catalog.MetaReader;
35  import org.apache.hadoop.hbase.master.MasterServices;
36  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
37  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription.Type;
38  import org.apache.hadoop.hbase.regionserver.HRegion;
39  import org.apache.hadoop.hbase.regionserver.StoreFile;
40  import org.apache.hadoop.hbase.snapshot.CorruptedSnapshotException;
41  import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
42  import org.apache.hadoop.hbase.snapshot.TakeSnapshotUtils;
43  import org.apache.hadoop.hbase.util.Bytes;
44  import org.apache.hadoop.hbase.util.FSTableDescriptors;
45  import org.apache.hadoop.hbase.util.FSUtils;
46  import org.apache.hadoop.hbase.util.HFileArchiveUtil;
47  
48  /**
49   * General snapshot verification on the master.
50   * <p>
51   * This is a light-weight verification mechanism for all the files in a snapshot. It doesn't
52   * attempt to verify that the files are exact copies (that would be paramount to taking the
53   * snapshot again!), but instead just attempts to ensure that the files match the expected
54   * files and are the same length.
55   * <p>
56   * Taking an online snapshots can race against other operations and this is an last line of
57   * defense.  For example, if meta changes between when snapshots are taken not all regions of a
58   * table may be present.  This can be caused by a region split (daughters present on this scan,
59   * but snapshot took parent), or move (snapshots only checks lists of region servers, a move could
60   * have caused a region to be skipped or done twice).
61   * <p>
62   * Current snapshot files checked:
63   * <ol>
64   * <li>SnapshotDescription is readable</li>
65   * <li>Table info is readable</li>
66   * <li>Regions</li>
67   * <ul>
68   * <li>Matching regions in the snapshot as currently in the table</li>
69   * <li>{@link HRegionInfo} matches the current and stored regions</li>
70   * <li>All referenced hfiles have valid names</li>
71   * <li>All the hfiles are present (either in .archive directory in the region)</li>
72   * <li>All recovered.edits files are present (by name) and have the correct file size</li>
73   * </ul>
74   * </ol>
75   */
76  @InterfaceAudience.Private
77  @InterfaceStability.Unstable
78  public final class MasterSnapshotVerifier {
79  
80    private SnapshotDescription snapshot;
81    private FileSystem fs;
82    private Path rootDir;
83    private String tableName;
84    private MasterServices services;
85  
86    /**
87     * @param services services for the master
88     * @param snapshot snapshot to check
89     * @param rootDir root directory of the hbase installation.
90     */
91    public MasterSnapshotVerifier(MasterServices services, SnapshotDescription snapshot, Path rootDir) {
92      this.fs = services.getMasterFileSystem().getFileSystem();
93      this.services = services;
94      this.snapshot = snapshot;
95      this.rootDir = rootDir;
96      this.tableName = snapshot.getTable();
97    }
98  
99    /**
100    * Verify that the snapshot in the directory is a valid snapshot
101    * @param snapshotDir snapshot directory to check
102    * @param snapshotServers {@link ServerName} of the servers that are involved in the snapshot
103    * @throws CorruptedSnapshotException if the snapshot is invalid
104    * @throws IOException if there is an unexpected connection issue to the filesystem
105    */
106   public void verifySnapshot(Path snapshotDir, Set<String> snapshotServers)
107       throws CorruptedSnapshotException, IOException {
108     // verify snapshot info matches
109     verifySnapshotDescription(snapshotDir);
110 
111     // check that tableinfo is a valid table description
112     verifyTableInfo(snapshotDir);
113 
114     // check that each region is valid
115     verifyRegions(snapshotDir);
116   }
117 
118   /**
119    * Check that the snapshot description written in the filesystem matches the current snapshot
120    * @param snapshotDir snapshot directory to check
121    */
122   private void verifySnapshotDescription(Path snapshotDir) throws CorruptedSnapshotException {
123     SnapshotDescription found = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
124     if (!this.snapshot.equals(found)) {
125       throw new CorruptedSnapshotException("Snapshot read (" + found
126           + ") doesn't equal snapshot we ran (" + snapshot + ").", snapshot);
127     }
128   }
129 
130   /**
131    * Check that the table descriptor for the snapshot is a valid table descriptor
132    * @param snapshotDir snapshot directory to check
133    */
134   private void verifyTableInfo(Path snapshotDir) throws IOException {
135     FSTableDescriptors.getTableDescriptor(fs, snapshotDir);
136   }
137 
138   /**
139    * Check that all the regions in the snapshot are valid, and accounted for.
140    * @param snapshotDir snapshot directory to check
141    * @throws IOException if we can't reach .META. or read the files from the FS
142    */
143   private void verifyRegions(Path snapshotDir) throws IOException {
144     List<HRegionInfo> regions = MetaReader.getTableRegions(this.services.getCatalogTracker(),
145       Bytes.toBytes(tableName));
146     for (HRegionInfo region : regions) {
147       // if offline split parent, skip it
148       if (region.isOffline() && (region.isSplit() || region.isSplitParent())) {
149         continue;
150       }
151 
152       verifyRegion(fs, snapshotDir, region);
153     }
154   }
155 
156   /**
157    * Verify that the region (regioninfo, hfiles) are valid
158    * @param fs the FileSystem instance
159    * @param snapshotDir snapshot directory to check
160    * @param region the region to check
161    */
162   private void verifyRegion(FileSystem fs, Path snapshotDir, HRegionInfo region) throws IOException {
163     // make sure we have region in the snapshot
164     Path regionDir = new Path(snapshotDir, region.getEncodedName());
165     if (!fs.exists(regionDir)) {
166       // could happen due to a move or split race.
167       throw new CorruptedSnapshotException("No region directory found for region:" + region,
168           snapshot);
169     }
170     // make sure we have the region info in the snapshot
171     Path regionInfo = new Path(regionDir, HRegion.REGIONINFO_FILE);
172     // make sure the file exists
173     if (!fs.exists(regionInfo)) {
174       throw new CorruptedSnapshotException("No region info found for region:" + region, snapshot);
175     }
176     FSDataInputStream in = fs.open(regionInfo);
177     HRegionInfo found = new HRegionInfo();
178     try {
179       found.readFields(in);
180       if (!region.equals(found)) {
181         throw new CorruptedSnapshotException("Found region info (" + found
182            + ") doesn't match expected region:" + region, snapshot);
183       }
184     } finally {
185       in.close();
186     }
187 
188     // make sure we have the expected recovered edits files
189     TakeSnapshotUtils.verifyRecoveredEdits(fs, snapshotDir, found, snapshot);
190 
191     // check for the existance of each hfile
192     PathFilter familiesDirs = new FSUtils.FamilyDirFilter(fs);
193     FileStatus[] columnFamilies = FSUtils.listStatus(fs, regionDir, familiesDirs);
194     // should we do some checking here to make sure the cfs are correct?
195     if (columnFamilies == null) return;
196 
197     // setup the suffixes for the snapshot directories
198     Path tableNameSuffix = new Path(tableName);
199     Path regionNameSuffix = new Path(tableNameSuffix, region.getEncodedName());
200 
201     // get the potential real paths
202     Path archivedRegion = new Path(HFileArchiveUtil.getArchivePath(services.getConfiguration()),
203         regionNameSuffix);
204     Path realRegion = new Path(rootDir, regionNameSuffix);
205 
206     // loop through each cf and check we can find each of the hfiles
207     for (FileStatus cf : columnFamilies) {
208       FileStatus[] hfiles = FSUtils.listStatus(fs, cf.getPath(), null);
209       // should we check if there should be hfiles?
210       if (hfiles == null || hfiles.length == 0) continue;
211 
212       Path realCfDir = new Path(realRegion, cf.getPath().getName());
213       Path archivedCfDir = new Path(archivedRegion, cf.getPath().getName());
214       for (FileStatus hfile : hfiles) {
215         // make sure the name is correct
216         if (!StoreFile.validateStoreFileName(hfile.getPath().getName())) {
217           throw new CorruptedSnapshotException("HFile: " + hfile.getPath()
218               + " is not a valid hfile name.", snapshot);
219         }
220 
221         // check to see if hfile is present in the real table
222         String fileName = hfile.getPath().getName();
223         Path file = new Path(realCfDir, fileName);
224         Path archived = new Path(archivedCfDir, fileName);
225         if (!fs.exists(file) && !fs.exists(archived)) {
226           throw new CorruptedSnapshotException("Can't find hfile: " + hfile.getPath()
227               + " in the real (" + realCfDir + ") or archive (" + archivedCfDir
228               + ") directory for the primary table.", snapshot);
229         }
230       }
231     }
232   }
233 
234   /**
235    * Check that the logs stored in the log directory for the snapshot are valid - it contains all
236    * the expected logs for all servers involved in the snapshot.
237    * @param snapshotDir snapshot directory to check
238    * @param snapshotServers list of the names of servers involved in the snapshot.
239    * @throws CorruptedSnapshotException if the hlogs in the snapshot are not correct
240    * @throws IOException if we can't reach the filesystem
241    */
242   private void verifyLogs(Path snapshotDir, Set<String> snapshotServers)
243       throws CorruptedSnapshotException, IOException {
244     Path snapshotLogDir = new Path(snapshotDir, HConstants.HREGION_LOGDIR_NAME);
245     Path logsDir = new Path(rootDir, HConstants.HREGION_LOGDIR_NAME);
246     TakeSnapshotUtils.verifyAllLogsGotReferenced(fs, logsDir, snapshotServers, snapshot,
247       snapshotLogDir);
248   }
249 }