View Javadoc

1   /*
2    * Copyright The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.fs;
22  
23  import java.io.IOException;
24  import java.lang.reflect.Field;
25  import java.lang.reflect.InvocationHandler;
26  import java.lang.reflect.InvocationTargetException;
27  import java.lang.reflect.Method;
28  import java.lang.reflect.Modifier;
29  import java.lang.reflect.Proxy;
30  import java.lang.reflect.UndeclaredThrowableException;
31  import java.net.URI;
32  
33  import org.apache.commons.logging.Log;
34  import org.apache.commons.logging.LogFactory;
35  import org.apache.hadoop.conf.Configuration;
36  import org.apache.hadoop.fs.FSDataOutputStream;
37  import org.apache.hadoop.fs.FileSystem;
38  import org.apache.hadoop.fs.FilterFileSystem;
39  import org.apache.hadoop.fs.LocalFileSystem;
40  import org.apache.hadoop.fs.Path;
41  import org.apache.hadoop.hbase.ServerName;
42  import org.apache.hadoop.hbase.regionserver.wal.HLogUtil;
43  import org.apache.hadoop.hdfs.DFSClient;
44  import org.apache.hadoop.hdfs.DistributedFileSystem;
45  import org.apache.hadoop.hdfs.protocol.ClientProtocol;
46  import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
47  import org.apache.hadoop.hdfs.protocol.LocatedBlock;
48  import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
49  import org.apache.hadoop.io.Closeable;
50  import org.apache.hadoop.util.Progressable;
51  import org.apache.hadoop.util.ReflectionUtils;
52  
53  /**
54   * An encapsulation for the FileSystem object that hbase uses to access
55   * data. This class allows the flexibility of using  
56   * separate filesystem objects for reading and writing hfiles and hlogs.
57   * In future, if we want to make hlogs be in a different filesystem,
58   * this is the place to make it happen.
59   */
60  public class HFileSystem extends FilterFileSystem {
61    public static final Log LOG = LogFactory.getLog(HFileSystem.class);
62  
63    private final FileSystem noChecksumFs;   // read hfile data from storage
64    private final boolean useHBaseChecksum;
65  
66    /**
67     * Create a FileSystem object for HBase regionservers.
68     * @param conf The configuration to be used for the filesystem
69     * @param useHBaseChecksum if true, then use
70     *        checksum verfication in hbase, otherwise
71     *        delegate checksum verification to the FileSystem.
72     */
73    public HFileSystem(Configuration conf, boolean useHBaseChecksum)
74      throws IOException {
75  
76      // Create the default filesystem with checksum verification switched on.
77      // By default, any operation to this FilterFileSystem occurs on
78      // the underlying filesystem that has checksums switched on.
79      this.fs = FileSystem.get(conf);
80      this.useHBaseChecksum = useHBaseChecksum;
81      
82      fs.initialize(getDefaultUri(conf), conf);
83      addLocationsOrderInterceptor(conf);
84  
85      // If hbase checksum verification is switched on, then create a new
86      // filesystem object that has cksum verification turned off.
87      // We will avoid verifying checksums in the fs client, instead do it
88      // inside of hbase.
89      // If this is the local file system hadoop has a bug where seeks
90      // do not go to the correct location if setVerifyChecksum(false) is called.
91      // This manifests itself in that incorrect data is read and HFileBlocks won't be able to read
92      // their header magic numbers. See HBASE-5885
93      if (useHBaseChecksum && !(fs instanceof LocalFileSystem)) {
94        conf = new Configuration(conf);
95        conf.setBoolean("dfs.client.read.shortcircuit.skip.checksum", true);
96        this.noChecksumFs = newInstanceFileSystem(conf);
97        this.noChecksumFs.setVerifyChecksum(false);
98      } else {
99        this.noChecksumFs = fs;
100     }
101   }
102 
103   /**
104    * Wrap a FileSystem object within a HFileSystem. The noChecksumFs and
105    * writefs are both set to be the same specified fs. 
106    * Do not verify hbase-checksums while reading data from filesystem.
107    * @param fs Set the noChecksumFs and writeFs to this specified filesystem.
108    */
109   public HFileSystem(FileSystem fs) {
110     this.fs = fs;
111     this.noChecksumFs = fs;
112     this.useHBaseChecksum = false;
113   }
114 
115   /**
116    * Returns the filesystem that is specially setup for 
117    * doing reads from storage. This object avoids doing 
118    * checksum verifications for reads.
119    * @return The FileSystem object that can be used to read data
120    *         from files.
121    */
122   public FileSystem getNoChecksumFs() {
123     return noChecksumFs;
124   }
125 
126   /**
127    * Returns the underlying filesystem
128    * @return The underlying FileSystem for this FilterFileSystem object.
129    */
130   public FileSystem getBackingFs() throws IOException {
131     return fs;
132   }
133 
134   /**
135    * Are we verifying checksums in HBase?
136    * @return True, if hbase is configured to verify checksums,
137    *         otherwise false.
138    */
139   public boolean useHBaseChecksum() {
140     return useHBaseChecksum;
141   }
142 
143   /**
144    * Close this filesystem object
145    */
146   @Override
147   public void close() throws IOException {
148     super.close();
149     if (this.noChecksumFs != fs) {
150       this.noChecksumFs.close();
151     }
152   }
153 
154  /**
155    * Returns a brand new instance of the FileSystem. It does not use
156    * the FileSystem.Cache. In newer versions of HDFS, we can directly
157    * invoke FileSystem.newInstance(Configuration).
158    * 
159    * @param conf Configuration
160    * @return A new instance of the filesystem
161    */
162   private static FileSystem newInstanceFileSystem(Configuration conf)
163     throws IOException {
164     URI uri = FileSystem.getDefaultUri(conf);
165     FileSystem fs = null;
166     Class<?> clazz = conf.getClass("fs." + uri.getScheme() + ".impl", null);
167     if (clazz != null) {
168       // This will be true for Hadoop 1.0, or 0.20.
169       fs = (FileSystem)ReflectionUtils.newInstance(clazz, conf);
170       fs.initialize(uri, conf);
171     } else {
172       // For Hadoop 2.0, we have to go through FileSystem for the filesystem
173       // implementation to be loaded by the service loader in case it has not
174       // been loaded yet.
175       Configuration clone = new Configuration(conf);
176       clone.setBoolean("fs." + uri.getScheme() + ".impl.disable.cache", true);
177       fs = FileSystem.get(uri, clone);
178     }
179     if (fs == null) {
180       throw new IOException("No FileSystem for scheme: " + uri.getScheme());
181     }
182 
183     return fs;
184   }
185 
186   public static boolean addLocationsOrderInterceptor(Configuration conf) throws IOException {
187     return addLocationsOrderInterceptor(conf, new ReorderWALBlocks());
188   }
189 
190   /**
191    * Add an interceptor on the calls to the namenode#getBlockLocations from the DFSClient
192    * linked to this FileSystem. See HBASE-6435 for the background.
193    * <p/>
194    * There should be no reason, except testing, to create a specific ReorderBlocks.
195    *
196    * @return true if the interceptor was added, false otherwise.
197    */
198   static boolean addLocationsOrderInterceptor(Configuration conf, final ReorderBlocks lrb) {
199     if (!conf.getBoolean("hbase.filesystem.reorder.blocks", true)) {  // activated by default
200       LOG.debug("addLocationsOrderInterceptor configured to false");
201       return false;
202     }
203 
204     FileSystem fs;
205     try {
206       fs = FileSystem.get(conf);
207     } catch (IOException e) {
208       LOG.warn("Can't get the file system from the conf.", e);
209       return false;
210     }
211 
212     if (!(fs instanceof DistributedFileSystem)) {
213       LOG.debug("The file system is not a DistributedFileSystem. " +
214           "Skipping on block location reordering");
215       return false;
216     }
217 
218     DistributedFileSystem dfs = (DistributedFileSystem) fs;
219     DFSClient dfsc = dfs.getClient();
220     if (dfsc == null) {
221       LOG.warn("The DistributedFileSystem does not contain a DFSClient. Can't add the location " +
222           "block reordering interceptor. Continuing, but this is unexpected."
223       );
224       return false;
225     }
226 
227     try {
228       Field nf = DFSClient.class.getDeclaredField("namenode");
229       nf.setAccessible(true);
230       Field modifiersField = Field.class.getDeclaredField("modifiers");
231       modifiersField.setAccessible(true);
232       modifiersField.setInt(nf, nf.getModifiers() & ~Modifier.FINAL);
233 
234       ClientProtocol namenode = (ClientProtocol) nf.get(dfsc);
235       if (namenode == null) {
236         LOG.warn("The DFSClient is not linked to a namenode. Can't add the location block" +
237             " reordering interceptor. Continuing, but this is unexpected."
238         );
239         return false;
240       }
241 
242       ClientProtocol cp1 = createReorderingProxy(namenode, lrb, conf);
243       nf.set(dfsc, cp1);
244       LOG.info("Added intercepting call to namenode#getBlockLocations so can do block reordering" +
245         " using class " + lrb.getClass());
246     } catch (NoSuchFieldException e) {
247       LOG.warn("Can't modify the DFSClient#namenode field to add the location reorder.", e);
248       return false;
249     } catch (IllegalAccessException e) {
250       LOG.warn("Can't modify the DFSClient#namenode field to add the location reorder.", e);
251       return false;
252     }
253 
254     return true;
255   }
256 
257   private static ClientProtocol createReorderingProxy(final ClientProtocol cp,
258       final ReorderBlocks lrb, final Configuration conf) {
259     return (ClientProtocol) Proxy.newProxyInstance
260         (cp.getClass().getClassLoader(),
261             new Class[]{ClientProtocol.class, Closeable.class},
262             new InvocationHandler() {
263               public Object invoke(Object proxy, Method method,
264                                    Object[] args) throws Throwable {
265                 try { 
266                   Object res = method.invoke(cp, args);
267                   if (res != null && args != null && args.length == 3
268                       && "getBlockLocations".equals(method.getName())
269                       && res instanceof LocatedBlocks
270                       && args[0] instanceof String
271                       && args[0] != null) {
272                     lrb.reorderBlocks(conf, (LocatedBlocks) res, (String) args[0]);
273                   }
274                   return res;
275                 } catch  (InvocationTargetException ite) {
276                   // We will have this for all the exception, checked on not, sent
277                   //  by any layer, including the functional exception
278                   Throwable cause = ite.getCause();
279                   if (cause == null){
280                     throw new RuntimeException(
281                       "Proxy invocation failed and getCause is null", ite);
282                   }
283                   if (cause instanceof UndeclaredThrowableException) {
284                     Throwable causeCause = cause.getCause();
285                     if (causeCause == null) {
286                       throw new RuntimeException("UndeclaredThrowableException had null cause!");
287                     }
288                     cause = cause.getCause();
289                   }
290                   throw cause;
291                 }
292               }
293             });
294   }
295 
296   /**
297    * Interface to implement to add a specific reordering logic in hdfs.
298    */
299   interface ReorderBlocks {
300     /**
301      *
302      * @param conf - the conf to use
303      * @param lbs - the LocatedBlocks to reorder
304      * @param src - the file name currently read
305      * @throws IOException - if something went wrong
306      */
307     void reorderBlocks(Configuration conf, LocatedBlocks lbs, String src) throws IOException;
308   }
309 
310   /**
311    * We're putting at lowest priority the hlog files blocks that are on the same datanode
312    * as the original regionserver which created these files. This because we fear that the
313    * datanode is actually dead, so if we use it it will timeout.
314    */
315   static class ReorderWALBlocks implements ReorderBlocks {
316     public void reorderBlocks(Configuration conf, LocatedBlocks lbs, String src)
317         throws IOException {
318 
319       ServerName sn = HLogUtil.getServerNameFromHLogDirectoryName(conf, src);
320       if (sn == null) {
321         // It's not an HLOG
322         return;
323       }
324 
325       // Ok, so it's an HLog
326       String hostName = sn.getHostname();
327       if (LOG.isTraceEnabled()) {
328         LOG.trace(src +
329             " is an HLog file, so reordering blocks, last hostname will be:" + hostName);
330       }
331 
332       // Just check for all blocks
333       for (LocatedBlock lb : lbs.getLocatedBlocks()) {
334         DatanodeInfo[] dnis = lb.getLocations();
335         if (dnis != null && dnis.length > 1) {
336           boolean found = false;
337           for (int i = 0; i < dnis.length - 1 && !found; i++) {
338             if (hostName.equals(dnis[i].getHostName())) {
339               // advance the other locations by one and put this one at the last place.
340               DatanodeInfo toLast = dnis[i];
341               System.arraycopy(dnis, i + 1, dnis, i, dnis.length - i - 1);
342               dnis[dnis.length - 1] = toLast;
343               found = true;
344             }
345           }
346         }
347       }
348     }
349   }
350 
351   /**
352    * Create a new HFileSystem object, similar to FileSystem.get().
353    * This returns a filesystem object that avoids checksum
354    * verification in the filesystem for hfileblock-reads.
355    * For these blocks, checksum verification is done by HBase.
356    */
357   static public FileSystem get(Configuration conf) throws IOException {
358     return new HFileSystem(conf, true);
359   }
360 
361   /**
362    * Wrap a LocalFileSystem within a HFileSystem.
363    */
364   static public FileSystem getLocalFs(Configuration conf) throws IOException {
365     return new HFileSystem(FileSystem.getLocal(conf));
366   }
367 
368   /**
369    * The org.apache.hadoop.fs.FilterFileSystem does not yet support 
370    * createNonRecursive. This is a hadoop bug and when it is fixed in Hadoop,
371    * this definition will go away.
372    */
373   public FSDataOutputStream createNonRecursive(Path f,
374       boolean overwrite,
375       int bufferSize, short replication, long blockSize,
376       Progressable progress) throws IOException {
377     return fs.createNonRecursive(f, overwrite, bufferSize, replication,
378                                  blockSize, progress);
379   }
380 }