View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations under
15   * the License.
16   */
17  package org.apache.hadoop.hbase.io.hfile;
18  
19  import java.io.BufferedInputStream;
20  import java.io.BufferedOutputStream;
21  import java.io.FilterOutputStream;
22  import java.io.IOException;
23  import java.io.InputStream;
24  import java.io.OutputStream;
25  
26  import org.apache.commons.logging.Log;
27  import org.apache.commons.logging.LogFactory;
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.conf.Configurable;
30  import org.apache.hadoop.io.compress.CodecPool;
31  import org.apache.hadoop.io.compress.CompressionCodec;
32  import org.apache.hadoop.io.compress.CompressionInputStream;
33  import org.apache.hadoop.io.compress.CompressionOutputStream;
34  import org.apache.hadoop.io.compress.Compressor;
35  import org.apache.hadoop.io.compress.Decompressor;
36  import org.apache.hadoop.io.compress.GzipCodec;
37  import org.apache.hadoop.io.compress.DefaultCodec;
38  import org.apache.hadoop.util.ReflectionUtils;
39  
40  /**
41   * Compression related stuff.
42   * Copied from hadoop-3315 tfile.
43   */
44  public final class Compression {
45    static final Log LOG = LogFactory.getLog(Compression.class);
46  
47    /**
48     * Prevent the instantiation of class.
49     */
50    private Compression() {
51      super();
52    }
53  
54    static class FinishOnFlushCompressionStream extends FilterOutputStream {
55      public FinishOnFlushCompressionStream(CompressionOutputStream cout) {
56        super(cout);
57      }
58  
59      @Override
60      public void write(byte b[], int off, int len) throws IOException {
61        out.write(b, off, len);
62      }
63  
64      @Override
65      public void flush() throws IOException {
66        CompressionOutputStream cout = (CompressionOutputStream) out;
67        cout.finish();
68        cout.flush();
69        cout.resetState();
70      }
71    }
72  
73    /**
74     * Compression algorithms.
75     */
76    public static enum Algorithm {
77      LZO("lzo") {
78        // Use base type to avoid compile-time dependencies.
79        private transient CompressionCodec lzoCodec;
80  
81        @Override
82        CompressionCodec getCodec() {
83          if (lzoCodec == null) {
84            Configuration conf = new Configuration();
85            conf.setBoolean("hadoop.native.lib", true);
86            try {
87              Class<?> externalCodec =
88                  ClassLoader.getSystemClassLoader().loadClass("com.hadoop.compression.lzo.LzoCodec");
89              lzoCodec = (CompressionCodec) ReflectionUtils.newInstance(externalCodec, conf);
90            } catch (ClassNotFoundException e) {
91              throw new RuntimeException(e);
92            }
93          }
94          return lzoCodec;
95        }
96      },
97      GZ("gz") {
98        private transient GzipCodec codec;
99  
100       @Override
101       DefaultCodec getCodec() {
102         if (codec == null) {
103           Configuration conf = new Configuration();
104           conf.setBoolean("hadoop.native.lib", true);
105           codec = new GzipCodec();
106           codec.setConf(conf);
107         }
108 
109         return codec;
110       }
111     },
112 
113     NONE("none") {
114       @Override
115       DefaultCodec getCodec() {
116         return null;
117       }
118 
119       @Override
120       public synchronized InputStream createDecompressionStream(
121           InputStream downStream, Decompressor decompressor,
122           int downStreamBufferSize) throws IOException {
123         if (downStreamBufferSize > 0) {
124           return new BufferedInputStream(downStream, downStreamBufferSize);
125         }
126         // else {
127           // Make sure we bypass FSInputChecker buffer.
128         // return new BufferedInputStream(downStream, 1024);
129         // }
130         // }
131         return downStream;
132       }
133 
134       @Override
135       public synchronized OutputStream createCompressionStream(
136           OutputStream downStream, Compressor compressor,
137           int downStreamBufferSize) throws IOException {
138         if (downStreamBufferSize > 0) {
139           return new BufferedOutputStream(downStream, downStreamBufferSize);
140         }
141 
142         return downStream;
143       }
144     };
145 
146     private final String compressName;
147 	// data input buffer size to absorb small reads from application.
148     private static final int DATA_IBUF_SIZE = 1 * 1024;
149 	// data output buffer size to absorb small writes from application.
150     private static final int DATA_OBUF_SIZE = 4 * 1024;
151 
152     Algorithm(String name) {
153       this.compressName = name;
154     }
155 
156     abstract CompressionCodec getCodec();
157 
158     public InputStream createDecompressionStream(
159         InputStream downStream, Decompressor decompressor,
160         int downStreamBufferSize) throws IOException {
161       CompressionCodec codec = getCodec();
162       // Set the internal buffer size to read from down stream.
163       if (downStreamBufferSize > 0) {
164         Configurable c = (Configurable) codec;
165         c.getConf().setInt("io.file.buffer.size", downStreamBufferSize);
166       }
167       CompressionInputStream cis =
168           codec.createInputStream(downStream, decompressor);
169       BufferedInputStream bis2 = new BufferedInputStream(cis, DATA_IBUF_SIZE);
170       return bis2;
171 
172     }
173 
174     public OutputStream createCompressionStream(
175         OutputStream downStream, Compressor compressor, int downStreamBufferSize)
176         throws IOException {
177       CompressionCodec codec = getCodec();
178       OutputStream bos1 = null;
179       if (downStreamBufferSize > 0) {
180         bos1 = new BufferedOutputStream(downStream, downStreamBufferSize);
181       }
182       else {
183         bos1 = downStream;
184       }
185       Configurable c = (Configurable) codec;
186       c.getConf().setInt("io.file.buffer.size", 32 * 1024);
187       CompressionOutputStream cos =
188           codec.createOutputStream(bos1, compressor);
189       BufferedOutputStream bos2 =
190           new BufferedOutputStream(new FinishOnFlushCompressionStream(cos),
191               DATA_OBUF_SIZE);
192       return bos2;
193     }
194 
195     public Compressor getCompressor() {
196       CompressionCodec codec = getCodec();
197       if (codec != null) {
198         Compressor compressor = CodecPool.getCompressor(codec);
199         if (compressor != null) {
200           if (compressor.finished()) {
201             // Somebody returns the compressor to CodecPool but is still using
202             // it.
203             LOG
204                 .warn("Compressor obtained from CodecPool is already finished()");
205             // throw new AssertionError(
206             // "Compressor obtained from CodecPool is already finished()");
207           }
208           compressor.reset();
209         }
210         return compressor;
211       }
212       return null;
213     }
214 
215     public void returnCompressor(Compressor compressor) {
216       if (compressor != null) {
217         CodecPool.returnCompressor(compressor);
218       }
219     }
220 
221     public Decompressor getDecompressor() {
222       CompressionCodec codec = getCodec();
223       if (codec != null) {
224         Decompressor decompressor = CodecPool.getDecompressor(codec);
225         if (decompressor != null) {
226           if (decompressor.finished()) {
227             // Somebody returns the decompressor to CodecPool but is still using
228             // it.
229             LOG
230                 .warn("Deompressor obtained from CodecPool is already finished()");
231             // throw new AssertionError(
232             // "Decompressor obtained from CodecPool is already finished()");
233           }
234           decompressor.reset();
235         }
236         return decompressor;
237       }
238 
239       return null;
240     }
241 
242     public void returnDecompressor(Decompressor decompressor) {
243       if (decompressor != null) {
244         CodecPool.returnDecompressor(decompressor);
245       }
246     }
247 
248     public String getName() {
249       return compressName;
250     }
251   }
252 
253   public static Algorithm getCompressionAlgorithmByName(String compressName) {
254     Algorithm[] algos = Algorithm.class.getEnumConstants();
255 
256     for (Algorithm a : algos) {
257       if (a.getName().equals(compressName)) {
258         return a;
259       }
260     }
261 
262     throw new IllegalArgumentException(
263         "Unsupported compression algorithm name: " + compressName);
264   }
265 
266   static String[] getSupportedAlgorithms() {
267     Algorithm[] algos = Algorithm.class.getEnumConstants();
268 
269     String[] ret = new String[algos.length];
270     int i = 0;
271     for (Algorithm a : algos) {
272       ret[i++] = a.getName();
273     }
274 
275     return ret;
276   }
277 }