1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.hadoop.hbase.io.hfile;
18
19 import java.io.BufferedInputStream;
20 import java.io.BufferedOutputStream;
21 import java.io.FilterOutputStream;
22 import java.io.IOException;
23 import java.io.InputStream;
24 import java.io.OutputStream;
25
26 import org.apache.commons.logging.Log;
27 import org.apache.commons.logging.LogFactory;
28 import org.apache.hadoop.conf.Configurable;
29 import org.apache.hadoop.conf.Configuration;
30 import org.apache.hadoop.io.compress.CodecPool;
31 import org.apache.hadoop.io.compress.CompressionCodec;
32 import org.apache.hadoop.io.compress.CompressionInputStream;
33 import org.apache.hadoop.io.compress.CompressionOutputStream;
34 import org.apache.hadoop.io.compress.Compressor;
35 import org.apache.hadoop.io.compress.Decompressor;
36 import org.apache.hadoop.io.compress.DoNotPool;
37 import org.apache.hadoop.io.compress.GzipCodec;
38 import org.apache.hadoop.io.compress.DefaultCodec;
39 import org.apache.hadoop.util.ReflectionUtils;
40
41
42
43
44
45 public final class Compression {
46 static final Log LOG = LogFactory.getLog(Compression.class);
47
48
49
50
51 private Compression() {
52 super();
53 }
54
55 static class FinishOnFlushCompressionStream extends FilterOutputStream {
56 public FinishOnFlushCompressionStream(CompressionOutputStream cout) {
57 super(cout);
58 }
59
60 @Override
61 public void write(byte b[], int off, int len) throws IOException {
62 out.write(b, off, len);
63 }
64
65 @Override
66 public void flush() throws IOException {
67 CompressionOutputStream cout = (CompressionOutputStream) out;
68 cout.finish();
69 cout.flush();
70 cout.resetState();
71 }
72 }
73
74
75
76
77
78 private static ClassLoader getClassLoaderForCodec() {
79 ClassLoader cl = Thread.currentThread().getContextClassLoader();
80 if (cl == null) {
81 cl = Compression.class.getClassLoader();
82 }
83 if (cl == null) {
84 cl = ClassLoader.getSystemClassLoader();
85 }
86 if (cl == null) {
87 throw new RuntimeException("A ClassLoader to load the Codec could not be determined");
88 }
89 return cl;
90 }
91
92
93
94
95
96
97 public static enum Algorithm {
98 LZO("lzo") {
99
100 private volatile transient CompressionCodec lzoCodec;
101 private transient Object lock = new Object();
102
103 @Override
104 CompressionCodec getCodec(Configuration conf) {
105 if (lzoCodec == null) {
106 synchronized (lock) {
107 if (lzoCodec == null) {
108 lzoCodec = buildCodec(conf);
109 }
110 }
111 }
112 return lzoCodec;
113 }
114
115 private CompressionCodec buildCodec(Configuration conf) {
116 try {
117 Class<?> externalCodec =
118 ClassLoader.getSystemClassLoader()
119 .loadClass("com.hadoop.compression.lzo.LzoCodec");
120 return (CompressionCodec) ReflectionUtils.newInstance(externalCodec,
121 new Configuration(conf));
122 } catch (ClassNotFoundException e) {
123 throw new RuntimeException(e);
124 }
125 }
126 },
127 GZ("gz") {
128 private volatile transient GzipCodec codec;
129 private transient Object lock = new Object();
130
131 @Override
132 DefaultCodec getCodec(Configuration conf) {
133 if (codec == null) {
134 synchronized (lock) {
135 if (codec == null) {
136 codec = buildCodec(conf);
137 }
138 }
139 }
140
141 return codec;
142 }
143
144 private GzipCodec buildCodec(Configuration conf) {
145 GzipCodec gzcodec = new ReusableStreamGzipCodec();
146 gzcodec.setConf(new Configuration(conf));
147 return gzcodec;
148 }
149 },
150
151 NONE("none") {
152 @Override
153 DefaultCodec getCodec(Configuration conf) {
154 return null;
155 }
156
157 @Override
158 public synchronized InputStream createDecompressionStream(
159 InputStream downStream, Decompressor decompressor,
160 int downStreamBufferSize) throws IOException {
161 if (downStreamBufferSize > 0) {
162 return new BufferedInputStream(downStream, downStreamBufferSize);
163 }
164
165
166
167
168
169 return downStream;
170 }
171
172 @Override
173 public synchronized OutputStream createCompressionStream(
174 OutputStream downStream, Compressor compressor,
175 int downStreamBufferSize) throws IOException {
176 if (downStreamBufferSize > 0) {
177 return new BufferedOutputStream(downStream, downStreamBufferSize);
178 }
179
180 return downStream;
181 }
182 },
183 SNAPPY("snappy") {
184
185 private volatile transient CompressionCodec snappyCodec;
186 private transient Object lock = new Object();
187
188 @Override
189 CompressionCodec getCodec(Configuration conf) {
190 if (snappyCodec == null) {
191 synchronized (lock) {
192 if (snappyCodec == null) {
193 snappyCodec = buildCodec(conf);
194 }
195 }
196 }
197 return snappyCodec;
198 }
199
200 private CompressionCodec buildCodec(Configuration conf) {
201 try {
202 Class<?> externalCodec =
203 ClassLoader.getSystemClassLoader()
204 .loadClass("org.apache.hadoop.io.compress.SnappyCodec");
205 return (CompressionCodec) ReflectionUtils.newInstance(externalCodec,
206 conf);
207 } catch (ClassNotFoundException e) {
208 throw new RuntimeException(e);
209 }
210 }
211 },
212 LZ4("lz4") {
213
214 private volatile transient CompressionCodec lz4Codec;
215 private transient Object lock = new Object();
216
217 @Override
218 CompressionCodec getCodec(Configuration conf) {
219 if (lz4Codec == null) {
220 synchronized (lock) {
221 if (lz4Codec == null) {
222 lz4Codec = buildCodec(conf);
223 }
224 }
225 buildCodec(conf);
226 }
227 return lz4Codec;
228 }
229
230 private CompressionCodec buildCodec(Configuration conf) {
231 try {
232 Class<?> externalCodec =
233 getClassLoaderForCodec().loadClass("org.apache.hadoop.io.compress.Lz4Codec");
234 return (CompressionCodec) ReflectionUtils.newInstance(externalCodec,
235 conf);
236 } catch (ClassNotFoundException e) {
237 throw new RuntimeException(e);
238 }
239 }
240 };
241
242 private final Configuration conf;
243 private final String compressName;
244
245 private static final int DATA_IBUF_SIZE = 1 * 1024;
246
247 private static final int DATA_OBUF_SIZE = 4 * 1024;
248
249 Algorithm(String name) {
250 this.conf = new Configuration();
251 this.conf.setBoolean("hadoop.native.lib", true);
252 this.compressName = name;
253 }
254
255 abstract CompressionCodec getCodec(Configuration conf);
256
257 public InputStream createDecompressionStream(
258 InputStream downStream, Decompressor decompressor,
259 int downStreamBufferSize) throws IOException {
260 CompressionCodec codec = getCodec(conf);
261
262 if (downStreamBufferSize > 0) {
263 ((Configurable)codec).getConf().setInt("io.file.buffer.size",
264 downStreamBufferSize);
265 }
266 CompressionInputStream cis =
267 codec.createInputStream(downStream, decompressor);
268 BufferedInputStream bis2 = new BufferedInputStream(cis, DATA_IBUF_SIZE);
269 return bis2;
270
271 }
272
273 public OutputStream createCompressionStream(
274 OutputStream downStream, Compressor compressor, int downStreamBufferSize)
275 throws IOException {
276 OutputStream bos1 = null;
277 if (downStreamBufferSize > 0) {
278 bos1 = new BufferedOutputStream(downStream, downStreamBufferSize);
279 }
280 else {
281 bos1 = downStream;
282 }
283 CompressionOutputStream cos =
284 createPlainCompressionStream(bos1, compressor);
285 BufferedOutputStream bos2 =
286 new BufferedOutputStream(new FinishOnFlushCompressionStream(cos),
287 DATA_OBUF_SIZE);
288 return bos2;
289 }
290
291
292
293
294
295 CompressionOutputStream createPlainCompressionStream(
296 OutputStream downStream, Compressor compressor) throws IOException {
297 CompressionCodec codec = getCodec(conf);
298 ((Configurable)codec).getConf().setInt("io.file.buffer.size", 32 * 1024);
299 return codec.createOutputStream(downStream, compressor);
300 }
301
302 public Compressor getCompressor() {
303 CompressionCodec codec = getCodec(conf);
304 if (codec != null) {
305 Compressor compressor = CodecPool.getCompressor(codec);
306 if (compressor != null) {
307 if (compressor.finished()) {
308
309
310 LOG
311 .warn("Compressor obtained from CodecPool is already finished()");
312
313
314 }
315 compressor.reset();
316 }
317 return compressor;
318 }
319 return null;
320 }
321
322 public void returnCompressor(Compressor compressor) {
323 if (compressor != null) {
324 CodecPool.returnCompressor(compressor);
325 }
326 }
327
328 public Decompressor getDecompressor() {
329 CompressionCodec codec = getCodec(conf);
330 if (codec != null) {
331 Decompressor decompressor = CodecPool.getDecompressor(codec);
332 if (decompressor != null) {
333 if (decompressor.finished()) {
334
335
336 LOG
337 .warn("Deompressor obtained from CodecPool is already finished()");
338
339
340 }
341 decompressor.reset();
342 }
343 return decompressor;
344 }
345
346 return null;
347 }
348
349 public void returnDecompressor(Decompressor decompressor) {
350 if (decompressor != null) {
351 CodecPool.returnDecompressor(decompressor);
352 if (decompressor.getClass().isAnnotationPresent(DoNotPool.class)) {
353 decompressor.end();
354 }
355 }
356 }
357
358 public String getName() {
359 return compressName;
360 }
361 }
362
363 public static Algorithm getCompressionAlgorithmByName(String compressName) {
364 Algorithm[] algos = Algorithm.class.getEnumConstants();
365
366 for (Algorithm a : algos) {
367 if (a.getName().equals(compressName)) {
368 return a;
369 }
370 }
371
372 throw new IllegalArgumentException(
373 "Unsupported compression algorithm name: " + compressName);
374 }
375
376 static String[] getSupportedAlgorithms() {
377 Algorithm[] algos = Algorithm.class.getEnumConstants();
378
379 String[] ret = new String[algos.length];
380 int i = 0;
381 for (Algorithm a : algos) {
382 ret[i++] = a.getName();
383 }
384
385 return ret;
386 }
387 }