1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.hadoop.hbase.io.compress;
18
19 import java.io.BufferedInputStream;
20 import java.io.BufferedOutputStream;
21 import java.io.FilterOutputStream;
22 import java.io.IOException;
23 import java.io.InputStream;
24 import java.io.OutputStream;
25
26 import org.apache.commons.logging.Log;
27 import org.apache.commons.logging.LogFactory;
28 import org.apache.hadoop.classification.InterfaceAudience;
29 import org.apache.hadoop.classification.InterfaceStability;
30 import org.apache.hadoop.conf.Configurable;
31 import org.apache.hadoop.conf.Configuration;
32 import org.apache.hadoop.io.IOUtils;
33 import org.apache.hadoop.io.compress.CodecPool;
34 import org.apache.hadoop.io.compress.CompressionCodec;
35 import org.apache.hadoop.io.compress.CompressionInputStream;
36 import org.apache.hadoop.io.compress.CompressionOutputStream;
37 import org.apache.hadoop.io.compress.Compressor;
38 import org.apache.hadoop.io.compress.Decompressor;
39 import org.apache.hadoop.io.compress.DefaultCodec;
40 import org.apache.hadoop.io.compress.DoNotPool;
41 import org.apache.hadoop.io.compress.GzipCodec;
42 import org.apache.hadoop.util.ReflectionUtils;
43
44
45
46
47
48 @InterfaceAudience.Private
49 public final class Compression {
50 static final Log LOG = LogFactory.getLog(Compression.class);
51
52
53
54
55 private Compression() {
56 super();
57 }
58
59 static class FinishOnFlushCompressionStream extends FilterOutputStream {
60 public FinishOnFlushCompressionStream(CompressionOutputStream cout) {
61 super(cout);
62 }
63
64 @Override
65 public void write(byte b[], int off, int len) throws IOException {
66 out.write(b, off, len);
67 }
68
69 @Override
70 public void flush() throws IOException {
71 CompressionOutputStream cout = (CompressionOutputStream) out;
72 cout.finish();
73 cout.flush();
74 cout.resetState();
75 }
76 }
77
78
79
80
81 private static ClassLoader getClassLoaderForCodec() {
82 ClassLoader cl = Thread.currentThread().getContextClassLoader();
83 if (cl == null) {
84 cl = Compression.class.getClassLoader();
85 }
86 if (cl == null) {
87 cl = ClassLoader.getSystemClassLoader();
88 }
89 if (cl == null) {
90 throw new RuntimeException("A ClassLoader to load the Codec could not be determined");
91 }
92 return cl;
93 }
94
95
96
97
98
99
100 @edu.umd.cs.findbugs.annotations.SuppressWarnings(
101 value="SE_TRANSIENT_FIELD_NOT_RESTORED",
102 justification="We are not serializing so doesn't apply (not sure why transient though)")
103 @InterfaceAudience.Public
104 @InterfaceStability.Evolving
105 public static enum Algorithm {
106 LZO("lzo") {
107
108 private volatile transient CompressionCodec lzoCodec;
109 private transient Object lock = new Object();
110
111 @Override
112 CompressionCodec getCodec(Configuration conf) {
113 if (lzoCodec == null) {
114 synchronized (lock) {
115 if (lzoCodec == null) {
116 lzoCodec = buildCodec(conf);
117 }
118 }
119 }
120 return lzoCodec;
121 }
122
123 private CompressionCodec buildCodec(Configuration conf) {
124 try {
125 Class<?> externalCodec =
126 ClassLoader.getSystemClassLoader()
127 .loadClass("com.hadoop.compression.lzo.LzoCodec");
128 return (CompressionCodec) ReflectionUtils.newInstance(externalCodec,
129 new Configuration(conf));
130 } catch (ClassNotFoundException e) {
131 throw new RuntimeException(e);
132 }
133 }
134 },
135 GZ("gz") {
136 private volatile transient GzipCodec codec;
137 private transient Object lock = new Object();
138
139 @Override
140 DefaultCodec getCodec(Configuration conf) {
141 if (codec == null) {
142 synchronized (lock) {
143 if (codec == null) {
144 codec = buildCodec(conf);
145 }
146 }
147 }
148
149 return codec;
150 }
151
152 private GzipCodec buildCodec(Configuration conf) {
153 GzipCodec gzcodec = new ReusableStreamGzipCodec();
154 gzcodec.setConf(new Configuration(conf));
155 return gzcodec;
156 }
157 },
158
159 NONE("none") {
160 @Override
161 DefaultCodec getCodec(Configuration conf) {
162 return null;
163 }
164
165 @Override
166 public synchronized InputStream createDecompressionStream(
167 InputStream downStream, Decompressor decompressor,
168 int downStreamBufferSize) throws IOException {
169 if (downStreamBufferSize > 0) {
170 return new BufferedInputStream(downStream, downStreamBufferSize);
171 }
172
173
174
175
176
177 return downStream;
178 }
179
180 @Override
181 public synchronized OutputStream createCompressionStream(
182 OutputStream downStream, Compressor compressor,
183 int downStreamBufferSize) throws IOException {
184 if (downStreamBufferSize > 0) {
185 return new BufferedOutputStream(downStream, downStreamBufferSize);
186 }
187
188 return downStream;
189 }
190 },
191 SNAPPY("snappy") {
192
193 private volatile transient CompressionCodec snappyCodec;
194 private transient Object lock = new Object();
195
196 @Override
197 CompressionCodec getCodec(Configuration conf) {
198 if (snappyCodec == null) {
199 synchronized (lock) {
200 if (snappyCodec == null) {
201 snappyCodec = buildCodec(conf);
202 }
203 }
204 }
205 return snappyCodec;
206 }
207
208 private CompressionCodec buildCodec(Configuration conf) {
209 try {
210 Class<?> externalCodec =
211 ClassLoader.getSystemClassLoader()
212 .loadClass("org.apache.hadoop.io.compress.SnappyCodec");
213 return (CompressionCodec) ReflectionUtils.newInstance(externalCodec,
214 conf);
215 } catch (ClassNotFoundException e) {
216 throw new RuntimeException(e);
217 }
218 }
219 },
220 LZ4("lz4") {
221
222 private volatile transient CompressionCodec lz4Codec;
223 private transient Object lock = new Object();
224
225 @Override
226 CompressionCodec getCodec(Configuration conf) {
227 if (lz4Codec == null) {
228 synchronized (lock) {
229 if (lz4Codec == null) {
230 lz4Codec = buildCodec(conf);
231 }
232 }
233 }
234 return lz4Codec;
235 }
236
237 private CompressionCodec buildCodec(Configuration conf) {
238 try {
239 Class<?> externalCodec =
240 getClassLoaderForCodec().loadClass("org.apache.hadoop.io.compress.Lz4Codec");
241 return (CompressionCodec) ReflectionUtils.newInstance(externalCodec,
242 conf);
243 } catch (ClassNotFoundException e) {
244 throw new RuntimeException(e);
245 }
246 }
247 };
248
249 private final Configuration conf;
250 private final String compressName;
251
252 private static final int DATA_IBUF_SIZE = 1 * 1024;
253
254 private static final int DATA_OBUF_SIZE = 4 * 1024;
255
256 Algorithm(String name) {
257 this.conf = new Configuration();
258 this.conf.setBoolean("hadoop.native.lib", true);
259 this.compressName = name;
260 }
261
262 abstract CompressionCodec getCodec(Configuration conf);
263
264 public InputStream createDecompressionStream(
265 InputStream downStream, Decompressor decompressor,
266 int downStreamBufferSize) throws IOException {
267 CompressionCodec codec = getCodec(conf);
268
269 if (downStreamBufferSize > 0) {
270 ((Configurable)codec).getConf().setInt("io.file.buffer.size",
271 downStreamBufferSize);
272 }
273 CompressionInputStream cis =
274 codec.createInputStream(downStream, decompressor);
275 BufferedInputStream bis2 = new BufferedInputStream(cis, DATA_IBUF_SIZE);
276 return bis2;
277
278 }
279
280 public OutputStream createCompressionStream(
281 OutputStream downStream, Compressor compressor, int downStreamBufferSize)
282 throws IOException {
283 OutputStream bos1 = null;
284 if (downStreamBufferSize > 0) {
285 bos1 = new BufferedOutputStream(downStream, downStreamBufferSize);
286 }
287 else {
288 bos1 = downStream;
289 }
290 CompressionOutputStream cos =
291 createPlainCompressionStream(bos1, compressor);
292 BufferedOutputStream bos2 =
293 new BufferedOutputStream(new FinishOnFlushCompressionStream(cos),
294 DATA_OBUF_SIZE);
295 return bos2;
296 }
297
298
299
300
301
302 public CompressionOutputStream createPlainCompressionStream(
303 OutputStream downStream, Compressor compressor) throws IOException {
304 CompressionCodec codec = getCodec(conf);
305 ((Configurable)codec).getConf().setInt("io.file.buffer.size", 32 * 1024);
306 return codec.createOutputStream(downStream, compressor);
307 }
308
309 public Compressor getCompressor() {
310 CompressionCodec codec = getCodec(conf);
311 if (codec != null) {
312 Compressor compressor = CodecPool.getCompressor(codec);
313 if (compressor != null) {
314 if (compressor.finished()) {
315
316
317 LOG
318 .warn("Compressor obtained from CodecPool is already finished()");
319
320
321 }
322 compressor.reset();
323 }
324 return compressor;
325 }
326 return null;
327 }
328
329 public void returnCompressor(Compressor compressor) {
330 if (compressor != null) {
331 CodecPool.returnCompressor(compressor);
332 }
333 }
334
335 public Decompressor getDecompressor() {
336 CompressionCodec codec = getCodec(conf);
337 if (codec != null) {
338 Decompressor decompressor = CodecPool.getDecompressor(codec);
339 if (decompressor != null) {
340 if (decompressor.finished()) {
341
342
343 LOG
344 .warn("Deompressor obtained from CodecPool is already finished()");
345
346
347 }
348 decompressor.reset();
349 }
350 return decompressor;
351 }
352
353 return null;
354 }
355
356 public void returnDecompressor(Decompressor decompressor) {
357 if (decompressor != null) {
358 CodecPool.returnDecompressor(decompressor);
359 if (decompressor.getClass().isAnnotationPresent(DoNotPool.class)) {
360 decompressor.end();
361 }
362 }
363 }
364
365 public String getName() {
366 return compressName;
367 }
368 }
369
370 public static Algorithm getCompressionAlgorithmByName(String compressName) {
371 Algorithm[] algos = Algorithm.class.getEnumConstants();
372
373 for (Algorithm a : algos) {
374 if (a.getName().equals(compressName)) {
375 return a;
376 }
377 }
378
379 throw new IllegalArgumentException(
380 "Unsupported compression algorithm name: " + compressName);
381 }
382
383
384
385
386
387
388
389 public static String[] getSupportedAlgorithms() {
390 Algorithm[] algos = Algorithm.class.getEnumConstants();
391
392 String[] ret = new String[algos.length];
393 int i = 0;
394 for (Algorithm a : algos) {
395 ret[i++] = a.getName();
396 }
397
398 return ret;
399 }
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421 public static void decompress(byte[] dest, int destOffset,
422 InputStream bufferedBoundedStream, int compressedSize,
423 int uncompressedSize, Compression.Algorithm compressAlgo)
424 throws IOException {
425
426 if (dest.length - destOffset < uncompressedSize) {
427 throw new IllegalArgumentException(
428 "Output buffer does not have enough space to hold "
429 + uncompressedSize + " decompressed bytes, available: "
430 + (dest.length - destOffset));
431 }
432
433 Decompressor decompressor = null;
434 try {
435 decompressor = compressAlgo.getDecompressor();
436 InputStream is = compressAlgo.createDecompressionStream(
437 bufferedBoundedStream, decompressor, 0);
438
439 IOUtils.readFully(is, dest, destOffset, uncompressedSize);
440 is.close();
441 } finally {
442 if (decompressor != null) {
443 compressAlgo.returnDecompressor(decompressor);
444 }
445 }
446 }
447
448 }