001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.binary;
019
020import java.util.Arrays;
021
022import org.apache.commons.codec.BinaryDecoder;
023import org.apache.commons.codec.BinaryEncoder;
024import org.apache.commons.codec.DecoderException;
025import org.apache.commons.codec.EncoderException;
026
027/**
028 * Abstract superclass for Base-N encoders and decoders.
029 *
030 * <p>
031 * This class is thread-safe.
032 * </p>
033 *
034 */
035public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder {
036
037    /**
038     * Holds thread context so classes can be thread-safe.
039     *
040     * This class is not itself thread-safe; each thread must allocate its own copy.
041     *
042     * @since 1.7
043     */
044    static class Context {
045
046        /**
047         * Place holder for the bytes we're dealing with for our based logic.
048         * Bitwise operations store and extract the encoding or decoding from this variable.
049         */
050        int ibitWorkArea;
051
052        /**
053         * Place holder for the bytes we're dealing with for our based logic.
054         * Bitwise operations store and extract the encoding or decoding from this variable.
055         */
056        long lbitWorkArea;
057
058        /**
059         * Buffer for streaming.
060         */
061        byte[] buffer;
062
063        /**
064         * Position where next character should be written in the buffer.
065         */
066        int pos;
067
068        /**
069         * Position where next character should be read from the buffer.
070         */
071        int readPos;
072
073        /**
074         * Boolean flag to indicate the EOF has been reached. Once EOF has been reached, this object becomes useless,
075         * and must be thrown away.
076         */
077        boolean eof;
078
079        /**
080         * Variable tracks how many characters have been written to the current line. Only used when encoding. We use
081         * it to make sure each encoded line never goes beyond lineLength (if lineLength &gt; 0).
082         */
083        int currentLinePos;
084
085        /**
086         * Writes to the buffer only occur after every 3/5 reads when encoding, and every 4/8 reads when decoding. This
087         * variable helps track that.
088         */
089        int modulus;
090
091        Context() {
092        }
093
094        /**
095         * Returns a String useful for debugging (especially within a debugger.)
096         *
097         * @return a String useful for debugging.
098         */
099        @SuppressWarnings("boxing") // OK to ignore boxing here
100        @Override
101        public String toString() {
102            return String.format("%s[buffer=%s, currentLinePos=%s, eof=%s, ibitWorkArea=%s, lbitWorkArea=%s, " +
103                    "modulus=%s, pos=%s, readPos=%s]", this.getClass().getSimpleName(), Arrays.toString(buffer),
104                    currentLinePos, eof, ibitWorkArea, lbitWorkArea, modulus, pos, readPos);
105        }
106    }
107
108    /**
109     * EOF
110     *
111     * @since 1.7
112     */
113    static final int EOF = -1;
114
115    /**
116     *  MIME chunk size per RFC 2045 section 6.8.
117     *
118     * <p>
119     * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any
120     * equal signs.
121     * </p>
122     *
123     * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a>
124     */
125    public static final int MIME_CHUNK_SIZE = 76;
126
127    /**
128     * PEM chunk size per RFC 1421 section 4.3.2.4.
129     *
130     * <p>
131     * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any
132     * equal signs.
133     * </p>
134     *
135     * @see <a href="http://tools.ietf.org/html/rfc1421">RFC 1421 section 4.3.2.4</a>
136     */
137    public static final int PEM_CHUNK_SIZE = 64;
138
139    private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2;
140
141    /**
142     * Defines the default buffer size - currently {@value}
143     * - must be large enough for at least one encoded block+separator
144     */
145    private static final int DEFAULT_BUFFER_SIZE = 8192;
146
147    /**
148     * The maximum size buffer to allocate.
149     *
150     * <p>This is set to the same size used in the JDK {@code java.util.ArrayList}:</p>
151     * <blockquote>
152     * Some VMs reserve some header words in an array.
153     * Attempts to allocate larger arrays may result in
154     * OutOfMemoryError: Requested array size exceeds VM limit.
155     * </blockquote>
156     */
157    private static final int MAX_BUFFER_SIZE = Integer.MAX_VALUE - 8;
158
159    /** Mask used to extract 8 bits, used in decoding bytes */
160    protected static final int MASK_8BITS = 0xff;
161
162    /**
163     * Byte used to pad output.
164     */
165    protected static final byte PAD_DEFAULT = '='; // Allow static access to default
166
167    /**
168     * @deprecated Use {@link #pad}. Will be removed in 2.0.
169     */
170    @Deprecated
171    protected final byte PAD = PAD_DEFAULT; // instance variable just in case it needs to vary later
172
173    protected final byte pad; // instance variable just in case it needs to vary later
174
175    /** Number of bytes in each full block of unencoded data, e.g. 4 for Base64 and 5 for Base32 */
176    private final int unencodedBlockSize;
177
178    /** Number of bytes in each full block of encoded data, e.g. 3 for Base64 and 8 for Base32 */
179    private final int encodedBlockSize;
180
181    /**
182     * Chunksize for encoding. Not used when decoding.
183     * A value of zero or less implies no chunking of the encoded data.
184     * Rounded down to nearest multiple of encodedBlockSize.
185     */
186    protected final int lineLength;
187
188    /**
189     * Size of chunk separator. Not used unless {@link #lineLength} &gt; 0.
190     */
191    private final int chunkSeparatorLength;
192
193    /**
194     * Note {@code lineLength} is rounded down to the nearest multiple of the encoded block size.
195     * If {@code chunkSeparatorLength} is zero, then chunking is disabled.
196     * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3)
197     * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4)
198     * @param lineLength if &gt; 0, use chunking with a length {@code lineLength}
199     * @param chunkSeparatorLength the chunk separator length, if relevant
200     */
201    protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize,
202                         final int lineLength, final int chunkSeparatorLength) {
203        this(unencodedBlockSize, encodedBlockSize, lineLength, chunkSeparatorLength, PAD_DEFAULT);
204    }
205
206    /**
207     * Note {@code lineLength} is rounded down to the nearest multiple of the encoded block size.
208     * If {@code chunkSeparatorLength} is zero, then chunking is disabled.
209     * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3)
210     * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4)
211     * @param lineLength if &gt; 0, use chunking with a length {@code lineLength}
212     * @param chunkSeparatorLength the chunk separator length, if relevant
213     * @param pad byte used as padding byte.
214     */
215    protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize,
216                         final int lineLength, final int chunkSeparatorLength, final byte pad) {
217        this.unencodedBlockSize = unencodedBlockSize;
218        this.encodedBlockSize = encodedBlockSize;
219        final boolean useChunking = lineLength > 0 && chunkSeparatorLength > 0;
220        this.lineLength = useChunking ? (lineLength / encodedBlockSize) * encodedBlockSize : 0;
221        this.chunkSeparatorLength = chunkSeparatorLength;
222
223        this.pad = pad;
224    }
225
226    /**
227     * Returns true if this object has buffered data for reading.
228     *
229     * @param context the context to be used
230     * @return true if there is data still available for reading.
231     */
232    boolean hasData(final Context context) {  // package protected for access from I/O streams
233        return context.buffer != null;
234    }
235
236    /**
237     * Returns the amount of buffered data available for reading.
238     *
239     * @param context the context to be used
240     * @return The amount of buffered data available for reading.
241     */
242    int available(final Context context) {  // package protected for access from I/O streams
243        return context.buffer != null ? context.pos - context.readPos : 0;
244    }
245
246    /**
247     * Get the default buffer size. Can be overridden.
248     *
249     * @return the default buffer size.
250     */
251    protected int getDefaultBufferSize() {
252        return DEFAULT_BUFFER_SIZE;
253    }
254
255    /**
256     * Increases our buffer by the {@link #DEFAULT_BUFFER_RESIZE_FACTOR}.
257     * @param context the context to be used
258     * @param minCapacity the minimum required capacity
259     * @return the resized byte[] buffer
260     * @throws OutOfMemoryError if the {@code minCapacity} is negative
261     */
262    private static byte[] resizeBuffer(final Context context, final int minCapacity) {
263        // Overflow-conscious code treats the min and new capacity as unsigned.
264        final int oldCapacity = context.buffer.length;
265        int newCapacity = oldCapacity * DEFAULT_BUFFER_RESIZE_FACTOR;
266        if (compareUnsigned(newCapacity, minCapacity) < 0) {
267            newCapacity = minCapacity;
268        }
269        if (compareUnsigned(newCapacity, MAX_BUFFER_SIZE) > 0) {
270            newCapacity = createPositiveCapacity(minCapacity);
271        }
272
273        final byte[] b = new byte[newCapacity];
274        System.arraycopy(context.buffer, 0, b, 0, context.buffer.length);
275        context.buffer = b;
276        return b;
277    }
278
279    /**
280     * Compares two {@code int} values numerically treating the values
281     * as unsigned. Taken from JDK 1.8.
282     *
283     * <p>TODO: Replace with JDK 1.8 Integer::compareUnsigned(int, int).</p>
284     *
285     * @param  x the first {@code int} to compare
286     * @param  y the second {@code int} to compare
287     * @return the value {@code 0} if {@code x == y}; a value less
288     *         than {@code 0} if {@code x < y} as unsigned values; and
289     *         a value greater than {@code 0} if {@code x > y} as
290     *         unsigned values
291     */
292    private static int compareUnsigned(final int x, final int y) {
293        return Integer.compare(x + Integer.MIN_VALUE, y + Integer.MIN_VALUE);
294    }
295
296    /**
297     * Create a positive capacity at least as large the minimum required capacity.
298     * If the minimum capacity is negative then this throws an OutOfMemoryError as no array
299     * can be allocated.
300     *
301     * @param minCapacity the minimum capacity
302     * @return the capacity
303     * @throws OutOfMemoryError if the {@code minCapacity} is negative
304     */
305    private static int createPositiveCapacity(final int minCapacity) {
306        if (minCapacity < 0) {
307            // overflow
308            throw new OutOfMemoryError("Unable to allocate array size: " + (minCapacity & 0xffffffffL));
309        }
310        // This is called when we require buffer expansion to a very big array.
311        // Use the conservative maximum buffer size if possible, otherwise the biggest required.
312        //
313        // Note: In this situation JDK 1.8 java.util.ArrayList returns Integer.MAX_VALUE.
314        // This excludes some VMs that can exceed MAX_BUFFER_SIZE but not allocate a full
315        // Integer.MAX_VALUE length array.
316        // The result is that we may have to allocate an array of this size more than once if
317        // the capacity must be expanded again.
318        return (minCapacity > MAX_BUFFER_SIZE) ?
319            minCapacity :
320            MAX_BUFFER_SIZE;
321    }
322
323    /**
324     * Ensure that the buffer has room for {@code size} bytes
325     *
326     * @param size minimum spare space required
327     * @param context the context to be used
328     * @return the buffer
329     */
330    protected byte[] ensureBufferSize(final int size, final Context context){
331        if (context.buffer == null) {
332            context.buffer = new byte[getDefaultBufferSize()];
333            context.pos = 0;
334            context.readPos = 0;
335
336            // Overflow-conscious:
337            // x + y > z  ==  x + y - z > 0
338        } else if (context.pos + size - context.buffer.length > 0) {
339            return resizeBuffer(context, context.pos + size);
340        }
341        return context.buffer;
342    }
343
344    /**
345     * Extracts buffered data into the provided byte[] array, starting at position bPos, up to a maximum of bAvail
346     * bytes. Returns how many bytes were actually extracted.
347     * <p>
348     * Package protected for access from I/O streams.
349     *
350     * @param b
351     *            byte[] array to extract the buffered data into.
352     * @param bPos
353     *            position in byte[] array to start extraction at.
354     * @param bAvail
355     *            amount of bytes we're allowed to extract. We may extract fewer (if fewer are available).
356     * @param context
357     *            the context to be used
358     * @return The number of bytes successfully extracted into the provided byte[] array.
359     */
360    int readResults(final byte[] b, final int bPos, final int bAvail, final Context context) {
361        if (context.buffer != null) {
362            final int len = Math.min(available(context), bAvail);
363            System.arraycopy(context.buffer, context.readPos, b, bPos, len);
364            context.readPos += len;
365            if (context.readPos >= context.pos) {
366                context.buffer = null; // so hasData() will return false, and this method can return -1
367            }
368            return len;
369        }
370        return context.eof ? EOF : 0;
371    }
372
373    /**
374     * Checks if a byte value is whitespace or not.
375     * Whitespace is taken to mean: space, tab, CR, LF
376     * @param byteToCheck
377     *            the byte to check
378     * @return true if byte is whitespace, false otherwise
379     */
380    protected static boolean isWhiteSpace(final byte byteToCheck) {
381        switch (byteToCheck) {
382            case ' ' :
383            case '\n' :
384            case '\r' :
385            case '\t' :
386                return true;
387            default :
388                return false;
389        }
390    }
391
392    /**
393     * Encodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of
394     * the Encoder interface, and will throw an EncoderException if the supplied object is not of type byte[].
395     *
396     * @param obj
397     *            Object to encode
398     * @return An object (of type byte[]) containing the Base-N encoded data which corresponds to the byte[] supplied.
399     * @throws EncoderException
400     *             if the parameter supplied is not of type byte[]
401     */
402    @Override
403    public Object encode(final Object obj) throws EncoderException {
404        if (!(obj instanceof byte[])) {
405            throw new EncoderException("Parameter supplied to Base-N encode is not a byte[]");
406        }
407        return encode((byte[]) obj);
408    }
409
410    /**
411     * Encodes a byte[] containing binary data, into a String containing characters in the Base-N alphabet.
412     * Uses UTF8 encoding.
413     *
414     * @param pArray
415     *            a byte array containing binary data
416     * @return A String containing only Base-N character data
417     */
418    public String encodeToString(final byte[] pArray) {
419        return StringUtils.newStringUtf8(encode(pArray));
420    }
421
422    /**
423     * Encodes a byte[] containing binary data, into a String containing characters in the appropriate alphabet.
424     * Uses UTF8 encoding.
425     *
426     * @param pArray a byte array containing binary data
427     * @return String containing only character data in the appropriate alphabet.
428     * @since 1.5
429     * This is a duplicate of {@link #encodeToString(byte[])}; it was merged during refactoring.
430    */
431    public String encodeAsString(final byte[] pArray){
432        return StringUtils.newStringUtf8(encode(pArray));
433    }
434
435    /**
436     * Decodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of
437     * the Decoder interface, and will throw a DecoderException if the supplied object is not of type byte[] or String.
438     *
439     * @param obj
440     *            Object to decode
441     * @return An object (of type byte[]) containing the binary data which corresponds to the byte[] or String
442     *         supplied.
443     * @throws DecoderException
444     *             if the parameter supplied is not of type byte[]
445     */
446    @Override
447    public Object decode(final Object obj) throws DecoderException {
448        if (obj instanceof byte[]) {
449            return decode((byte[]) obj);
450        } else if (obj instanceof String) {
451            return decode((String) obj);
452        } else {
453            throw new DecoderException("Parameter supplied to Base-N decode is not a byte[] or a String");
454        }
455    }
456
457    /**
458     * Decodes a String containing characters in the Base-N alphabet.
459     *
460     * @param pArray
461     *            A String containing Base-N character data
462     * @return a byte array containing binary data
463     */
464    public byte[] decode(final String pArray) {
465        return decode(StringUtils.getBytesUtf8(pArray));
466    }
467
468    /**
469     * Decodes a byte[] containing characters in the Base-N alphabet.
470     *
471     * @param pArray
472     *            A byte array containing Base-N character data
473     * @return a byte array containing binary data
474     */
475    @Override
476    public byte[] decode(final byte[] pArray) {
477        if (pArray == null || pArray.length == 0) {
478            return pArray;
479        }
480        final Context context = new Context();
481        decode(pArray, 0, pArray.length, context);
482        decode(pArray, 0, EOF, context); // Notify decoder of EOF.
483        final byte[] result = new byte[context.pos];
484        readResults(result, 0, result.length, context);
485        return result;
486    }
487
488    /**
489     * Encodes a byte[] containing binary data, into a byte[] containing characters in the alphabet.
490     *
491     * @param pArray
492     *            a byte array containing binary data
493     * @return A byte array containing only the base N alphabetic character data
494     */
495    @Override
496    public byte[] encode(final byte[] pArray) {
497        if (pArray == null || pArray.length == 0) {
498            return pArray;
499        }
500        return encode(pArray, 0, pArray.length);
501    }
502
503    /**
504     * Encodes a byte[] containing binary data, into a byte[] containing
505     * characters in the alphabet.
506     *
507     * @param pArray
508     *            a byte array containing binary data
509     * @param offset
510     *            initial offset of the subarray.
511     * @param length
512     *            length of the subarray.
513     * @return A byte array containing only the base N alphabetic character data
514     * @since 1.11
515     */
516    public byte[] encode(final byte[] pArray, final int offset, final int length) {
517        if (pArray == null || pArray.length == 0) {
518            return pArray;
519        }
520        final Context context = new Context();
521        encode(pArray, offset, length, context);
522        encode(pArray, offset, EOF, context); // Notify encoder of EOF.
523        final byte[] buf = new byte[context.pos - context.readPos];
524        readResults(buf, 0, buf.length, context);
525        return buf;
526    }
527
528    // package protected for access from I/O streams
529    abstract void encode(byte[] pArray, int i, int length, Context context);
530
531    // package protected for access from I/O streams
532    abstract void decode(byte[] pArray, int i, int length, Context context);
533
534    /**
535     * Returns whether or not the {@code octet} is in the current alphabet.
536     * Does not allow whitespace or pad.
537     *
538     * @param value The value to test
539     *
540     * @return {@code true} if the value is defined in the current alphabet, {@code false} otherwise.
541     */
542    protected abstract boolean isInAlphabet(byte value);
543
544    /**
545     * Tests a given byte array to see if it contains only valid characters within the alphabet.
546     * The method optionally treats whitespace and pad as valid.
547     *
548     * @param arrayOctet byte array to test
549     * @param allowWSPad if {@code true}, then whitespace and PAD are also allowed
550     *
551     * @return {@code true} if all bytes are valid characters in the alphabet or if the byte array is empty;
552     *         {@code false}, otherwise
553     */
554    public boolean isInAlphabet(final byte[] arrayOctet, final boolean allowWSPad) {
555        for (final byte octet : arrayOctet) {
556            if (!isInAlphabet(octet) &&
557                    (!allowWSPad || (octet != pad) && !isWhiteSpace(octet))) {
558                return false;
559            }
560        }
561        return true;
562    }
563
564    /**
565     * Tests a given String to see if it contains only valid characters within the alphabet.
566     * The method treats whitespace and PAD as valid.
567     *
568     * @param basen String to test
569     * @return {@code true} if all characters in the String are valid characters in the alphabet or if
570     *         the String is empty; {@code false}, otherwise
571     * @see #isInAlphabet(byte[], boolean)
572     */
573    public boolean isInAlphabet(final String basen) {
574        return isInAlphabet(StringUtils.getBytesUtf8(basen), true);
575    }
576
577    /**
578     * Tests a given byte array to see if it contains any characters within the alphabet or PAD.
579     *
580     * Intended for use in checking line-ending arrays
581     *
582     * @param arrayOctet
583     *            byte array to test
584     * @return {@code true} if any byte is a valid character in the alphabet or PAD; {@code false} otherwise
585     */
586    protected boolean containsAlphabetOrPad(final byte[] arrayOctet) {
587        if (arrayOctet == null) {
588            return false;
589        }
590        for (final byte element : arrayOctet) {
591            if (pad == element || isInAlphabet(element)) {
592                return true;
593            }
594        }
595        return false;
596    }
597
598    /**
599     * Calculates the amount of space needed to encode the supplied array.
600     *
601     * @param pArray byte[] array which will later be encoded
602     *
603     * @return amount of space needed to encoded the supplied array.
604     * Returns a long since a max-len array will require &gt; Integer.MAX_VALUE
605     */
606    public long getEncodedLength(final byte[] pArray) {
607        // Calculate non-chunked size - rounded up to allow for padding
608        // cast to long is needed to avoid possibility of overflow
609        long len = ((pArray.length + unencodedBlockSize-1)  / unencodedBlockSize) * (long) encodedBlockSize;
610        if (lineLength > 0) { // We're using chunking
611            // Round up to nearest multiple
612            len += ((len + lineLength-1) / lineLength) * chunkSeparatorLength;
613        }
614        return len;
615    }
616}