001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.binary; 019 020import java.util.Arrays; 021 022import org.apache.commons.codec.BinaryDecoder; 023import org.apache.commons.codec.BinaryEncoder; 024import org.apache.commons.codec.DecoderException; 025import org.apache.commons.codec.EncoderException; 026 027/** 028 * Abstract superclass for Base-N encoders and decoders. 029 * 030 * <p> 031 * This class is thread-safe. 032 * </p> 033 * 034 */ 035public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder { 036 037 /** 038 * Holds thread context so classes can be thread-safe. 039 * 040 * This class is not itself thread-safe; each thread must allocate its own copy. 041 * 042 * @since 1.7 043 */ 044 static class Context { 045 046 /** 047 * Place holder for the bytes we're dealing with for our based logic. 048 * Bitwise operations store and extract the encoding or decoding from this variable. 049 */ 050 int ibitWorkArea; 051 052 /** 053 * Place holder for the bytes we're dealing with for our based logic. 054 * Bitwise operations store and extract the encoding or decoding from this variable. 055 */ 056 long lbitWorkArea; 057 058 /** 059 * Buffer for streaming. 060 */ 061 byte[] buffer; 062 063 /** 064 * Position where next character should be written in the buffer. 065 */ 066 int pos; 067 068 /** 069 * Position where next character should be read from the buffer. 070 */ 071 int readPos; 072 073 /** 074 * Boolean flag to indicate the EOF has been reached. Once EOF has been reached, this object becomes useless, 075 * and must be thrown away. 076 */ 077 boolean eof; 078 079 /** 080 * Variable tracks how many characters have been written to the current line. Only used when encoding. We use 081 * it to make sure each encoded line never goes beyond lineLength (if lineLength > 0). 082 */ 083 int currentLinePos; 084 085 /** 086 * Writes to the buffer only occur after every 3/5 reads when encoding, and every 4/8 reads when decoding. This 087 * variable helps track that. 088 */ 089 int modulus; 090 091 Context() { 092 } 093 094 /** 095 * Returns a String useful for debugging (especially within a debugger.) 096 * 097 * @return a String useful for debugging. 098 */ 099 @SuppressWarnings("boxing") // OK to ignore boxing here 100 @Override 101 public String toString() { 102 return String.format("%s[buffer=%s, currentLinePos=%s, eof=%s, ibitWorkArea=%s, lbitWorkArea=%s, " + 103 "modulus=%s, pos=%s, readPos=%s]", this.getClass().getSimpleName(), Arrays.toString(buffer), 104 currentLinePos, eof, ibitWorkArea, lbitWorkArea, modulus, pos, readPos); 105 } 106 } 107 108 /** 109 * EOF 110 * 111 * @since 1.7 112 */ 113 static final int EOF = -1; 114 115 /** 116 * MIME chunk size per RFC 2045 section 6.8. 117 * 118 * <p> 119 * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any 120 * equal signs. 121 * </p> 122 * 123 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a> 124 */ 125 public static final int MIME_CHUNK_SIZE = 76; 126 127 /** 128 * PEM chunk size per RFC 1421 section 4.3.2.4. 129 * 130 * <p> 131 * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any 132 * equal signs. 133 * </p> 134 * 135 * @see <a href="http://tools.ietf.org/html/rfc1421">RFC 1421 section 4.3.2.4</a> 136 */ 137 public static final int PEM_CHUNK_SIZE = 64; 138 139 private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2; 140 141 /** 142 * Defines the default buffer size - currently {@value} 143 * - must be large enough for at least one encoded block+separator 144 */ 145 private static final int DEFAULT_BUFFER_SIZE = 8192; 146 147 /** 148 * The maximum size buffer to allocate. 149 * 150 * <p>This is set to the same size used in the JDK {@code java.util.ArrayList}:</p> 151 * <blockquote> 152 * Some VMs reserve some header words in an array. 153 * Attempts to allocate larger arrays may result in 154 * OutOfMemoryError: Requested array size exceeds VM limit. 155 * </blockquote> 156 */ 157 private static final int MAX_BUFFER_SIZE = Integer.MAX_VALUE - 8; 158 159 /** Mask used to extract 8 bits, used in decoding bytes */ 160 protected static final int MASK_8BITS = 0xff; 161 162 /** 163 * Byte used to pad output. 164 */ 165 protected static final byte PAD_DEFAULT = '='; // Allow static access to default 166 167 /** 168 * @deprecated Use {@link #pad}. Will be removed in 2.0. 169 */ 170 @Deprecated 171 protected final byte PAD = PAD_DEFAULT; // instance variable just in case it needs to vary later 172 173 protected final byte pad; // instance variable just in case it needs to vary later 174 175 /** Number of bytes in each full block of unencoded data, e.g. 4 for Base64 and 5 for Base32 */ 176 private final int unencodedBlockSize; 177 178 /** Number of bytes in each full block of encoded data, e.g. 3 for Base64 and 8 for Base32 */ 179 private final int encodedBlockSize; 180 181 /** 182 * Chunksize for encoding. Not used when decoding. 183 * A value of zero or less implies no chunking of the encoded data. 184 * Rounded down to nearest multiple of encodedBlockSize. 185 */ 186 protected final int lineLength; 187 188 /** 189 * Size of chunk separator. Not used unless {@link #lineLength} > 0. 190 */ 191 private final int chunkSeparatorLength; 192 193 /** 194 * Note {@code lineLength} is rounded down to the nearest multiple of the encoded block size. 195 * If {@code chunkSeparatorLength} is zero, then chunking is disabled. 196 * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3) 197 * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4) 198 * @param lineLength if > 0, use chunking with a length {@code lineLength} 199 * @param chunkSeparatorLength the chunk separator length, if relevant 200 */ 201 protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize, 202 final int lineLength, final int chunkSeparatorLength) { 203 this(unencodedBlockSize, encodedBlockSize, lineLength, chunkSeparatorLength, PAD_DEFAULT); 204 } 205 206 /** 207 * Note {@code lineLength} is rounded down to the nearest multiple of the encoded block size. 208 * If {@code chunkSeparatorLength} is zero, then chunking is disabled. 209 * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3) 210 * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4) 211 * @param lineLength if > 0, use chunking with a length {@code lineLength} 212 * @param chunkSeparatorLength the chunk separator length, if relevant 213 * @param pad byte used as padding byte. 214 */ 215 protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize, 216 final int lineLength, final int chunkSeparatorLength, final byte pad) { 217 this.unencodedBlockSize = unencodedBlockSize; 218 this.encodedBlockSize = encodedBlockSize; 219 final boolean useChunking = lineLength > 0 && chunkSeparatorLength > 0; 220 this.lineLength = useChunking ? (lineLength / encodedBlockSize) * encodedBlockSize : 0; 221 this.chunkSeparatorLength = chunkSeparatorLength; 222 223 this.pad = pad; 224 } 225 226 /** 227 * Returns true if this object has buffered data for reading. 228 * 229 * @param context the context to be used 230 * @return true if there is data still available for reading. 231 */ 232 boolean hasData(final Context context) { // package protected for access from I/O streams 233 return context.buffer != null; 234 } 235 236 /** 237 * Returns the amount of buffered data available for reading. 238 * 239 * @param context the context to be used 240 * @return The amount of buffered data available for reading. 241 */ 242 int available(final Context context) { // package protected for access from I/O streams 243 return context.buffer != null ? context.pos - context.readPos : 0; 244 } 245 246 /** 247 * Get the default buffer size. Can be overridden. 248 * 249 * @return the default buffer size. 250 */ 251 protected int getDefaultBufferSize() { 252 return DEFAULT_BUFFER_SIZE; 253 } 254 255 /** 256 * Increases our buffer by the {@link #DEFAULT_BUFFER_RESIZE_FACTOR}. 257 * @param context the context to be used 258 * @param minCapacity the minimum required capacity 259 * @return the resized byte[] buffer 260 * @throws OutOfMemoryError if the {@code minCapacity} is negative 261 */ 262 private static byte[] resizeBuffer(final Context context, final int minCapacity) { 263 // Overflow-conscious code treats the min and new capacity as unsigned. 264 final int oldCapacity = context.buffer.length; 265 int newCapacity = oldCapacity * DEFAULT_BUFFER_RESIZE_FACTOR; 266 if (compareUnsigned(newCapacity, minCapacity) < 0) { 267 newCapacity = minCapacity; 268 } 269 if (compareUnsigned(newCapacity, MAX_BUFFER_SIZE) > 0) { 270 newCapacity = createPositiveCapacity(minCapacity); 271 } 272 273 final byte[] b = new byte[newCapacity]; 274 System.arraycopy(context.buffer, 0, b, 0, context.buffer.length); 275 context.buffer = b; 276 return b; 277 } 278 279 /** 280 * Compares two {@code int} values numerically treating the values 281 * as unsigned. Taken from JDK 1.8. 282 * 283 * <p>TODO: Replace with JDK 1.8 Integer::compareUnsigned(int, int).</p> 284 * 285 * @param x the first {@code int} to compare 286 * @param y the second {@code int} to compare 287 * @return the value {@code 0} if {@code x == y}; a value less 288 * than {@code 0} if {@code x < y} as unsigned values; and 289 * a value greater than {@code 0} if {@code x > y} as 290 * unsigned values 291 */ 292 private static int compareUnsigned(final int x, final int y) { 293 return Integer.compare(x + Integer.MIN_VALUE, y + Integer.MIN_VALUE); 294 } 295 296 /** 297 * Create a positive capacity at least as large the minimum required capacity. 298 * If the minimum capacity is negative then this throws an OutOfMemoryError as no array 299 * can be allocated. 300 * 301 * @param minCapacity the minimum capacity 302 * @return the capacity 303 * @throws OutOfMemoryError if the {@code minCapacity} is negative 304 */ 305 private static int createPositiveCapacity(final int minCapacity) { 306 if (minCapacity < 0) { 307 // overflow 308 throw new OutOfMemoryError("Unable to allocate array size: " + (minCapacity & 0xffffffffL)); 309 } 310 // This is called when we require buffer expansion to a very big array. 311 // Use the conservative maximum buffer size if possible, otherwise the biggest required. 312 // 313 // Note: In this situation JDK 1.8 java.util.ArrayList returns Integer.MAX_VALUE. 314 // This excludes some VMs that can exceed MAX_BUFFER_SIZE but not allocate a full 315 // Integer.MAX_VALUE length array. 316 // The result is that we may have to allocate an array of this size more than once if 317 // the capacity must be expanded again. 318 return (minCapacity > MAX_BUFFER_SIZE) ? 319 minCapacity : 320 MAX_BUFFER_SIZE; 321 } 322 323 /** 324 * Ensure that the buffer has room for {@code size} bytes 325 * 326 * @param size minimum spare space required 327 * @param context the context to be used 328 * @return the buffer 329 */ 330 protected byte[] ensureBufferSize(final int size, final Context context){ 331 if (context.buffer == null) { 332 context.buffer = new byte[getDefaultBufferSize()]; 333 context.pos = 0; 334 context.readPos = 0; 335 336 // Overflow-conscious: 337 // x + y > z == x + y - z > 0 338 } else if (context.pos + size - context.buffer.length > 0) { 339 return resizeBuffer(context, context.pos + size); 340 } 341 return context.buffer; 342 } 343 344 /** 345 * Extracts buffered data into the provided byte[] array, starting at position bPos, up to a maximum of bAvail 346 * bytes. Returns how many bytes were actually extracted. 347 * <p> 348 * Package protected for access from I/O streams. 349 * 350 * @param b 351 * byte[] array to extract the buffered data into. 352 * @param bPos 353 * position in byte[] array to start extraction at. 354 * @param bAvail 355 * amount of bytes we're allowed to extract. We may extract fewer (if fewer are available). 356 * @param context 357 * the context to be used 358 * @return The number of bytes successfully extracted into the provided byte[] array. 359 */ 360 int readResults(final byte[] b, final int bPos, final int bAvail, final Context context) { 361 if (context.buffer != null) { 362 final int len = Math.min(available(context), bAvail); 363 System.arraycopy(context.buffer, context.readPos, b, bPos, len); 364 context.readPos += len; 365 if (context.readPos >= context.pos) { 366 context.buffer = null; // so hasData() will return false, and this method can return -1 367 } 368 return len; 369 } 370 return context.eof ? EOF : 0; 371 } 372 373 /** 374 * Checks if a byte value is whitespace or not. 375 * Whitespace is taken to mean: space, tab, CR, LF 376 * @param byteToCheck 377 * the byte to check 378 * @return true if byte is whitespace, false otherwise 379 */ 380 protected static boolean isWhiteSpace(final byte byteToCheck) { 381 switch (byteToCheck) { 382 case ' ' : 383 case '\n' : 384 case '\r' : 385 case '\t' : 386 return true; 387 default : 388 return false; 389 } 390 } 391 392 /** 393 * Encodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of 394 * the Encoder interface, and will throw an EncoderException if the supplied object is not of type byte[]. 395 * 396 * @param obj 397 * Object to encode 398 * @return An object (of type byte[]) containing the Base-N encoded data which corresponds to the byte[] supplied. 399 * @throws EncoderException 400 * if the parameter supplied is not of type byte[] 401 */ 402 @Override 403 public Object encode(final Object obj) throws EncoderException { 404 if (!(obj instanceof byte[])) { 405 throw new EncoderException("Parameter supplied to Base-N encode is not a byte[]"); 406 } 407 return encode((byte[]) obj); 408 } 409 410 /** 411 * Encodes a byte[] containing binary data, into a String containing characters in the Base-N alphabet. 412 * Uses UTF8 encoding. 413 * 414 * @param pArray 415 * a byte array containing binary data 416 * @return A String containing only Base-N character data 417 */ 418 public String encodeToString(final byte[] pArray) { 419 return StringUtils.newStringUtf8(encode(pArray)); 420 } 421 422 /** 423 * Encodes a byte[] containing binary data, into a String containing characters in the appropriate alphabet. 424 * Uses UTF8 encoding. 425 * 426 * @param pArray a byte array containing binary data 427 * @return String containing only character data in the appropriate alphabet. 428 * @since 1.5 429 * This is a duplicate of {@link #encodeToString(byte[])}; it was merged during refactoring. 430 */ 431 public String encodeAsString(final byte[] pArray){ 432 return StringUtils.newStringUtf8(encode(pArray)); 433 } 434 435 /** 436 * Decodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of 437 * the Decoder interface, and will throw a DecoderException if the supplied object is not of type byte[] or String. 438 * 439 * @param obj 440 * Object to decode 441 * @return An object (of type byte[]) containing the binary data which corresponds to the byte[] or String 442 * supplied. 443 * @throws DecoderException 444 * if the parameter supplied is not of type byte[] 445 */ 446 @Override 447 public Object decode(final Object obj) throws DecoderException { 448 if (obj instanceof byte[]) { 449 return decode((byte[]) obj); 450 } else if (obj instanceof String) { 451 return decode((String) obj); 452 } else { 453 throw new DecoderException("Parameter supplied to Base-N decode is not a byte[] or a String"); 454 } 455 } 456 457 /** 458 * Decodes a String containing characters in the Base-N alphabet. 459 * 460 * @param pArray 461 * A String containing Base-N character data 462 * @return a byte array containing binary data 463 */ 464 public byte[] decode(final String pArray) { 465 return decode(StringUtils.getBytesUtf8(pArray)); 466 } 467 468 /** 469 * Decodes a byte[] containing characters in the Base-N alphabet. 470 * 471 * @param pArray 472 * A byte array containing Base-N character data 473 * @return a byte array containing binary data 474 */ 475 @Override 476 public byte[] decode(final byte[] pArray) { 477 if (pArray == null || pArray.length == 0) { 478 return pArray; 479 } 480 final Context context = new Context(); 481 decode(pArray, 0, pArray.length, context); 482 decode(pArray, 0, EOF, context); // Notify decoder of EOF. 483 final byte[] result = new byte[context.pos]; 484 readResults(result, 0, result.length, context); 485 return result; 486 } 487 488 /** 489 * Encodes a byte[] containing binary data, into a byte[] containing characters in the alphabet. 490 * 491 * @param pArray 492 * a byte array containing binary data 493 * @return A byte array containing only the base N alphabetic character data 494 */ 495 @Override 496 public byte[] encode(final byte[] pArray) { 497 if (pArray == null || pArray.length == 0) { 498 return pArray; 499 } 500 return encode(pArray, 0, pArray.length); 501 } 502 503 /** 504 * Encodes a byte[] containing binary data, into a byte[] containing 505 * characters in the alphabet. 506 * 507 * @param pArray 508 * a byte array containing binary data 509 * @param offset 510 * initial offset of the subarray. 511 * @param length 512 * length of the subarray. 513 * @return A byte array containing only the base N alphabetic character data 514 * @since 1.11 515 */ 516 public byte[] encode(final byte[] pArray, final int offset, final int length) { 517 if (pArray == null || pArray.length == 0) { 518 return pArray; 519 } 520 final Context context = new Context(); 521 encode(pArray, offset, length, context); 522 encode(pArray, offset, EOF, context); // Notify encoder of EOF. 523 final byte[] buf = new byte[context.pos - context.readPos]; 524 readResults(buf, 0, buf.length, context); 525 return buf; 526 } 527 528 // package protected for access from I/O streams 529 abstract void encode(byte[] pArray, int i, int length, Context context); 530 531 // package protected for access from I/O streams 532 abstract void decode(byte[] pArray, int i, int length, Context context); 533 534 /** 535 * Returns whether or not the {@code octet} is in the current alphabet. 536 * Does not allow whitespace or pad. 537 * 538 * @param value The value to test 539 * 540 * @return {@code true} if the value is defined in the current alphabet, {@code false} otherwise. 541 */ 542 protected abstract boolean isInAlphabet(byte value); 543 544 /** 545 * Tests a given byte array to see if it contains only valid characters within the alphabet. 546 * The method optionally treats whitespace and pad as valid. 547 * 548 * @param arrayOctet byte array to test 549 * @param allowWSPad if {@code true}, then whitespace and PAD are also allowed 550 * 551 * @return {@code true} if all bytes are valid characters in the alphabet or if the byte array is empty; 552 * {@code false}, otherwise 553 */ 554 public boolean isInAlphabet(final byte[] arrayOctet, final boolean allowWSPad) { 555 for (final byte octet : arrayOctet) { 556 if (!isInAlphabet(octet) && 557 (!allowWSPad || (octet != pad) && !isWhiteSpace(octet))) { 558 return false; 559 } 560 } 561 return true; 562 } 563 564 /** 565 * Tests a given String to see if it contains only valid characters within the alphabet. 566 * The method treats whitespace and PAD as valid. 567 * 568 * @param basen String to test 569 * @return {@code true} if all characters in the String are valid characters in the alphabet or if 570 * the String is empty; {@code false}, otherwise 571 * @see #isInAlphabet(byte[], boolean) 572 */ 573 public boolean isInAlphabet(final String basen) { 574 return isInAlphabet(StringUtils.getBytesUtf8(basen), true); 575 } 576 577 /** 578 * Tests a given byte array to see if it contains any characters within the alphabet or PAD. 579 * 580 * Intended for use in checking line-ending arrays 581 * 582 * @param arrayOctet 583 * byte array to test 584 * @return {@code true} if any byte is a valid character in the alphabet or PAD; {@code false} otherwise 585 */ 586 protected boolean containsAlphabetOrPad(final byte[] arrayOctet) { 587 if (arrayOctet == null) { 588 return false; 589 } 590 for (final byte element : arrayOctet) { 591 if (pad == element || isInAlphabet(element)) { 592 return true; 593 } 594 } 595 return false; 596 } 597 598 /** 599 * Calculates the amount of space needed to encode the supplied array. 600 * 601 * @param pArray byte[] array which will later be encoded 602 * 603 * @return amount of space needed to encoded the supplied array. 604 * Returns a long since a max-len array will require > Integer.MAX_VALUE 605 */ 606 public long getEncodedLength(final byte[] pArray) { 607 // Calculate non-chunked size - rounded up to allow for padding 608 // cast to long is needed to avoid possibility of overflow 609 long len = ((pArray.length + unencodedBlockSize-1) / unencodedBlockSize) * (long) encodedBlockSize; 610 if (lineLength > 0) { // We're using chunking 611 // Round up to nearest multiple 612 len += ((len + lineLength-1) / lineLength) * chunkSeparatorLength; 613 } 614 return len; 615 } 616}