001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.binary; 019 020import java.math.BigInteger; 021import java.util.Objects; 022 023/** 024 * Provides Base64 encoding and decoding as defined by <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>. 025 * 026 * <p> 027 * This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose 028 * Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by Freed and Borenstein. 029 * </p> 030 * <p> 031 * The class can be parameterized in the following manner with various constructors: 032 * </p> 033 * <ul> 034 * <li>URL-safe mode: Default off.</li> 035 * <li>Line length: Default 76. Line length that aren't multiples of 4 will still essentially end up being multiples of 036 * 4 in the encoded data. 037 * <li>Line separator: Default is CRLF ("\r\n")</li> 038 * </ul> 039 * <p> 040 * The URL-safe parameter is only applied to encode operations. Decoding seamlessly handles both modes. 041 * </p> 042 * <p> 043 * Since this class operates directly on byte streams, and not character streams, it is hard-coded to only 044 * encode/decode character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252, 045 * UTF-8, etc). 046 * </p> 047 * <p> 048 * This class is thread-safe. 049 * </p> 050 * 051 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a> 052 * @since 1.0 053 */ 054public class Base64 extends BaseNCodec { 055 056 /** 057 * BASE32 characters are 6 bits in length. 058 * They are formed by taking a block of 3 octets to form a 24-bit string, 059 * which is converted into 4 BASE64 characters. 060 */ 061 private static final int BITS_PER_ENCODED_BYTE = 6; 062 private static final int BYTES_PER_UNENCODED_BLOCK = 3; 063 private static final int BYTES_PER_ENCODED_BLOCK = 4; 064 065 /** 066 * Chunk separator per RFC 2045 section 2.1. 067 * 068 * <p> 069 * N.B. The next major release may break compatibility and make this field private. 070 * </p> 071 * 072 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a> 073 */ 074 static final byte[] CHUNK_SEPARATOR = {'\r', '\n'}; 075 076 /** 077 * This array is a lookup table that translates 6-bit positive integer index values into their "Base64 Alphabet" 078 * equivalents as specified in Table 1 of RFC 2045. 079 * 080 * Thanks to "commons" project in ws.apache.org for this code. 081 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/ 082 */ 083 private static final byte[] STANDARD_ENCODE_TABLE = { 084 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 085 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 086 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 087 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 088 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/' 089 }; 090 091 /** 092 * This is a copy of the STANDARD_ENCODE_TABLE above, but with + and / 093 * changed to - and _ to make the encoded Base64 results more URL-SAFE. 094 * This table is only used when the Base64's mode is set to URL-SAFE. 095 */ 096 private static final byte[] URL_SAFE_ENCODE_TABLE = { 097 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 098 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 099 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 100 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 101 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_' 102 }; 103 104 /** 105 * This array is a lookup table that translates Unicode characters drawn from the "Base64 Alphabet" (as specified 106 * in Table 1 of RFC 2045) into their 6-bit positive integer equivalents. Characters that are not in the Base64 107 * alphabet but fall within the bounds of the array are translated to -1. 108 * 109 * Note: '+' and '-' both decode to 62. '/' and '_' both decode to 63. This means decoder seamlessly handles both 110 * URL_SAFE and STANDARD base64. (The encoder, on the other hand, needs to know ahead of time what to emit). 111 * 112 * Thanks to "commons" project in ws.apache.org for this code. 113 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/ 114 */ 115 private static final byte[] DECODE_TABLE = { 116 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 117 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f 118 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f 119 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, 62, -1, 63, // 20-2f + - / 120 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, // 30-3f 0-9 121 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 40-4f A-O 122 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63, // 50-5f P-Z _ 123 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 60-6f a-o 124 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51 // 70-7a p-z 125 }; 126 127 /** 128 * Base64 uses 6-bit fields. 129 */ 130 /** Mask used to extract 6 bits, used when encoding */ 131 private static final int MASK_6BITS = 0x3f; 132 /** Mask used to extract 4 bits, used when decoding final trailing character. */ 133 private static final int MASK_4BITS = 0xf; 134 /** Mask used to extract 2 bits, used when decoding final trailing character. */ 135 private static final int MASK_2BITS = 0x3; 136 137 // The static final fields above are used for the original static byte[] methods on Base64. 138 // The private member fields below are used with the new streaming approach, which requires 139 // some state be preserved between calls of encode() and decode(). 140 141 /** 142 * Encode table to use: either STANDARD or URL_SAFE. Note: the DECODE_TABLE above remains static because it is able 143 * to decode both STANDARD and URL_SAFE streams, but the encodeTable must be a member variable so we can switch 144 * between the two modes. 145 */ 146 private final byte[] encodeTable; 147 148 // Only one decode table currently; keep for consistency with Base32 code 149 private final byte[] decodeTable = DECODE_TABLE; 150 151 /** 152 * Line separator for encoding. Not used when decoding. Only used if lineLength > 0. 153 */ 154 private final byte[] lineSeparator; 155 156 /** 157 * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. 158 * {@code decodeSize = 3 + lineSeparator.length;} 159 */ 160 private final int decodeSize; 161 162 /** 163 * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. 164 * {@code encodeSize = 4 + lineSeparator.length;} 165 */ 166 private final int encodeSize; 167 168 /** 169 * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode. 170 * <p> 171 * When encoding the line length is 0 (no chunking), and the encoding table is STANDARD_ENCODE_TABLE. 172 * </p> 173 * 174 * <p> 175 * When decoding all variants are supported. 176 * </p> 177 */ 178 public Base64() { 179 this(0); 180 } 181 182 /** 183 * Creates a Base64 codec used for decoding (all modes) and encoding in the given URL-safe mode. 184 * <p> 185 * When encoding the line length is 76, the line separator is CRLF, and the encoding table is STANDARD_ENCODE_TABLE. 186 * </p> 187 * 188 * <p> 189 * When decoding all variants are supported. 190 * </p> 191 * 192 * @param urlSafe 193 * if {@code true}, URL-safe encoding is used. In most cases this should be set to 194 * {@code false}. 195 * @since 1.4 196 */ 197 public Base64(final boolean urlSafe) { 198 this(MIME_CHUNK_SIZE, CHUNK_SEPARATOR, urlSafe); 199 } 200 201 /** 202 * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode. 203 * <p> 204 * When encoding the line length is given in the constructor, the line separator is CRLF, and the encoding table is 205 * STANDARD_ENCODE_TABLE. 206 * </p> 207 * <p> 208 * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data. 209 * </p> 210 * <p> 211 * When decoding all variants are supported. 212 * </p> 213 * 214 * @param lineLength 215 * Each line of encoded data will be at most of the given length (rounded down to nearest multiple of 216 * 4). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when 217 * decoding. 218 * @since 1.4 219 */ 220 public Base64(final int lineLength) { 221 this(lineLength, CHUNK_SEPARATOR); 222 } 223 224 /** 225 * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode. 226 * <p> 227 * When encoding the line length and line separator are given in the constructor, and the encoding table is 228 * STANDARD_ENCODE_TABLE. 229 * </p> 230 * <p> 231 * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data. 232 * </p> 233 * <p> 234 * When decoding all variants are supported. 235 * </p> 236 * 237 * @param lineLength 238 * Each line of encoded data will be at most of the given length (rounded down to nearest multiple of 239 * 4). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when 240 * decoding. 241 * @param lineSeparator 242 * Each line of encoded data will end with this sequence of bytes. 243 * @throws IllegalArgumentException 244 * Thrown when the provided lineSeparator included some base64 characters. 245 * @since 1.4 246 */ 247 public Base64(final int lineLength, final byte[] lineSeparator) { 248 this(lineLength, lineSeparator, false); 249 } 250 251 /** 252 * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode. 253 * <p> 254 * When encoding the line length and line separator are given in the constructor, and the encoding table is 255 * STANDARD_ENCODE_TABLE. 256 * </p> 257 * <p> 258 * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data. 259 * </p> 260 * <p> 261 * When decoding all variants are supported. 262 * </p> 263 * 264 * @param lineLength 265 * Each line of encoded data will be at most of the given length (rounded down to nearest multiple of 266 * 4). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when 267 * decoding. 268 * @param lineSeparator 269 * Each line of encoded data will end with this sequence of bytes. 270 * @param urlSafe 271 * Instead of emitting '+' and '/' we emit '-' and '_' respectively. urlSafe is only applied to encode 272 * operations. Decoding seamlessly handles both modes. 273 * <b>Note: no padding is added when using the URL-safe alphabet.</b> 274 * @throws IllegalArgumentException 275 * The provided lineSeparator included some base64 characters. That's not going to work! 276 * @since 1.4 277 */ 278 public Base64(final int lineLength, final byte[] lineSeparator, final boolean urlSafe) { 279 super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, 280 lineLength, 281 lineSeparator == null ? 0 : lineSeparator.length); 282 // TODO could be simplified if there is no requirement to reject invalid line sep when length <=0 283 // @see test case Base64Test.testConstructors() 284 if (lineSeparator != null) { 285 if (containsAlphabetOrPad(lineSeparator)) { 286 final String sep = StringUtils.newStringUtf8(lineSeparator); 287 throw new IllegalArgumentException("lineSeparator must not contain base64 characters: [" + sep + "]"); 288 } 289 if (lineLength > 0){ // null line-sep forces no chunking rather than throwing IAE 290 this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparator.length; 291 this.lineSeparator = new byte[lineSeparator.length]; 292 System.arraycopy(lineSeparator, 0, this.lineSeparator, 0, lineSeparator.length); 293 } else { 294 this.encodeSize = BYTES_PER_ENCODED_BLOCK; 295 this.lineSeparator = null; 296 } 297 } else { 298 this.encodeSize = BYTES_PER_ENCODED_BLOCK; 299 this.lineSeparator = null; 300 } 301 this.decodeSize = this.encodeSize - 1; 302 this.encodeTable = urlSafe ? URL_SAFE_ENCODE_TABLE : STANDARD_ENCODE_TABLE; 303 } 304 305 /** 306 * Returns our current encode mode. True if we're URL-SAFE, false otherwise. 307 * 308 * @return true if we're in URL-SAFE mode, false otherwise. 309 * @since 1.4 310 */ 311 public boolean isUrlSafe() { 312 return this.encodeTable == URL_SAFE_ENCODE_TABLE; 313 } 314 315 /** 316 * <p> 317 * Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with 318 * the data to encode, and once with inAvail set to "-1" to alert encoder that EOF has been reached, to flush last 319 * remaining bytes (if not multiple of 3). 320 * </p> 321 * <p><b>Note: no padding is added when encoding using the URL-safe alphabet.</b></p> 322 * <p> 323 * Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach. 324 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/ 325 * </p> 326 * 327 * @param in 328 * byte[] array of binary data to base64 encode. 329 * @param inPos 330 * Position to start reading data from. 331 * @param inAvail 332 * Amount of bytes available from input for encoding. 333 * @param context 334 * the context to be used 335 */ 336 @Override 337 void encode(final byte[] in, int inPos, final int inAvail, final Context context) { 338 if (context.eof) { 339 return; 340 } 341 // inAvail < 0 is how we're informed of EOF in the underlying data we're 342 // encoding. 343 if (inAvail < 0) { 344 context.eof = true; 345 if (0 == context.modulus && lineLength == 0) { 346 return; // no leftovers to process and not using chunking 347 } 348 final byte[] buffer = ensureBufferSize(encodeSize, context); 349 final int savedPos = context.pos; 350 switch (context.modulus) { // 0-2 351 case 0 : // nothing to do here 352 break; 353 case 1 : // 8 bits = 6 + 2 354 // top 6 bits: 355 buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 2) & MASK_6BITS]; 356 // remaining 2: 357 buffer[context.pos++] = encodeTable[(context.ibitWorkArea << 4) & MASK_6BITS]; 358 // URL-SAFE skips the padding to further reduce size. 359 if (encodeTable == STANDARD_ENCODE_TABLE) { 360 buffer[context.pos++] = pad; 361 buffer[context.pos++] = pad; 362 } 363 break; 364 365 case 2 : // 16 bits = 6 + 6 + 4 366 buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 10) & MASK_6BITS]; 367 buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 4) & MASK_6BITS]; 368 buffer[context.pos++] = encodeTable[(context.ibitWorkArea << 2) & MASK_6BITS]; 369 // URL-SAFE skips the padding to further reduce size. 370 if (encodeTable == STANDARD_ENCODE_TABLE) { 371 buffer[context.pos++] = pad; 372 } 373 break; 374 default: 375 throw new IllegalStateException("Impossible modulus "+context.modulus); 376 } 377 context.currentLinePos += context.pos - savedPos; // keep track of current line position 378 // if currentPos == 0 we are at the start of a line, so don't add CRLF 379 if (lineLength > 0 && context.currentLinePos > 0) { 380 System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length); 381 context.pos += lineSeparator.length; 382 } 383 } else { 384 for (int i = 0; i < inAvail; i++) { 385 final byte[] buffer = ensureBufferSize(encodeSize, context); 386 context.modulus = (context.modulus+1) % BYTES_PER_UNENCODED_BLOCK; 387 int b = in[inPos++]; 388 if (b < 0) { 389 b += 256; 390 } 391 context.ibitWorkArea = (context.ibitWorkArea << 8) + b; // BITS_PER_BYTE 392 if (0 == context.modulus) { // 3 bytes = 24 bits = 4 * 6 bits to extract 393 buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 18) & MASK_6BITS]; 394 buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 12) & MASK_6BITS]; 395 buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 6) & MASK_6BITS]; 396 buffer[context.pos++] = encodeTable[context.ibitWorkArea & MASK_6BITS]; 397 context.currentLinePos += BYTES_PER_ENCODED_BLOCK; 398 if (lineLength > 0 && lineLength <= context.currentLinePos) { 399 System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length); 400 context.pos += lineSeparator.length; 401 context.currentLinePos = 0; 402 } 403 } 404 } 405 } 406 } 407 408 /** 409 * <p> 410 * Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once 411 * with the data to decode, and once with inAvail set to "-1" to alert decoder that EOF has been reached. The "-1" 412 * call is not necessary when decoding, but it doesn't hurt, either. 413 * </p> 414 * <p> 415 * Ignores all non-base64 characters. This is how chunked (e.g. 76 character) data is handled, since CR and LF are 416 * silently ignored, but has implications for other bytes, too. This method subscribes to the garbage-in, 417 * garbage-out philosophy: it will not check the provided data for validity. 418 * </p> 419 * <p> 420 * Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach. 421 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/ 422 * </p> 423 * 424 * @param in 425 * byte[] array of ascii data to base64 decode. 426 * @param inPos 427 * Position to start reading data from. 428 * @param inAvail 429 * Amount of bytes available from input for decoding. 430 * @param context 431 * the context to be used 432 */ 433 @Override 434 void decode(final byte[] in, int inPos, final int inAvail, final Context context) { 435 if (context.eof) { 436 return; 437 } 438 if (inAvail < 0) { 439 context.eof = true; 440 } 441 for (int i = 0; i < inAvail; i++) { 442 final byte[] buffer = ensureBufferSize(decodeSize, context); 443 final byte b = in[inPos++]; 444 if (b == pad) { 445 // We're done. 446 context.eof = true; 447 break; 448 } 449 if (b >= 0 && b < DECODE_TABLE.length) { 450 final int result = DECODE_TABLE[b]; 451 if (result >= 0) { 452 context.modulus = (context.modulus+1) % BYTES_PER_ENCODED_BLOCK; 453 context.ibitWorkArea = (context.ibitWorkArea << BITS_PER_ENCODED_BYTE) + result; 454 if (context.modulus == 0) { 455 buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 16) & MASK_8BITS); 456 buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 8) & MASK_8BITS); 457 buffer[context.pos++] = (byte) (context.ibitWorkArea & MASK_8BITS); 458 } 459 } 460 } 461 } 462 463 // Two forms of EOF as far as base64 decoder is concerned: actual 464 // EOF (-1) and first time '=' character is encountered in stream. 465 // This approach makes the '=' padding characters completely optional. 466 if (context.eof && context.modulus != 0) { 467 final byte[] buffer = ensureBufferSize(decodeSize, context); 468 469 // We have some spare bits remaining 470 // Output all whole multiples of 8 bits and ignore the rest 471 switch (context.modulus) { 472// case 0 : // impossible, as excluded above 473 case 1 : // 6 bits - ignore entirely 474 // TODO not currently tested; perhaps it is impossible? 475 break; 476 case 2 : // 12 bits = 8 + 4 477 validateCharacter(MASK_4BITS, context); 478 context.ibitWorkArea = context.ibitWorkArea >> 4; // dump the extra 4 bits 479 buffer[context.pos++] = (byte) ((context.ibitWorkArea) & MASK_8BITS); 480 break; 481 case 3 : // 18 bits = 8 + 8 + 2 482 validateCharacter(MASK_2BITS, context); 483 context.ibitWorkArea = context.ibitWorkArea >> 2; // dump 2 bits 484 buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 8) & MASK_8BITS); 485 buffer[context.pos++] = (byte) ((context.ibitWorkArea) & MASK_8BITS); 486 break; 487 default: 488 throw new IllegalStateException("Impossible modulus "+context.modulus); 489 } 490 } 491 } 492 493 /** 494 * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. Currently the 495 * method treats whitespace as valid. 496 * 497 * @param arrayOctet 498 * byte array to test 499 * @return {@code true} if all bytes are valid characters in the Base64 alphabet or if the byte array is empty; 500 * {@code false}, otherwise 501 * @deprecated 1.5 Use {@link #isBase64(byte[])}, will be removed in 2.0. 502 */ 503 @Deprecated 504 public static boolean isArrayByteBase64(final byte[] arrayOctet) { 505 return isBase64(arrayOctet); 506 } 507 508 /** 509 * Returns whether or not the {@code octet} is in the base 64 alphabet. 510 * 511 * @param octet 512 * The value to test 513 * @return {@code true} if the value is defined in the the base 64 alphabet, {@code false} otherwise. 514 * @since 1.4 515 */ 516 public static boolean isBase64(final byte octet) { 517 return octet == PAD_DEFAULT || (octet >= 0 && octet < DECODE_TABLE.length && DECODE_TABLE[octet] != -1); 518 } 519 520 /** 521 * Tests a given String to see if it contains only valid characters within the Base64 alphabet. Currently the 522 * method treats whitespace as valid. 523 * 524 * @param base64 525 * String to test 526 * @return {@code true} if all characters in the String are valid characters in the Base64 alphabet or if 527 * the String is empty; {@code false}, otherwise 528 * @since 1.5 529 */ 530 public static boolean isBase64(final String base64) { 531 return isBase64(StringUtils.getBytesUtf8(base64)); 532 } 533 534 /** 535 * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. Currently the 536 * method treats whitespace as valid. 537 * 538 * @param arrayOctet 539 * byte array to test 540 * @return {@code true} if all bytes are valid characters in the Base64 alphabet or if the byte array is empty; 541 * {@code false}, otherwise 542 * @since 1.5 543 */ 544 public static boolean isBase64(final byte[] arrayOctet) { 545 for (int i = 0; i < arrayOctet.length; i++) { 546 if (!isBase64(arrayOctet[i]) && !isWhiteSpace(arrayOctet[i])) { 547 return false; 548 } 549 } 550 return true; 551 } 552 553 /** 554 * Encodes binary data using the base64 algorithm but does not chunk the output. 555 * 556 * @param binaryData 557 * binary data to encode 558 * @return byte[] containing Base64 characters in their UTF-8 representation. 559 */ 560 public static byte[] encodeBase64(final byte[] binaryData) { 561 return encodeBase64(binaryData, false); 562 } 563 564 /** 565 * Encodes binary data using the base64 algorithm but does not chunk the output. 566 * 567 * NOTE: We changed the behaviour of this method from multi-line chunking (commons-codec-1.4) to 568 * single-line non-chunking (commons-codec-1.5). 569 * 570 * @param binaryData 571 * binary data to encode 572 * @return String containing Base64 characters. 573 * @since 1.4 (NOTE: 1.4 chunked the output, whereas 1.5 does not). 574 */ 575 public static String encodeBase64String(final byte[] binaryData) { 576 return StringUtils.newStringUsAscii(encodeBase64(binaryData, false)); 577 } 578 579 /** 580 * Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output. The 581 * url-safe variation emits - and _ instead of + and / characters. 582 * <b>Note: no padding is added.</b> 583 * @param binaryData 584 * binary data to encode 585 * @return byte[] containing Base64 characters in their UTF-8 representation. 586 * @since 1.4 587 */ 588 public static byte[] encodeBase64URLSafe(final byte[] binaryData) { 589 return encodeBase64(binaryData, false, true); 590 } 591 592 /** 593 * Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output. The 594 * url-safe variation emits - and _ instead of + and / characters. 595 * <b>Note: no padding is added.</b> 596 * @param binaryData 597 * binary data to encode 598 * @return String containing Base64 characters 599 * @since 1.4 600 */ 601 public static String encodeBase64URLSafeString(final byte[] binaryData) { 602 return StringUtils.newStringUsAscii(encodeBase64(binaryData, false, true)); 603 } 604 605 /** 606 * Encodes binary data using the base64 algorithm and chunks the encoded output into 76 character blocks 607 * 608 * @param binaryData 609 * binary data to encode 610 * @return Base64 characters chunked in 76 character blocks 611 */ 612 public static byte[] encodeBase64Chunked(final byte[] binaryData) { 613 return encodeBase64(binaryData, true); 614 } 615 616 /** 617 * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks. 618 * 619 * @param binaryData 620 * Array containing binary data to encode. 621 * @param isChunked 622 * if {@code true} this encoder will chunk the base64 output into 76 character blocks 623 * @return Base64-encoded data. 624 * @throws IllegalArgumentException 625 * Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE} 626 */ 627 public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked) { 628 return encodeBase64(binaryData, isChunked, false); 629 } 630 631 /** 632 * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks. 633 * 634 * @param binaryData 635 * Array containing binary data to encode. 636 * @param isChunked 637 * if {@code true} this encoder will chunk the base64 output into 76 character blocks 638 * @param urlSafe 639 * if {@code true} this encoder will emit - and _ instead of the usual + and / characters. 640 * <b>Note: no padding is added when encoding using the URL-safe alphabet.</b> 641 * @return Base64-encoded data. 642 * @throws IllegalArgumentException 643 * Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE} 644 * @since 1.4 645 */ 646 public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked, final boolean urlSafe) { 647 return encodeBase64(binaryData, isChunked, urlSafe, Integer.MAX_VALUE); 648 } 649 650 /** 651 * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks. 652 * 653 * @param binaryData 654 * Array containing binary data to encode. 655 * @param isChunked 656 * if {@code true} this encoder will chunk the base64 output into 76 character blocks 657 * @param urlSafe 658 * if {@code true} this encoder will emit - and _ instead of the usual + and / characters. 659 * <b>Note: no padding is added when encoding using the URL-safe alphabet.</b> 660 * @param maxResultSize 661 * The maximum result size to accept. 662 * @return Base64-encoded data. 663 * @throws IllegalArgumentException 664 * Thrown when the input array needs an output array bigger than maxResultSize 665 * @since 1.4 666 */ 667 public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked, 668 final boolean urlSafe, final int maxResultSize) { 669 if (binaryData == null || binaryData.length == 0) { 670 return binaryData; 671 } 672 673 // Create this so can use the super-class method 674 // Also ensures that the same roundings are performed by the ctor and the code 675 final Base64 b64 = isChunked ? new Base64(urlSafe) : new Base64(0, CHUNK_SEPARATOR, urlSafe); 676 final long len = b64.getEncodedLength(binaryData); 677 if (len > maxResultSize) { 678 throw new IllegalArgumentException("Input array too big, the output array would be bigger (" + 679 len + 680 ") than the specified maximum size of " + 681 maxResultSize); 682 } 683 684 return b64.encode(binaryData); 685 } 686 687 /** 688 * Decodes a Base64 String into octets. 689 * <p> 690 * <b>Note:</b> this method seamlessly handles data encoded in URL-safe or normal mode. 691 * </p> 692 * 693 * @param base64String 694 * String containing Base64 data 695 * @return Array containing decoded data. 696 * @since 1.4 697 */ 698 public static byte[] decodeBase64(final String base64String) { 699 return new Base64().decode(base64String); 700 } 701 702 /** 703 * Decodes Base64 data into octets. 704 * <p> 705 * <b>Note:</b> this method seamlessly handles data encoded in URL-safe or normal mode. 706 * </p> 707 * 708 * @param base64Data 709 * Byte array containing Base64 data 710 * @return Array containing decoded data. 711 */ 712 public static byte[] decodeBase64(final byte[] base64Data) { 713 return new Base64().decode(base64Data); 714 } 715 716 // Implementation of the Encoder Interface 717 718 // Implementation of integer encoding used for crypto 719 /** 720 * Decodes a byte64-encoded integer according to crypto standards such as W3C's XML-Signature. 721 * 722 * @param pArray 723 * a byte array containing base64 character data 724 * @return A BigInteger 725 * @since 1.4 726 */ 727 public static BigInteger decodeInteger(final byte[] pArray) { 728 return new BigInteger(1, decodeBase64(pArray)); 729 } 730 731 /** 732 * Encodes to a byte64-encoded integer according to crypto standards such as W3C's XML-Signature. 733 * 734 * @param bigInteger 735 * a BigInteger 736 * @return A byte array containing base64 character data 737 * @throws NullPointerException 738 * if null is passed in 739 * @since 1.4 740 */ 741 public static byte[] encodeInteger(final BigInteger bigInteger) { 742 Objects.requireNonNull(bigInteger, "bigInteger"); 743 return encodeBase64(toIntegerBytes(bigInteger), false); 744 } 745 746 /** 747 * Returns a byte-array representation of a {@code BigInteger} without sign bit. 748 * 749 * @param bigInt 750 * {@code BigInteger} to be converted 751 * @return a byte array representation of the BigInteger parameter 752 */ 753 static byte[] toIntegerBytes(final BigInteger bigInt) { 754 int bitlen = bigInt.bitLength(); 755 // round bitlen 756 bitlen = ((bitlen + 7) >> 3) << 3; 757 final byte[] bigBytes = bigInt.toByteArray(); 758 759 if (((bigInt.bitLength() % 8) != 0) && (((bigInt.bitLength() / 8) + 1) == (bitlen / 8))) { 760 return bigBytes; 761 } 762 // set up params for copying everything but sign bit 763 int startSrc = 0; 764 int len = bigBytes.length; 765 766 // if bigInt is exactly byte-aligned, just skip signbit in copy 767 if ((bigInt.bitLength() % 8) == 0) { 768 startSrc = 1; 769 len--; 770 } 771 final int startDst = bitlen / 8 - len; // to pad w/ nulls as per spec 772 final byte[] resizedBytes = new byte[bitlen / 8]; 773 System.arraycopy(bigBytes, startSrc, resizedBytes, startDst, len); 774 return resizedBytes; 775 } 776 777 /** 778 * Returns whether or not the {@code octet} is in the Base64 alphabet. 779 * 780 * @param octet 781 * The value to test 782 * @return {@code true} if the value is defined in the the Base64 alphabet {@code false} otherwise. 783 */ 784 @Override 785 protected boolean isInAlphabet(final byte octet) { 786 return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1; 787 } 788 789 /** 790 * Validates whether decoding the final trailing character is possible in the context 791 * of the set of possible base 64 values. 792 * 793 * <p>The character is valid if the lower bits within the provided mask are zero. This 794 * is used to test the final trailing base-64 digit is zero in the bits that will be discarded. 795 * 796 * @param emptyBitsMask The mask of the lower bits that should be empty 797 * @param context the context to be used 798 * 799 * @throws IllegalArgumentException if the bits being checked contain any non-zero value 800 */ 801 private static void validateCharacter(final int emptyBitsMask, final Context context) { 802 if ((context.ibitWorkArea & emptyBitsMask) != 0) { 803 throw new IllegalArgumentException( 804 "Last encoded character (before the paddings if any) is a valid base 64 alphabet but not a possible value. " + 805 "Expected the discarded bits to be zero."); 806 } 807 } 808}