1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 package org.apache.commons.fileupload;
17
18 import java.io.ByteArrayOutputStream;
19 import java.io.IOException;
20 import java.io.InputStream;
21 import java.io.OutputStream;
22 import java.io.UnsupportedEncodingException;
23
24 /***
25 * <p> Low level API for processing file uploads.
26 *
27 * <p> This class can be used to process data streams conforming to MIME
28 * 'multipart' format as defined in
29 * <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a>. Arbitrarily
30 * large amounts of data in the stream can be processed under constant
31 * memory usage.
32 *
33 * <p> The format of the stream is defined in the following way:<br>
34 *
35 * <code>
36 * multipart-body := preamble 1*encapsulation close-delimiter epilogue<br>
37 * encapsulation := delimiter body CRLF<br>
38 * delimiter := "--" boundary CRLF<br>
39 * close-delimiter := "--" boudary "--"<br>
40 * preamble := <ignore><br>
41 * epilogue := <ignore><br>
42 * body := header-part CRLF body-part<br>
43 * header-part := 1*header CRLF<br>
44 * header := header-name ":" header-value<br>
45 * header-name := <printable ascii characters except ":"><br>
46 * header-value := <any ascii characters except CR & LF><br>
47 * body-data := <arbitrary data><br>
48 * </code>
49 *
50 * <p>Note that body-data can contain another mulipart entity. There
51 * is limited support for single pass processing of such nested
52 * streams. The nested stream is <strong>required</strong> to have a
53 * boundary token of the same length as the parent stream (see {@link
54 * #setBoundary(byte[])}).
55 *
56 * <p>Here is an example of usage of this class.<br>
57 *
58 * <pre>
59 * try {
60 * MultipartStream multipartStream = new MultipartStream(input,
61 * boundary);
62 * boolean nextPart = multipartStream.skipPreamble();
63 * OutputStream output;
64 * while(nextPart) {
65 * header = chunks.readHeader();
66 * // process headers
67 * // create some output stream
68 * multipartStream.readBodyPart(output);
69 * nextPart = multipartStream.readBoundary();
70 * }
71 * } catch(MultipartStream.MalformedStreamException e) {
72 * // the stream failed to follow required syntax
73 * } catch(IOException) {
74 * // a read or write error occurred
75 * }
76 *
77 * </pre>
78 *
79 * @author <a href="mailto:Rafal.Krzewski@e-point.pl">Rafal Krzewski</a>
80 * @author <a href="mailto:martinc@apache.org">Martin Cooper</a>
81 * @author Sean C. Sullivan
82 *
83 * @version $Id: MultipartStream.java 353822 2005-12-04 06:33:55Z martinc $
84 */
85 public class MultipartStream {
86
87
88
89
90 /***
91 * The Carriage Return ASCII character value.
92 */
93 public static final byte CR = 0x0D;
94
95
96 /***
97 * The Line Feed ASCII character value.
98 */
99 public static final byte LF = 0x0A;
100
101
102 /***
103 * The dash (-) ASCII character value.
104 */
105 public static final byte DASH = 0x2D;
106
107
108 /***
109 * The maximum length of <code>header-part</code> that will be
110 * processed (10 kilobytes = 10240 bytes.).
111 */
112 public static final int HEADER_PART_SIZE_MAX = 10240;
113
114
115 /***
116 * The default length of the buffer used for processing a request.
117 */
118 protected static final int DEFAULT_BUFSIZE = 4096;
119
120
121 /***
122 * A byte sequence that marks the end of <code>header-part</code>
123 * (<code>CRLFCRLF</code>).
124 */
125 protected static final byte[] HEADER_SEPARATOR = {
126 CR, LF, CR, LF };
127
128
129 /***
130 * A byte sequence that that follows a delimiter that will be
131 * followed by an encapsulation (<code>CRLF</code>).
132 */
133 protected static final byte[] FIELD_SEPARATOR = {
134 CR, LF};
135
136
137 /***
138 * A byte sequence that that follows a delimiter of the last
139 * encapsulation in the stream (<code>--</code>).
140 */
141 protected static final byte[] STREAM_TERMINATOR = {
142 DASH, DASH};
143
144
145 /***
146 * A byte sequence that precedes a boundary (<code>CRLF--</code>).
147 */
148 protected static final byte[] BOUNDARY_PREFIX = {
149 CR, LF, DASH, DASH};
150
151
152 /***
153 * The number of bytes, over and above the boundary size, to use for the
154 * keep region.
155 */
156 private static final int KEEP_REGION_PAD = 3;
157
158
159
160
161
162 /***
163 * The input stream from which data is read.
164 */
165 private InputStream input;
166
167
168 /***
169 * The length of the boundary token plus the leading <code>CRLF--</code>.
170 */
171 private int boundaryLength;
172
173
174 /***
175 * The amount of data, in bytes, that must be kept in the buffer in order
176 * to detect delimiters reliably.
177 */
178 private int keepRegion;
179
180
181 /***
182 * The byte sequence that partitions the stream.
183 */
184 private byte[] boundary;
185
186
187 /***
188 * The length of the buffer used for processing the request.
189 */
190 private int bufSize;
191
192
193 /***
194 * The buffer used for processing the request.
195 */
196 private byte[] buffer;
197
198
199 /***
200 * The index of first valid character in the buffer.
201 * <br>
202 * 0 <= head < bufSize
203 */
204 private int head;
205
206
207 /***
208 * The index of last valid characer in the buffer + 1.
209 * <br>
210 * 0 <= tail <= bufSize
211 */
212 private int tail;
213
214
215 /***
216 * The content encoding to use when reading headers.
217 */
218 private String headerEncoding;
219
220
221
222
223
224 /***
225 * Default constructor.
226 *
227 * @see #MultipartStream(InputStream, byte[], int)
228 * @see #MultipartStream(InputStream, byte[])
229 *
230 */
231 public MultipartStream() {
232 }
233
234
235 /***
236 * <p> Constructs a <code>MultipartStream</code> with a custom size buffer.
237 *
238 * <p> Note that the buffer must be at least big enough to contain the
239 * boundary string, plus 4 characters for CR/LF and double dash, plus at
240 * least one byte of data. Too small a buffer size setting will degrade
241 * performance.
242 *
243 * @param input The <code>InputStream</code> to serve as a data source.
244 * @param boundary The token used for dividing the stream into
245 * <code>encapsulations</code>.
246 * @param bufSize The size of the buffer to be used, in bytes.
247 *
248 *
249 * @see #MultipartStream()
250 * @see #MultipartStream(InputStream, byte[])
251 *
252 */
253 public MultipartStream(InputStream input,
254 byte[] boundary,
255 int bufSize) {
256 this.input = input;
257 this.bufSize = bufSize;
258 this.buffer = new byte[bufSize];
259
260
261
262 this.boundary = new byte[boundary.length + BOUNDARY_PREFIX.length];
263 this.boundaryLength = boundary.length + BOUNDARY_PREFIX.length;
264 this.keepRegion = boundary.length + KEEP_REGION_PAD;
265 System.arraycopy(BOUNDARY_PREFIX, 0, this.boundary, 0,
266 BOUNDARY_PREFIX.length);
267 System.arraycopy(boundary, 0, this.boundary, BOUNDARY_PREFIX.length,
268 boundary.length);
269
270 head = 0;
271 tail = 0;
272 }
273
274
275 /***
276 * <p> Constructs a <code>MultipartStream</code> with a default size buffer.
277 *
278 * @param input The <code>InputStream</code> to serve as a data source.
279 * @param boundary The token used for dividing the stream into
280 * <code>encapsulations</code>.
281 *
282 * @throws IOException when an error occurs.
283 *
284 * @see #MultipartStream()
285 * @see #MultipartStream(InputStream, byte[], int)
286 *
287 */
288 public MultipartStream(InputStream input,
289 byte[] boundary)
290 throws IOException {
291 this(input, boundary, DEFAULT_BUFSIZE);
292 }
293
294
295
296
297
298 /***
299 * Retrieves the character encoding used when reading the headers of an
300 * individual part. When not specified, or <code>null</code>, the platform
301 * default encoding is used.
302
303 *
304 * @return The encoding used to read part headers.
305 */
306 public String getHeaderEncoding() {
307 return headerEncoding;
308 }
309
310
311 /***
312 * Specifies the character encoding to be used when reading the headers of
313 * individual parts. When not specified, or <code>null</code>, the platform
314 * default encoding is used.
315 *
316 * @param encoding The encoding used to read part headers.
317 */
318 public void setHeaderEncoding(String encoding) {
319 headerEncoding = encoding;
320 }
321
322
323 /***
324 * Reads a byte from the <code>buffer</code>, and refills it as
325 * necessary.
326 *
327 * @return The next byte from the input stream.
328 *
329 * @throws IOException if there is no more data available.
330 */
331 public byte readByte()
332 throws IOException {
333
334 if (head == tail) {
335 head = 0;
336
337 tail = input.read(buffer, head, bufSize);
338 if (tail == -1) {
339
340 throw new IOException("No more data is available");
341 }
342 }
343 return buffer[head++];
344 }
345
346
347 /***
348 * Skips a <code>boundary</code> token, and checks whether more
349 * <code>encapsulations</code> are contained in the stream.
350 *
351 * @return <code>true</code> if there are more encapsulations in
352 * this stream; <code>false</code> otherwise.
353 *
354 * @throws MalformedStreamException if the stream ends unexpecetedly or
355 * fails to follow required syntax.
356 */
357 public boolean readBoundary()
358 throws MalformedStreamException {
359 byte[] marker = new byte[2];
360 boolean nextChunk = false;
361
362 head += boundaryLength;
363 try {
364 marker[0] = readByte();
365 if (marker[0] == LF) {
366
367
368
369
370
371
372 return true;
373 }
374
375 marker[1] = readByte();
376 if (arrayequals(marker, STREAM_TERMINATOR, 2)) {
377 nextChunk = false;
378 } else if (arrayequals(marker, FIELD_SEPARATOR, 2)) {
379 nextChunk = true;
380 } else {
381 throw new MalformedStreamException(
382 "Unexpected characters follow a boundary");
383 }
384 } catch (IOException e) {
385 throw new MalformedStreamException("Stream ended unexpectedly");
386 }
387 return nextChunk;
388 }
389
390
391 /***
392 * <p>Changes the boundary token used for partitioning the stream.
393 *
394 * <p>This method allows single pass processing of nested multipart
395 * streams.
396 *
397 * <p>The boundary token of the nested stream is <code>required</code>
398 * to be of the same length as the boundary token in parent stream.
399 *
400 * <p>Restoring the parent stream boundary token after processing of a
401 * nested stream is left to the application.
402 *
403 * @param boundary The boundary to be used for parsing of the nested
404 * stream.
405 *
406 * @throws IllegalBoundaryException if the <code>boundary</code>
407 * has a different length than the one
408 * being currently parsed.
409 */
410 public void setBoundary(byte[] boundary)
411 throws IllegalBoundaryException {
412 if (boundary.length != boundaryLength - BOUNDARY_PREFIX.length) {
413 throw new IllegalBoundaryException(
414 "The length of a boundary token can not be changed");
415 }
416 System.arraycopy(boundary, 0, this.boundary, BOUNDARY_PREFIX.length,
417 boundary.length);
418 }
419
420
421 /***
422 * <p>Reads the <code>header-part</code> of the current
423 * <code>encapsulation</code>.
424 *
425 * <p>Headers are returned verbatim to the input stream, including the
426 * trailing <code>CRLF</code> marker. Parsing is left to the
427 * application.
428 *
429 * <p><strong>TODO</strong> allow limiting maximum header size to
430 * protect against abuse.
431 *
432 * @return The <code>header-part</code> of the current encapsulation.
433 *
434 * @throws MalformedStreamException if the stream ends unexpecetedly.
435 */
436 public String readHeaders()
437 throws MalformedStreamException {
438 int i = 0;
439 byte[] b = new byte[1];
440
441 ByteArrayOutputStream baos = new ByteArrayOutputStream();
442 int sizeMax = HEADER_PART_SIZE_MAX;
443 int size = 0;
444 while (i < HEADER_SEPARATOR.length) {
445 try {
446 b[0] = readByte();
447 } catch (IOException e) {
448 throw new MalformedStreamException("Stream ended unexpectedly");
449 }
450 size++;
451 if (b[0] == HEADER_SEPARATOR[i]) {
452 i++;
453 } else {
454 i = 0;
455 }
456 if (size <= sizeMax) {
457 baos.write(b[0]);
458 }
459 }
460
461 String headers = null;
462 if (headerEncoding != null) {
463 try {
464 headers = baos.toString(headerEncoding);
465 } catch (UnsupportedEncodingException e) {
466
467
468 headers = baos.toString();
469 }
470 } else {
471 headers = baos.toString();
472 }
473
474 return headers;
475 }
476
477
478 /***
479 * <p>Reads <code>body-data</code> from the current
480 * <code>encapsulation</code> and writes its contents into the
481 * output <code>Stream</code>.
482 *
483 * <p>Arbitrary large amounts of data can be processed by this
484 * method using a constant size buffer. (see {@link
485 * #MultipartStream(InputStream,byte[],int) constructor}).
486 *
487 * @param output The <code>Stream</code> to write data into.
488 *
489 * @return the amount of data written.
490 *
491 * @throws MalformedStreamException if the stream ends unexpectedly.
492 * @throws IOException if an i/o error occurs.
493 */
494 public int readBodyData(OutputStream output)
495 throws MalformedStreamException,
496 IOException {
497 boolean done = false;
498 int pad;
499 int pos;
500 int bytesRead;
501 int total = 0;
502 while (!done) {
503
504 pos = findSeparator();
505 if (pos != -1) {
506
507 output.write(buffer, head, pos - head);
508 total += pos - head;
509 head = pos;
510 done = true;
511 } else {
512
513
514 if (tail - head > keepRegion) {
515 pad = keepRegion;
516 } else {
517 pad = tail - head;
518 }
519
520 output.write(buffer, head, tail - head - pad);
521
522
523 total += tail - head - pad;
524 System.arraycopy(buffer, tail - pad, buffer, 0, pad);
525
526
527 head = 0;
528 bytesRead = input.read(buffer, pad, bufSize - pad);
529
530
531 if (bytesRead != -1) {
532 tail = pad + bytesRead;
533 } else {
534
535
536
537 output.write(buffer, 0, pad);
538 output.flush();
539 total += pad;
540 throw new MalformedStreamException(
541 "Stream ended unexpectedly");
542 }
543 }
544 }
545 output.flush();
546 return total;
547 }
548
549
550 /***
551 * <p> Reads <code>body-data</code> from the current
552 * <code>encapsulation</code> and discards it.
553 *
554 * <p>Use this method to skip encapsulations you don't need or don't
555 * understand.
556 *
557 * @return The amount of data discarded.
558 *
559 * @throws MalformedStreamException if the stream ends unexpectedly.
560 * @throws IOException if an i/o error occurs.
561 */
562 public int discardBodyData()
563 throws MalformedStreamException,
564 IOException {
565 boolean done = false;
566 int pad;
567 int pos;
568 int bytesRead;
569 int total = 0;
570 while (!done) {
571
572 pos = findSeparator();
573 if (pos != -1) {
574
575 total += pos - head;
576 head = pos;
577 done = true;
578 } else {
579
580
581 if (tail - head > keepRegion) {
582 pad = keepRegion;
583 } else {
584 pad = tail - head;
585 }
586 total += tail - head - pad;
587
588
589 System.arraycopy(buffer, tail - pad, buffer, 0, pad);
590
591
592 head = 0;
593 bytesRead = input.read(buffer, pad, bufSize - pad);
594
595
596 if (bytesRead != -1) {
597 tail = pad + bytesRead;
598 } else {
599
600
601
602 total += pad;
603 throw new MalformedStreamException(
604 "Stream ended unexpectedly");
605 }
606 }
607 }
608 return total;
609 }
610
611
612 /***
613 * Finds the beginning of the first <code>encapsulation</code>.
614 *
615 * @return <code>true</code> if an <code>encapsulation</code> was found in
616 * the stream.
617 *
618 * @throws IOException if an i/o error occurs.
619 */
620 public boolean skipPreamble()
621 throws IOException {
622
623 System.arraycopy(boundary, 2, boundary, 0, boundary.length - 2);
624 boundaryLength = boundary.length - 2;
625 try {
626
627 discardBodyData();
628
629
630
631 return readBoundary();
632 } catch (MalformedStreamException e) {
633 return false;
634 } finally {
635
636 System.arraycopy(boundary, 0, boundary, 2, boundary.length - 2);
637 boundaryLength = boundary.length;
638 boundary[0] = CR;
639 boundary[1] = LF;
640 }
641 }
642
643
644 /***
645 * Compares <code>count</code> first bytes in the arrays
646 * <code>a</code> and <code>b</code>.
647 *
648 * @param a The first array to compare.
649 * @param b The second array to compare.
650 * @param count How many bytes should be compared.
651 *
652 * @return <code>true</code> if <code>count</code> first bytes in arrays
653 * <code>a</code> and <code>b</code> are equal.
654 */
655 public static boolean arrayequals(byte[] a,
656 byte[] b,
657 int count) {
658 for (int i = 0; i < count; i++) {
659 if (a[i] != b[i]) {
660 return false;
661 }
662 }
663 return true;
664 }
665
666
667 /***
668 * Searches for a byte of specified value in the <code>buffer</code>,
669 * starting at the specified <code>position</code>.
670 *
671 * @param value The value to find.
672 * @param pos The starting position for searching.
673 *
674 * @return The position of byte found, counting from beginning of the
675 * <code>buffer</code>, or <code>-1</code> if not found.
676 */
677 protected int findByte(byte value,
678 int pos) {
679 for (int i = pos; i < tail; i++) {
680 if (buffer[i] == value) {
681 return i;
682 }
683 }
684
685 return -1;
686 }
687
688
689 /***
690 * Searches for the <code>boundary</code> in the <code>buffer</code>
691 * region delimited by <code>head</code> and <code>tail</code>.
692 *
693 * @return The position of the boundary found, counting from the
694 * beginning of the <code>buffer</code>, or <code>-1</code> if
695 * not found.
696 */
697 protected int findSeparator() {
698 int first;
699 int match = 0;
700 int maxpos = tail - boundaryLength;
701 for (first = head;
702 (first <= maxpos) && (match != boundaryLength);
703 first++) {
704 first = findByte(boundary[0], first);
705 if (first == -1 || (first > maxpos)) {
706 return -1;
707 }
708 for (match = 1; match < boundaryLength; match++) {
709 if (buffer[first + match] != boundary[match]) {
710 break;
711 }
712 }
713 }
714 if (match == boundaryLength) {
715 return first - 1;
716 }
717 return -1;
718 }
719
720 /***
721 * Returns a string representation of this object.
722 *
723 * @return The string representation of this object.
724 */
725 public String toString() {
726 StringBuffer sbTemp = new StringBuffer();
727 sbTemp.append("boundary='");
728 sbTemp.append(String.valueOf(boundary));
729 sbTemp.append("'\nbufSize=");
730 sbTemp.append(bufSize);
731 return sbTemp.toString();
732 }
733
734 /***
735 * Thrown to indicate that the input stream fails to follow the
736 * required syntax.
737 */
738 public static class MalformedStreamException
739 extends IOException {
740 /***
741 * Constructs a <code>MalformedStreamException</code> with no
742 * detail message.
743 */
744 public MalformedStreamException() {
745 super();
746 }
747
748 /***
749 * Constructs an <code>MalformedStreamException</code> with
750 * the specified detail message.
751 *
752 * @param message The detail message.
753 */
754 public MalformedStreamException(String message) {
755 super(message);
756 }
757 }
758
759
760 /***
761 * Thrown upon attempt of setting an invalid boundary token.
762 */
763 public static class IllegalBoundaryException
764 extends IOException {
765 /***
766 * Constructs an <code>IllegalBoundaryException</code> with no
767 * detail message.
768 */
769 public IllegalBoundaryException() {
770 super();
771 }
772
773 /***
774 * Constructs an <code>IllegalBoundaryException</code> with
775 * the specified detail message.
776 *
777 * @param message The detail message.
778 */
779 public IllegalBoundaryException(String message) {
780 super(message);
781 }
782 }
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850 }