001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.fileupload2.core;
018
019import java.io.IOException;
020import java.io.InputStream;
021import java.io.OutputStream;
022import java.nio.charset.Charset;
023import java.nio.charset.StandardCharsets;
024import java.util.ArrayList;
025import java.util.HashMap;
026import java.util.List;
027import java.util.Locale;
028import java.util.Map;
029import java.util.Objects;
030
031import org.apache.commons.fileupload2.core.FileItemFactory.AbstractFileItemBuilder;
032import org.apache.commons.io.IOUtils;
033
034/**
035 * High level API for processing file uploads.
036 * <p>
037 * This class handles multiple files per single HTML widget, sent using {@code multipart/mixed} encoding type, as specified by
038 * <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a>. Use {@link #parseRequest(RequestContext)} to acquire a list of {@link FileItem}s associated with
039 * a given HTML widget.
040 * </p>
041 * <p>
042 * How the data for individual parts is stored is determined by the factory used to create them; a given part may be in memory, on disk, or somewhere else.
043 * </p>
044 *
045 * @param <R> The request context type.
046 * @param <I> The FileItem type.
047 * @param <F> the FileItemFactory type.
048 */
049public abstract class AbstractFileUpload<R, I extends FileItem<I>, F extends FileItemFactory<I>> {
050
051    /**
052     * Boundary parameter key.
053     */
054    private static final String BOUNDARY_KEY = "boundary";
055
056    /**
057     * Name parameter key.
058     */
059    private static final String NAME_KEY = "name";
060
061    /**
062     * File name parameter key.
063     */
064    private static final String FILENAME_KEY = "filename";
065
066    /**
067     * HTTP content type header name.
068     */
069    public static final String CONTENT_TYPE = "Content-type";
070
071    /**
072     * HTTP content disposition header name.
073     */
074    public static final String CONTENT_DISPOSITION = "Content-disposition";
075
076    /**
077     * HTTP content length header name.
078     */
079    public static final String CONTENT_LENGTH = "Content-length";
080
081    /**
082     * Content-disposition value for form data.
083     */
084    public static final String FORM_DATA = "form-data";
085
086    /**
087     * Content-disposition value for file attachment.
088     */
089    public static final String ATTACHMENT = "attachment";
090
091    /**
092     * Part of HTTP content type header.
093     */
094    public static final String MULTIPART = "multipart/";
095
096    /**
097     * HTTP content type header for multipart forms.
098     */
099    public static final String MULTIPART_FORM_DATA = "multipart/form-data";
100
101    /**
102     * HTTP content type header for multiple uploads.
103     */
104    public static final String MULTIPART_MIXED = "multipart/mixed";
105
106    /**
107     * Utility method that determines whether the request contains multipart content.
108     * <p>
109     * <strong>NOTE:</strong> This method will be moved to the {@code ServletFileUpload} class after the FileUpload 1.1 release. Unfortunately, since this
110     * method is static, it is not possible to provide its replacement until this method is removed.
111     * </p>
112     *
113     * @param ctx The request context to be evaluated. Must be non-null.
114     * @return {@code true} if the request is multipart; {@code false} otherwise.
115     */
116    public static final boolean isMultipartContent(final RequestContext ctx) {
117        final String contentType = ctx.getContentType();
118        if (contentType == null) {
119            return false;
120        }
121        return contentType.toLowerCase(Locale.ENGLISH).startsWith(MULTIPART);
122    }
123
124    /**
125     * The maximum size permitted for the complete request, as opposed to {@link #fileSizeMax}. A value of -1 indicates no maximum.
126     */
127    private long sizeMax = -1;
128
129    /**
130     * The maximum size permitted for a single uploaded file, as opposed to {@link #sizeMax}. A value of -1 indicates no maximum.
131     */
132    private long fileSizeMax = -1;
133
134    /**
135     * The maximum permitted number of files that may be uploaded in a single request. A value of -1 indicates no maximum.
136     */
137    private long fileCountMax = -1;
138
139    /**
140     * The content encoding to use when reading part headers.
141     */
142    private Charset headerCharset;
143
144    /**
145     * The progress listener.
146     */
147    private ProgressListener progressListener = ProgressListener.NOP;
148
149    /**
150     * The factory to use to create new form items.
151     */
152    private F fileItemFactory;
153
154    /**
155     * Gets the boundary from the {@code Content-type} header.
156     *
157     * @param contentType The value of the content type header from which to extract the boundary value.
158     * @return The boundary, as a byte array.
159     */
160    public byte[] getBoundary(final String contentType) {
161        final ParameterParser parser = new ParameterParser();
162        parser.setLowerCaseNames(true);
163        // Parameter parser can handle null input
164        final Map<String, String> params = parser.parse(contentType, new char[] { ';', ',' });
165        final String boundaryStr = params.get(BOUNDARY_KEY);
166        return boundaryStr != null ? boundaryStr.getBytes(StandardCharsets.ISO_8859_1) : null;
167    }
168
169    /**
170     * Gets the field name from the {@code Content-disposition} header.
171     *
172     * @param headers A {@code Map} containing the HTTP request headers.
173     * @return The field name for the current {@code encapsulation}.
174     */
175    public String getFieldName(final FileItemHeaders headers) {
176        return getFieldName(headers.getHeader(CONTENT_DISPOSITION));
177    }
178
179    /**
180     * Gets the field name, which is given by the content-disposition header.
181     *
182     * @param contentDisposition The content-dispositions header value.
183     * @return The field name.
184     */
185    private String getFieldName(final String contentDisposition) {
186        String fieldName = null;
187        if (contentDisposition != null && contentDisposition.toLowerCase(Locale.ENGLISH).startsWith(FORM_DATA)) {
188            final ParameterParser parser = new ParameterParser();
189            parser.setLowerCaseNames(true);
190            // Parameter parser can handle null input
191            final Map<String, String> params = parser.parse(contentDisposition, ';');
192            fieldName = params.get(NAME_KEY);
193            if (fieldName != null) {
194                fieldName = fieldName.trim();
195            }
196        }
197        return fieldName;
198    }
199
200    /**
201     * Gets the maximum number of files allowed in a single request.
202     *
203     * @return The maximum number of files allowed in a single request.
204     */
205    public long getFileCountMax() {
206        return fileCountMax;
207    }
208
209    /**
210     * Gets the factory class used when creating file items.
211     *
212     * @return The factory class for new file items.
213     */
214    public F getFileItemFactory() {
215        return fileItemFactory;
216    }
217
218    /**
219     * Gets the file name from the {@code Content-disposition} header.
220     *
221     * @param headers The HTTP headers object.
222     *
223     * @return The file name for the current {@code encapsulation}.
224     */
225    public String getFileName(final FileItemHeaders headers) {
226        return getFileName(headers.getHeader(CONTENT_DISPOSITION));
227    }
228
229    /**
230     * Gets the given content-disposition headers file name.
231     *
232     * @param contentDisposition The content-disposition headers value.
233     * @return The file name
234     */
235    private String getFileName(final String contentDisposition) {
236        String fileName = null;
237        if (contentDisposition != null) {
238            final String cdl = contentDisposition.toLowerCase(Locale.ENGLISH);
239            if (cdl.startsWith(FORM_DATA) || cdl.startsWith(ATTACHMENT)) {
240                final ParameterParser parser = new ParameterParser();
241                parser.setLowerCaseNames(true);
242                // Parameter parser can handle null input
243                final Map<String, String> params = parser.parse(contentDisposition, ';');
244                if (params.containsKey(FILENAME_KEY)) {
245                    fileName = params.get(FILENAME_KEY);
246                    if (fileName != null) {
247                        fileName = fileName.trim();
248                    } else {
249                        // Even if there is no value, the parameter is present,
250                        // so we return an empty file name rather than no file
251                        // name.
252                        fileName = "";
253                    }
254                }
255            }
256        }
257        return fileName;
258    }
259
260    /**
261     * Gets the maximum allowed size of a single uploaded file, as opposed to {@link #getSizeMax()}.
262     *
263     * @see #setFileSizeMax(long)
264     * @return Maximum size of a single uploaded file.
265     */
266    public long getFileSizeMax() {
267        return fileSizeMax;
268    }
269
270    /**
271     * Gets the character encoding used when reading the headers of an individual part. When not specified, or {@code null}, the request encoding is used. If
272     * that is also not specified, or {@code null}, the platform default encoding is used.
273     *
274     * @return The encoding used to read part headers.
275     */
276    public Charset getHeaderCharset() {
277        return headerCharset;
278    }
279
280    /**
281     * Gets a file item iterator.
282     *
283     * @param request The servlet request to be parsed.
284     * @return An iterator to instances of {@code FileItemInput} parsed from the request, in the order that they were transmitted.
285     * @throws FileUploadException if there are problems reading/parsing the request or storing files.
286     * @throws IOException         An I/O error occurred. This may be a network error while communicating with the client or a problem while storing the
287     *                             uploaded content.
288     */
289    public abstract FileItemInputIterator getItemIterator(R request) throws FileUploadException, IOException;
290
291    /**
292     * Gets an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream.
293     *
294     * @param requestContext The context for the request to be parsed.
295     * @return An iterator to instances of {@code FileItemInput} parsed from the request, in the order that they were transmitted.
296     * @throws FileUploadException if there are problems reading/parsing the request or storing files.
297     * @throws IOException         An I/O error occurred. This may be a network error while communicating with the client or a problem while storing the
298     *                             uploaded content.
299     */
300    public FileItemInputIterator getItemIterator(final RequestContext requestContext) throws FileUploadException, IOException {
301        return new FileItemInputIteratorImpl(this, requestContext);
302    }
303
304    /**
305     * Parses the {@code header-part} and returns as key/value pairs.
306     * <p>
307     * If there are multiple headers of the same names, the name will map to a comma-separated list containing the values.
308     * </p>
309     *
310     * @param headerPart The {@code header-part} of the current {@code encapsulation}.
311     * @return A {@code Map} containing the parsed HTTP request headers.
312     */
313    public FileItemHeaders getParsedHeaders(final String headerPart) {
314        final int len = headerPart.length();
315        final FileItemHeaders headers = newFileItemHeaders();
316        int start = 0;
317        for (;;) {
318            int end = parseEndOfLine(headerPart, start);
319            if (start == end) {
320                break;
321            }
322            final StringBuilder header = new StringBuilder(headerPart.substring(start, end));
323            start = end + 2;
324            while (start < len) {
325                int nonWs = start;
326                while (nonWs < len) {
327                    final char c = headerPart.charAt(nonWs);
328                    if (c != ' ' && c != '\t') {
329                        break;
330                    }
331                    ++nonWs;
332                }
333                if (nonWs == start) {
334                    break;
335                }
336                // Continuation line found
337                end = parseEndOfLine(headerPart, nonWs);
338                header.append(' ').append(headerPart, nonWs, end);
339                start = end + 2;
340            }
341            parseHeaderLine(headers, header.toString());
342        }
343        return headers;
344    }
345
346    /**
347     * Gets the progress listener.
348     *
349     * @return The progress listener, if any, or null.
350     */
351    public ProgressListener getProgressListener() {
352        return progressListener;
353    }
354
355    /**
356     * Gets the maximum allowed size of a complete request, as opposed to {@link #getFileSizeMax()}.
357     *
358     * @return The maximum allowed size, in bytes. The default value of -1 indicates, that there is no limit.
359     * @see #setSizeMax(long)
360     *
361     */
362    public long getSizeMax() {
363        return sizeMax;
364    }
365
366    /**
367     * Creates a new instance of {@link FileItemHeaders}.
368     *
369     * @return The new instance.
370     */
371    protected FileItemHeaders newFileItemHeaders() {
372        return AbstractFileItemBuilder.newFileItemHeaders();
373    }
374
375    /**
376     * Skips bytes until the end of the current line.
377     *
378     * @param headerPart The headers, which are being parsed.
379     * @param end        Index of the last byte, which has yet been processed.
380     * @return Index of the \r\n sequence, which indicates end of line.
381     */
382    private int parseEndOfLine(final String headerPart, final int end) {
383        int index = end;
384        for (;;) {
385            final int offset = headerPart.indexOf('\r', index);
386            if (offset == -1 || offset + 1 >= headerPart.length()) {
387                throw new IllegalStateException("Expected headers to be terminated by an empty line.");
388            }
389            if (headerPart.charAt(offset + 1) == '\n') {
390                return offset;
391            }
392            index = offset + 1;
393        }
394    }
395
396    /**
397     * Parses the next header line.
398     *
399     * @param headers String with all headers.
400     * @param header  Map where to store the current header.
401     */
402    private void parseHeaderLine(final FileItemHeaders headers, final String header) {
403        final int colonOffset = header.indexOf(':');
404        if (colonOffset == -1) {
405            // This header line is malformed, skip it.
406            return;
407        }
408        final String headerName = header.substring(0, colonOffset).trim();
409        final String headerValue = header.substring(colonOffset + 1).trim();
410        headers.addHeader(headerName, headerValue);
411    }
412
413    /**
414     * Parses an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream.
415     *
416     * @param request The servlet request to be parsed.
417     * @return A map of {@code FileItem} instances parsed from the request.
418     * @throws FileUploadException if there are problems reading/parsing the request or storing files.
419     */
420    public abstract Map<String, List<I>> parseParameterMap(R request) throws FileUploadException;
421
422    /**
423     * Parses an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream.
424     *
425     * @param ctx The context for the request to be parsed.
426     * @return A map of {@code FileItem} instances parsed from the request.
427     * @throws FileUploadException if there are problems reading/parsing the request or storing files.
428     */
429    public Map<String, List<I>> parseParameterMap(final RequestContext ctx) throws FileUploadException {
430        final List<I> items = parseRequest(ctx);
431        final Map<String, List<I>> itemsMap = new HashMap<>(items.size());
432
433        for (final I fileItem : items) {
434            final String fieldName = fileItem.getFieldName();
435            final List<I> mappedItems = itemsMap.computeIfAbsent(fieldName, k -> new ArrayList<>());
436            mappedItems.add(fileItem);
437        }
438
439        return itemsMap;
440    }
441
442    /**
443     * Parses an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream.
444     *
445     * @param request The servlet request to be parsed.
446     * @return A list of {@code FileItem} instances parsed from the request, in the order that they were transmitted.
447     * @throws FileUploadException if there are problems reading/parsing the request or storing files.
448     */
449    public abstract List<I> parseRequest(R request) throws FileUploadException;
450
451    /**
452     * Parses an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream.
453     *
454     * @param requestContext The context for the request to be parsed.
455     * @return A list of {@code FileItem} instances parsed from the request, in the order that they were transmitted.
456     * @throws FileUploadException if there are problems reading/parsing the request or storing files.
457     */
458    public List<I> parseRequest(final RequestContext requestContext) throws FileUploadException {
459        final List<I> itemList = new ArrayList<>();
460        boolean successful = false;
461        try {
462            final F fileItemFactory = Objects.requireNonNull(getFileItemFactory(), "No FileItemFactory has been set.");
463            final byte[] buffer = new byte[IOUtils.DEFAULT_BUFFER_SIZE];
464            getItemIterator(requestContext).forEachRemaining(fileItemInput -> {
465                if (itemList.size() == fileCountMax) {
466                    // The next item will exceed the limit.
467                    throw new FileUploadFileCountLimitException(ATTACHMENT, getFileCountMax(), itemList.size());
468                }
469                // Don't use getName() here to prevent an InvalidFileNameException.
470                // @formatter:off
471                final I fileItem = fileItemFactory.fileItemBuilder()
472                    .setFieldName(fileItemInput.getFieldName())
473                    .setContentType(fileItemInput.getContentType())
474                    .setFormField(fileItemInput.isFormField())
475                    .setFileName(fileItemInput.getName())
476                    .setFileItemHeaders(fileItemInput.getHeaders())
477                    .get();
478                // @formatter:on
479                itemList.add(fileItem);
480                try (InputStream inputStream = fileItemInput.getInputStream();
481                        OutputStream outputStream = fileItem.getOutputStream()) {
482                    IOUtils.copyLarge(inputStream, outputStream, buffer);
483                } catch (final FileUploadException e) {
484                    throw e;
485                } catch (final IOException e) {
486                    throw new FileUploadException(String.format("Processing of %s request failed. %s", MULTIPART_FORM_DATA, e.getMessage()), e);
487                }
488            });
489            successful = true;
490            return itemList;
491        } catch (final FileUploadException e) {
492            throw e;
493        } catch (final IOException e) {
494            throw new FileUploadException(e.getMessage(), e);
495        } finally {
496            if (!successful) {
497                for (final I fileItem : itemList) {
498                    try {
499                        fileItem.delete();
500                    } catch (final Exception ignored) {
501                        // ignored TODO perhaps add to tracker delete failure list somehow?
502                    }
503                }
504            }
505        }
506    }
507
508    /**
509     * Sets the maximum number of files allowed per request.
510     *
511     * @param fileCountMax The new limit. {@code -1} means no limit.
512     */
513    public void setFileCountMax(final long fileCountMax) {
514        this.fileCountMax = fileCountMax;
515    }
516
517    /**
518     * Sets the factory class to use when creating file items.
519     *
520     * @param factory The factory class for new file items.
521     */
522    public void setFileItemFactory(final F factory) {
523        this.fileItemFactory = factory;
524    }
525
526    /**
527     * Sets the maximum allowed size of a single uploaded file, as opposed to {@link #getSizeMax()}.
528     *
529     * @see #getFileSizeMax()
530     * @param fileSizeMax Maximum size of a single uploaded file.
531     */
532    public void setFileSizeMax(final long fileSizeMax) {
533        this.fileSizeMax = fileSizeMax;
534    }
535
536    /**
537     * Specifies the character encoding to be used when reading the headers of individual part. When not specified, or {@code null}, the request encoding is
538     * used. If that is also not specified, or {@code null}, the platform default encoding is used.
539     *
540     * @param headerCharset The encoding used to read part headers.
541     */
542    public void setHeaderCharset(final Charset headerCharset) {
543        this.headerCharset = headerCharset;
544    }
545
546    /**
547     * Sets the progress listener.
548     *
549     * @param progressListener The progress listener, if any. Defaults to null.
550     */
551    public void setProgressListener(final ProgressListener progressListener) {
552        this.progressListener = progressListener != null ? progressListener : ProgressListener.NOP;
553    }
554
555    /**
556     * Sets the maximum allowed size of a complete request, as opposed to {@link #setFileSizeMax(long)}.
557     *
558     * @param sizeMax The maximum allowed size, in bytes. The default value of -1 indicates, that there is no limit.
559     * @see #getSizeMax()
560     */
561    public void setSizeMax(final long sizeMax) {
562        this.sizeMax = sizeMax;
563    }
564
565}