001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.fileupload2.core;
018
019import java.io.ByteArrayInputStream;
020import java.io.IOException;
021import java.io.InputStream;
022import java.io.OutputStream;
023import java.io.UncheckedIOException;
024import java.nio.charset.Charset;
025import java.nio.charset.StandardCharsets;
026import java.nio.file.CopyOption;
027import java.nio.file.Files;
028import java.nio.file.InvalidPathException;
029import java.nio.file.Path;
030import java.nio.file.Paths;
031import java.nio.file.StandardCopyOption;
032import java.util.Map;
033import java.util.UUID;
034import java.util.concurrent.atomic.AtomicInteger;
035
036import org.apache.commons.fileupload2.core.FileItemFactory.AbstractFileItemBuilder;
037import org.apache.commons.io.Charsets;
038import org.apache.commons.io.FileCleaningTracker;
039import org.apache.commons.io.build.AbstractOrigin;
040import org.apache.commons.io.file.PathUtils;
041import org.apache.commons.io.function.Uncheck;
042import org.apache.commons.io.output.DeferredFileOutputStream;
043
044/**
045 * The default implementation of the {@link FileItem FileItem} interface.
046 * <p>
047 * After retrieving an instance of this class from a {@link DiskFileItemFactory} instance (see
048 * {@code org.apache.commons.fileupload2.core.servlet.ServletFileUpload
049 * #parseRequest(javax.servlet.http.HttpServletRequest)}), you may either request all contents of file at once using {@link #get()} or request an
050 * {@link java.io.InputStream InputStream} with {@link #getInputStream()} and process the file without attempting to load it into memory, which may come handy
051 * with large files.
052 * </p>
053 * <p>
054 * Temporary files, which are created for file items, should be deleted later on. The best way to do this is using a
055 * {@link org.apache.commons.io.FileCleaningTracker}, which you can set on the {@link DiskFileItemFactory}. However, if you do use such a tracker, then you must
056 * consider the following: Temporary files are automatically deleted as soon as they are no longer needed. (More precisely, when the corresponding instance of
057 * {@link java.io.File} is garbage collected.) This is done by the so-called reaper thread, which is started and stopped automatically by the
058 * {@link org.apache.commons.io.FileCleaningTracker} when there are files to be tracked. It might make sense to terminate that thread, for example, if your web
059 * application ends. See the section on "Resource cleanup" in the users guide of Commons FileUpload.
060 * </p>
061 */
062public final class DiskFileItem implements FileItem<DiskFileItem> {
063
064    /**
065     * Builds a new {@link DiskFileItem} instance.
066     * <p>
067     * For example:
068     * </p>
069     *
070     * <pre>{@code
071     * final FileItem fileItem = fileItemFactory.fileItemBuilder()
072     *   .setFieldName("FieldName")
073     *   .setContentType("ContentType")
074     *   .setFormField(true)
075     *   .setFileName("FileName")
076     *   .setFileItemHeaders(...)
077     *   .get();
078     * }
079     * </pre>
080     */
081    public static class Builder extends AbstractFileItemBuilder<DiskFileItem, Builder> {
082
083        public Builder() {
084            setBufferSize(DiskFileItemFactory.DEFAULT_THRESHOLD);
085            setPath(PathUtils.getTempDirectory());
086            setCharset(DEFAULT_CHARSET);
087            setCharsetDefault(DEFAULT_CHARSET);
088        }
089
090        /**
091         * Constructs a new instance.
092         * <p>
093         * You must provide an origin that can be converted to a Reader by this builder, otherwise, this call will throw an
094         * {@link UnsupportedOperationException}.
095         * </p>
096         *
097         * @return a new instance.
098         * @throws UnsupportedOperationException if the origin cannot provide a Path.
099         * @see AbstractOrigin#getReader(Charset)
100         */
101        @Override
102        public DiskFileItem get() {
103            final DiskFileItem diskFileItem = new DiskFileItem(getFieldName(), getContentType(), isFormField(), getFileName(), getBufferSize(), getPath(),
104                    getFileItemHeaders(), getCharset());
105            final FileCleaningTracker tracker = getFileCleaningTracker();
106            if (tracker != null) {
107                tracker.track(diskFileItem.getTempFile().toFile(), diskFileItem);
108            }
109            return diskFileItem;
110        }
111
112    }
113
114    /**
115     * Default content charset to be used when no explicit charset parameter is provided by the sender. Media subtypes of the "text" type are defined to have a
116     * default charset value of "ISO-8859-1" when received via HTTP.
117     */
118    public static final Charset DEFAULT_CHARSET = StandardCharsets.ISO_8859_1;
119
120    /**
121     * UID used in unique file name generation.
122     */
123    private static final String UID = UUID.randomUUID().toString().replace('-', '_');
124
125    /**
126     * Counter used in unique identifier generation.
127     */
128    private static final AtomicInteger COUNTER = new AtomicInteger(0);
129
130    /**
131     * Constructs a new {@link Builder}.
132     *
133     * @return a new {@link Builder}.
134     */
135    public static Builder builder() {
136        return new Builder();
137    }
138
139    /**
140     * Tests if the file name is valid. For example, if it contains a NUL characters, it's invalid. If the file name is valid, it will be returned without any
141     * modifications. Otherwise, throw an {@link InvalidPathException}.
142     *
143     * @param fileName The file name to check
144     * @return Unmodified file name, if valid.
145     * @throws InvalidPathException The file name is invalid.
146     */
147    public static String checkFileName(final String fileName) {
148        if (fileName != null) {
149            // Specific NUL check to build a better exception message.
150            final int indexOf0 = fileName.indexOf(0);
151            if (indexOf0 != -1) {
152                final StringBuilder sb = new StringBuilder();
153                for (int i = 0; i < fileName.length(); i++) {
154                    final char c = fileName.charAt(i);
155                    switch (c) {
156                    case 0:
157                        sb.append("\\0");
158                        break;
159                    default:
160                        sb.append(c);
161                        break;
162                    }
163                }
164                throw new InvalidPathException(fileName, sb.toString(), indexOf0);
165            }
166            // Throws InvalidPathException on invalid file names
167            Paths.get(fileName);
168        }
169        return fileName;
170    }
171
172    /**
173     * Gets an identifier that is unique within the class loader used to load this class, but does not have random-like appearance.
174     *
175     * @return A String with the non-random looking instance identifier.
176     */
177    private static String getUniqueId() {
178        final int limit = 100_000_000;
179        final int current = COUNTER.getAndIncrement();
180        String id = Integer.toString(current);
181
182        // If you manage to get more than 100 million of ids, you'll
183        // start getting ids longer than 8 characters.
184        if (current < limit) {
185            id = ("00000000" + id).substring(id.length());
186        }
187        return id;
188    }
189
190    /**
191     * The name of the form field as provided by the browser.
192     */
193    private String fieldName;
194
195    /**
196     * The content type passed by the browser, or {@code null} if not defined.
197     */
198    private final String contentType;
199
200    /**
201     * Whether or not this item is a simple form field.
202     */
203    private boolean isFormField;
204
205    /**
206     * The original file name in the user's file system.
207     */
208    private final String fileName;
209
210    /**
211     * The size of the item, in bytes. This is used to cache the size when a file item is moved from its original location.
212     */
213    private long size = -1;
214
215    /**
216     * The threshold above which uploads will be stored on disk.
217     */
218    private final int threshold;
219
220    /**
221     * The directory in which uploaded files will be stored, if stored on disk.
222     */
223    private final Path repository;
224
225    /**
226     * Cached contents of the file.
227     */
228    private byte[] cachedContent;
229
230    /**
231     * Output stream for this item.
232     */
233    private DeferredFileOutputStream dfos;
234
235    /**
236     * The temporary file to use.
237     */
238    private final Path tempFile;
239
240    /**
241     * The file items headers.
242     */
243    private FileItemHeaders fileItemHeaders;
244
245    /**
246     * Default content Charset to be used when no explicit Charset parameter is provided by the sender.
247     */
248    private Charset charsetDefault = DEFAULT_CHARSET;
249
250    /**
251     * Constructs a new {@code DiskFileItem} instance.
252     *
253     * @param fieldName       The name of the form field.
254     * @param contentType     The content type passed by the browser or {@code null} if not specified.
255     * @param isFormField     Whether or not this item is a plain form field, as opposed to a file upload.
256     * @param fileName        The original file name in the user's file system, or {@code null} if not specified.
257     * @param threshold       The threshold, in bytes, below which items will be retained in memory and above which they will be stored as a file.
258     * @param repository      The data repository, which is the directory in which files will be created, should the item size exceed the threshold.
259     * @param fileItemHeaders The file item headers.
260     * @param defaultCharset  The default Charset.
261     */
262    private DiskFileItem(final String fieldName, final String contentType, final boolean isFormField, final String fileName, final int threshold,
263            final Path repository, final FileItemHeaders fileItemHeaders, final Charset defaultCharset) {
264        this.fieldName = fieldName;
265        this.contentType = contentType;
266        this.charsetDefault = defaultCharset;
267        this.isFormField = isFormField;
268        this.fileName = fileName;
269        this.fileItemHeaders = fileItemHeaders;
270        this.threshold = threshold;
271        this.repository = repository != null ? repository : PathUtils.getTempDirectory();
272        this.tempFile = this.repository.resolve(String.format("upload_%s_%s.tmp", UID, getUniqueId()));
273    }
274
275    /**
276     * Deletes the underlying storage for a file item, including deleting any associated temporary disk file. This method can be used to ensure that this is
277     * done at an earlier time, thus preserving system resources.
278     *
279     * @throws IOException if an error occurs.
280     */
281    @Override
282    public DiskFileItem delete() throws IOException {
283        cachedContent = null;
284        final Path outputFile = getPath();
285        if (outputFile != null && !isInMemory() && Files.exists(outputFile)) {
286            Files.delete(outputFile);
287        }
288        return this;
289    }
290
291    /**
292     * Gets the contents of the file as an array of bytes. If the contents of the file were not yet cached in memory, they will be loaded from the disk storage
293     * and cached.
294     *
295     * @return The contents of the file as an array of bytes or {@code null} if the data cannot be read.
296     * @throws UncheckedIOException if an I/O error occurs.
297     * @throws OutOfMemoryError     See {@link Files#readAllBytes(Path)}: If an array of the required size cannot be allocated, for example the file is larger
298     *                              that {@code 2GB}
299     */
300    @Override
301    public byte[] get() throws UncheckedIOException {
302        if (isInMemory()) {
303            if (cachedContent == null && dfos != null) {
304                cachedContent = dfos.getData();
305            }
306            return cachedContent != null ? cachedContent.clone() : new byte[0];
307        }
308        return Uncheck.get(() -> Files.readAllBytes(dfos.getFile().toPath()));
309    }
310
311    /**
312     * Gets the content charset passed by the agent or {@code null} if not defined.
313     *
314     * @return The content charset passed by the agent or {@code null} if not defined.
315     */
316    public Charset getCharset() {
317        final ParameterParser parser = new ParameterParser();
318        parser.setLowerCaseNames(true);
319        // Parameter parser can handle null input
320        final Map<String, String> params = parser.parse(getContentType(), ';');
321        return Charsets.toCharset(params.get("charset"), charsetDefault);
322    }
323
324    /**
325     * Gets the default charset for use when no explicit charset parameter is provided by the sender.
326     *
327     * @return the default charset
328     */
329    public Charset getCharsetDefault() {
330        return charsetDefault;
331    }
332
333    /**
334     * Gets the content type passed by the agent or {@code null} if not defined.
335     *
336     * @return The content type passed by the agent or {@code null} if not defined.
337     */
338    @Override
339    public String getContentType() {
340        return contentType;
341    }
342
343    /**
344     * Gets the name of the field in the multipart form corresponding to this file item.
345     *
346     * @return The name of the form field.
347     * @see #setFieldName(String)
348     */
349    @Override
350    public String getFieldName() {
351        return fieldName;
352    }
353
354    /**
355     * Gets the file item headers.
356     *
357     * @return The file items headers.
358     */
359    @Override
360    public FileItemHeaders getHeaders() {
361        return fileItemHeaders;
362    }
363
364    /**
365     * Gets an {@link java.io.InputStream InputStream} that can be used to retrieve the contents of the file.
366     *
367     * @return An {@link java.io.InputStream InputStream} that can be used to retrieve the contents of the file.
368     * @throws IOException if an error occurs.
369     */
370    @Override
371    public InputStream getInputStream() throws IOException {
372        if (!isInMemory()) {
373            return Files.newInputStream(dfos.getFile().toPath());
374        }
375
376        if (cachedContent == null) {
377            cachedContent = dfos.getData();
378        }
379        return new ByteArrayInputStream(cachedContent);
380    }
381
382    /**
383     * Gets the original file name in the client's file system.
384     *
385     * @return The original file name in the client's file system.
386     * @throws InvalidPathException The file name contains a NUL character, which might be an indicator of a security attack. If you intend to use the file name
387     *                              anyways, catch the exception and use {@link InvalidPathException#getInput()}.
388     */
389    @Override
390    public String getName() {
391        return DiskFileItem.checkFileName(fileName);
392    }
393
394    /**
395     * Gets an {@link java.io.OutputStream OutputStream} that can be used for storing the contents of the file.
396     *
397     * @return An {@link java.io.OutputStream OutputStream} that can be used for storing the contents of the file.
398     */
399    @Override
400    public OutputStream getOutputStream() {
401        if (dfos == null) {
402            dfos = DeferredFileOutputStream.builder().setThreshold(threshold).setOutputFile(getTempFile().toFile()).get();
403        }
404        return dfos;
405    }
406
407    /**
408     * Gets the {@link Path} for the {@code FileItem}'s data's temporary location on the disk. Note that for {@code FileItem}s that have their data stored in
409     * memory, this method will return {@code null}. When handling large files, you can use {@link Files#move(Path,Path,CopyOption...)} to move the file to new
410     * location without copying the data, if the source and destination locations reside within the same logical volume.
411     *
412     * @return The data file, or {@code null} if the data is stored in memory.
413     */
414    public Path getPath() {
415        if (dfos == null) {
416            return null;
417        }
418        if (isInMemory()) {
419            return null;
420        }
421        return dfos.getFile().toPath();
422    }
423
424    /**
425     * Gets the size of the file.
426     *
427     * @return The size of the file, in bytes.
428     */
429    @Override
430    public long getSize() {
431        if (size >= 0) {
432            return size;
433        }
434        if (cachedContent != null) {
435            return cachedContent.length;
436        }
437        return dfos.getByteCount();
438    }
439
440    /**
441     * Gets the contents of the file as a String, using the default character encoding. This method uses {@link #get()} to retrieve the contents of the file.
442     * <p>
443     * <b>TODO</b> Consider making this method throw UnsupportedEncodingException.
444     * </p>
445     *
446     * @return The contents of the file, as a string.
447     */
448    @Override
449    public String getString() {
450        return new String(get(), getCharset());
451    }
452
453    /**
454     * Gets the contents of the file as a String, using the specified encoding. This method uses {@link #get()} to retrieve the contents of the file.
455     *
456     * @param charset The charset to use.
457     * @return The contents of the file, as a string.
458     */
459    @Override
460    public String getString(final Charset charset) throws IOException {
461        return new String(get(), Charsets.toCharset(charset, charsetDefault));
462    }
463
464    /**
465     * Creates and returns a {@link java.io.File File} representing a uniquely named temporary file in the configured repository path. The lifetime of the file
466     * is tied to the lifetime of the {@code FileItem} instance; the file will be deleted when the instance is garbage collected.
467     * <p>
468     * <b>Note: Subclasses that override this method must ensure that they return the same File each time.</b>
469     * </p>
470     *
471     * @return The {@link java.io.File File} to be used for temporary storage.
472     */
473    protected Path getTempFile() {
474        return tempFile;
475    }
476
477    /**
478     * Tests whether or not a {@code FileItem} instance represents a simple form field.
479     *
480     * @return {@code true} if the instance represents a simple form field; {@code false} if it represents an uploaded file.
481     * @see #setFormField(boolean)
482     */
483    @Override
484    public boolean isFormField() {
485        return isFormField;
486    }
487
488    /**
489     * Provides a hint as to whether or not the file contents will be read from memory.
490     *
491     * @return {@code true} if the file contents will be read from memory; {@code false} otherwise.
492     */
493    @Override
494    public boolean isInMemory() {
495        if (cachedContent != null) {
496            return true;
497        }
498        return dfos.isInMemory();
499    }
500
501    /**
502     * Sets the default charset for use when no explicit charset parameter is provided by the sender.
503     *
504     * @param charset the default charset
505     * @return this
506     */
507    public DiskFileItem setCharsetDefault(final Charset charset) {
508        charsetDefault = charset;
509        return this;
510    }
511
512    /**
513     * Sets the field name used to reference this file item.
514     *
515     * @param fieldName The name of the form field.
516     * @see #getFieldName()
517     */
518    @Override
519    public DiskFileItem setFieldName(final String fieldName) {
520        this.fieldName = fieldName;
521        return this;
522    }
523
524    /**
525     * Specifies whether or not a {@code FileItem} instance represents a simple form field.
526     *
527     * @param state {@code true} if the instance represents a simple form field; {@code false} if it represents an uploaded file.
528     * @see #isFormField()
529     */
530    @Override
531    public DiskFileItem setFormField(final boolean state) {
532        isFormField = state;
533        return this;
534    }
535
536    /**
537     * Sets the file item headers.
538     *
539     * @param headers The file items headers.
540     */
541    @Override
542    public DiskFileItem setHeaders(final FileItemHeaders headers) {
543        this.fileItemHeaders = headers;
544        return this;
545    }
546
547    /**
548     * Returns a string representation of this object.
549     *
550     * @return a string representation of this object.
551     */
552    @Override
553    public String toString() {
554        return String.format("name=%s, StoreLocation=%s, size=%s bytes, isFormField=%s, FieldName=%s", getName(), getPath(), getSize(), isFormField(),
555                getFieldName());
556    }
557
558    /**
559     * Writes an uploaded item to disk.
560     * <p>
561     * The client code is not concerned with whether or not the item is stored in memory, or on disk in a temporary location. They just want to write the
562     * uploaded item to a file.
563     * </p>
564     * <p>
565     * This implementation first attempts to rename the uploaded item to the specified destination file, if the item was originally written to disk. Otherwise,
566     * the data will be copied to the specified file.
567     * </p>
568     * <p>
569     * This method is only guaranteed to work <em>once</em>, the first time it is invoked for a particular item. This is because, in the event that the method
570     * renames a temporary file, that file will no longer be available to copy or rename again at a later time.
571     * </p>
572     *
573     * @param file The {@code File} into which the uploaded item should be stored.
574     * @throws IOException if an error occurs.
575     */
576    @Override
577    public DiskFileItem write(final Path file) throws IOException {
578        if (isInMemory()) {
579            try (OutputStream fout = Files.newOutputStream(file)) {
580                fout.write(get());
581            } catch (final IOException e) {
582                throw new IOException("Unexpected output data", e);
583            }
584        } else {
585            final Path outputFile = getPath();
586            if (outputFile == null) {
587                /*
588                 * For whatever reason we cannot write the file to disk.
589                 */
590                throw new FileUploadException("Cannot write uploaded file to disk.");
591            }
592            // Save the length of the file
593            size = Files.size(outputFile);
594            //
595            // The uploaded file is being stored on disk in a temporary location so move it to the desired file.
596            //
597            Files.move(outputFile, file, StandardCopyOption.REPLACE_EXISTING);
598        }
599        return this;
600    }
601}