001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io.input;
018
019import static org.apache.commons.io.IOUtils.EOF;
020
021import java.io.BufferedInputStream;
022import java.io.IOException;
023import java.io.InputStream;
024import java.nio.ByteBuffer;
025import java.nio.channels.FileChannel;
026import java.nio.channels.FileChannel.MapMode;
027import java.nio.file.Path;
028import java.nio.file.StandardOpenOption;
029
030import org.apache.commons.io.build.AbstractStreamBuilder;
031
032/**
033 * An {@link InputStream} that utilizes memory mapped files to improve performance. A sliding window of the file is
034 * mapped to memory to avoid mapping the entire file to memory at one time. The size of the sliding buffer is
035 * configurable.
036 * <p>
037 * For most operating systems, mapping a file into memory is more expensive than reading or writing a few tens of
038 * kilobytes of data. From the standpoint of performance. it is generally only worth mapping relatively large files into
039 * memory.
040 * </p>
041 * <p>
042 * Note: Use of this class does not necessarily obviate the need to use a {@link BufferedInputStream}. Depending on the
043 * use case, the use of buffering may still further improve performance. For example:
044 * </p>
045 * <pre>
046 * new BufferedInputStream(new GzipInputStream(new MemoryMappedFileInputStream(path))))
047 * </pre>
048 * <p>
049 * should outperform:
050 * </p>
051 * <pre>
052 * new GzipInputStream(new MemoryMappedFileInputStream(path))
053 * </pre>
054 *
055 * @since 2.12.0
056 */
057public final class MemoryMappedFileInputStream extends InputStream {
058
059    /**
060     * Builds a new {@link MemoryMappedFileInputStream} instance.
061     * <p>
062     * For example:
063     * </p>
064     * <pre>{@code
065     * MemoryMappedFileInputStream s = MemoryMappedFileInputStream.builder()
066     *   .setPath(path)
067     *   .setBufferSize(256 * 1024)
068     *   .get()}
069     * </pre>
070     * <p>
071     * @since 2.12.0
072     */
073    public static class Builder extends AbstractStreamBuilder<MemoryMappedFileInputStream, Builder> {
074
075        public Builder() {
076            setBufferSizeDefault(DEFAULT_BUFFER_SIZE);
077            setBufferSize(DEFAULT_BUFFER_SIZE);
078        }
079
080        /**
081         * Constructs a new instance.
082         *
083         * @throws UnsupportedOperationException if the origin cannot be converted to a Path.
084         */
085        @Override
086        public MemoryMappedFileInputStream get() throws IOException {
087            return new MemoryMappedFileInputStream(getOrigin().getPath(), getBufferSize());
088        }
089    }
090
091    /**
092     * Default size of the sliding memory mapped buffer. We use 256K, equal to 65536 pages (given a 4K page size).
093     * Increasing the value beyond the default size will generally not provide any increase in throughput.
094     */
095    private static final int DEFAULT_BUFFER_SIZE = 256 * 1024;
096
097    private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.wrap(new byte[0]).asReadOnlyBuffer();
098
099    /**
100     * Constructs a new {@link Builder}.
101     *
102     * @return a new {@link Builder}.
103     * @since 2.12.0
104     */
105    public static Builder builder() {
106        return new Builder();
107    }
108
109    private final int bufferSize;
110    private final FileChannel channel;
111    private ByteBuffer buffer = EMPTY_BUFFER;
112    private boolean closed;
113
114    /**
115     * The starting position (within the file) of the next sliding buffer.
116     */
117    private long nextBufferPosition;
118
119    /**
120     * Constructs a new instance.
121     *
122     * @param file The path of the file to open.
123     * @param bufferSize Size of the sliding buffer.
124     * @throws IOException If an I/O error occurs.
125     */
126    private MemoryMappedFileInputStream(final Path file, final int bufferSize) throws IOException {
127        this.bufferSize = bufferSize;
128        this.channel = FileChannel.open(file, StandardOpenOption.READ);
129    }
130
131    @Override
132    public int available() throws IOException {
133        return buffer.remaining();
134    }
135
136    private void cleanBuffer() {
137        if (ByteBufferCleaner.isSupported() && buffer.isDirect()) {
138            ByteBufferCleaner.clean(buffer);
139        }
140    }
141
142    @Override
143    public void close() throws IOException {
144        if (!closed) {
145            cleanBuffer();
146            buffer = null;
147            channel.close();
148            closed = true;
149        }
150    }
151
152    private void ensureOpen() throws IOException {
153        if (closed) {
154            throw new IOException("Stream closed");
155        }
156    }
157
158    int getBufferSize() {
159        return bufferSize;
160    }
161
162    private void nextBuffer() throws IOException {
163        final long remainingInFile = channel.size() - nextBufferPosition;
164        if (remainingInFile > 0) {
165            final long amountToMap = Math.min(remainingInFile, bufferSize);
166            cleanBuffer();
167            buffer = channel.map(MapMode.READ_ONLY, nextBufferPosition, amountToMap);
168            nextBufferPosition += amountToMap;
169        } else {
170            buffer = EMPTY_BUFFER;
171        }
172    }
173
174    @Override
175    public int read() throws IOException {
176        ensureOpen();
177        if (!buffer.hasRemaining()) {
178            nextBuffer();
179            if (!buffer.hasRemaining()) {
180                return EOF;
181            }
182        }
183        return Short.toUnsignedInt(buffer.get());
184    }
185
186    @Override
187    public int read(final byte[] b, final int off, final int len) throws IOException {
188        ensureOpen();
189        if (!buffer.hasRemaining()) {
190            nextBuffer();
191            if (!buffer.hasRemaining()) {
192                return EOF;
193            }
194        }
195        final int numBytes = Math.min(buffer.remaining(), len);
196        buffer.get(b, off, numBytes);
197        return numBytes;
198    }
199
200    @Override
201    public long skip(final long n) throws IOException {
202        ensureOpen();
203        if (n <= 0) {
204            return 0;
205        }
206        if (n <= buffer.remaining()) {
207            buffer.position((int) (buffer.position() + n));
208            return n;
209        }
210        final long remainingInFile = channel.size() - nextBufferPosition;
211        final long skipped = buffer.remaining() + Math.min(remainingInFile, n - buffer.remaining());
212        nextBufferPosition += skipped - buffer.remaining();
213        nextBuffer();
214        return skipped;
215    }
216
217}