001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io.output;
018
019import java.io.IOException;
020import java.io.OutputStream;
021import java.io.Writer;
022import java.nio.ByteBuffer;
023import java.nio.CharBuffer;
024import java.nio.charset.Charset;
025import java.nio.charset.CharsetDecoder;
026import java.nio.charset.CoderResult;
027import java.nio.charset.CodingErrorAction;
028import java.nio.charset.StandardCharsets;
029
030import org.apache.commons.io.Charsets;
031import org.apache.commons.io.IOUtils;
032import org.apache.commons.io.build.AbstractStreamBuilder;
033import org.apache.commons.io.charset.CharsetDecoders;
034
035/**
036 * {@link OutputStream} implementation that transforms a byte stream to a character stream using a specified charset encoding and writes the resulting stream to
037 * a {@link Writer}. The stream is transformed using a {@link CharsetDecoder} object, guaranteeing that all charset encodings supported by the JRE are handled
038 * correctly.
039 * <p>
040 * The output of the {@link CharsetDecoder} is buffered using a fixed size buffer. This implies that the data is written to the underlying {@link Writer} in
041 * chunks that are no larger than the size of this buffer. By default, the buffer is flushed only when it overflows or when {@link #flush()} or {@link #close()}
042 * is called. In general there is therefore no need to wrap the underlying {@link Writer} in a {@link java.io.BufferedWriter}. {@link WriterOutputStream} can
043 * also be instructed to flush the buffer after each write operation. In this case, all available data is written immediately to the underlying {@link Writer},
044 * implying that the current position of the {@link Writer} is correlated to the current position of the {@link WriterOutputStream}.
045 * <p>
046 * {@link WriterOutputStream} implements the inverse transformation of {@link java.io.OutputStreamWriter}; in the following example, writing to {@code out2}
047 * would have the same result as writing to {@code out} directly (provided that the byte sequence is legal with respect to the charset encoding):
048 *
049 * <pre>
050 * OutputStream out = ...
051 * Charset cs = ...
052 * OutputStreamWriter writer = new OutputStreamWriter(out, cs);
053 * WriterOutputStream out2 = new WriterOutputStream(writer, cs);
054 * </pre>
055 *
056 * {@link WriterOutputStream} implements the same transformation as {@link java.io.InputStreamReader}, except that the control flow is reversed: both classes
057 * transform a byte stream into a character stream, but {@link java.io.InputStreamReader} pulls data from the underlying stream, while
058 * {@link WriterOutputStream} pushes it to the underlying stream.
059 * <p>
060 * Note that while there are use cases where there is no alternative to using this class, very often the need to use this class is an indication of a flaw in
061 * the design of the code. This class is typically used in situations where an existing API only accepts an {@link OutputStream} object, but where the stream is
062 * known to represent character data that must be decoded for further use.
063 * </p>
064 * <p>
065 * Instances of {@link WriterOutputStream} are not thread safe.
066 * </p>
067 *
068 * @see org.apache.commons.io.input.ReaderInputStream
069 * @since 2.0
070 */
071public class WriterOutputStream extends OutputStream {
072
073    /**
074     * Builds a new {@link WriterOutputStream} instance.
075     * <p>
076     * For example:
077     * </p>
078     * <pre>{@code
079     * WriterOutputStream s = WriterOutputStream.builder()
080     *   .setPath(path)
081     *   .setBufferSize(8192)
082     *   .setCharset(StandardCharsets.UTF_8)
083     *   .setWriteImmediately(false)
084     *   .get()}
085     * </pre>
086     * <p>
087     * @since 2.12.0
088     */
089    public static class Builder extends AbstractStreamBuilder<WriterOutputStream, Builder> {
090
091        private CharsetDecoder charsetDecoder;
092        private boolean writeImmediately;
093
094        public Builder() {
095            this.charsetDecoder = getCharset().newDecoder();
096        }
097
098        /**
099         * Constructs a new instance.
100         *
101         * @throws UnsupportedOperationException if the origin cannot be converted to a Writer.
102         */
103        @SuppressWarnings("resource")
104        @Override
105        public WriterOutputStream get() throws IOException {
106            return new WriterOutputStream(getOrigin().getWriter(getCharset()), charsetDecoder, getBufferSize(), writeImmediately);
107        }
108
109        @Override
110        public Builder setCharset(final Charset charset) {
111            super.setCharset(charset);
112            this.charsetDecoder = getCharset().newDecoder();
113            return this;
114        }
115
116        @Override
117        public Builder setCharset(final String charset) {
118            super.setCharset(charset);
119            this.charsetDecoder = getCharset().newDecoder();
120            return this;
121        }
122
123        /**
124         * Sets the charset decoder.
125         *
126         * @param charsetDecoder the charset decoder.
127         * @return this
128         */
129        public Builder setCharsetDecoder(final CharsetDecoder charsetDecoder) {
130            this.charsetDecoder = charsetDecoder != null ? charsetDecoder : getCharsetDefault().newDecoder();
131            super.setCharset(this.charsetDecoder.charset());
132            return this;
133        }
134
135        /**
136         * Sets whether the output buffer will be flushed after each write operation ({@code true}), i.e. all available data will be written to the underlying
137         * {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()}
138         * is called.
139         *
140         * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, i.e. all available data will be written to the
141         *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
142         *                         {@link #flush()} or {@link #close()} is called.
143         * @return this
144         */
145        public Builder setWriteImmediately(final boolean writeImmediately) {
146            this.writeImmediately = writeImmediately;
147            return this;
148        }
149
150    }
151
152    private static final int BUFFER_SIZE = IOUtils.DEFAULT_BUFFER_SIZE;
153
154    /**
155     * Constructs a new {@link Builder}.
156     *
157     * @return a new {@link Builder}.
158     * @since 2.12.0
159     */
160    public static Builder builder() {
161        return new Builder();
162    }
163
164    /**
165     * Checks if the JDK in use properly supports the given charset.
166     *
167     * @param charset the charset to check the support for
168     */
169    private static void checkIbmJdkWithBrokenUTF16(final Charset charset) {
170        if (!StandardCharsets.UTF_16.name().equals(charset.name())) {
171            return;
172        }
173        final String TEST_STRING_2 = "v\u00e9s";
174        final byte[] bytes = TEST_STRING_2.getBytes(charset);
175
176        final CharsetDecoder charsetDecoder2 = charset.newDecoder();
177        final ByteBuffer bb2 = ByteBuffer.allocate(16);
178        final CharBuffer cb2 = CharBuffer.allocate(TEST_STRING_2.length());
179        final int len = bytes.length;
180        for (int i = 0; i < len; i++) {
181            bb2.put(bytes[i]);
182            bb2.flip();
183            try {
184                charsetDecoder2.decode(bb2, cb2, i == len - 1);
185            } catch (final IllegalArgumentException e) {
186                throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. "
187                        + "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream");
188            }
189            bb2.compact();
190        }
191        cb2.rewind();
192        if (!TEST_STRING_2.equals(cb2.toString())) {
193            throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. "
194                    + "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream");
195        }
196
197    }
198
199    private final Writer writer;
200    private final CharsetDecoder decoder;
201
202    private final boolean writeImmediately;
203
204    /**
205     * ByteBuffer used as input for the decoder. This buffer can be small as it is used only to transfer the received data to the decoder.
206     */
207    private final ByteBuffer decoderIn = ByteBuffer.allocate(128);
208
209    /**
210     * CharBuffer used as output for the decoder. It should be somewhat larger as we write from this buffer to the underlying Writer.
211     */
212    private final CharBuffer decoderOut;
213
214    /**
215     * Constructs a new {@link WriterOutputStream} that uses the default character encoding and with a default output buffer size of {@value #BUFFER_SIZE}
216     * characters. The output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()} is called.
217     *
218     * @param writer the target {@link Writer}
219     * @deprecated Use {@link #builder()} instead
220     */
221    @Deprecated
222    public WriterOutputStream(final Writer writer) {
223        this(writer, Charset.defaultCharset(), BUFFER_SIZE, false);
224    }
225
226    /**
227     * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
228     * when it overflows or when {@link #flush()} or {@link #close()} is called.
229     *
230     * @param writer  the target {@link Writer}
231     * @param charset the charset encoding
232     * @deprecated Use {@link #builder()} instead
233     */
234    @Deprecated
235    public WriterOutputStream(final Writer writer, final Charset charset) {
236        this(writer, charset, BUFFER_SIZE, false);
237    }
238
239    /**
240     * Constructs a new {@link WriterOutputStream}.
241     *
242     * @param writer           the target {@link Writer}
243     * @param charset          the charset encoding
244     * @param bufferSize       the size of the output buffer in number of characters
245     * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, i.e. all available data will be written to the
246     *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
247     *                         {@link #flush()} or {@link #close()} is called.
248     * @deprecated Use {@link #builder()} instead
249     */
250    @Deprecated
251    public WriterOutputStream(final Writer writer, final Charset charset, final int bufferSize, final boolean writeImmediately) {
252        // @formatter:off
253        this(writer,
254            Charsets.toCharset(charset).newDecoder()
255                    .onMalformedInput(CodingErrorAction.REPLACE)
256                    .onUnmappableCharacter(CodingErrorAction.REPLACE)
257                    .replaceWith("?"),
258             bufferSize,
259             writeImmediately);
260        // @formatter:on
261    }
262
263    /**
264     * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
265     * when it overflows or when {@link #flush()} or {@link #close()} is called.
266     *
267     * @param writer  the target {@link Writer}
268     * @param decoder the charset decoder
269     * @since 2.1
270     * @deprecated Use {@link #builder()} instead
271     */
272    @Deprecated
273    public WriterOutputStream(final Writer writer, final CharsetDecoder decoder) {
274        this(writer, decoder, BUFFER_SIZE, false);
275    }
276
277    /**
278     * Constructs a new {@link WriterOutputStream}.
279     *
280     * @param writer           the target {@link Writer}
281     * @param decoder          the charset decoder
282     * @param bufferSize       the size of the output buffer in number of characters
283     * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, i.e. all available data will be written to the
284     *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
285     *                         {@link #flush()} or {@link #close()} is called.
286     * @since 2.1
287     * @deprecated Use {@link #builder()} instead
288     */
289    @Deprecated
290    public WriterOutputStream(final Writer writer, final CharsetDecoder decoder, final int bufferSize, final boolean writeImmediately) {
291        checkIbmJdkWithBrokenUTF16(CharsetDecoders.toCharsetDecoder(decoder).charset());
292        this.writer = writer;
293        this.decoder = CharsetDecoders.toCharsetDecoder(decoder);
294        this.writeImmediately = writeImmediately;
295        this.decoderOut = CharBuffer.allocate(bufferSize);
296    }
297
298    /**
299     * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
300     * when it overflows or when {@link #flush()} or {@link #close()} is called.
301     *
302     * @param writer      the target {@link Writer}
303     * @param charsetName the name of the charset encoding
304     * @deprecated Use {@link #builder()} instead
305     */
306    @Deprecated
307    public WriterOutputStream(final Writer writer, final String charsetName) {
308        this(writer, charsetName, BUFFER_SIZE, false);
309    }
310
311    /**
312     * Constructs a new {@link WriterOutputStream}.
313     *
314     * @param writer           the target {@link Writer}
315     * @param charsetName      the name of the charset encoding
316     * @param bufferSize       the size of the output buffer in number of characters
317     * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, i.e. all available data will be written to the
318     *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
319     *                         {@link #flush()} or {@link #close()} is called.
320     * @deprecated Use {@link #builder()} instead
321     */
322    @Deprecated
323    public WriterOutputStream(final Writer writer, final String charsetName, final int bufferSize, final boolean writeImmediately) {
324        this(writer, Charsets.toCharset(charsetName), bufferSize, writeImmediately);
325    }
326
327    /**
328     * Close the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that
329     * {@link Writer#close()} will be called.
330     *
331     * @throws IOException if an I/O error occurs.
332     */
333    @Override
334    public void close() throws IOException {
335        processInput(true);
336        flushOutput();
337        writer.close();
338    }
339
340    /**
341     * Flush the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that
342     * {@link Writer#flush()} will be called.
343     *
344     * @throws IOException if an I/O error occurs.
345     */
346    @Override
347    public void flush() throws IOException {
348        flushOutput();
349        writer.flush();
350    }
351
352    /**
353     * Flush the output.
354     *
355     * @throws IOException if an I/O error occurs.
356     */
357    private void flushOutput() throws IOException {
358        if (decoderOut.position() > 0) {
359            writer.write(decoderOut.array(), 0, decoderOut.position());
360            decoderOut.rewind();
361        }
362    }
363
364    /**
365     * Decode the contents of the input ByteBuffer into a CharBuffer.
366     *
367     * @param endOfInput indicates end of input
368     * @throws IOException if an I/O error occurs.
369     */
370    private void processInput(final boolean endOfInput) throws IOException {
371        // Prepare decoderIn for reading
372        decoderIn.flip();
373        CoderResult coderResult;
374        while (true) {
375            coderResult = decoder.decode(decoderIn, decoderOut, endOfInput);
376            if (coderResult.isOverflow()) {
377                flushOutput();
378            } else if (coderResult.isUnderflow()) {
379                break;
380            } else {
381                // The decoder is configured to replace malformed input and unmappable characters,
382                // so we should not get here.
383                throw new IOException("Unexpected coder result");
384            }
385        }
386        // Discard the bytes that have been read
387        decoderIn.compact();
388    }
389
390    /**
391     * Write bytes from the specified byte array to the stream.
392     *
393     * @param b the byte array containing the bytes to write
394     * @throws IOException if an I/O error occurs.
395     */
396    @Override
397    public void write(final byte[] b) throws IOException {
398        write(b, 0, b.length);
399    }
400
401    /**
402     * Write bytes from the specified byte array to the stream.
403     *
404     * @param b   the byte array containing the bytes to write
405     * @param off the start offset in the byte array
406     * @param len the number of bytes to write
407     * @throws IOException if an I/O error occurs.
408     */
409    @Override
410    public void write(final byte[] b, int off, int len) throws IOException {
411        while (len > 0) {
412            final int c = Math.min(len, decoderIn.remaining());
413            decoderIn.put(b, off, c);
414            processInput(false);
415            len -= c;
416            off += c;
417        }
418        if (writeImmediately) {
419            flushOutput();
420        }
421    }
422
423    /**
424     * Write a single byte to the stream.
425     *
426     * @param b the byte to write
427     * @throws IOException if an I/O error occurs.
428     */
429    @Override
430    public void write(final int b) throws IOException {
431        write(new byte[] { (byte) b }, 0, 1);
432    }
433}