001    /****************************************************************
002     * Licensed to the Apache Software Foundation (ASF) under one   *
003     * or more contributor license agreements.  See the NOTICE file *
004     * distributed with this work for additional information        *
005     * regarding copyright ownership.  The ASF licenses this file   *
006     * to you under the Apache License, Version 2.0 (the            *
007     * "License"); you may not use this file except in compliance   *
008     * with the License.  You may obtain a copy of the License at   *
009     *                                                              *
010     *   http://www.apache.org/licenses/LICENSE-2.0                 *
011     *                                                              *
012     * Unless required by applicable law or agreed to in writing,   *
013     * software distributed under the License is distributed on an  *
014     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
015     * KIND, either express or implied.  See the License for the    *
016     * specific language governing permissions and limitations      *
017     * under the License.                                           *
018     ****************************************************************/
019    
020    package org.apache.james.mime4j.codec;
021    
022    import java.io.IOException;
023    import java.io.InputStream;
024    
025    import org.apache.james.mime4j.util.ByteArrayBuffer;
026    
027    /**
028     * Performs Quoted-Printable decoding on an underlying stream.
029     */
030    public class QuotedPrintableInputStream extends InputStream {
031    
032        private static final int DEFAULT_BUFFER_SIZE = 1024 * 2;
033    
034        private static final byte EQ = 0x3D;
035        private static final byte CR = 0x0D;
036        private static final byte LF = 0x0A;
037    
038        private final byte[] singleByte = new byte[1];
039    
040        private final InputStream in;
041        private final ByteArrayBuffer decodedBuf;
042        private final ByteArrayBuffer blanks;
043    
044        private final byte[] encoded;
045        private int pos = 0; // current index into encoded buffer
046        private int limit = 0; // current size of encoded buffer
047    
048        private boolean closed;
049    
050        private final DecodeMonitor monitor;
051    
052        public QuotedPrintableInputStream(final InputStream in, DecodeMonitor monitor) {
053            this(DEFAULT_BUFFER_SIZE, in, monitor);
054        }
055    
056        protected QuotedPrintableInputStream(final int bufsize, final InputStream in, DecodeMonitor monitor) {
057            super();
058            this.in = in;
059            this.encoded = new byte[bufsize];
060            this.decodedBuf = new ByteArrayBuffer(512);
061            this.blanks = new ByteArrayBuffer(512);
062            this.closed = false;
063            this.monitor = monitor;
064        }
065    
066        protected QuotedPrintableInputStream(final int bufsize, final InputStream in, boolean strict) {
067            this(bufsize, in, strict ? DecodeMonitor.STRICT : DecodeMonitor.SILENT);
068        }
069    
070        public QuotedPrintableInputStream(final InputStream in, boolean strict) {
071            this(DEFAULT_BUFFER_SIZE, in, strict);
072        }
073    
074        public QuotedPrintableInputStream(final InputStream in) {
075            this(in, false);
076        }
077    
078        /**
079         * Terminates Quoted-Printable coded content. This method does NOT close
080         * the underlying input stream.
081         *
082         * @throws IOException on I/O errors.
083         */
084        @Override
085        public void close() throws IOException {
086            closed = true;
087        }
088    
089        private int fillBuffer() throws IOException {
090            // Compact buffer if needed
091            if (pos < limit) {
092                System.arraycopy(encoded, pos, encoded, 0, limit - pos);
093                limit -= pos;
094                pos = 0;
095            } else {
096                limit = 0;
097                pos = 0;
098            }
099    
100            int capacity = encoded.length - limit;
101            if (capacity > 0) {
102                int bytesRead = in.read(encoded, limit, capacity);
103                if (bytesRead > 0) {
104                    limit += bytesRead;
105                }
106                return bytesRead;
107            } else {
108                return 0;
109            }
110        }
111    
112        private int getnext() {
113            if (pos < limit) {
114                byte b =  encoded[pos];
115                pos++;
116                return b & 0xFF;
117            } else {
118                return -1;
119            }
120        }
121    
122        private int peek(int i) {
123            if (pos + i < limit) {
124                return encoded[pos + i] & 0xFF;
125            } else {
126                return -1;
127            }
128        }
129    
130        private int transfer(
131                final int b, final byte[] buffer, final int from, final int to, boolean keepblanks) throws IOException {
132            int index = from;
133            if (keepblanks && blanks.length() > 0) {
134                int chunk = Math.min(blanks.length(), to - index);
135                System.arraycopy(blanks.buffer(), 0, buffer, index, chunk);
136                index += chunk;
137                int remaining = blanks.length() - chunk;
138                if (remaining > 0) {
139                    decodedBuf.append(blanks.buffer(), chunk, remaining);
140                }
141                blanks.clear();
142            } else if (blanks.length() > 0 && !keepblanks) {
143                StringBuilder sb = new StringBuilder(blanks.length() * 3);
144                for (int i = 0; i < blanks.length(); i++) sb.append(" "+blanks.byteAt(i));
145                if (monitor.warn("ignored blanks", sb.toString()))
146                    throw new IOException("ignored blanks");
147            }
148            if (b != -1) {
149                if (index < to) {
150                    buffer[index++] = (byte) b;
151                } else {
152                    decodedBuf.append(b);
153                }
154            }
155            return index;
156        }
157    
158        private int read0(final byte[] buffer, final int off, final int len) throws IOException {
159            boolean eof = false;
160            int from = off;
161            int to = off + len;
162            int index = off;
163    
164            // check if a previous invocation left decoded content
165            if (decodedBuf.length() > 0) {
166                int chunk = Math.min(decodedBuf.length(), to - index);
167                System.arraycopy(decodedBuf.buffer(), 0, buffer, index, chunk);
168                decodedBuf.remove(0, chunk);
169                index += chunk;
170            }
171    
172            while (index < to) {
173    
174                if (limit - pos < 3) {
175                    int bytesRead = fillBuffer();
176                    eof = bytesRead == -1;
177                }
178    
179                // end of stream?
180                if (limit - pos == 0 && eof) {
181                    return index == from ? -1 : index - from;
182                }
183    
184                boolean lastWasCR = false;
185                while (pos < limit && index < to) {
186                    int b = encoded[pos++] & 0xFF;
187    
188                    if (lastWasCR && b != LF) {
189                        if (monitor.warn("Found CR without LF", "Leaving it as is"))
190                            throw new IOException("Found CR without LF");
191                        index = transfer(CR, buffer, index, to, false);
192                    } else if (!lastWasCR && b == LF) {
193                        if (monitor.warn("Found LF without CR", "Translating to CRLF"))
194                            throw new IOException("Found LF without CR");
195                    }
196    
197                    if (b == CR) {
198                        lastWasCR = true;
199                        continue;
200                    } else {
201                        lastWasCR = false;
202                    }
203    
204                    if (b == LF) {
205                        // at end of line
206                        if (blanks.length() == 0) {
207                            index = transfer(CR, buffer, index, to, false);
208                            index = transfer(LF, buffer, index, to, false);
209                        } else {
210                            if (blanks.byteAt(0) != EQ) {
211                                // hard line break
212                                index = transfer(CR, buffer, index, to, false);
213                                index = transfer(LF, buffer, index, to, false);
214                            }
215                        }
216                        blanks.clear();
217                    } else if (b == EQ) {
218                        if (limit - pos < 2 && !eof) {
219                            // not enough buffered data
220                            pos--;
221                            break;
222                        }
223    
224                        // found special char '='
225                        int b2 = getnext();
226                        if (b2 == EQ) {
227                            index = transfer(b2, buffer, index, to, true);
228                            // deal with '==\r\n' brokenness
229                            int bb1 = peek(0);
230                            int bb2 = peek(1);
231                            if (bb1 == LF || (bb1 == CR && bb2 == LF)) {
232                                monitor.warn("Unexpected ==EOL encountered", "== 0x"+bb1+" 0x"+bb2);
233                                blanks.append(b2);
234                            } else {
235                                monitor.warn("Unexpected == encountered", "==");
236                            }
237                        } else if (Character.isWhitespace((char) b2)) {
238                            // soft line break
239                            index = transfer(-1, buffer, index, to, true);
240                            if (b2 != LF) {
241                                blanks.append(b);
242                                blanks.append(b2);
243                            }
244                        } else {
245                            int b3 = getnext();
246                            int upper = convert(b2);
247                            int lower = convert(b3);
248                            if (upper < 0 || lower < 0) {
249                                monitor.warn("Malformed encoded value encountered", "leaving "+((char) EQ)+((char) b2)+((char) b3)+" as is");
250                                // TODO see MIME4J-160
251                                index = transfer(EQ, buffer, index, to, true);
252                                index = transfer(b2, buffer, index, to, false);
253                                index = transfer(b3, buffer, index, to, false);
254                            } else {
255                                index = transfer((upper << 4) | lower, buffer, index, to, true);
256                            }
257                        }
258                    } else if (Character.isWhitespace(b)) {
259                        blanks.append(b);
260                    } else {
261                        index = transfer((int) b & 0xFF, buffer, index, to, true);
262                    }
263                }
264            }
265            return to - from;
266        }
267    
268        /**
269         * Converts '0' => 0, 'A' => 10, etc.
270         * @param c ASCII character value.
271         * @return Numeric value of hexadecimal character.
272         */
273        private int convert(int c) {
274            if (c >= '0' && c <= '9') {
275                return (c - '0');
276            } else if (c >= 'A' && c <= 'F') {
277                return (0xA + (c - 'A'));
278            } else if (c >= 'a' && c <= 'f') {
279                return (0xA + (c - 'a'));
280            } else {
281                return -1;
282            }
283        }
284    
285        @Override
286        public int read() throws IOException {
287            if (closed) {
288                throw new IOException("Stream has been closed");
289            }
290            for (;;) {
291                int bytes = read(singleByte, 0, 1);
292                if (bytes == -1) {
293                    return -1;
294                }
295                if (bytes == 1) {
296                    return singleByte[0] & 0xff;
297                }
298            }
299        }
300    
301        @Override
302        public int read(byte[] b, int off, int len) throws IOException {
303            if (closed) {
304                throw new IOException("Stream has been closed");
305            }
306            return read0(b, off, len);
307        }
308    
309    }