001    /****************************************************************
002     * Licensed to the Apache Software Foundation (ASF) under one   *
003     * or more contributor license agreements.  See the NOTICE file *
004     * distributed with this work for additional information        *
005     * regarding copyright ownership.  The ASF licenses this file   *
006     * to you under the Apache License, Version 2.0 (the            *
007     * "License"); you may not use this file except in compliance   *
008     * with the License.  You may obtain a copy of the License at   *
009     *                                                              *
010     *   http://www.apache.org/licenses/LICENSE-2.0                 *
011     *                                                              *
012     * Unless required by applicable law or agreed to in writing,   *
013     * software distributed under the License is distributed on an  *
014     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
015     * KIND, either express or implied.  See the License for the    *
016     * specific language governing permissions and limitations      *
017     * under the License.                                           *
018     ****************************************************************/
019    
020    package org.apache.james.mime4j.codec;
021    
022    import java.io.ByteArrayInputStream;
023    import java.io.ByteArrayOutputStream;
024    import java.io.IOException;
025    import java.io.UnsupportedEncodingException;
026    import java.nio.charset.Charset;
027    import java.util.regex.Matcher;
028    import java.util.regex.Pattern;
029    
030    import org.apache.james.mime4j.util.CharsetUtil;
031    
032    /**
033     * Static methods for decoding strings, byte arrays and encoded words.
034     */
035    public class DecoderUtil {
036    
037        private static final Pattern PATTERN_ENCODED_WORD = Pattern.compile(
038                "(.*?)=\\?(.+?)\\?(\\w)\\?(.+?)\\?=", Pattern.DOTALL);
039    
040        /**
041         * Decodes a string containing quoted-printable encoded data.
042         *
043         * @param s the string to decode.
044         * @return the decoded bytes.
045         */
046        private static byte[] decodeQuotedPrintable(String s, DecodeMonitor monitor) {
047            ByteArrayOutputStream baos = new ByteArrayOutputStream();
048    
049            try {
050                byte[] bytes = s.getBytes("US-ASCII");
051    
052                QuotedPrintableInputStream is = new QuotedPrintableInputStream(
053                                                   new ByteArrayInputStream(bytes), monitor);
054    
055                int b = 0;
056                while ((b = is.read()) != -1) {
057                    baos.write(b);
058                }
059            } catch (IOException e) {
060                // This should never happen!
061                throw new IllegalStateException(e);
062            }
063    
064            return baos.toByteArray();
065        }
066    
067        /**
068         * Decodes a string containing base64 encoded data.
069         *
070         * @param s the string to decode.
071         * @param monitor
072         * @return the decoded bytes.
073         */
074        private static byte[] decodeBase64(String s, DecodeMonitor monitor) {
075            ByteArrayOutputStream baos = new ByteArrayOutputStream();
076    
077            try {
078                byte[] bytes = s.getBytes("US-ASCII");
079    
080                Base64InputStream is = new Base64InputStream(
081                                            new ByteArrayInputStream(bytes), monitor);
082    
083                int b = 0;
084                while ((b = is.read()) != -1) {
085                    baos.write(b);
086                }
087            } catch (IOException e) {
088                // This should never happen!
089                throw new IllegalStateException(e);
090            }
091    
092            return baos.toByteArray();
093        }
094    
095        /**
096         * Decodes an encoded text encoded with the 'B' encoding (described in
097         * RFC 2047) found in a header field body.
098         *
099         * @param encodedText the encoded text to decode.
100         * @param charset the Java charset to use.
101         * @param monitor
102         * @return the decoded string.
103         * @throws UnsupportedEncodingException if the given Java charset isn't
104         *         supported.
105         */
106        static String decodeB(String encodedText, String charset, DecodeMonitor monitor)
107                throws UnsupportedEncodingException {
108            byte[] decodedBytes = decodeBase64(encodedText, monitor);
109            return new String(decodedBytes, charset);
110        }
111    
112        /**
113         * Decodes an encoded text encoded with the 'Q' encoding (described in
114         * RFC 2047) found in a header field body.
115         *
116         * @param encodedText the encoded text to decode.
117         * @param charset the Java charset to use.
118         * @return the decoded string.
119         * @throws UnsupportedEncodingException if the given Java charset isn't
120         *         supported.
121         */
122        static String decodeQ(String encodedText, String charset, DecodeMonitor monitor)
123                throws UnsupportedEncodingException {
124            encodedText = replaceUnderscores(encodedText);
125    
126            byte[] decodedBytes = decodeQuotedPrintable(encodedText, monitor);
127            return new String(decodedBytes, charset);
128        }
129    
130        static String decodeEncodedWords(String body)  {
131            return decodeEncodedWords(body, DecodeMonitor.SILENT);
132        }
133    
134        /**
135         * Decodes a string containing encoded words as defined by RFC 2047. Encoded
136         * words have the form =?charset?enc?encoded-text?= where enc is either 'Q'
137         * or 'q' for quoted-printable and 'B' or 'b' for base64.
138         *
139         * @param body the string to decode
140         * @param monitor the DecodeMonitor to be used.
141         * @return the decoded string.
142         * @throws IllegalArgumentException only if the DecodeMonitor strategy throws it (Strict parsing)
143         */
144        public static String decodeEncodedWords(String body, DecodeMonitor monitor) throws IllegalArgumentException {
145            int tailIndex = 0;
146            boolean lastMatchValid = false;
147    
148            StringBuilder sb = new StringBuilder();
149    
150            for (Matcher matcher = PATTERN_ENCODED_WORD.matcher(body); matcher.find();) {
151                String separator = matcher.group(1);
152                String mimeCharset = matcher.group(2);
153                String encoding = matcher.group(3);
154                String encodedText = matcher.group(4);
155    
156                String decoded = null;
157                decoded = tryDecodeEncodedWord(mimeCharset, encoding, encodedText, monitor);
158                if (decoded == null) {
159                    sb.append(matcher.group(0));
160                } else {
161                    if (!lastMatchValid || !CharsetUtil.isWhitespace(separator)) {
162                        sb.append(separator);
163                    }
164                    sb.append(decoded);
165                }
166    
167                tailIndex = matcher.end();
168                lastMatchValid = decoded != null;
169            }
170    
171            if (tailIndex == 0) {
172                return body;
173            } else {
174                sb.append(body.substring(tailIndex));
175                return sb.toString();
176            }
177        }
178    
179        // return null on error
180        private static String tryDecodeEncodedWord(final String mimeCharset,
181                final String encoding, final String encodedText, final DecodeMonitor monitor) {
182            Charset charset = CharsetUtil.lookup(mimeCharset);
183            if (charset == null) {
184                monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded",
185                        "Mime charser '", mimeCharset, "' doesn't have a corresponding Java charset");
186                return null;
187            }
188    
189            if (encodedText.length() == 0) {
190                monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded",
191                        "Missing encoded text in encoded word");
192                return null;
193            }
194    
195            try {
196                if (encoding.equalsIgnoreCase("Q")) {
197                    return DecoderUtil.decodeQ(encodedText, charset.name(), monitor);
198                } else if (encoding.equalsIgnoreCase("B")) {
199                    return DecoderUtil.decodeB(encodedText, charset.name(), monitor);
200                } else {
201                    monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded",
202                            "Warning: Unknown encoding in encoded word");
203                    return null;
204                }
205            } catch (UnsupportedEncodingException e) {
206                // should not happen because of isDecodingSupported check above
207                monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded",
208                        "Unsupported encoding (", e.getMessage(), ") in encoded word");
209                return null;
210            } catch (RuntimeException e) {
211                monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded",
212                        "Could not decode (", e.getMessage(), ") encoded word");
213                return null;
214            }
215        }
216    
217        private static void monitor(DecodeMonitor monitor, String mimeCharset, String encoding,
218                String encodedText, String dropDesc, String... strings) throws IllegalArgumentException {
219            if (monitor.isListening()) {
220                String encodedWord = recombine(mimeCharset, encoding, encodedText);
221                StringBuilder text = new StringBuilder();
222                for (String str : strings) {
223                    text.append(str);
224                }
225                text.append(" (");
226                text.append(encodedWord);
227                text.append(")");
228                String exceptionDesc = text.toString();
229                if (monitor.warn(exceptionDesc, dropDesc))
230                    throw new IllegalArgumentException(text.toString());
231            }
232        }
233    
234        private static String recombine(final String mimeCharset,
235                final String encoding, final String encodedText) {
236            return "=?" + mimeCharset + "?" + encoding + "?" + encodedText + "?=";
237        }
238    
239        // Replace _ with =20
240        private static String replaceUnderscores(String str) {
241            // probably faster than String#replace(CharSequence, CharSequence)
242    
243            StringBuilder sb = new StringBuilder(128);
244    
245            for (int i = 0; i < str.length(); i++) {
246                char c = str.charAt(i);
247                if (c == '_') {
248                    sb.append("=20");
249                } else {
250                    sb.append(c);
251                }
252            }
253    
254            return sb.toString();
255        }
256    }