001    /****************************************************************
002     * Licensed to the Apache Software Foundation (ASF) under one   *
003     * or more contributor license agreements.  See the NOTICE file *
004     * distributed with this work for additional information        *
005     * regarding copyright ownership.  The ASF licenses this file   *
006     * to you under the Apache License, Version 2.0 (the            *
007     * "License"); you may not use this file except in compliance   *
008     * with the License.  You may obtain a copy of the License at   *
009     *                                                              *
010     *   http://www.apache.org/licenses/LICENSE-2.0                 *
011     *                                                              *
012     * Unless required by applicable law or agreed to in writing,   *
013     * software distributed under the License is distributed on an  *
014     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
015     * KIND, either express or implied.  See the License for the    *
016     * specific language governing permissions and limitations      *
017     * under the License.                                           *
018     ****************************************************************/
019    
020    package org.apache.james.mime4j.field;
021    
022    import org.apache.james.mime4j.MimeException;
023    import org.apache.james.mime4j.codec.DecodeMonitor;
024    import org.apache.james.mime4j.dom.FieldParser;
025    import org.apache.james.mime4j.dom.field.AddressListField;
026    import org.apache.james.mime4j.dom.field.ContentDescriptionField;
027    import org.apache.james.mime4j.dom.field.ContentDispositionField;
028    import org.apache.james.mime4j.dom.field.ContentIdField;
029    import org.apache.james.mime4j.dom.field.ContentLanguageField;
030    import org.apache.james.mime4j.dom.field.ContentLengthField;
031    import org.apache.james.mime4j.dom.field.ContentLocationField;
032    import org.apache.james.mime4j.dom.field.ContentMD5Field;
033    import org.apache.james.mime4j.dom.field.ContentTransferEncodingField;
034    import org.apache.james.mime4j.dom.field.ContentTypeField;
035    import org.apache.james.mime4j.dom.field.DateTimeField;
036    import org.apache.james.mime4j.dom.field.FieldName;
037    import org.apache.james.mime4j.dom.field.MailboxField;
038    import org.apache.james.mime4j.dom.field.MailboxListField;
039    import org.apache.james.mime4j.dom.field.MimeVersionField;
040    import org.apache.james.mime4j.dom.field.ParsedField;
041    import org.apache.james.mime4j.dom.field.UnstructuredField;
042    import org.apache.james.mime4j.stream.Field;
043    import org.apache.james.mime4j.stream.RawField;
044    import org.apache.james.mime4j.stream.RawFieldParser;
045    import org.apache.james.mime4j.util.ByteSequence;
046    import org.apache.james.mime4j.util.ContentUtil;
047    
048    /**
049     * Lenient implementation of the {@link FieldParser} interface with a high degree of tolerance
050     * to non-severe MIME field format violations. 
051     */
052    public class LenientFieldParser extends DelegatingFieldParser {
053    
054        private static final FieldParser<ParsedField> PARSER = new LenientFieldParser();
055    
056        /**
057         * Gets the default instance of this class.
058         *
059         * @return the default instance
060         */
061        public static FieldParser<ParsedField> getParser() {
062            return PARSER;
063        }
064    
065        /**
066         * Parses the given byte sequence and returns an instance of the {@link ParsedField} class. 
067         * The type of the class returned depends on the field name; see {@link #parse(String)} for 
068         * a table of field names and their corresponding classes.
069         *
070         * @param raw the bytes to parse.
071         * @param monitor decoding monitor used while parsing/decoding.
072         * @return a parsed field.
073         * @throws MimeException if the raw string cannot be split into field name and body.
074         */
075        public static ParsedField parse(
076                final ByteSequence raw,
077                final DecodeMonitor monitor) throws MimeException {
078            Field rawField = RawFieldParser.DEFAULT.parseField(raw);
079            return PARSER.parse(rawField, monitor);
080        }
081    
082        /**
083         * Parses the given string and returns an instance of the <code>Field</code> class.
084         * The type of the class returned depends on the field name.
085         *
086         * @param rawStr the string to parse.
087         * @param monitor a DecodeMonitor object used while parsing/decoding.
088         * @return a <code>ParsedField</code> instance.
089         * @throws MimeException if the raw string cannot be split into field name and body.
090         */
091        public static ParsedField parse(
092                final String rawStr,
093                final DecodeMonitor monitor) throws MimeException {
094            ByteSequence raw = ContentUtil.encode(rawStr);
095            RawField rawField = RawFieldParser.DEFAULT.parseField(raw);
096            // Do not retain the original raw representation as the field
097            // may require folding
098            return PARSER.parse(rawField, monitor);
099        }
100    
101        /**
102         * Parses the given string and returns an instance of the {@link ParsedField} class. 
103         * The type of the class returned depends on the field name:
104         * <p>
105         * <table>
106         *   <tr><th>Class returned</th><th>Field names</th></tr>
107         *   <tr><td>{@link ContentTypeField}</td><td>Content-Type</td></tr>
108         *   <tr><td>{@link ContentLengthField}</td><td>Content-Length</td></tr>
109         *   <tr><td>{@link ContentTransferEncodingField}</td><td>Content-Transfer-Encoding</td></tr>
110         *   <tr><td>{@link ContentDispositionField}</td><td>Content-Disposition</td></tr>
111         *   <tr><td>{@link ContentDescriptionField}</td><td>Content-Description</td></tr>
112         *   <tr><td>{@link ContentIdField}</td><td>Content-ID</td></tr>
113         *   <tr><td>{@link ContentMD5Field}</td><td>Content-MD5</td></tr>
114         *   <tr><td>{@link ContentLanguageField}</td><td>Content-Language</td></tr>
115         *   <tr><td>{@link ContentLocationField}</td><td>Content-Location</td></tr>
116         *   <tr><td>{@link MimeVersionField}</td><td>MIME-Version</td></tr>
117         *   <tr><td>{@link DateTimeField}</td><td>Date, Resent-Date</td></tr>
118         *   <tr><td>{@link MailboxField}</td><td>Sender, Resent-Sender</td></tr>
119         *   <tr><td>{@link MailboxListField}</td><td>From, Resent-From</td></tr>
120         *   <tr><td>{@link AddressListField}</td><td>To, Cc, Bcc, Reply-To, Resent-To, Resent-Cc, Resent-Bcc</td></tr>
121         *   <tr><td>{@link UnstructuredField}</td><td>Subject and others</td></tr>
122         * </table>
123         *
124         * @param rawStr the string to parse.
125         * @return a parsed field.
126         * @throws MimeException if the raw string cannot be split into field name and body.
127         */
128        public static ParsedField parse(final String rawStr) throws MimeException {
129            return parse(rawStr, DecodeMonitor.SILENT);
130        }
131    
132        public LenientFieldParser() {
133            super(UnstructuredFieldImpl.PARSER);
134            setFieldParser(FieldName.CONTENT_TYPE,
135                    ContentTypeFieldLenientImpl.PARSER);        // lenient
136            setFieldParser(FieldName.CONTENT_LENGTH,
137                    ContentLengthFieldImpl.PARSER);             // default
138            setFieldParser(FieldName.CONTENT_TRANSFER_ENCODING,
139                    ContentTransferEncodingFieldImpl.PARSER);   // default
140            setFieldParser(FieldName.CONTENT_DISPOSITION,
141                    ContentDispositionFieldLenientImpl.PARSER); // lenient
142            setFieldParser(FieldName.CONTENT_ID,
143                    ContentIdFieldImpl.PARSER);                 // default
144            setFieldParser(FieldName.CONTENT_MD5,
145                    ContentMD5FieldImpl.PARSER);                // default
146            setFieldParser(FieldName.CONTENT_DESCRIPTION,
147                    ContentDescriptionFieldImpl.PARSER);        // default
148            setFieldParser(FieldName.CONTENT_LANGUAGE,
149                    ContentLanguageFieldLenientImpl.PARSER);    // lenient
150            setFieldParser(FieldName.CONTENT_LOCATION,
151                    ContentLocationFieldLenientImpl.PARSER);    // lenient
152            setFieldParser(FieldName.MIME_VERSION,
153                    MimeVersionFieldImpl.PARSER);               // lenient
154    
155            FieldParser<DateTimeField> dateTimeParser = DateTimeFieldLenientImpl.PARSER;
156            setFieldParser(FieldName.DATE, dateTimeParser);
157            setFieldParser(FieldName.RESENT_DATE, dateTimeParser);
158    
159            FieldParser<MailboxListField> mailboxListParser = MailboxListFieldLenientImpl.PARSER;
160            setFieldParser(FieldName.FROM, mailboxListParser);
161            setFieldParser(FieldName.RESENT_FROM, mailboxListParser);
162    
163            FieldParser<MailboxField> mailboxParser = MailboxFieldLenientImpl.PARSER;
164            setFieldParser(FieldName.SENDER, mailboxParser);
165            setFieldParser(FieldName.RESENT_SENDER, mailboxParser);
166    
167            FieldParser<AddressListField> addressListParser = AddressListFieldLenientImpl.PARSER;
168            setFieldParser(FieldName.TO, addressListParser);
169            setFieldParser(FieldName.RESENT_TO, addressListParser);
170            setFieldParser(FieldName.CC, addressListParser);
171            setFieldParser(FieldName.RESENT_CC, addressListParser);
172            setFieldParser(FieldName.BCC, addressListParser);
173            setFieldParser(FieldName.RESENT_BCC, addressListParser);
174            setFieldParser(FieldName.REPLY_TO, addressListParser);
175        }
176    
177    }