001    /****************************************************************
002     * Licensed to the Apache Software Foundation (ASF) under one   *
003     * or more contributor license agreements.  See the NOTICE file *
004     * distributed with this work for additional information        *
005     * regarding copyright ownership.  The ASF licenses this file   *
006     * to you under the Apache License, Version 2.0 (the            *
007     * "License"); you may not use this file except in compliance   *
008     * with the License.  You may obtain a copy of the License at   *
009     *                                                              *
010     *   http://www.apache.org/licenses/LICENSE-2.0                 *
011     *                                                              *
012     * Unless required by applicable law or agreed to in writing,   *
013     * software distributed under the License is distributed on an  *
014     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
015     * KIND, either express or implied.  See the License for the    *
016     * specific language governing permissions and limitations      *
017     * under the License.                                           *
018     ****************************************************************/
019    
020    package org.apache.james.mime4j.parser;
021    
022    import java.io.IOException;
023    import java.io.InputStream;
024    
025    import org.apache.james.mime4j.MimeException;
026    import org.apache.james.mime4j.codec.DecodeMonitor;
027    import org.apache.james.mime4j.stream.BodyDescriptor;
028    import org.apache.james.mime4j.stream.BodyDescriptorBuilder;
029    import org.apache.james.mime4j.stream.EntityState;
030    import org.apache.james.mime4j.stream.Field;
031    import org.apache.james.mime4j.stream.MimeConfig;
032    import org.apache.james.mime4j.stream.MimeTokenStream;
033    import org.apache.james.mime4j.stream.RecursionMode;
034    
035    /**
036     * <p>
037     * Parses MIME (or RFC822) message streams of bytes or characters and reports
038     * parsing events to a {@link ContentHandler} instance.
039     * </p>
040     * <p>
041     * Typical usage:<br/>
042     * <pre>
043     *      ContentHandler handler = new MyHandler();
044     *      MimeConfig config = new MimeConfig();
045     *      MimeStreamParser parser = new MimeStreamParser(config);
046     *      parser.setContentHandler(handler);
047     *      InputStream instream = new FileInputStream("mime.msg");
048     *      try {
049     *          parser.parse(instream);
050     *      } finally {
051     *          instream.close();
052     *      }
053     * </pre>
054     */
055    public class MimeStreamParser {
056    
057        private ContentHandler handler = null;
058        private boolean contentDecoding;
059    
060        private final MimeTokenStream mimeTokenStream;
061    
062        public MimeStreamParser(MimeTokenStream tokenStream) {
063            super();
064            this.mimeTokenStream = tokenStream;
065            this.contentDecoding = false;
066        }
067    
068        public MimeStreamParser(
069                final MimeConfig config,
070                final DecodeMonitor monitor,
071                final BodyDescriptorBuilder bodyDescBuilder) {
072            this(new MimeTokenStream(config != null ? config.clone() : new MimeConfig(),
073                    monitor, bodyDescBuilder));
074        }
075    
076        public MimeStreamParser(final MimeConfig config) {
077            this(config, null, null);
078        }
079    
080        public MimeStreamParser() {
081            this(new MimeTokenStream(new MimeConfig(), null, null));
082        }
083    
084        /**
085         * Determines whether this parser automatically decodes body content
086         * based on the on the MIME fields with the standard defaults.
087         */
088        public boolean isContentDecoding() {
089            return contentDecoding;
090        }
091    
092        /**
093         * Defines whether parser should automatically decode body content
094         * based on the on the MIME fields with the standard defaults.
095         */
096        public void setContentDecoding(boolean b) {
097            this.contentDecoding = b;
098        }
099    
100        /**
101         * Parses a stream of bytes containing a MIME message. Please note that if the
102         * {@link MimeConfig} associated with the mime stream returns a not null Content-Type
103         * value from its {@link MimeConfig#getHeadlessParsing()} method, the message is
104         * assumed to have no head section and the headless parsing mode will be used.
105         *
106         * @param instream the stream to parse.
107         * @throws MimeException if the message can not be processed
108         * @throws IOException on I/O errors.
109         */
110        public void parse(InputStream instream) throws MimeException, IOException {
111            MimeConfig config = mimeTokenStream.getConfig();
112            if (config.getHeadlessParsing() != null) {
113                Field contentType = mimeTokenStream.parseHeadless(
114                        instream, config.getHeadlessParsing());
115                handler.startMessage();
116                handler.startHeader();
117                handler.field(contentType);
118                handler.endHeader();
119            } else {
120                mimeTokenStream.parse(instream);
121            }
122            OUTER: for (;;) {
123                EntityState state = mimeTokenStream.getState();
124                switch (state) {
125                    case T_BODY:
126                        BodyDescriptor desc = mimeTokenStream.getBodyDescriptor();
127                        InputStream bodyContent;
128                        if (contentDecoding) {
129                            bodyContent = mimeTokenStream.getDecodedInputStream();
130                        } else {
131                            bodyContent = mimeTokenStream.getInputStream();
132                        }
133                        handler.body(desc, bodyContent);
134                        break;
135                    case T_END_BODYPART:
136                        handler.endBodyPart();
137                        break;
138                    case T_END_HEADER:
139                        handler.endHeader();
140                        break;
141                    case T_END_MESSAGE:
142                        handler.endMessage();
143                        break;
144                    case T_END_MULTIPART:
145                        handler.endMultipart();
146                        break;
147                    case T_END_OF_STREAM:
148                        break OUTER;
149                    case T_EPILOGUE:
150                        handler.epilogue(mimeTokenStream.getInputStream());
151                        break;
152                    case T_FIELD:
153                        handler.field(mimeTokenStream.getField());
154                        break;
155                    case T_PREAMBLE:
156                        handler.preamble(mimeTokenStream.getInputStream());
157                        break;
158                    case T_RAW_ENTITY:
159                        handler.raw(mimeTokenStream.getInputStream());
160                        break;
161                    case T_START_BODYPART:
162                        handler.startBodyPart();
163                        break;
164                    case T_START_HEADER:
165                        handler.startHeader();
166                        break;
167                    case T_START_MESSAGE:
168                        handler.startMessage();
169                        break;
170                    case T_START_MULTIPART:
171                        handler.startMultipart(mimeTokenStream.getBodyDescriptor());
172                        break;
173                    default:
174                        throw new IllegalStateException("Invalid state: " + state);
175                }
176                state = mimeTokenStream.next();
177            }
178        }
179    
180        /**
181         * Determines if this parser is currently in raw mode.
182         *
183         * @return <code>true</code> if in raw mode, <code>false</code>
184         *         otherwise.
185         * @see #setRaw()
186         */
187        public boolean isRaw() {
188            return mimeTokenStream.isRaw();
189        }
190    
191        /**
192         * Enables raw mode. In raw mode all future entities (messages
193         * or body parts) in the stream will be reported to the
194         * {@link ContentHandler#raw(InputStream)} handler method only.
195         * The stream will contain the entire unparsed entity contents
196         * including header fields and whatever is in the body.
197         */
198        public void setRaw() {
199            mimeTokenStream.setRecursionMode(RecursionMode.M_RAW);
200        }
201    
202        /**
203         * Enables flat mode. In flat mode rfc822 parts are not recursively
204         * parsed and multipart content is handled as a single "simple" stream.
205         */
206        public void setFlat() {
207            mimeTokenStream.setRecursionMode(RecursionMode.M_FLAT);
208        }
209    
210        /**
211         * Enables recursive mode. In this mode rfc822 parts are recursively
212         * parsed.
213         */
214        public void setRecurse() {
215            mimeTokenStream.setRecursionMode(RecursionMode.M_RECURSE);
216        }
217    
218        /**
219         * Finishes the parsing and stops reading lines.
220         * NOTE: No more lines will be parsed but the parser
221         * will still call
222         * {@link ContentHandler#endMultipart()},
223         * {@link ContentHandler#endBodyPart()},
224         * {@link ContentHandler#endMessage()}, etc to match previous calls
225         * to
226         * {@link ContentHandler#startMultipart(BodyDescriptor)},
227         * {@link ContentHandler#startBodyPart()},
228         * {@link ContentHandler#startMessage()}, etc.
229         */
230        public void stop() {
231            mimeTokenStream.stop();
232        }
233    
234        /**
235         * Sets the <code>ContentHandler</code> to use when reporting
236         * parsing events.
237         *
238         * @param h the <code>ContentHandler</code>.
239         */
240        public void setContentHandler(ContentHandler h) {
241            this.handler = h;
242        }
243    
244    }