001    /****************************************************************
002     * Licensed to the Apache Software Foundation (ASF) under one   *
003     * or more contributor license agreements.  See the NOTICE file *
004     * distributed with this work for additional information        *
005     * regarding copyright ownership.  The ASF licenses this file   *
006     * to you under the Apache License, Version 2.0 (the            *
007     * "License"); you may not use this file except in compliance   *
008     * with the License.  You may obtain a copy of the License at   *
009     *                                                              *
010     *   http://www.apache.org/licenses/LICENSE-2.0                 *
011     *                                                              *
012     * Unless required by applicable law or agreed to in writing,   *
013     * software distributed under the License is distributed on an  *
014     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
015     * KIND, either express or implied.  See the License for the    *
016     * specific language governing permissions and limitations      *
017     * under the License.                                           *
018     ****************************************************************/
019    
020    package org.apache.james.mime4j.field.address;
021    
022    import java.util.ArrayList;
023    import java.util.BitSet;
024    import java.util.Collections;
025    import java.util.List;
026    
027    import org.apache.james.mime4j.codec.DecodeMonitor;
028    import org.apache.james.mime4j.codec.DecoderUtil;
029    import org.apache.james.mime4j.dom.address.Address;
030    import org.apache.james.mime4j.dom.address.AddressList;
031    import org.apache.james.mime4j.dom.address.DomainList;
032    import org.apache.james.mime4j.dom.address.Group;
033    import org.apache.james.mime4j.dom.address.Mailbox;
034    import org.apache.james.mime4j.stream.ParserCursor;
035    import org.apache.james.mime4j.stream.RawFieldParser;
036    import org.apache.james.mime4j.util.ByteSequence;
037    import org.apache.james.mime4j.util.CharsetUtil;
038    import org.apache.james.mime4j.util.ContentUtil;
039    
040    /**
041     * Lenient (tolerant to non-critical format violations) builder for {@link Address}
042     * and its subclasses.
043     */
044    public class LenientAddressBuilder {
045    
046        private static final int AT                = '@';
047        private static final int OPENING_BRACKET   = '<';
048        private static final int CLOSING_BRACKET   = '>';
049        private static final int COMMA             = ',';
050        private static final int COLON             = ':';
051        private static final int SEMICOLON         = ';';
052    
053        private static final BitSet AT_AND_CLOSING_BRACKET = RawFieldParser.INIT_BITSET(AT, CLOSING_BRACKET);
054        private static final BitSet CLOSING_BRACKET_ONLY   = RawFieldParser.INIT_BITSET(CLOSING_BRACKET);
055        private static final BitSet COMMA_ONLY             = RawFieldParser.INIT_BITSET(COMMA);
056        private static final BitSet COLON_ONLY             = RawFieldParser.INIT_BITSET(COLON);
057        private static final BitSet SEMICOLON_ONLY         = RawFieldParser.INIT_BITSET(SEMICOLON);
058    
059        public static final LenientAddressBuilder DEFAULT = new LenientAddressBuilder(DecodeMonitor.SILENT);
060    
061        private final DecodeMonitor monitor;
062        private final RawFieldParser parser;
063    
064        protected LenientAddressBuilder(final DecodeMonitor monitor) {
065            super();
066            this.monitor = monitor;
067            this.parser = new RawFieldParser();
068        }
069    
070        String parseDomain(final ByteSequence buf, final ParserCursor cursor, final BitSet delimiters) {
071            StringBuilder dst = new StringBuilder();
072            while (!cursor.atEnd()) {
073                char current = (char) (buf.byteAt(cursor.getPos()) & 0xff);
074                if (delimiters != null && delimiters.get(current)) {
075                    break;
076                } else if (CharsetUtil.isWhitespace(current)) {
077                    this.parser.skipWhiteSpace(buf, cursor);
078                } else if (current == '(') {
079                    this.parser.skipComment(buf, cursor);
080                } else {
081                    this.parser.copyContent(buf, cursor, delimiters, dst);
082                }
083            }
084            return dst.toString();
085        }
086    
087        DomainList parseRoute(final ByteSequence buf, final ParserCursor cursor, final BitSet delimiters) {
088            BitSet bitset = RawFieldParser.INIT_BITSET(COMMA, COLON);
089            if (delimiters != null) {
090                bitset.or(delimiters);
091            }
092            List<String> domains = null;
093            for (;;) {
094                this.parser.skipAllWhiteSpace(buf, cursor);
095                if (cursor.atEnd()) {
096                    break;
097                }
098                int pos = cursor.getPos();
099                int current = (char) (buf.byteAt(pos) & 0xff);
100                if (current == AT) {
101                    cursor.updatePos(pos + 1);
102                } else {
103                    break;
104                }
105                String s = parseDomain(buf, cursor, bitset);
106                if (s != null && s.length() > 0) {
107                    if (domains == null) {
108                        domains = new ArrayList<String>();
109                    }
110                    domains.add(s);
111                }
112                if (cursor.atEnd()) {
113                    break;
114                }
115                pos = cursor.getPos();
116                current = (char) (buf.byteAt(pos) & 0xff);
117                if (current == COMMA) {
118                    cursor.updatePos(pos + 1);
119                    continue;
120                } else if (current == COLON) {
121                    cursor.updatePos(pos + 1);
122                    break;
123                } else {
124                    break;
125                }
126            }
127            return domains != null ? new DomainList(domains, true) : null;
128        }
129    
130        private Mailbox createMailbox(
131                final String name, final DomainList route, final String localPart, final String domain) {
132            return new Mailbox(
133                    name != null ? DecoderUtil.decodeEncodedWords(name, this.monitor) : null, 
134                            route, localPart, domain);
135        }
136        
137        Mailbox parseMailboxAddress(
138                final String openingText, final ByteSequence buf, final ParserCursor cursor) {
139            if (cursor.atEnd()) {
140                return createMailbox(null, null, openingText, null);
141            }
142            int pos = cursor.getPos();
143            char current = (char) (buf.byteAt(pos) & 0xff);
144            if (current == OPENING_BRACKET) {
145                cursor.updatePos(pos + 1);
146            } else {
147                return createMailbox(null, null, openingText, null);
148            }
149            DomainList domainList = parseRoute(buf, cursor, CLOSING_BRACKET_ONLY);
150            String localPart = this.parser.parseValue(buf, cursor, AT_AND_CLOSING_BRACKET);
151            if (cursor.atEnd()) {
152                return createMailbox(openingText, domainList, localPart, null);
153            }
154            pos = cursor.getPos();
155            current = (char) (buf.byteAt(pos) & 0xff);
156            if (current == AT) {
157                cursor.updatePos(pos + 1);
158            } else {
159                return createMailbox(openingText, domainList, localPart, null);
160            }
161            String domain = parseDomain(buf, cursor, CLOSING_BRACKET_ONLY);
162            if (cursor.atEnd()) {
163                return createMailbox(openingText, domainList, localPart, domain);
164            }
165            pos = cursor.getPos();
166            current = (char) (buf.byteAt(pos) & 0xff);
167            if (current == CLOSING_BRACKET) {
168                cursor.updatePos(pos + 1);
169            } else {
170                return createMailbox(openingText, domainList, localPart, domain);
171            }
172            while (!cursor.atEnd()) {
173                pos = cursor.getPos();
174                current = (char) (buf.byteAt(pos) & 0xff);
175                if (CharsetUtil.isWhitespace(current)) {
176                    this.parser.skipWhiteSpace(buf, cursor);
177                } else if (current == '(') {
178                    this.parser.skipComment(buf, cursor);
179                } else {
180                    break;
181                }
182            }
183            return createMailbox(openingText, domainList, localPart, domain);
184        }
185    
186        private Mailbox createMailbox(final String localPart) {
187            if (localPart != null && localPart.length() > 0) {
188                return new Mailbox(null, null, localPart, null);
189            } else {
190                return null;
191            }
192        }
193    
194        public Mailbox parseMailbox(
195                final ByteSequence buf, final ParserCursor cursor, final BitSet delimiters) {
196            BitSet bitset = RawFieldParser.INIT_BITSET(AT, OPENING_BRACKET);
197            if (delimiters != null) {
198                bitset.or(delimiters);
199            }
200            String openingText = this.parser.parseValue(buf, cursor, bitset);
201            if (cursor.atEnd()) {
202                return createMailbox(openingText);
203            }
204            int pos = cursor.getPos();
205            char current = (char) (buf.byteAt(pos) & 0xff);
206            if (current == OPENING_BRACKET) {
207                // name <localPart @ domain> form
208                return parseMailboxAddress(openingText, buf, cursor);
209            } else if (current == AT) {
210                // localPart @ domain form
211                cursor.updatePos(pos + 1);
212                String localPart = openingText;
213                String domain = parseDomain(buf, cursor, delimiters);
214                return new Mailbox(null, null, localPart, domain);
215            } else {
216                return createMailbox(openingText);
217            }
218        }
219    
220        public Mailbox parseMailbox(final String text) {
221            ByteSequence raw = ContentUtil.encode(text);
222            ParserCursor cursor = new ParserCursor(0, text.length());
223            return parseMailbox(raw, cursor, null);
224        }
225    
226        List<Mailbox> parseMailboxes(
227                final ByteSequence buf, final ParserCursor cursor, final BitSet delimiters) {
228            BitSet bitset = RawFieldParser.INIT_BITSET(COMMA);
229            if (delimiters != null) {
230                bitset.or(delimiters);
231            }
232            List<Mailbox> mboxes = new ArrayList<Mailbox>();
233            while (!cursor.atEnd()) {
234                int pos = cursor.getPos();
235                int current = (char) (buf.byteAt(pos) & 0xff);
236                if (delimiters != null && delimiters.get(current)) {
237                    break;
238                } else if (current == COMMA) {
239                    cursor.updatePos(pos + 1);
240                } else {
241                    Mailbox mbox = parseMailbox(buf, cursor, bitset);
242                    if (mbox != null) {
243                        mboxes.add(mbox);
244                    }
245                }
246            }
247            return mboxes;
248        }
249    
250        public Group parseGroup(final ByteSequence buf, final ParserCursor cursor) {
251            String name = this.parser.parseToken(buf, cursor, COLON_ONLY);
252            if (cursor.atEnd()) {
253                return new Group(name, Collections.<Mailbox>emptyList());
254            }
255            int pos = cursor.getPos();
256            int current = (char) (buf.byteAt(pos) & 0xff);
257            if (current == COLON) {
258                cursor.updatePos(pos + 1);
259            }
260            List<Mailbox> mboxes = parseMailboxes(buf, cursor, SEMICOLON_ONLY);
261            return new Group(name, mboxes);
262        }
263    
264        public Group parseGroup(final String text) {
265            ByteSequence raw = ContentUtil.encode(text);
266            ParserCursor cursor = new ParserCursor(0, text.length());
267            return parseGroup(raw, cursor);
268        }
269    
270        public Address parseAddress(
271                final ByteSequence buf, final ParserCursor cursor, final BitSet delimiters) {
272            BitSet bitset = RawFieldParser.INIT_BITSET(COLON, AT, OPENING_BRACKET);
273            if (delimiters != null) {
274                bitset.or(delimiters);
275            }
276            String openingText = this.parser.parseValue(buf, cursor, bitset);
277            if (cursor.atEnd()) {
278                return createMailbox(openingText);
279            }
280            int pos = cursor.getPos();
281            char current = (char) (buf.byteAt(pos) & 0xff);
282            if (current == OPENING_BRACKET) {
283                // name <localPart @ domain> form
284                return parseMailboxAddress(openingText, buf, cursor);
285            } else if (current == AT) {
286                // localPart @ domain form
287                cursor.updatePos(pos + 1);
288                String localPart = openingText;
289                String domain = parseDomain(buf, cursor, delimiters);
290                return new Mailbox(null, null, localPart, domain);
291            } else if (current == COLON) {
292                // group-name: localPart @ domain, name <localPart @ domain>; form
293                cursor.updatePos(pos + 1);
294                String name = openingText;
295                List<Mailbox> mboxes = parseMailboxes(buf, cursor, SEMICOLON_ONLY);
296                if (!cursor.atEnd()) {
297                    pos = cursor.getPos();
298                    current = (char) (buf.byteAt(pos) & 0xff);
299                    if (current == SEMICOLON) {
300                        cursor.updatePos(pos + 1);
301                    }
302                }
303                return new Group(name, mboxes);
304            } else {
305                return createMailbox(openingText);
306            }
307        }
308    
309        public Address parseAddress(final String text) {
310            ByteSequence raw = ContentUtil.encode(text);
311            ParserCursor cursor = new ParserCursor(0, text.length());
312            return parseAddress(raw, cursor, null);
313        }
314    
315        public AddressList parseAddressList(final ByteSequence buf, final ParserCursor cursor) {
316            List<Address> addresses = new ArrayList<Address>();
317            while (!cursor.atEnd()) {
318                int pos = cursor.getPos();
319                int current = (char) (buf.byteAt(pos) & 0xff);
320                if (current == COMMA) {
321                    cursor.updatePos(pos + 1);
322                } else {
323                    Address address = parseAddress(buf, cursor, COMMA_ONLY);
324                    if (address != null) {
325                        addresses.add(address);
326                    }
327                }
328            }
329            return new AddressList(addresses, false);
330        }
331    
332        public AddressList parseAddressList(final String text) {
333            ByteSequence raw = ContentUtil.encode(text);
334            ParserCursor cursor = new ParserCursor(0, text.length());
335            return parseAddressList(raw, cursor);
336        }
337    
338    }