001    /****************************************************************
002     * Licensed to the Apache Software Foundation (ASF) under one   *
003     * or more contributor license agreements.  See the NOTICE file *
004     * distributed with this work for additional information        *
005     * regarding copyright ownership.  The ASF licenses this file   *
006     * to you under the Apache License, Version 2.0 (the            *
007     * "License"); you may not use this file except in compliance   *
008     * with the License.  You may obtain a copy of the License at   *
009     *                                                              *
010     *   http://www.apache.org/licenses/LICENSE-2.0                 *
011     *                                                              *
012     * Unless required by applicable law or agreed to in writing,   *
013     * software distributed under the License is distributed on an  *
014     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
015     * KIND, either express or implied.  See the License for the    *
016     * specific language governing permissions and limitations      *
017     * under the License.                                           *
018     ****************************************************************/
019    
020    
021    package org.apache.james.jspf.parser;
022    
023    import org.apache.james.jspf.core.Logger;
024    import org.apache.james.jspf.core.SPF1Constants;
025    import org.apache.james.jspf.core.SPF1Record;
026    import org.apache.james.jspf.core.SPFRecordParser;
027    import org.apache.james.jspf.core.exceptions.NeutralException;
028    import org.apache.james.jspf.core.exceptions.NoneException;
029    import org.apache.james.jspf.core.exceptions.PermErrorException;
030    import org.apache.james.jspf.terms.Configuration;
031    import org.apache.james.jspf.terms.Directive;
032    import org.apache.james.jspf.terms.Mechanism;
033    import org.apache.james.jspf.terms.Modifier;
034    
035    import java.util.ArrayList;
036    import java.util.Collection;
037    import java.util.Collections;
038    import java.util.Iterator;
039    import java.util.List;
040    import java.util.regex.Matcher;
041    import java.util.regex.Pattern;
042    
043    /**
044     * This class is used to parse SPF1-Records from their textual form to an
045     * SPF1Record object that is composed by 2 collections: directives and
046     * modifiers.
047     * 
048     * The parsing is modular and get informations from Mechanism and Modifiers
049     * classes declared in the org/apache/james/jspf/parser/jspf.default.terms file.
050     * 
051     * Each term implementation provide its own REGEX in the REGEX static public
052     * field. This parser simply join all the regexp in a single "alternative"
053     * pattern and count the number of catch groups (brackets) assigned to each
054     * regex fragment.
055     * 
056     * SO it creates a big regex and an array where it store what term is associated
057     * to each catch group of the big regex.
058     * 
059     * If the regex matches the input vspf1 record then it start looking for the
060     * matched group (not null) and lookup the term that created that part of the
061     * regex.
062     * 
063     * With this informations it creates a new instance of the term and, if the term
064     * is ConfigurationEnabled it calls the config() method passing to it only the specific
065     * subset of the MatchResult (using the MatchResultSubset).
066     * 
067     * TODO doubts about the specification - redirect or exp with no domain-spec are
068     * evaluated as an unknown-modifiers according to the current spec (it does not
069     * make too much sense) - top-label is defined differently in various specs.
070     * We'll have to review the code. -
071     * http://data.iana.org/TLD/tlds-alpha-by-domain.txt (we should probably beeter
072     * use and alpha sequence being at least 2 chars - Somewhere is defined as "."
073     * TLD [ "." ] - Otherwise defined as ( *alphanum ALPHA *alphanum ) / (
074     * 1*alphanum "-" *( * alphanum / "-" ) alphanum )
075     * 
076     * @see org.apache.james.jspf.core.SPF1Record
077     * 
078     */
079    public class RFC4408SPF1Parser implements SPFRecordParser {
080    
081        /**
082         * Regex based on http://www.ietf.org/rfc/rfc4408.txt.
083         * This will be the next official SPF-Spec
084         */
085    
086        // Changed this because C, T and R MACRO_LETTERS are not available 
087        // in record parsing and must return a PermError.
088       
089        // private static final String MACRO_LETTER_PATTERN = "[lsodipvhcrtLSODIPVHCRT]";
090    
091        /**
092         * ABNF: qualifier = "+" / "-" / "?" / "~"
093         */
094        private static final String QUALIFIER_PATTERN = "[" + "\\"
095                + SPF1Constants.PASS + "\\" + SPF1Constants.FAIL + "\\"
096                + SPF1Constants.NEUTRAL + "\\" + SPF1Constants.SOFTFAIL + "]";
097    
098        private Pattern termsSeparatorPattern = null;
099    
100        private Pattern termPattern = null;
101    
102        private int TERM_STEP_REGEX_QUALIFIER_POS;
103    
104        private int TERM_STEP_REGEX_MECHANISM_POS;
105    
106        private int TERM_STEP_REGEX_MODIFIER_POS;
107    
108        private List<TermDefinition> matchResultPositions;
109    
110        private Logger log;
111    
112        private TermsFactory termsFactory;
113    
114        /**
115         * Constructor. Creates all the values needed to run the parsing
116         * 
117         * @param logger the logger to use
118         * @param termsFactory the TermsFactory implementation
119         */
120        public RFC4408SPF1Parser(Logger logger, TermsFactory termsFactory) {
121            this.log = logger;
122            this.termsFactory = termsFactory;
123            
124            /**
125             * ABNF: mechanism = ( all / include / A / MX / PTR / IP4 / IP6 / exists )
126             */
127            String MECHANISM_REGEX = createRegex(termsFactory.getMechanismsCollection());
128    
129            /**
130             * ABNF: modifier = redirect / explanation / unknown-modifier
131             */
132            String MODIFIER_REGEX = "(" + createRegex(termsFactory.getModifiersCollection()) + ")";
133    
134            /**
135             * ABNF: directive = [ qualifier ] mechanism
136             */
137            String DIRECTIVE_REGEX = "(" + QUALIFIER_PATTERN + "?)("
138                    + MECHANISM_REGEX + ")";
139    
140            /**
141             * ABNF: ( directive / modifier )
142             */
143            String TERM_REGEX = "(?:" + MODIFIER_REGEX + "|" + DIRECTIVE_REGEX
144                    + ")";
145    
146            /**
147             * ABNF: 1*SP
148             */
149            String TERMS_SEPARATOR_REGEX = "[ ]+";
150    
151            termsSeparatorPattern = Pattern.compile(TERMS_SEPARATOR_REGEX);
152            termPattern = Pattern.compile(TERM_REGEX);
153    
154            initializePositions();
155        }
156    
157        /**
158         * Fill in the matchResultPositions ArrayList. This array simply map each
159         * regex matchgroup to the Term class that originated that part of the
160         * regex.
161         */
162        private void initializePositions() {
163            ArrayList<TermDefinition> matchResultPositions = new ArrayList<TermDefinition>();
164    
165            // FULL MATCH
166            int posIndex = 0;
167            matchResultPositions.ensureCapacity(posIndex + 1);
168            matchResultPositions.add(posIndex, null);
169    
170            Iterator<TermDefinition> i;
171    
172            TERM_STEP_REGEX_MODIFIER_POS = ++posIndex;
173            matchResultPositions.ensureCapacity(posIndex + 1);
174            matchResultPositions.add(TERM_STEP_REGEX_MODIFIER_POS, null);
175            i = termsFactory.getModifiersCollection().iterator();
176            while (i.hasNext()) {
177                TermDefinition td = i.next();
178                int size = td.getMatchSize() + 1;
179                for (int k = 0; k < size; k++) {
180                    posIndex++;
181                    matchResultPositions.ensureCapacity(posIndex + 1);
182                    matchResultPositions.add(posIndex, td);
183                }
184            }
185    
186            TERM_STEP_REGEX_QUALIFIER_POS = ++posIndex;
187            matchResultPositions.ensureCapacity(posIndex + 1);
188            matchResultPositions.add(posIndex, null);
189    
190            TERM_STEP_REGEX_MECHANISM_POS = ++posIndex;
191            matchResultPositions.ensureCapacity(posIndex + 1);
192            matchResultPositions.add(TERM_STEP_REGEX_MECHANISM_POS, null);
193            i = termsFactory.getMechanismsCollection().iterator();
194            while (i.hasNext()) {
195                TermDefinition td = i.next();
196                int size = td.getMatchSize() + 1;
197                for (int k = 0; k < size; k++) {
198                    posIndex++;
199                    matchResultPositions.ensureCapacity(posIndex + 1);
200                    matchResultPositions.add(posIndex, td);
201                }
202            }
203    
204            if (log.isDebugEnabled()) {
205                log.debug("Parsing catch group positions: Modifiers["
206                        + TERM_STEP_REGEX_MODIFIER_POS + "] Qualifier["
207                        + TERM_STEP_REGEX_QUALIFIER_POS + "] Mechanism["
208                        + TERM_STEP_REGEX_MECHANISM_POS + "]");
209                for (int k = 0; k < matchResultPositions.size(); k++) {
210                    log
211                            .debug(k
212                                    + ") "
213                                    + (matchResultPositions.get(k) != null ? ((TermDefinition) matchResultPositions
214                                            .get(k)).getPattern().pattern()
215                                            : null));
216                }
217            }
218            
219            this.matchResultPositions = Collections.synchronizedList(matchResultPositions);
220        }
221    
222        /**
223         * Loop the classes searching for a String static field named
224         * staticFieldName and create an OR regeex like this:
225         * (?:FIELD1|FIELD2|FIELD3)
226         * 
227         * @param classes
228         *            classes to analyze
229         * @param staticFieldName
230         *            static field to concatenate
231         * @return regex The regex
232         */
233        private String createRegex(Collection<TermDefinition> commandMap) {
234            StringBuffer modifierRegex = new StringBuffer();
235            Iterator<TermDefinition> i = commandMap.iterator();
236            boolean first = true;
237            while (i.hasNext()) {
238                if (first) {
239                    modifierRegex.append("(?:(");
240                    first = false;
241                } else {
242                    modifierRegex.append(")|(");
243                }
244                Pattern pattern = i.next().getPattern();
245                modifierRegex.append(pattern.pattern());
246            }
247            modifierRegex.append("))");
248            return modifierRegex.toString();
249        }
250    
251        /**
252         * @see org.apache.james.jspf.core.SPFRecordParser#parse(java.lang.String)
253         */
254        public SPF1Record parse(String spfRecord) throws PermErrorException,
255                NoneException, NeutralException {
256    
257            log.debug("Start parsing SPF-Record: " + spfRecord);
258    
259            SPF1Record result = new SPF1Record();
260    
261            // check the version "header"
262            if (spfRecord.toLowerCase().startsWith(SPF1Constants.SPF_VERSION1 + " ") || spfRecord.equalsIgnoreCase(SPF1Constants.SPF_VERSION1)) {
263                if (!spfRecord.toLowerCase().startsWith(SPF1Constants.SPF_VERSION1 + " ")) throw new NeutralException("Empty SPF Record");
264            } else {
265                throw new NoneException("No valid SPF Record: " + spfRecord);
266            }
267    
268            // extract terms
269            String[] terms = termsSeparatorPattern.split(spfRecord.replaceFirst(
270                    SPF1Constants.SPF_VERSION1, ""));
271    
272            // cycle terms
273            for (int i = 0; i < terms.length; i++) {
274                if (terms[i].length() > 0) {
275                    Matcher termMatcher = termPattern.matcher(terms[i]);
276                    if (!termMatcher.matches()) {
277                        throw new PermErrorException("Term [" + terms[i]
278                                + "] is not syntactically valid: "
279                                + termPattern.pattern());
280                    }
281    
282                    // true if we matched a modifier, false if we matched a
283                    // directive
284                    String modifierString = termMatcher
285                            .group(TERM_STEP_REGEX_MODIFIER_POS);
286    
287                    if (modifierString != null) {
288                        // MODIFIER
289                        Modifier mod = (Modifier) lookupAndCreateTerm(termMatcher,
290                                TERM_STEP_REGEX_MODIFIER_POS);
291    
292                        if (mod.enforceSingleInstance()) {
293                            Iterator<Modifier> it = result.getModifiers().iterator();
294                            while (it.hasNext()) {
295                                if (it.next().getClass().equals(mod.getClass())) {
296                                    throw new PermErrorException("More than one "
297                                            + modifierString
298                                            + " found in SPF-Record");
299                                }
300                            }
301                        }
302    
303                        result.getModifiers().add(mod);
304    
305                    } else {
306                        // DIRECTIVE
307                        String qualifier = termMatcher
308                                .group(TERM_STEP_REGEX_QUALIFIER_POS);
309    
310                        Object mech = lookupAndCreateTerm(termMatcher,
311                                TERM_STEP_REGEX_MECHANISM_POS);
312    
313                        result.getDirectives().add(
314                                new Directive(qualifier, (Mechanism) mech, log.getChildLogger(qualifier+"directive")));
315    
316                    }
317    
318                }
319            }
320    
321            return result;
322        }
323    
324        /**
325         * @param res
326         *            the MatchResult
327         * @param start
328         *            the position where the terms starts
329         * @return
330         * @throws PermErrorException
331         */
332        private Object lookupAndCreateTerm(Matcher res, int start)
333                throws PermErrorException {
334            for (int k = start + 1; k < res.groupCount(); k++) {
335                if (res.group(k) != null && k != TERM_STEP_REGEX_QUALIFIER_POS) {
336                    TermDefinition c = (TermDefinition) matchResultPositions.get(k);
337                    Configuration subres = new MatcherBasedConfiguration(res, k, c
338                            .getMatchSize());
339                    try {
340                        return termsFactory.createTerm(c.getTermDef(), subres);
341                    } catch (InstantiationException e) {
342                        e.printStackTrace();
343                        // TODO is it ok to use a Runtime for this? Or should we use a PermError here?
344                        throw new IllegalStateException("Unexpected error creating term: " + e.getMessage());
345                    }
346    
347                }
348            }
349            return null;
350        }
351    
352    }