001 /**************************************************************** 002 * Licensed to the Apache Software Foundation (ASF) under one * 003 * or more contributor license agreements. See the NOTICE file * 004 * distributed with this work for additional information * 005 * regarding copyright ownership. The ASF licenses this file * 006 * to you under the Apache License, Version 2.0 (the * 007 * "License"); you may not use this file except in compliance * 008 * with the License. You may obtain a copy of the License at * 009 * * 010 * http://www.apache.org/licenses/LICENSE-2.0 * 011 * * 012 * Unless required by applicable law or agreed to in writing, * 013 * software distributed under the License is distributed on an * 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * 015 * KIND, either express or implied. See the License for the * 016 * specific language governing permissions and limitations * 017 * under the License. * 018 ****************************************************************/ 019 020 021 package org.apache.james.jspf.parser; 022 023 import org.apache.james.jspf.core.Logger; 024 import org.apache.james.jspf.core.SPF1Constants; 025 import org.apache.james.jspf.core.SPF1Record; 026 import org.apache.james.jspf.core.SPFRecordParser; 027 import org.apache.james.jspf.core.exceptions.NeutralException; 028 import org.apache.james.jspf.core.exceptions.NoneException; 029 import org.apache.james.jspf.core.exceptions.PermErrorException; 030 import org.apache.james.jspf.terms.Configuration; 031 import org.apache.james.jspf.terms.Directive; 032 import org.apache.james.jspf.terms.Mechanism; 033 import org.apache.james.jspf.terms.Modifier; 034 035 import java.util.ArrayList; 036 import java.util.Collection; 037 import java.util.Collections; 038 import java.util.Iterator; 039 import java.util.List; 040 import java.util.regex.Matcher; 041 import java.util.regex.Pattern; 042 043 /** 044 * This class is used to parse SPF1-Records from their textual form to an 045 * SPF1Record object that is composed by 2 collections: directives and 046 * modifiers. 047 * 048 * The parsing is modular and get informations from Mechanism and Modifiers 049 * classes declared in the org/apache/james/jspf/parser/jspf.default.terms file. 050 * 051 * Each term implementation provide its own REGEX in the REGEX static public 052 * field. This parser simply join all the regexp in a single "alternative" 053 * pattern and count the number of catch groups (brackets) assigned to each 054 * regex fragment. 055 * 056 * SO it creates a big regex and an array where it store what term is associated 057 * to each catch group of the big regex. 058 * 059 * If the regex matches the input vspf1 record then it start looking for the 060 * matched group (not null) and lookup the term that created that part of the 061 * regex. 062 * 063 * With this informations it creates a new instance of the term and, if the term 064 * is ConfigurationEnabled it calls the config() method passing to it only the specific 065 * subset of the MatchResult (using the MatchResultSubset). 066 * 067 * TODO doubts about the specification - redirect or exp with no domain-spec are 068 * evaluated as an unknown-modifiers according to the current spec (it does not 069 * make too much sense) - top-label is defined differently in various specs. 070 * We'll have to review the code. - 071 * http://data.iana.org/TLD/tlds-alpha-by-domain.txt (we should probably beeter 072 * use and alpha sequence being at least 2 chars - Somewhere is defined as "." 073 * TLD [ "." ] - Otherwise defined as ( *alphanum ALPHA *alphanum ) / ( 074 * 1*alphanum "-" *( * alphanum / "-" ) alphanum ) 075 * 076 * @see org.apache.james.jspf.core.SPF1Record 077 * 078 */ 079 public class RFC4408SPF1Parser implements SPFRecordParser { 080 081 /** 082 * Regex based on http://www.ietf.org/rfc/rfc4408.txt. 083 * This will be the next official SPF-Spec 084 */ 085 086 // Changed this because C, T and R MACRO_LETTERS are not available 087 // in record parsing and must return a PermError. 088 089 // private static final String MACRO_LETTER_PATTERN = "[lsodipvhcrtLSODIPVHCRT]"; 090 091 /** 092 * ABNF: qualifier = "+" / "-" / "?" / "~" 093 */ 094 private static final String QUALIFIER_PATTERN = "[" + "\\" 095 + SPF1Constants.PASS + "\\" + SPF1Constants.FAIL + "\\" 096 + SPF1Constants.NEUTRAL + "\\" + SPF1Constants.SOFTFAIL + "]"; 097 098 private Pattern termsSeparatorPattern = null; 099 100 private Pattern termPattern = null; 101 102 private int TERM_STEP_REGEX_QUALIFIER_POS; 103 104 private int TERM_STEP_REGEX_MECHANISM_POS; 105 106 private int TERM_STEP_REGEX_MODIFIER_POS; 107 108 private List<TermDefinition> matchResultPositions; 109 110 private Logger log; 111 112 private TermsFactory termsFactory; 113 114 /** 115 * Constructor. Creates all the values needed to run the parsing 116 * 117 * @param logger the logger to use 118 * @param termsFactory the TermsFactory implementation 119 */ 120 public RFC4408SPF1Parser(Logger logger, TermsFactory termsFactory) { 121 this.log = logger; 122 this.termsFactory = termsFactory; 123 124 /** 125 * ABNF: mechanism = ( all / include / A / MX / PTR / IP4 / IP6 / exists ) 126 */ 127 String MECHANISM_REGEX = createRegex(termsFactory.getMechanismsCollection()); 128 129 /** 130 * ABNF: modifier = redirect / explanation / unknown-modifier 131 */ 132 String MODIFIER_REGEX = "(" + createRegex(termsFactory.getModifiersCollection()) + ")"; 133 134 /** 135 * ABNF: directive = [ qualifier ] mechanism 136 */ 137 String DIRECTIVE_REGEX = "(" + QUALIFIER_PATTERN + "?)(" 138 + MECHANISM_REGEX + ")"; 139 140 /** 141 * ABNF: ( directive / modifier ) 142 */ 143 String TERM_REGEX = "(?:" + MODIFIER_REGEX + "|" + DIRECTIVE_REGEX 144 + ")"; 145 146 /** 147 * ABNF: 1*SP 148 */ 149 String TERMS_SEPARATOR_REGEX = "[ ]+"; 150 151 termsSeparatorPattern = Pattern.compile(TERMS_SEPARATOR_REGEX); 152 termPattern = Pattern.compile(TERM_REGEX); 153 154 initializePositions(); 155 } 156 157 /** 158 * Fill in the matchResultPositions ArrayList. This array simply map each 159 * regex matchgroup to the Term class that originated that part of the 160 * regex. 161 */ 162 private void initializePositions() { 163 ArrayList<TermDefinition> matchResultPositions = new ArrayList<TermDefinition>(); 164 165 // FULL MATCH 166 int posIndex = 0; 167 matchResultPositions.ensureCapacity(posIndex + 1); 168 matchResultPositions.add(posIndex, null); 169 170 Iterator<TermDefinition> i; 171 172 TERM_STEP_REGEX_MODIFIER_POS = ++posIndex; 173 matchResultPositions.ensureCapacity(posIndex + 1); 174 matchResultPositions.add(TERM_STEP_REGEX_MODIFIER_POS, null); 175 i = termsFactory.getModifiersCollection().iterator(); 176 while (i.hasNext()) { 177 TermDefinition td = i.next(); 178 int size = td.getMatchSize() + 1; 179 for (int k = 0; k < size; k++) { 180 posIndex++; 181 matchResultPositions.ensureCapacity(posIndex + 1); 182 matchResultPositions.add(posIndex, td); 183 } 184 } 185 186 TERM_STEP_REGEX_QUALIFIER_POS = ++posIndex; 187 matchResultPositions.ensureCapacity(posIndex + 1); 188 matchResultPositions.add(posIndex, null); 189 190 TERM_STEP_REGEX_MECHANISM_POS = ++posIndex; 191 matchResultPositions.ensureCapacity(posIndex + 1); 192 matchResultPositions.add(TERM_STEP_REGEX_MECHANISM_POS, null); 193 i = termsFactory.getMechanismsCollection().iterator(); 194 while (i.hasNext()) { 195 TermDefinition td = i.next(); 196 int size = td.getMatchSize() + 1; 197 for (int k = 0; k < size; k++) { 198 posIndex++; 199 matchResultPositions.ensureCapacity(posIndex + 1); 200 matchResultPositions.add(posIndex, td); 201 } 202 } 203 204 if (log.isDebugEnabled()) { 205 log.debug("Parsing catch group positions: Modifiers[" 206 + TERM_STEP_REGEX_MODIFIER_POS + "] Qualifier[" 207 + TERM_STEP_REGEX_QUALIFIER_POS + "] Mechanism[" 208 + TERM_STEP_REGEX_MECHANISM_POS + "]"); 209 for (int k = 0; k < matchResultPositions.size(); k++) { 210 log 211 .debug(k 212 + ") " 213 + (matchResultPositions.get(k) != null ? ((TermDefinition) matchResultPositions 214 .get(k)).getPattern().pattern() 215 : null)); 216 } 217 } 218 219 this.matchResultPositions = Collections.synchronizedList(matchResultPositions); 220 } 221 222 /** 223 * Loop the classes searching for a String static field named 224 * staticFieldName and create an OR regeex like this: 225 * (?:FIELD1|FIELD2|FIELD3) 226 * 227 * @param classes 228 * classes to analyze 229 * @param staticFieldName 230 * static field to concatenate 231 * @return regex The regex 232 */ 233 private String createRegex(Collection<TermDefinition> commandMap) { 234 StringBuffer modifierRegex = new StringBuffer(); 235 Iterator<TermDefinition> i = commandMap.iterator(); 236 boolean first = true; 237 while (i.hasNext()) { 238 if (first) { 239 modifierRegex.append("(?:("); 240 first = false; 241 } else { 242 modifierRegex.append(")|("); 243 } 244 Pattern pattern = i.next().getPattern(); 245 modifierRegex.append(pattern.pattern()); 246 } 247 modifierRegex.append("))"); 248 return modifierRegex.toString(); 249 } 250 251 /** 252 * @see org.apache.james.jspf.core.SPFRecordParser#parse(java.lang.String) 253 */ 254 public SPF1Record parse(String spfRecord) throws PermErrorException, 255 NoneException, NeutralException { 256 257 log.debug("Start parsing SPF-Record: " + spfRecord); 258 259 SPF1Record result = new SPF1Record(); 260 261 // check the version "header" 262 if (spfRecord.toLowerCase().startsWith(SPF1Constants.SPF_VERSION1 + " ") || spfRecord.equalsIgnoreCase(SPF1Constants.SPF_VERSION1)) { 263 if (!spfRecord.toLowerCase().startsWith(SPF1Constants.SPF_VERSION1 + " ")) throw new NeutralException("Empty SPF Record"); 264 } else { 265 throw new NoneException("No valid SPF Record: " + spfRecord); 266 } 267 268 // extract terms 269 String[] terms = termsSeparatorPattern.split(spfRecord.replaceFirst( 270 SPF1Constants.SPF_VERSION1, "")); 271 272 // cycle terms 273 for (int i = 0; i < terms.length; i++) { 274 if (terms[i].length() > 0) { 275 Matcher termMatcher = termPattern.matcher(terms[i]); 276 if (!termMatcher.matches()) { 277 throw new PermErrorException("Term [" + terms[i] 278 + "] is not syntactically valid: " 279 + termPattern.pattern()); 280 } 281 282 // true if we matched a modifier, false if we matched a 283 // directive 284 String modifierString = termMatcher 285 .group(TERM_STEP_REGEX_MODIFIER_POS); 286 287 if (modifierString != null) { 288 // MODIFIER 289 Modifier mod = (Modifier) lookupAndCreateTerm(termMatcher, 290 TERM_STEP_REGEX_MODIFIER_POS); 291 292 if (mod.enforceSingleInstance()) { 293 Iterator<Modifier> it = result.getModifiers().iterator(); 294 while (it.hasNext()) { 295 if (it.next().getClass().equals(mod.getClass())) { 296 throw new PermErrorException("More than one " 297 + modifierString 298 + " found in SPF-Record"); 299 } 300 } 301 } 302 303 result.getModifiers().add(mod); 304 305 } else { 306 // DIRECTIVE 307 String qualifier = termMatcher 308 .group(TERM_STEP_REGEX_QUALIFIER_POS); 309 310 Object mech = lookupAndCreateTerm(termMatcher, 311 TERM_STEP_REGEX_MECHANISM_POS); 312 313 result.getDirectives().add( 314 new Directive(qualifier, (Mechanism) mech, log.getChildLogger(qualifier+"directive"))); 315 316 } 317 318 } 319 } 320 321 return result; 322 } 323 324 /** 325 * @param res 326 * the MatchResult 327 * @param start 328 * the position where the terms starts 329 * @return 330 * @throws PermErrorException 331 */ 332 private Object lookupAndCreateTerm(Matcher res, int start) 333 throws PermErrorException { 334 for (int k = start + 1; k < res.groupCount(); k++) { 335 if (res.group(k) != null && k != TERM_STEP_REGEX_QUALIFIER_POS) { 336 TermDefinition c = (TermDefinition) matchResultPositions.get(k); 337 Configuration subres = new MatcherBasedConfiguration(res, k, c 338 .getMatchSize()); 339 try { 340 return termsFactory.createTerm(c.getTermDef(), subres); 341 } catch (InstantiationException e) { 342 e.printStackTrace(); 343 // TODO is it ok to use a Runtime for this? Or should we use a PermError here? 344 throw new IllegalStateException("Unexpected error creating term: " + e.getMessage()); 345 } 346 347 } 348 } 349 return null; 350 } 351 352 }