1    
2    /* ====================================================================
3     * The Apache Software License, Version 1.1
4     *
5     * Copyright (c) 2002 The Apache Software Foundation.  All rights
6     * reserved.
7     *
8     * Redistribution and use in source and binary forms, with or without
9     * modification, are permitted provided that the following conditions
10    * are met:
11    *
12    * 1. Redistributions of source code must retain the above copyright
13    *    notice, this list of conditions and the following disclaimer.
14    *
15    * 2. Redistributions in binary form must reproduce the above copyright
16    *    notice, this list of conditions and the following disclaimer in
17    *    the documentation and/or other materials provided with the
18    *    distribution.
19    *
20    * 3. The end-user documentation included with the redistribution,
21    *    if any, must include the following acknowledgment:
22    *       "This product includes software developed by the
23    *        Apache Software Foundation (http://www.apache.org/)."
24    *    Alternately, this acknowledgment may appear in the software itself,
25    *    if and wherever such third-party acknowledgments normally appear.
26    *
27    * 4. The names "Apache" and "Apache Software Foundation" and
28    *    "Apache POI" must not be used to endorse or promote products
29    *    derived from this software without prior written permission. For
30    *    written permission, please contact apache@apache.org.
31    *
32    * 5. Products derived from this software may not be called "Apache",
33    *    "Apache POI", nor may "Apache" appear in their name, without
34    *    prior written permission of the Apache Software Foundation.
35    *
36    * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37    * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38    * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39    * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40    * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41    * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42    * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43    * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44    * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45    * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46    * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47    * SUCH DAMAGE.
48    * ====================================================================
49    *
50    * This software consists of voluntary contributions made by many
51    * individuals on behalf of the Apache Software Foundation.  For more
52    * information on the Apache Software Foundation, please see
53    * <http://www.apache.org/>.
54    */
55   
56   
57   package org.apache.poi.hssf.record.formula;
58   
59   import java.util.List;
60   import java.util.ArrayList;
61   import java.util.Stack;
62   
63   import java.io.FileOutputStream;
64   import java.io.File;
65   
66   
67   /**
68    * This class parses a formula string into a List of tokens in RPN order.
69    * Inspired by 
70    *           Lets Build a Compiler, by Jack Crenshaw
71    * BNF for the formula expression is :
72    * <expression> ::= <term> [<addop> <term>]*
73    * <term> ::= <factor>  [ <mulop> <factor> ]*
74    * <factor> ::= <number> | (<expression>) | <cellRef> | <function>
75    * <function> ::= <functionName> ([expression [, expression]*])
76    *
77    *  @author Avik Sengupta <avik AT Avik Sengupta DOT com>
78    *  @author Andrew C. oliver (acoliver at apache dot org)
79    */
80   public class FormulaParser {
81       
82       public static int FORMULA_TYPE_CELL = 0;
83       public static int FORMULA_TYPE_SHARED = 1;
84       public static int FORMULA_TYPE_ARRAY =2;
85       public static int FORMULA_TYPE_CONDFOMRAT = 3;
86       public static int FORMULA_TYPE_NAMEDRANGE = 4;
87       
88       private String formulaString;
89       private int pointer=0;
90       
91       private List tokens = new java.util.Stack();
92       //private Stack tokens = new java.util.Stack();
93       private List result = new ArrayList();
94       private int numParen;
95       
96       private static char TAB = '\t';
97       private static char CR = '\n';
98       
99      private char Look;              // Lookahead Character 
100      
101      
102      /** create the parser with the string that is to be parsed
103       *    later call the parse() method to return ptg list in rpn order
104       *    then call the getRPNPtg() to retrive the parse results
105       *  This class is recommended only for single threaded use
106       *  The parse and getPRNPtg are internally synchronized for safety, thus
107       *  while it is safe to use in a multithreaded environment, you will get long lock waits.  
108       */
109      public FormulaParser(String formula){
110          formulaString = formula;
111          pointer=0;
112      }
113      
114  
115      /** Read New Character From Input Stream */
116      private void GetChar() {
117          Look=formulaString.charAt(pointer++);
118          //System.out.println("Got char: "+Look);
119      }
120      
121  
122      /** Report an Error */
123      private void Error(String s) {
124          System.out.println("Error: "+s);
125      }
126      
127      
128   
129      /** Report Error and Halt */
130      private void Abort(String s) {
131          Error(s);
132          //System.exit(1);  //throw exception??
133          throw new RuntimeException("Cannot Parse, sorry");
134      }
135      
136      
137  
138      /** Report What Was Expected */
139      private void Expected(String s) {
140          Abort(s + " Expected");
141      }
142      
143      
144   
145      /** Recognize an Alpha Character */
146      private boolean IsAlpha(char c) {
147          return Character.isLetter(c) || c == '$';
148      }
149      
150      
151   
152      /** Recognize a Decimal Digit */
153      private boolean IsDigit(char c) {
154          //System.out.println("Checking digit for"+c);
155          return Character.isDigit(c);
156      }
157      
158      
159  
160      /** Recognize an Alphanumeric */
161      private boolean  IsAlNum(char c) {
162          return  (IsAlpha(c) || IsDigit(c));
163      }
164      
165      
166  
167      /** Recognize an Addop */
168      private boolean IsAddop( char c) {
169          return (c =='+' || c =='-');
170      }
171      
172  
173      /** Recognize White Space */
174      private boolean IsWhite( char c) {
175          return  (c ==' ' || c== TAB);
176      }
177      
178      
179  
180      /** Skip Over Leading White Space */
181      private void SkipWhite() {
182          while (IsWhite(Look)) {
183              GetChar();
184          }
185      }
186      
187      
188  
189      /** Match a Specific Input Character */
190      private void Match(char x) {
191          if (Look != x) {
192              Expected("" + x + "");
193          }else {
194              GetChar();
195              SkipWhite();
196          }
197      }
198      
199      
200      /** Get an Identifier */
201      private String GetName() {
202          StringBuffer Token = new StringBuffer();
203          if (!IsAlpha(Look)) {
204              Expected("Name");
205          }
206          while (IsAlNum(Look)) {
207              Token = Token.append(Character.toUpperCase(Look));
208              GetChar();
209          }
210          SkipWhite();
211          return Token.toString();
212      }
213      
214      
215      /** Get a Number */
216      private String GetNum() {
217          String Value ="";
218          if  (!IsDigit(Look)) Expected("Integer");
219          while (IsDigit(Look)){
220              Value = Value + Look;
221              GetChar();
222          }
223          SkipWhite();
224          return Value;
225      }
226  
227      /** Output a String with Tab */
228      private void  Emit(String s){
229          System.out.print(TAB+s);
230      }
231  
232      /** Output a String with Tab and CRLF */
233      private void EmitLn(String s) {
234          Emit(s);
235          System.out.println();;
236      }
237      
238      /** Parse and Translate a Identifier */
239      private void Ident() {
240          String name;
241          name = GetName();
242          if (Look == '('){
243              //This is a function 
244              Match('(');
245              int numArgs = Arguments(); 
246              Match(')');
247              //this is the end of the function
248              tokens.add(function(name,(byte)numArgs));
249          } else if (Look == ':') { // this is a AreaReference
250              String first = name;
251              Match(':');
252              String second = GetName();
253              tokens.add(new AreaPtg(first+":"+second));
254          } else {
255              //this can be either a cell ref or a named range !!
256              boolean cellRef = true ; //we should probably do it with reg exp??
257              if (cellRef) {
258                  tokens.add(new ReferencePtg(name)); 
259              }else {
260                  //handle after named range is integrated!!
261              }
262          }
263      }
264      
265      private Ptg function(String name,byte numArgs) {
266          Ptg retval = null;
267          retval = new FuncVarPtg(name,numArgs);
268         /** if (numArgs == 1 && name.equals("SUM")) {
269              AttrPtg ptg = new AttrPtg();
270              ptg.setData((short)1); //sums don't care but this is what excel does.
271              ptg.setSum(true);
272              retval = ptg;
273          } else {
274              retval = new FuncVarPtg(name,numArgs);
275          }*/
276          
277          return retval; 
278      }
279      
280      /** get arguments to a function */
281      private int Arguments() {
282          int numArgs = 0;
283          if (Look != ')')  {
284              numArgs++; 
285              Expression();
286          }
287          while (Look == ','  || Look == ';') { //TODO handle EmptyArgs
288              if(Look == ',') {
289                Match(',');
290              }
291              else {
292                Match(';');
293              }
294              Expression();
295              numArgs++;
296          }
297          return numArgs;
298      }
299  
300     /** Parse and Translate a Math Factor  */
301      private void Factor() {
302          if (Look == '(' ) {
303              Match('(');
304              Expression();
305              Match(')');
306              tokens.add(new ParenthesisPtg());
307              return;
308          } else if (IsAlpha(Look)){
309              Ident();
310          } else if(Look == '"') {
311             StringLiteral();
312          } else {
313               
314              String number = GetNum();
315              if (Look=='.') { 
316                  Match('.');
317                  String decimalPart = null;
318                  if (IsDigit(Look)) number = number +"."+ GetNum(); //this also takes care of someone entering "1234."
319                  tokens.add(new NumberPtg(number));
320              } else {
321                  tokens.add(new IntPtg(number));  //TODO:what if the number is too big to be a short? ..add factory to return Int or Number!
322              }
323          }
324      }
325      
326      private void StringLiteral() {
327          Match('"');
328          String name= GetName();
329          Match('"');
330          tokens.add(new StringPtg(name));
331      }
332      
333      /** Recognize and Translate a Multiply */
334      private void Multiply(){
335          Match('*');
336          Factor();
337          tokens.add(new MultiplyPtg());
338    
339      }
340      
341      
342      /** Recognize and Translate a Divide */
343      private void Divide() {
344          Match('/');
345          Factor();
346          tokens.add(new DividePtg());
347  
348      }
349      
350      
351      /** Parse and Translate a Math Term */
352      private void  Term(){
353          Factor();
354          while (Look == '*' || Look == '/' || Look == '^' || Look == '&') {
355              ///TODO do we need to do anything here??
356              if (Look == '*') Multiply();
357              if (Look == '/') Divide();
358              if (Look == '^') Power();
359              if (Look == '&') Concat();
360          }
361      }
362      
363      /** Recognize and Translate an Add */
364      private void Add() {
365          Match('+');
366          Term();
367          tokens.add(new AddPtg());
368      }
369      
370      /** Recognize and Translate an Add */
371      private void Concat() {
372          Match('&');
373          Term();
374          tokens.add(new ConcatPtg());
375      }
376      
377      
378      
379      /** Recognize and Translate a Subtract */
380      private void Subtract() {
381          Match('-');
382          Term();
383          tokens.add(new SubtractPtg());
384      }
385      
386      private void Power() {
387          Match('^');
388          Term();
389          tokens.add(new PowerPtg());
390      }
391      
392      
393      /** Parse and Translate an Expression */
394      private void Expression() {
395          if (IsAddop(Look)) {
396              EmitLn("CLR D0");  //unaryAdd ptg???
397          } else {
398              Term();
399          }
400          while (IsAddop(Look)) {
401              if ( Look == '+' )  Add();
402              if (Look == '-') Subtract();
403              // if (Look == '*') Multiply();
404             // if (Look == '/') Divide();
405          }
406      }
407      
408      
409      //{--------------------------------------------------------------}
410      //{ Parse and Translate an Assignment Statement }
411      /**
412  procedure Assignment;
413  var Name: string[8];
414  begin
415     Name := GetName;
416     Match('=');
417     Expression;
418  
419  end;
420       **/
421      
422   
423      /** Initialize */
424      
425      private void  init() {
426          GetChar();
427          SkipWhite();
428      }
429      
430      /** API call to execute the parsing of the formula
431       *
432       */
433      public void parse() {
434          synchronized (tokens) {
435              init();
436              Expression();
437          }
438      }
439      
440      
441      /*********************************
442       * PARSER IMPLEMENTATION ENDS HERE
443       * EXCEL SPECIFIC METHODS BELOW
444       *******************************/
445      
446      /** API call to retrive the array of Ptgs created as 
447       * a result of the parsing
448       */
449      public Ptg[] getRPNPtg() {
450       return getRPNPtg(FORMULA_TYPE_CELL);
451      }
452      
453      public Ptg[] getRPNPtg(int formulaType) {
454          Node node = createTree();
455          setRootLevelRVA(node, formulaType);
456          setParameterRVA(node,formulaType);
457          return (Ptg[]) tokens.toArray(new Ptg[0]);
458      }
459      
460      private void setRootLevelRVA(Node n, int formulaType) {
461          //Pg 16, excelfileformat.pdf @ openoffice.org
462          Ptg p = (Ptg) n.getValue();
463              if (formulaType == this.FORMULA_TYPE_NAMEDRANGE) {
464                  if (p.getDefaultOperandClass() == Ptg.CLASS_REF) {
465                      setClass(n,Ptg.CLASS_REF);
466                  } else {
467                      setClass(n,Ptg.CLASS_ARRAY);
468                  }
469              } else {
470                  setClass(n,Ptg.CLASS_VALUE);
471              }
472          
473      }
474      
475      private void setParameterRVA(Node n, int formulaType) {
476          Ptg p = (Ptg) n.getValue();
477          if (p instanceof AbstractFunctionPtg) {
478              int numOperands = n.getNumChildren();
479              for (int i =0;i<n.getNumChildren();i++) {
480                  setParameterRVA(n.getChild(i),((AbstractFunctionPtg)p).getParameterClass(i),formulaType);
481                  if (n.getChild(i).getValue() instanceof AbstractFunctionPtg) {
482                      setParameterRVA(n.getChild(i),formulaType);
483                  }
484              }  
485          } else {
486              for (int i =0;i<n.getNumChildren();i++) {
487                  setParameterRVA(n.getChild(i),formulaType);
488              }
489          } 
490      }
491      private void setParameterRVA(Node n, int expectedClass,int formulaType) {
492          Ptg p = (Ptg) n.getValue();
493          if (expectedClass == Ptg.CLASS_REF) { //pg 15, table 1 
494              if (p.getDefaultOperandClass() == Ptg.CLASS_REF ) {
495                  setClass(n, Ptg.CLASS_REF);
496              }
497              if (p.getDefaultOperandClass() == Ptg.CLASS_VALUE) {
498                  if (formulaType==FORMULA_TYPE_CELL || formulaType == FORMULA_TYPE_SHARED) {
499                      setClass(n,Ptg.CLASS_VALUE);
500                  } else {
501                      setClass(n,Ptg.CLASS_ARRAY);
502                  }
503              }
504              if (p.getDefaultOperandClass() == Ptg.CLASS_ARRAY ) {
505                  setClass(n, Ptg.CLASS_ARRAY);
506              }
507          } else if (expectedClass == Ptg.CLASS_VALUE) { //pg 15, table 2
508              if (formulaType == FORMULA_TYPE_NAMEDRANGE) {
509                  setClass(n,Ptg.CLASS_ARRAY) ;
510              } else {
511                  setClass(n,Ptg.CLASS_VALUE);
512              }
513          } else { //Array class, pg 16. 
514              if (p.getDefaultOperandClass() == Ptg.CLASS_VALUE &&
515                   (formulaType==FORMULA_TYPE_CELL || formulaType == FORMULA_TYPE_SHARED)) {
516                   setClass(n,Ptg.CLASS_VALUE);
517              } else {
518                  setClass(n,Ptg.CLASS_ARRAY);
519              }
520          }
521      }
522      
523       private void setClass(Node n, byte theClass) {
524          Ptg p = (Ptg) n.getValue();
525          if (p instanceof AbstractFunctionPtg || !(p instanceof OperationPtg)) {
526              p.setClass(theClass);
527          } else {
528              for (int i =0;i<n.getNumChildren();i++) {
529                  setClass(n.getChild(i),theClass);
530              }
531          }
532       }
533      /**
534       * Convience method which takes in a list then passes it to the other toFormulaString
535       * signature. 
536       * @param lptgs - list of ptgs, can be null
537       */
538      public static String toFormulaString(List lptgs) {
539          String retval = null;
540          if (lptgs == null || lptgs.size() == 0) return "#NAME";
541          Ptg[] ptgs = new Ptg[lptgs.size()];
542          ptgs = (Ptg[])lptgs.toArray(ptgs);
543          retval = toFormulaString(ptgs);
544          return retval;
545      }
546      
547      /** Static method to convert an array of Ptgs in RPN order 
548       *  to a human readable string format in infix mode
549       *  @param ptgs - array of ptgs, can be null or empty
550       */
551      public static String toFormulaString(Ptg[] ptgs) {
552          if (ptgs == null || ptgs.length == 0) return "#NAME";
553          java.util.Stack stack = new java.util.Stack();
554          int numPtgs = ptgs.length;
555          OperationPtg o;
556          int numOperands;
557          String[] operands;
558          for (int i=0;i<numPtgs;i++) {
559             // Excel allows to have AttrPtg at position 0 (such as Blanks) which
560             // do not have any operands. Skip them.
561              if (ptgs[i] instanceof OperationPtg && i>0) {
562                    o = (OperationPtg) ptgs[i];
563                    numOperands = o.getNumberOfOperands();
564                    operands = new String[numOperands];
565                    for (int j=0;j<numOperands;j++) {
566                        operands[numOperands-j-1] = (String) stack.pop(); //TODO: catch stack underflow and throw parse exception. 
567                        
568                    }  
569                    String result = o.toFormulaString(operands);
570                    stack.push(result);
571              } else {
572                  stack.push(ptgs[i].toFormulaString());
573              }
574          }
575          return (String) stack.pop(); //TODO: catch stack underflow and throw parse exception. 
576      }
577      
578      private Node createTree() {
579          java.util.Stack stack = new java.util.Stack();
580          int numPtgs = tokens.size();
581          OperationPtg o;
582          int numOperands;
583          Node[] operands;
584          for (int i=0;i<numPtgs;i++) {
585              if (tokens.get(i) instanceof OperationPtg) {
586                  
587                  o = (OperationPtg) tokens.get(i);
588                  numOperands = o.getNumberOfOperands();
589                  operands = new Node[numOperands];
590                  for (int j=0;j<numOperands;j++) {
591                      operands[numOperands-j-1] = (Node) stack.pop(); 
592                  }
593                  Node result = new Node(o);
594                  result.setChildren(operands);
595                  stack.push(result);
596              } else {
597                  stack.push(new Node((Ptg)tokens.get(i)));
598              }
599          }
600          return (Node) stack.pop();
601      }
602     
603      /** toString on the parser instance returns the RPN ordered list of tokens
604       *   Useful for testing
605       */
606      public String toString() {
607          StringBuffer buf = new StringBuffer();
608             for (int i=0;i<tokens.size();i++) {
609              buf.append( ( (Ptg)tokens.get(i)).toFormulaString());
610              buf.append(' ');
611          } 
612          return buf.toString();
613      }
614      
615  }    
616      class Node {
617          private Ptg value=null;
618          private Node[] children=new Node[0];
619          private int numChild=0;
620          public Node(Ptg val) {
621              value = val; 
622          }
623          public void setChildren(Node[] child) {children = child;numChild=child.length;}
624          public int getNumChildren() {return numChild;}
625          public Node getChild(int number) {return children[number];}
626          public Ptg getValue() {return value;}
627      }
628      
629