1    
2    /* ====================================================================
3     * The Apache Software License, Version 1.1
4     *
5     * Copyright (c) 2002 The Apache Software Foundation.  All rights
6     * reserved.
7     *
8     * Redistribution and use in source and binary forms, with or without
9     * modification, are permitted provided that the following conditions
10    * are met:
11    *
12    * 1. Redistributions of source code must retain the above copyright
13    *    notice, this list of conditions and the following disclaimer.
14    *
15    * 2. Redistributions in binary form must reproduce the above copyright
16    *    notice, this list of conditions and the following disclaimer in
17    *    the documentation and/or other materials provided with the
18    *    distribution.
19    *
20    * 3. The end-user documentation included with the redistribution,
21    *    if any, must include the following acknowledgment:
22    *       "This product includes software developed by the
23    *        Apache Software Foundation (http://www.apache.org/)."
24    *    Alternately, this acknowledgment may appear in the software itself,
25    *    if and wherever such third-party acknowledgments normally appear.
26    *
27    * 4. The names "Apache" and "Apache Software Foundation" and
28    *    "Apache POI" must not be used to endorse or promote products
29    *    derived from this software without prior written permission. For
30    *    written permission, please contact apache@apache.org.
31    *
32    * 5. Products derived from this software may not be called "Apache",
33    *    "Apache POI", nor may "Apache" appear in their name, without
34    *    prior written permission of the Apache Software Foundation.
35    *
36    * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37    * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38    * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39    * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40    * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41    * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42    * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43    * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44    * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45    * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46    * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47    * SUCH DAMAGE.
48    * ====================================================================
49    *
50    * This software consists of voluntary contributions made by many
51    * individuals on behalf of the Apache Software Foundation.  For more
52    * information on the Apache Software Foundation, please see
53    * <http://www.apache.org/>.
54    */
55   
56   
57   package org.apache.poi.hssf.model;
58   
59   import java.util.List;
60   import java.util.ArrayList;
61   import java.util.Stack;
62   
63   import java.io.FileOutputStream;
64   import java.io.File;
65   
66   import org.apache.poi.hssf.util.SheetReferences;
67   import org.apache.poi.hssf.record.formula.*;
68   
69   
70   /**
71    * This class parses a formula string into a List of tokens in RPN order.
72    * Inspired by 
73    *           Lets Build a Compiler, by Jack Crenshaw
74    * BNF for the formula expression is :
75    * <expression> ::= <term> [<addop> <term>]*
76    * <term> ::= <factor>  [ <mulop> <factor> ]*
77    * <factor> ::= <number> | (<expression>) | <cellRef> | <function>
78    * <function> ::= <functionName> ([expression [, expression]*])
79    *
80    *  @author Avik Sengupta <avik AT Avik Sengupta DOT com>
81    *  @author Andrew C. oliver (acoliver at apache dot org)
82    *  @author Eric Ladner (eladner at goldinc dot com)
83    */
84   public class FormulaParser {
85       
86       public static int FORMULA_TYPE_CELL = 0;
87       public static int FORMULA_TYPE_SHARED = 1;
88       public static int FORMULA_TYPE_ARRAY =2;
89       public static int FORMULA_TYPE_CONDFOMRAT = 3;
90       public static int FORMULA_TYPE_NAMEDRANGE = 4;
91       
92       private String formulaString;
93       private int pointer=0;
94       private int formulaLength;
95       
96       private List tokens = new java.util.Stack();
97       //private Stack tokens = new java.util.Stack();
98       private List result = new ArrayList();
99       private int numParen;
100      
101      private static char TAB = '\t';
102      private static char CR = '\n';
103      
104     private char Look;              // Lookahead Character 
105     
106     private Workbook book;
107      
108      
109      /** create the parser with the string that is to be parsed
110       *    later call the parse() method to return ptg list in rpn order
111       *    then call the getRPNPtg() to retrive the parse results
112       *  This class is recommended only for single threaded use
113       */
114      public FormulaParser(String formula, Workbook book){
115          formulaString = formula;
116          pointer=0;
117          this.book = book;
118  	formulaLength = formulaString.length();
119      }
120      
121  
122      /** Read New Character From Input Stream */
123      private void GetChar() {
124          // Check to see if we've walked off the end of the string.
125  	// Just return if so and reset Look to smoething to keep 
126  	// SkipWhitespace from spinning
127          if (pointer == formulaLength) {
128              Look = (char)0;
129  	    return;
130  	}
131          Look=formulaString.charAt(pointer++);
132          //System.out.println("Got char: "+Look);
133      }
134      
135  
136      /** Report an Error */
137      private void Error(String s) {
138          System.out.println("Error: "+s);
139      }
140      
141      
142   
143      /** Report Error and Halt */
144      private void Abort(String s) {
145          Error(s);
146          //System.exit(1);  //throw exception??
147          throw new RuntimeException("Cannot Parse, sorry : "+s);
148      }
149      
150      
151  
152      /** Report What Was Expected */
153      private void Expected(String s) {
154          Abort(s + " Expected");
155      }
156      
157      
158   
159      /** Recognize an Alpha Character */
160      private boolean IsAlpha(char c) {
161          return Character.isLetter(c) || c == '$';
162      }
163      
164      
165   
166      /** Recognize a Decimal Digit */
167      private boolean IsDigit(char c) {
168          //System.out.println("Checking digit for"+c);
169          return Character.isDigit(c);
170      }
171      
172      
173  
174      /** Recognize an Alphanumeric */
175      private boolean  IsAlNum(char c) {
176          return  (IsAlpha(c) || IsDigit(c));
177      }
178      
179      
180  
181      /** Recognize an Addop */
182      private boolean IsAddop( char c) {
183          return (c =='+' || c =='-');
184      }
185      
186  
187      /** Recognize White Space */
188      private boolean IsWhite( char c) {
189          return  (c ==' ' || c== TAB);
190      }
191      
192      
193  
194      /** Skip Over Leading White Space */
195      private void SkipWhite() {
196          while (IsWhite(Look)) {
197              GetChar();
198          }
199      }
200      
201      
202  
203      /** Match a Specific Input Character */
204      private void Match(char x) {
205          if (Look != x) {
206              Expected("" + x + "");
207          }else {
208              GetChar();
209              SkipWhite();
210          }
211      }
212      
213      /** Get an Identifier */
214      private String GetName() {
215          StringBuffer Token = new StringBuffer();
216          if (!IsAlpha(Look)) {
217              Expected("Name");
218          }
219          while (IsAlNum(Look)) {
220              Token = Token.append(Character.toUpperCase(Look));
221              GetChar();
222          }
223          SkipWhite();
224          return Token.toString();
225      }
226      
227      /**Get an Identifier AS IS, without stripping white spaces or 
228         converting to uppercase; used for literals */
229      private String GetNameAsIs() {
230          StringBuffer Token = new StringBuffer();
231          if (!IsAlpha(Look)) {
232              Expected("Name");
233          }
234          while (IsAlNum(Look) || IsWhite(Look)) {
235              Token = Token.append(Look);
236              GetChar();
237          }
238          return Token.toString();
239      }
240      
241      
242      /** Get a Number */
243      private String GetNum() {
244          String Value ="";
245          if  (!IsDigit(Look)) Expected("Integer");
246          while (IsDigit(Look)){
247              Value = Value + Look;
248              GetChar();
249          }
250          SkipWhite();
251          return Value;
252      }
253  
254      /** Output a String with Tab */
255      private void  Emit(String s){
256          System.out.print(TAB+s);
257      }
258  
259      /** Output a String with Tab and CRLF */
260      private void EmitLn(String s) {
261          Emit(s);
262          System.out.println();;
263      }
264      
265      /** Parse and Translate a String Identifier */
266      private void Ident() {
267          String name;
268          name = GetName();
269          if (Look == '('){
270              //This is a function
271              function(name);
272          } else if (Look == ':') { // this is a AreaReference
273              String first = name;
274              Match(':');
275              String second = GetName();
276              tokens.add(new AreaPtg(first+":"+second));
277          } else if (Look == '!') {
278              Match('!');
279              String sheetName = name;
280              String first = GetName();
281              short externIdx = book.checkExternSheet(book.getSheetIndex(sheetName));
282              if (Look == ':') {
283                  Match(':');
284                  String second=GetName();
285                  
286                  tokens.add(new Area3DPtg(first+":"+second,externIdx));
287              } else {
288                  tokens.add(new Ref3DPtg(first,externIdx));
289              }
290          } else {
291              //this can be either a cell ref or a named range !!
292              boolean cellRef = true ; //we should probably do it with reg exp??
293              if (cellRef) {
294                  tokens.add(new ReferencePtg(name));
295              }else {
296                  //handle after named range is integrated!!
297              }
298          }
299      }
300      
301      private void function(String name) {
302          Match('(');
303          int numArgs = Arguments();
304          Match(')');
305          tokens.add(getFunction(name,(byte)numArgs));
306      }
307      
308      private Ptg getFunction(String name,byte numArgs) {
309          Ptg retval = null;
310          //retval = new FuncVarPtg(name,numArgs);
311         if (name.equals("IF")) {
312              AttrPtg ptg = new AttrPtg();
313              ptg.setData((short)6); //sums don't care but this is what excel does.
314              ptg.setOptimizedIf(true);
315              retval = ptg;
316          } else {
317              retval = new FuncVarPtg(name,numArgs);
318          }
319          
320          return retval; 
321      }
322      
323      /** get arguments to a function */
324      private int Arguments() {
325          int numArgs = 0;
326          if (Look != ')')  {
327              numArgs++; 
328              Expression();
329          }
330          while (Look == ','  || Look == ';') { //TODO handle EmptyArgs
331              if(Look == ',') {
332                Match(',');
333              }
334              else {
335                Match(';');
336              }
337              Expression();
338              numArgs++;
339          }
340          return numArgs;
341      }
342  
343     /** Parse and Translate a Math Factor  */
344      private void Factor() {
345          if (Look == '(' ) {
346              Match('(');
347              Expression();
348              Match(')');
349              tokens.add(new ParenthesisPtg());
350              return;
351          } else if (IsAlpha(Look)){
352              Ident();
353          } else if(Look == '"') {
354             StringLiteral();
355          } else {
356               
357              String number = GetNum();
358              if (Look=='.') { 
359                  Match('.');
360                  String decimalPart = null;
361                  if (IsDigit(Look)) number = number +"."+ GetNum(); //this also takes care of someone entering "1234."
362                  tokens.add(new NumberPtg(number));
363              } else {
364                  tokens.add(new IntPtg(number));  //TODO:what if the number is too big to be a short? ..add factory to return Int or Number!
365              }
366          }
367      }
368      
369      private void StringLiteral() {
370          Match('"');
371          String name= GetNameAsIs();
372          Match('"');
373          tokens.add(new StringPtg(name));
374      }
375      
376      /** Recognize and Translate a Multiply */
377      private void Multiply(){
378          Match('*');
379          Factor();
380          tokens.add(new MultiplyPtg());
381    
382      }
383      
384      
385      /** Recognize and Translate a Divide */
386      private void Divide() {
387          Match('/');
388          Factor();
389          tokens.add(new DividePtg());
390  
391      }
392      
393      
394      /** Parse and Translate a Math Term */
395      private void  Term(){
396          Factor();
397          while (Look == '*' || Look == '/' || Look == '^' || Look == '&' || Look == '=' ) {
398              ///TODO do we need to do anything here??
399              if (Look == '*') Multiply();
400              if (Look == '/') Divide();
401              if (Look == '^') Power();
402              if (Look == '&') Concat();
403              if (Look == '=') Equal();
404          }
405      }
406      
407      /** Recognize and Translate an Add */
408      private void Add() {
409          Match('+');
410          Term();
411          tokens.add(new AddPtg());
412      }
413      
414      /** Recognize and Translate a Concatination */
415      private void Concat() {
416          Match('&');
417          Term();
418          tokens.add(new ConcatPtg());
419      }
420      
421      /** Recognize and Translate a test for Equality  */
422      private void Equal() {
423          Match('=');
424          Term();
425          tokens.add(new EqualPtg());
426      }
427      
428      /** Recognize and Translate a Subtract */
429      private void Subtract() {
430          Match('-');
431          Term();
432          tokens.add(new SubtractPtg());
433      }
434      
435      private void Power() {
436          Match('^');
437          Term();
438          tokens.add(new PowerPtg());
439      }
440      
441      
442      /** Parse and Translate an Expression */
443      private void Expression() {
444          if (IsAddop(Look)) {
445              EmitLn("CLR D0");  //unaryAdd ptg???
446          } else {
447              Term();
448          }
449          while (IsAddop(Look)) {
450              if ( Look == '+' )  Add();
451              if (Look == '-') Subtract();
452              if (Look == '*') Multiply();
453              if (Look == '/') Divide();
454          }
455      }
456      
457      
458      //{--------------------------------------------------------------}
459      //{ Parse and Translate an Assignment Statement }
460      /**
461  procedure Assignment;
462  var Name: string[8];
463  begin
464     Name := GetName;
465     Match('=');
466     Expression;
467  
468  end;
469       **/
470      
471   
472      /** Initialize */
473      
474      private void  init() {
475          GetChar();
476          SkipWhite();
477      }
478      
479      /** API call to execute the parsing of the formula
480       *
481       */
482      public void parse() {
483          synchronized (tokens) {
484              init();
485              Expression();
486          }
487      }
488      
489      
490      /*********************************
491       * PARSER IMPLEMENTATION ENDS HERE
492       * EXCEL SPECIFIC METHODS BELOW
493       *******************************/
494      
495      /** API call to retrive the array of Ptgs created as 
496       * a result of the parsing
497       */
498      public Ptg[] getRPNPtg() {
499       return getRPNPtg(FORMULA_TYPE_CELL);
500      }
501      
502      public Ptg[] getRPNPtg(int formulaType) {
503          Node node = createTree();
504          setRootLevelRVA(node, formulaType);
505          setParameterRVA(node,formulaType);
506          return (Ptg[]) tokens.toArray(new Ptg[0]);
507      }
508      
509      private void setRootLevelRVA(Node n, int formulaType) {
510          //Pg 16, excelfileformat.pdf @ openoffice.org
511          Ptg p = (Ptg) n.getValue();
512              if (formulaType == this.FORMULA_TYPE_NAMEDRANGE) {
513                  if (p.getDefaultOperandClass() == Ptg.CLASS_REF) {
514                      setClass(n,Ptg.CLASS_REF);
515                  } else {
516                      setClass(n,Ptg.CLASS_ARRAY);
517                  }
518              } else {
519                  setClass(n,Ptg.CLASS_VALUE);
520              }
521          
522      }
523      
524      private void setParameterRVA(Node n, int formulaType) {
525          Ptg p = (Ptg) n.getValue();
526          if (p instanceof AbstractFunctionPtg) {
527              int numOperands = n.getNumChildren();
528              for (int i =0;i<n.getNumChildren();i++) {
529                  setParameterRVA(n.getChild(i),((AbstractFunctionPtg)p).getParameterClass(i),formulaType);
530                  if (n.getChild(i).getValue() instanceof AbstractFunctionPtg) {
531                      setParameterRVA(n.getChild(i),formulaType);
532                  }
533              }  
534          } else {
535              for (int i =0;i<n.getNumChildren();i++) {
536                  setParameterRVA(n.getChild(i),formulaType);
537              }
538          } 
539      }
540      private void setParameterRVA(Node n, int expectedClass,int formulaType) {
541          Ptg p = (Ptg) n.getValue();
542          if (expectedClass == Ptg.CLASS_REF) { //pg 15, table 1 
543              if (p.getDefaultOperandClass() == Ptg.CLASS_REF ) {
544                  setClass(n, Ptg.CLASS_REF);
545              }
546              if (p.getDefaultOperandClass() == Ptg.CLASS_VALUE) {
547                  if (formulaType==FORMULA_TYPE_CELL || formulaType == FORMULA_TYPE_SHARED) {
548                      setClass(n,Ptg.CLASS_VALUE);
549                  } else {
550                      setClass(n,Ptg.CLASS_ARRAY);
551                  }
552              }
553              if (p.getDefaultOperandClass() == Ptg.CLASS_ARRAY ) {
554                  setClass(n, Ptg.CLASS_ARRAY);
555              }
556          } else if (expectedClass == Ptg.CLASS_VALUE) { //pg 15, table 2
557              if (formulaType == FORMULA_TYPE_NAMEDRANGE) {
558                  setClass(n,Ptg.CLASS_ARRAY) ;
559              } else {
560                  setClass(n,Ptg.CLASS_VALUE);
561              }
562          } else { //Array class, pg 16. 
563              if (p.getDefaultOperandClass() == Ptg.CLASS_VALUE &&
564                   (formulaType==FORMULA_TYPE_CELL || formulaType == FORMULA_TYPE_SHARED)) {
565                   setClass(n,Ptg.CLASS_VALUE);
566              } else {
567                  setClass(n,Ptg.CLASS_ARRAY);
568              }
569          }
570      }
571      
572       private void setClass(Node n, byte theClass) {
573          Ptg p = (Ptg) n.getValue();
574          if (p instanceof AbstractFunctionPtg || !(p instanceof OperationPtg)) {
575              p.setClass(theClass);
576          } else {
577              for (int i =0;i<n.getNumChildren();i++) {
578                  setClass(n.getChild(i),theClass);
579              }
580          }
581       }
582      /**
583       * Convience method which takes in a list then passes it to the other toFormulaString
584       * signature. 
585       * @param lptgs - list of ptgs, can be null
586       */
587      public static String toFormulaString(SheetReferences refs, List lptgs) {
588          String retval = null;
589          if (lptgs == null || lptgs.size() == 0) return "#NAME";
590          Ptg[] ptgs = new Ptg[lptgs.size()];
591          ptgs = (Ptg[])lptgs.toArray(ptgs);
592          retval = toFormulaString(refs, ptgs);
593          return retval;
594      }
595      
596      /** Static method to convert an array of Ptgs in RPN order 
597       *  to a human readable string format in infix mode
598       *  @param ptgs - array of ptgs, can be null or empty
599       */
600      public static String toFormulaString(SheetReferences refs, Ptg[] ptgs) {
601          if (ptgs == null || ptgs.length == 0) return "#NAME";
602          java.util.Stack stack = new java.util.Stack();
603          int numPtgs = ptgs.length;
604          OperationPtg o;
605          int numOperands;
606          String result=null;
607          String[] operands;
608          AttrPtg ifptg = null;
609          for (int i=0;i<numPtgs;i++) {
610             // Excel allows to have AttrPtg at position 0 (such as Blanks) which
611             // do not have any operands. Skip them.
612              if (ptgs[i] instanceof OperationPtg && i>0) {
613                    o = (OperationPtg) ptgs[i];
614                    
615                    if (o instanceof AttrPtg && ((AttrPtg)o).isOptimizedIf()) {
616                          ifptg=(AttrPtg)o;
617                    } else {
618                        
619                        numOperands = o.getNumberOfOperands();
620                        operands = new String[numOperands];
621                        
622                        for (int j=0;j<numOperands;j++) {
623                            operands[numOperands-j-1] = (String) stack.pop(); //TODO: catch stack underflow and throw parse exception. 
624                        }  
625  
626                        if ( (o instanceof AbstractFunctionPtg) && 
627                              ((AbstractFunctionPtg)o).getName().equals("specialflag") &&
628                              ifptg != null
629                              ) {
630                               // this special case will be way different.
631                               result = ifptg.toFormulaString(
632                                    new String[] {(o.toFormulaString(operands))}
633                                                             );
634                               ifptg = null;
635                        } else {                      
636                          result = o.toFormulaString(operands);                                              
637                        }
638                        stack.push(result);                                        
639                    }
640                        
641                    
642              } else {
643                  stack.push(ptgs[i].toFormulaString(refs));
644              }
645          }
646          return (String) stack.pop(); //TODO: catch stack underflow and throw parse exception. 
647      }
648      
649      private Node createTree() {
650          java.util.Stack stack = new java.util.Stack();
651          int numPtgs = tokens.size();
652          OperationPtg o;
653          int numOperands;
654          Node[] operands;
655          for (int i=0;i<numPtgs;i++) {
656              if (tokens.get(i) instanceof OperationPtg) {
657                  
658                  o = (OperationPtg) tokens.get(i);
659                  numOperands = o.getNumberOfOperands();
660                  operands = new Node[numOperands];
661                  for (int j=0;j<numOperands;j++) {
662                      operands[numOperands-j-1] = (Node) stack.pop(); 
663                  }
664                  Node result = new Node(o);
665                  result.setChildren(operands);
666                  stack.push(result);
667              } else {
668                  stack.push(new Node((Ptg)tokens.get(i)));
669              }
670          }
671          return (Node) stack.pop();
672      }
673     
674      /** toString on the parser instance returns the RPN ordered list of tokens
675       *   Useful for testing
676       */
677      public String toString() {
678          SheetReferences refs = book.getSheetReferences();
679          StringBuffer buf = new StringBuffer();
680             for (int i=0;i<tokens.size();i++) {
681              buf.append( ( (Ptg)tokens.get(i)).toFormulaString(refs));
682              buf.append(' ');
683          } 
684          return buf.toString();
685      }
686      
687  }    
688      class Node {
689          private Ptg value=null;
690          private Node[] children=new Node[0];
691          private int numChild=0;
692          public Node(Ptg val) {
693              value = val; 
694          }
695          public void setChildren(Node[] child) {children = child;numChild=child.length;}
696          public int getNumChildren() {return numChild;}
697          public Node getChild(int number) {return children[number];}
698          public Ptg getValue() {return value;}
699      }
700      
701