1    /* ====================================================================
2     * The Apache Software License, Version 1.1
3     *
4     * Copyright (c) 2002 The Apache Software Foundation.  All rights
5     * reserved.
6     *
7     * Redistribution and use in source and binary forms, with or without
8     * modification, are permitted provided that the following conditions
9     * are met:
10    *
11    * 1. Redistributions of source code must retain the above copyright
12    *    notice, this list of conditions and the following disclaimer.
13    *
14    * 2. Redistributions in binary form must reproduce the above copyright
15    *    notice, this list of conditions and the following disclaimer in
16    *    the documentation and/or other materials provided with the
17    *    distribution.
18    *
19    * 3. The end-user documentation included with the redistribution,
20    *    if any, must include the following acknowledgment:
21    *       "This product includes software developed by the
22    *        Apache Software Foundation (http://www.apache.org/)."
23    *    Alternately, this acknowledgment may appear in the software itself,
24    *    if and wherever such third-party acknowledgments normally appear.
25    *
26    * 4. The names "Apache" and "Apache Software Foundation" and
27    *    "Apache POI" must not be used to endorse or promote products
28    *    derived from this software without prior written permission. For
29    *    written permission, please contact apache@apache.org.
30    *
31    * 5. Products derived from this software may not be called "Apache",
32    *    "Apache POI", nor may "Apache" appear in their name, without
33    *    prior written permission of the Apache Software Foundation.
34    *
35    * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
36    * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
37    * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
38    * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
39    * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40    * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
41    * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
42    * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
43    * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
44    * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
45    * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
46    * SUCH DAMAGE.
47    * ====================================================================
48    *
49    * This software consists of voluntary contributions made by many
50    * individuals on behalf of the Apache Software Foundation.  For more
51    * information on the Apache Software Foundation, please see
52    * <http://www.apache.org/>.
53    */
54   
55   package org.apache.poi.hssf.record;
56   
57   import org.apache.poi.util.BinaryTree;
58   import org.apache.poi.util.LittleEndian;
59   import org.apache.poi.util.LittleEndianConsts;
60   
61   import java.util.Iterator;
62   import java.util.List;
63   
64   /**
65    * Title:        Static String Table Record
66    * <P>
67    * Description:  This holds all the strings for LabelSSTRecords.
68    * <P>
69    * REFERENCE:    PG 389 Microsoft Excel 97 Developer's Kit (ISBN:
70    *               1-57231-498-2)
71    * <P>
72    * @author Andrew C. Oliver (acoliver at apache dot org)
73    * @author Marc Johnson (mjohnson at apache dot org)
74    * @author Glen Stampoultzis (glens at apache.org)
75    * @version 2.0-pre
76    * @see org.apache.poi.hssf.record.LabelSSTRecord
77    * @see org.apache.poi.hssf.record.ContinueRecord
78    */
79   
80   public class SSTRecord
81           extends Record
82   {
83   
84       /** how big can an SST record be? As big as any record can be: 8228 bytes */
85       static final int MAX_RECORD_SIZE = 8228;
86   
87       /** standard record overhead: two shorts (record id plus data space size)*/
88       static final int STD_RECORD_OVERHEAD =
89               2 * LittleEndianConsts.SHORT_SIZE;
90   
91       /** SST overhead: the standard record overhead, plus the number of strings and the number of unique strings -- two ints */
92       static final int SST_RECORD_OVERHEAD =
93               ( STD_RECORD_OVERHEAD + ( 2 * LittleEndianConsts.INT_SIZE ) );
94   
95       /** how much data can we stuff into an SST record? That would be _max minus the standard SST record overhead */
96       static final int MAX_DATA_SPACE = MAX_RECORD_SIZE - SST_RECORD_OVERHEAD;
97   
98       /** overhead for each string includes the string's character count (a short) and the flag describing its characteristics (a byte) */
99       static final int STRING_MINIMAL_OVERHEAD = LittleEndianConsts.SHORT_SIZE + LittleEndianConsts.BYTE_SIZE;
100  
101      public static final short sid = 0xfc;
102  
103      /** union of strings in the SST and EXTSST */
104      private int field_1_num_strings;
105  
106      /** according to docs ONLY SST */
107      private int field_2_num_unique_strings;
108      private BinaryTree field_3_strings;
109  
110      /** Record lengths for initial SST record and all continue records */
111      private List _record_lengths = null;
112      private SSTDeserializer deserializer;
113  
114      /**
115       * default constructor
116       */
117  
118      public SSTRecord()
119      {
120          field_1_num_strings = 0;
121          field_2_num_unique_strings = 0;
122          field_3_strings = new BinaryTree();
123          deserializer = new SSTDeserializer(field_3_strings);
124      }
125  
126      /**
127       * Constructs an SST record and sets its fields appropriately.
128       *
129       * @param id must be 0xfc or an exception will be throw upon
130       *           validation
131       * @param size the size of the data area of the record
132       * @param data of the record (should not contain sid/len)
133       */
134  
135      public SSTRecord( final short id, final short size, final byte[] data )
136      {
137          super( id, size, data );
138      }
139  
140      /**
141       * Constructs an SST record and sets its fields appropriately.
142       *
143       * @param id must be 0xfc or an exception will be throw upon
144       *           validation
145       * @param size the size of the data area of the record
146       * @param data of the record (should not contain sid/len)
147       * @param offset of the record
148       */
149  
150      public SSTRecord( final short id, final short size, final byte[] data,
151                        int offset )
152      {
153          super( id, size, data, offset );
154      }
155  
156      /**
157       * Add a string. Determines whether 8-bit encoding can be used, or
158       * whether 16-bit encoding must be used.
159       * <p>
160       * THIS IS THE PREFERRED METHOD OF ADDING A STRING. IF YOU USE THE
161       * OTHER ,code>addString</code> METHOD AND FORCE 8-BIT ENCODING ON
162       * A STRING THAT SHOULD USE 16-BIT ENCODING, YOU WILL CORRUPT THE
163       * STRING; IF YOU USE THAT METHOD AND FORCE 16-BIT ENCODING, YOU
164       * ARE WASTING SPACE WHEN THE WORKBOOK IS WRITTEN OUT.
165       *
166       * @param string string to be added
167       *
168       * @return the index of that string in the table
169       */
170  
171      public int addString( final String string )
172      {
173          int rval;
174  
175          if ( string == null )
176          {
177              rval = addString( "", false );
178          }
179          else
180          {
181  
182              // scan for characters greater than 255 ... if any are
183              // present, we have to use 16-bit encoding. Otherwise, we
184              // can use 8-bit encoding
185              boolean useUTF16 = false;
186              int strlen = string.length();
187  
188              for ( int j = 0; j < strlen; j++ )
189              {
190                  if ( string.charAt( j ) > 255 )
191                  {
192                      useUTF16 = true;
193                      break;
194                  }
195              }
196              rval = addString( string, useUTF16 );
197          }
198          return rval;
199      }
200  
201      /**
202       * Add a string and assert the encoding (8-bit or 16-bit) to be
203       * used.
204       * <P>
205       * USE THIS METHOD AT YOUR OWN RISK. IF YOU FORCE 8-BIT ENCODING,
206       * YOU MAY CORRUPT YOUR STRING. IF YOU FORCE 16-BIT ENCODING AND
207       * IT ISN'T NECESSARY, YOU WILL WASTE SPACE WHEN THIS RECORD IS
208       * WRITTEN OUT.
209       *
210       * @param string string to be added
211       * @param useUTF16 if true, forces 16-bit encoding. If false,
212       *                 forces 8-bit encoding
213       *
214       * @return the index of that string in the table
215       */
216  
217      public int addString( final String string, final boolean useUTF16 )
218      {
219          field_1_num_strings++;
220          String str = ( string == null ) ? ""
221                  : string;
222          int rval = -1;
223          UnicodeString ucs = new UnicodeString();
224  
225          ucs.setString( str );
226          ucs.setCharCount( (short) str.length() );
227          ucs.setOptionFlags( (byte) ( useUTF16 ? 1
228                  : 0 ) );
229          Integer integer = (Integer) field_3_strings.getKeyForValue( ucs );
230  
231          if ( integer != null )
232          {
233              rval = integer.intValue();
234          }
235          else
236          {
237  
238              // This is a new string -- we didn't see it among the
239              // strings we've already collected
240              rval = field_3_strings.size();
241              field_2_num_unique_strings++;
242              integer = new Integer( rval );
243              SSTDeserializer.addToStringTable( field_3_strings, integer, ucs );
244  //            field_3_strings.put( integer, ucs );
245          }
246          return rval;
247      }
248  
249      /**
250       * @return number of strings
251       */
252  
253      public int getNumStrings()
254      {
255          return field_1_num_strings;
256      }
257  
258      /**
259       * @return number of unique strings
260       */
261  
262      public int getNumUniqueStrings()
263      {
264          return field_2_num_unique_strings;
265      }
266  
267      /**
268       * USE THIS METHOD AT YOUR OWN PERIL: THE <code>addString</code>
269       * METHODS MANIPULATE THE NUMBER OF STRINGS AS A SIDE EFFECT; YOUR
270       * ATTEMPTS AT MANIPULATING THE STRING COUNT IS LIKELY TO BE VERY
271       * WRONG AND WILL RESULT IN BAD BEHAVIOR WHEN THIS RECORD IS
272       * WRITTEN OUT AND ANOTHER PROCESS ATTEMPTS TO READ THE RECORD
273       *
274       * @param count  number of strings
275       *
276       */
277  
278      public void setNumStrings( final int count )
279      {
280          field_1_num_strings = count;
281      }
282  
283      /**
284       * USE THIS METHOD AT YOUR OWN PERIL: THE <code>addString</code>
285       * METHODS MANIPULATE THE NUMBER OF UNIQUE STRINGS AS A SIDE
286       * EFFECT; YOUR ATTEMPTS AT MANIPULATING THE UNIQUE STRING COUNT
287       * IS LIKELY TO BE VERY WRONG AND WILL RESULT IN BAD BEHAVIOR WHEN
288       * THIS RECORD IS WRITTEN OUT AND ANOTHER PROCESS ATTEMPTS TO READ
289       * THE RECORD
290       *
291       * @param count  number of strings
292       */
293  
294      public void getNumUniqueStrings( final int count )
295      {
296          field_2_num_unique_strings = count;
297      }
298  
299      /**
300       * Get a particular string by its index
301       *
302       * @param id index into the array of strings
303       *
304       * @return the desired string
305       */
306  
307      public String getString( final int id )
308      {
309          return ( (UnicodeString) field_3_strings.get( new Integer( id ) ) ).getString();
310      }
311  
312      public boolean isString16bit( final int id )
313      {
314          UnicodeString unicodeString = ( (UnicodeString) field_3_strings.get( new Integer( id ) ) );
315          return ( ( unicodeString.getOptionFlags() & 0x01 ) == 1 );
316      }
317  
318      /**
319       * Return a debugging string representation
320       *
321       * @return string representation
322       */
323  
324      public String toString()
325      {
326          StringBuffer buffer = new StringBuffer();
327  
328          buffer.append( "[SST]\n" );
329          buffer.append( "    .numstrings     = " )
330                  .append( Integer.toHexString( getNumStrings() ) ).append( "\n" );
331          buffer.append( "    .uniquestrings  = " )
332                  .append( Integer.toHexString( getNumUniqueStrings() ) ).append( "\n" );
333          for ( int k = 0; k < field_3_strings.size(); k++ )
334          {
335              buffer.append( "    .string_" + k + "      = " )
336                      .getend( ( (UnicodeString) field_3_strings
337                      .get( new Integer( k ) ) ).toString() ).append( "\n" );
338          }
339          buffer.append( "[/SST]\n" );
340          return buffer.toString();
341      }
342  
343      /**
344       * @return sid
345       */
346      public short getSid()
347      {
348          return sid;
349      }
350  
351      /**
352       * @return hashcode
353       */
354      public int hashCode()
355      {
356          return field_2_num_unique_strings;
357      }
358  
359      public boolean equals( Object o )
360      {
361          if ( ( o == null ) || ( o.getClass() != this.getClass() ) )
362          {
363              return false;
364          }
365          SSTRecord other = (SSTRecord) o;
366  
367          return ( field_1_num_stringsfield_1_num_strings         .field_1_num_strings ) && ( field_2_num_unique_strings == other
368                  .field_2_num_unique_strings ) && field_3_strings
369                  .equals( other.field_3_strings ) );
370      }
371  
372      /**
373       * validate SID
374       *
375       * @param id the alleged SID
376       *
377       * @exception RecordFormatException if validation fails
378       */
379  
380      protected void validateSid( final short id )
381              throws RecordFormatException
382      {
383          if ( id != sid )
384          {
385              throw new RecordFormatException( "NOT An SST RECORD" );
386          }
387      }
388  
389      /**
390       * Fill the fields from the data
391       * <P>
392       * The data consists of sets of string data. This string data is
393       * arranged as follows:
394       * <P>
395       * <CODE>
396       * short  string_length;   // length of string data
397       * byte   string_flag;     // flag specifying special string
398       *                         // handling
399       * short  run_count;       // optional count of formatting runs
400       * int    extend_length;   // optional extension length
401       * char[] string_data;     // string data, can be byte[] or
402       *                         // short[] (length of array is
403       *                         // string_length)
404       * int[]  formatting_runs; // optional formatting runs (length of
405       *                         // array is run_count)
406       * byte[] extension;       // optional extension (length of array
407       *                         // is extend_length)
408       * </CODE>
409       * <P>
410       * The string_flag is bit mapped as follows:
411       * <P>
412       * <TABLE>
413       *   <TR>
414       *      <TH>Bit number</TH>
415       *      <TH>Meaning if 0</TH>
416       *      <TH>Meaning if 1</TH>
417       *   <TR>
418       *   <TR>
419       *      <TD>0</TD>
420       *      <TD>string_data is byte[]</TD>
421       *      <TD>string_data is short[]</TH>
422       *   <TR>
423       *   <TR>
424       *      <TD>1</TD>
425       *      <TD>Should always be 0</TD>
426       *      <TD>string_flag is defective</TH>
427       *   <TR>
428       *   <TR>
429       *      <TD>2</TD>
430       *      <TD>extension is not included</TD>
431       *      <TD>extension is included</TH>
432       *   <TR>
433       *   <TR>
434       *      <TD>3</TD>
435       *      <TD>formatting run data is not included</TD>
436       *      <TD>formatting run data is included</TH>
437       *   <TR>
438       *   <TR>
439       *      <TD>4</TD>
440       *      <TD>Should always be 0</TD>
441       *      <TD>string_flag is defective</TH>
442       *   <TR>
443       *   <TR>
444       *      <TD>5</TD>
445       *      <TD>Should always be 0</TD>
446       *      <TD>string_flag is defective</TH>
447       *   <TR>
448       *   <TR>
449       *      <TD>6</TD>
450       *      <TD>Should always be 0</TD>
451       *      <TD>string_flag is defective</TH>
452       *   <TR>
453       *   <TR>
454       *      <TD>7</TD>
455       *      <TD>Should always be 0</TD>
456       *      <TD>string_flag is defective</TH>
457       *   <TR>
458       * </TABLE>
459       * <P>
460       * We can handle eating the overhead associated with bits 2 or 3
461       * (or both) being set, but we have no idea what to do with the
462       * associated data. The UnicodeString class can handle the byte[]
463       * vs short[] nature of the actual string data
464       *
465       * @param data raw data
466       * @param size size of the raw data
467       */
468  
469      protected void fillFields( final byte[] data, final short size,
470                                 int offset )
471      {
472  
473          // this method is ALWAYS called after construction -- using
474          // the nontrivial constructor, of course -- so this is where
475          // we initialize our fields
476          field_1_num_strings = LittleEndian.getInt( data, 0 + offset );
477          field_2_num_unique_strings = LittleEndian.getInt( data, 4 + offset );
478          field_3_strings = new BinaryTree();
479          deserializer = new SSTDeserializer(field_3_strings);
480          deserializer.manufactureStrings( data, 8 + offset, (short)(size - 8) );
481      }
482  
483  
484      /**
485       * @return an iterator of the strings we hold. All instances are
486       *         UnicodeStrings
487       */
488  
489      Iterator getStrings()
490      {
491          return field_3_strings.values().iterator();
492      }
493  
494      /**
495       * @return count of the strings we hold.
496       */
497  
498      int countStrings()
499      {
500          return field_3_strings.size();
501      }
502  
503      /**
504       * called by the class that is responsible for writing this sucker.
505       * Subclasses should implement this so that their data is passed back in a
506       * byte array.
507       *
508       * @return byte array containing instance data
509       */
510  
511      public int serialize( int offset, byte[] data )
512      {
513          SSTSerializer serializer = new SSTSerializer(
514                  _record_lengths, field_3_strings, getNumStrings(), getNumUniqueStrings() );
515          return serializer.serialize( offset, data );
516      }
517  
518  
519      // we can probably simplify this later...this calculates the size
520      // w/o serializing but still is a bit slow
521      public int getRecordSize()
522      {
523          SSTSerializer serializer = new SSTSerializer(
524                  _record_lengths, field_3_strings, getNumStrings(), getNumUniqueStrings() );
525  
526          return serializer.getRecordSize();
527      }
528  
529      SSTDeserializer getDeserializer()
530      {
531          return deserializer;
532      }
533  
534      /**
535       * Strange to handle continue records this way.  Is it a smell?
536       */
537      public void processContinueRecord( byte[] record )
538      {
539          deserializer.processContinueRecord( record );
540      }
541  }
542  
543  
544  ???????????????????????????????????????????other??????????????????field_2_num_unique_strings?????????????????????????????????????????????field_2_num_unique_strings???????????????????????????????????????????????????????????????????????????other??????????????????????????????????????????????????field_3_strings??????????????????????????other????????????????????????????????field_3_strings???????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????validateSid????????????????????RecordFormatException??????????????id????????????????????sid?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????fillFields??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????field_1_num_strings???????????????????????????????LittleEndian????????????????????????????????????????????getInt????????????????????????????????????????????????????data??????????????????????????????????????????????????????????????offset?????????field_2_num_unique_strings??????????????????????????????????????LittleEndian???????????????????????????????????????????????????getInt???????????????????????????????????????????????????????????data?????????????????????????????????????????????????????????????????????offset?????????field_3_strings???????????????????????????????BinaryTree?????????deserializer????????????????????????????????????????????field_3_strings?????????deserializer??????????????????????manufactureStrings??????????????????????????????????????????data????????????????????????????????????????????????????offset????????????????????????????????????????????????????????????????????size?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????getStrings????????????????field_3_strings????????????????????????????????values??????????????????????????????????????????????????????????????????????countStrings????????????????field_3_strings????????????????????????????????size????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????serialize?????????SSTSerializer?????????????????_record_lengths??????????????????????????????????field_3_strings???????????????????????????????????????????????????getNumStrings????????????????????????????????????????????????????????????????????getNumUniqueStrings????????????????serializer???????????????????????????serialize??????????????????????????????????????offset??????????????????????????????????????????????data?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????getRecordSize?????????SSTSerializer?????????????????_record_lengths??????????????????????????????????field_3_strings???????????????????????????????????????????????????getNumStrings????????????????????????????????????????????????????????????????????getNumUniqueStrings????????????????serializer???????????????????????????getRecordSize?????SSTDeserializer?????????????????????getDeserializer????????????????deserializer?????????????????????????????????????????????????????????????????????????????????????????????????????processContinueRecord?????????deserializer??????????????????????processContinueRecord?????????????????????????????????????????????record