1
54
55 package org.apache.poi.hssf.record;
56
57 import org.apache.poi.util.BinaryTree;
58 import org.apache.poi.util.LittleEndian;
59 import org.apache.poi.util.LittleEndianConsts;
60
61 import java.util.Iterator;
62 import java.util.List;
63
64
79
80 public class SSTRecord
81 extends Record
82 {
83
84
85 static final int MAX_RECORD_SIZE = 8228;
86
87
88 static final int STD_RECORD_OVERHEAD =
89 2 * LittleEndianConsts.SHORT_SIZE;
90
91
92 static final int SST_RECORD_OVERHEAD =
93 ( STD_RECORD_OVERHEAD + ( 2 * LittleEndianConsts.INT_SIZE ) );
94
95
96 static final int MAX_DATA_SPACE = MAX_RECORD_SIZE - SST_RECORD_OVERHEAD;
97
98
99 static final int STRING_MINIMAL_OVERHEAD = LittleEndianConsts.SHORT_SIZE + LittleEndianConsts.BYTE_SIZE;
100
101 public static final short sid = 0xfc;
102
103
104 private int field_1_num_strings;
105
106
107 private int field_2_num_unique_strings;
108 private BinaryTree field_3_strings;
109
110
111 private List _record_lengths = null;
112 private SSTDeserializer deserializer;
113
114
117
118 public SSTRecord()
119 {
120 field_1_num_strings = 0;
121 field_2_num_unique_strings = 0;
122 field_3_strings = new BinaryTree();
123 deserializer = new SSTDeserializer(field_3_strings);
124 }
125
126
134
135 public SSTRecord( final short id, final short size, final byte[] data )
136 {
137 super( id, size, data );
138 }
139
140
149
150 public SSTRecord( final short id, final short size, final byte[] data,
151 int offset )
152 {
153 super( id, size, data, offset );
154 }
155
156
170
171 public int addString( final String string )
172 {
173 int rval;
174
175 if ( string == null )
176 {
177 rval = addString( "", false );
178 }
179 else
180 {
181
182
183
184
185 boolean useUTF16 = false;
186 int strlen = string.length();
187
188 for ( int j = 0; j < strlen; j++ )
189 {
190 if ( string.charAt( j ) > 255 )
191 {
192 useUTF16 = true;
193 break;
194 }
195 }
196 rval = addString( string, useUTF16 );
197 }
198 return rval;
199 }
200
201
216
217 public int addString( final String string, final boolean useUTF16 )
218 {
219 field_1_num_strings++;
220 String str = ( string == null ) ? ""
221 : string;
222 int rval = -1;
223 UnicodeString ucs = new UnicodeString();
224
225 ucs.setString( str );
226 ucs.setCharCount( (short) str.length() );
227 ucs.setOptionFlags( (byte) ( useUTF16 ? 1
228 : 0 ) );
229 Integer integer = (Integer) field_3_strings.getKeyForValue( ucs );
230
231 if ( integer != null )
232 {
233 rval = integer.intValue();
234 }
235 else
236 {
237
238
239
240 rval = field_3_strings.size();
241 field_2_num_unique_strings++;
242 integer = new Integer( rval );
243 SSTDeserializer.addToStringTable( field_3_strings, integer, ucs );
244
245 }
246 return rval;
247 }
248
249
252
253 public int getNumStrings()
254 {
255 return field_1_num_strings;
256 }
257
258
261
262 public int getNumUniqueStrings()
263 {
264 return field_2_num_unique_strings;
265 }
266
267
277
278 public void setNumStrings( final int count )
279 {
280 field_1_num_strings = count;
281 }
282
283
293
294 public void getNumUniqueStrings( final int count )
295 {
296 field_2_num_unique_strings = count;
297 }
298
299
306
307 public String getString( final int id )
308 {
309 return ( (UnicodeString) field_3_strings.get( new Integer( id ) ) ).getString();
310 }
311
312 public boolean isString16bit( final int id )
313 {
314 UnicodeString unicodeString = ( (UnicodeString) field_3_strings.get( new Integer( id ) ) );
315 return ( ( unicodeString.getOptionFlags() & 0x01 ) == 1 );
316 }
317
318
323
324 public String toString()
325 {
326 StringBuffer buffer = new StringBuffer();
327
328 buffer.append( "[SST]\n" );
329 buffer.append( " .numstrings = " )
330 .append( Integer.toHexString( getNumStrings() ) ).append( "\n" );
331 buffer.append( " .uniquestrings = " )
332 .append( Integer.toHexString( getNumUniqueStrings() ) ).append( "\n" );
333 for ( int k = 0; k < field_3_strings.size(); k++ )
334 {
335 buffer.append( " .string_" + k + " = " )
336 .getend( ( (UnicodeString) field_3_strings
337 .get( new Integer( k ) ) ).toString() ).append( "\n" );
338 }
339 buffer.append( "[/SST]\n" );
340 return buffer.toString();
341 }
342
343
346 public short getSid()
347 {
348 return sid;
349 }
350
351
354 public int hashCode()
355 {
356 return field_2_num_unique_strings;
357 }
358
359 public boolean equals( Object o )
360 {
361 if ( ( o == null ) || ( o.getClass() != this.getClass() ) )
362 {
363 return false;
364 }
365 SSTRecord other = (SSTRecord) o;
366
367 return ( field_1_num_stringsfield_1_num_strings .field_1_num_strings ) && ( field_2_num_unique_strings == other
368 .field_2_num_unique_strings ) && field_3_strings
369 .equals( other.field_3_strings ) );
370 }
371
372 /**
373 * validate SID
374 *
375 * @param id the alleged SID
376 *
377 * @exception RecordFormatException if validation fails
378 */
379
380 protected void validateSid( final short id )
381 throws RecordFormatException
382 {
383 if ( id != sid )
384 {
385 throw new RecordFormatException( "NOT An SST RECORD" );
386 }
387 }
388
389 /**
390 * Fill the fields from the data
391 * <P>
392 * The data consists of sets of string data. This string data is
393 * arranged as follows:
394 * <P>
395 * <CODE>
396 * short string_length; // length of string data
397 * byte string_flag; // flag specifying special string
398 * // handling
399 * short run_count; // optional count of formatting runs
400 * int extend_length; // optional extension length
401 * char[] string_data; // string data, can be byte[] or
402 * // short[] (length of array is
403 * // string_length)
404 * int[] formatting_runs; // optional formatting runs (length of
405 * // array is run_count)
406 * byte[] extension; // optional extension (length of array
407 * // is extend_length)
408 * </CODE>
409 * <P>
410 * The string_flag is bit mapped as follows:
411 * <P>
412 * <TABLE>
413 * <TR>
414 * <TH>Bit number</TH>
415 * <TH>Meaning if 0</TH>
416 * <TH>Meaning if 1</TH>
417 * <TR>
418 * <TR>
419 * <TD>0</TD>
420 * <TD>string_data is byte[]</TD>
421 * <TD>string_data is short[]</TH>
422 * <TR>
423 * <TR>
424 * <TD>1</TD>
425 * <TD>Should always be 0</TD>
426 * <TD>string_flag is defective</TH>
427 * <TR>
428 * <TR>
429 * <TD>2</TD>
430 * <TD>extension is not included</TD>
431 * <TD>extension is included</TH>
432 * <TR>
433 * <TR>
434 * <TD>3</TD>
435 * <TD>formatting run data is not included</TD>
436 * <TD>formatting run data is included</TH>
437 * <TR>
438 * <TR>
439 * <TD>4</TD>
440 * <TD>Should always be 0</TD>
441 * <TD>string_flag is defective</TH>
442 * <TR>
443 * <TR>
444 * <TD>5</TD>
445 * <TD>Should always be 0</TD>
446 * <TD>string_flag is defective</TH>
447 * <TR>
448 * <TR>
449 * <TD>6</TD>
450 * <TD>Should always be 0</TD>
451 * <TD>string_flag is defective</TH>
452 * <TR>
453 * <TR>
454 * <TD>7</TD>
455 * <TD>Should always be 0</TD>
456 * <TD>string_flag is defective</TH>
457 * <TR>
458 * </TABLE>
459 * <P>
460 * We can handle eating the overhead associated with bits 2 or 3
461 * (or both) being set, but we have no idea what to do with the
462 * associated data. The UnicodeString class can handle the byte[]
463 * vs short[] nature of the actual string data
464 *
465 * @param data raw data
466 * @param size size of the raw data
467 */
468
469 protected void fillFields( final byte[] data, final short size,
470 int offset )
471 {
472
473 // this method is ALWAYS called after construction -- using
474 // the nontrivial constructor, of course -- so this is where
475 // we initialize our fields
476 field_1_num_strings = LittleEndian.getInt( data, 0 + offset );
477 field_2_num_unique_strings = LittleEndian.getInt( data, 4 + offset );
478 field_3_strings = new BinaryTree();
479 deserializer = new SSTDeserializer(field_3_strings);
480 deserializer.manufactureStrings( data, 8 + offset, (short)(size - 8) );
481 }
482
483
484 /**
485 * @return an iterator of the strings we hold. All instances are
486 * UnicodeStrings
487 */
488
489 Iterator getStrings()
490 {
491 return field_3_strings.values().iterator();
492 }
493
494 /**
495 * @return count of the strings we hold.
496 */
497
498 int countStrings()
499 {
500 return field_3_strings.size();
501 }
502
503 /**
504 * called by the class that is responsible for writing this sucker.
505 * Subclasses should implement this so that their data is passed back in a
506 * byte array.
507 *
508 * @return byte array containing instance data
509 */
510
511 public int serialize( int offset, byte[] data )
512 {
513 SSTSerializer serializer = new SSTSerializer(
514 _record_lengths, field_3_strings, getNumStrings(), getNumUniqueStrings() );
515 return serializer.serialize( offset, data );
516 }
517
518
519 // we can probably simplify this later...this calculates the size
520 // w/o serializing but still is a bit slow
521 public int getRecordSize()
522 {
523 SSTSerializer serializer = new SSTSerializer(
524 _record_lengths, field_3_strings, getNumStrings(), getNumUniqueStrings() );
525
526 return serializer.getRecordSize();
527 }
528
529 SSTDeserializer getDeserializer()
530 {
531 return deserializer;
532 }
533
534 /**
535 * Strange to handle continue records this way. Is it a smell?
536 */
537 public void processContinueRecord( byte[] record )
538 {
539 deserializer.processContinueRecord( record );
540 }
541 }
542
543
544 ???????????????????????????????????????????other??????????????????field_2_num_unique_strings?????????????????????????????????????????????field_2_num_unique_strings???????????????????????????????????????????????????????????????????????????other??????????????????????????????????????????????????field_3_strings??????????????????????????other????????????????????????????????field_3_strings?????????????????????????validateSid????????????????????RecordFormatException??????????????id????????????????????sid?????????????????????????fillFields????????????????????????????????????field_1_num_strings???????????????????????????????LittleEndian????????????????????????????????????????????getInt????????????????????????????????????????????????????data??????????????????????????????????????????????????????????????offset?????????field_2_num_unique_strings??????????????????????????????????????LittleEndian???????????????????????????????????????????????????getInt???????????????????????????????????????????????????????????data?????????????????????????????????????????????????????????????????????offset?????????field_3_strings???????????????????????????????BinaryTree?????????deserializer????????????????????????????????????????????field_3_strings?????????deserializer??????????????????????manufactureStrings??????????????????????????????????????????data????????????????????????????????????????????????????offset????????????????????????????????????????????????????????????????????size???????????????????getStrings????????????????field_3_strings????????????????????????????????values??????????????countStrings????????????????field_3_strings????????????????????????????????size?????????????????????serialize?????????SSTSerializer?????????????????_record_lengths??????????????????????????????????field_3_strings???????????????????????????????????????????????????getNumStrings????????????????????????????????????????????????????????????????????getNumUniqueStrings????????????????serializer???????????????????????????serialize??????????????????????????????????????offset??????????????????????????????????????????????data??????????????????????????getRecordSize?????????SSTSerializer?????????????????_record_lengths??????????????????????????????????field_3_strings???????????????????????????????????????????????????getNumStrings????????????????????????????????????????????????????????????????????getNumUniqueStrings????????????????serializer???????????????????????????getRecordSize?????SSTDeserializer?????????????????????getDeserializer????????????????deserializer??????????????????????processContinueRecord?????????deserializer??????????????????????processContinueRecord?????????????????????????????????????????????record