1 /* ==================================================================== 2 * The Apache Software License, Version 1.1 3 * 4 * Copyright (c) 2000 The Apache Software Foundation. All rights 5 * reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 19 * 3. The end-user documentation included with the redistribution, 20 * if any, must include the following acknowledgment: 21 * "This product includes software developed by the 22 * Apache Software Foundation (http://www.apache.org/)." 23 * Alternately, this acknowledgment may appear in the software itself, 24 * if and wherever such third-party acknowledgments normally appear. 25 * 26 * 4. The names "Apache" and "Apache Software Foundation" must 27 * not be used to endorse or promote products derived from this 28 * software without prior written permission. For written 29 * permission, please contact apache@apache.org. 30 * 31 * 5. Products derived from this software may not be called "Apache", 32 * nor may "Apache" appear in their name, without prior written 33 * permission of the Apache Software Foundation. 34 * 35 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED 36 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 37 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 38 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR 39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 41 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 42 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 43 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 44 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 45 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 46 * SUCH DAMAGE. 47 * ==================================================================== 48 * 49 * This software consists of voluntary contributions made by many 50 * individuals on behalf of the Apache Software Foundation. For more 51 * information on the Apache Software Foundation, please see 52 * <http://www.apache.org/>. 53 */ 54 55 package org.apache.poi.hpsf; 56 57 import java.io.*; 58 import java.util.*; 59 import org.apache.poi.hpsf.littleendian.*; 60 import org.apache.poi.hpsf.wellknown.*; 61 import org.apache.poi.poifs.filesystem.*; 62 63 /** 64 * <p>Represents a property set in the Horrible Property Set Format 65 * (HPSF). These are usually metadata of a Microsoft Office 66 * document.</p> 67 * 68 * <p>An application that wants to access these metadata should create 69 * an instance of this class or one of its subclasses by calling the 70 * factory method {@link PropertySetFactory#create} and then retrieve 71 * the information its needs by calling appropriate methods.</p> 72 * 73 * <p>{@link PropertySetFactory#create} does its work by calling one 74 * of the constructors {@link PropertySet#PropertySet(InputStream)} or 75 * {@link PropertySet#PropertySet(byte[])}. If the constructor's 76 * argument is not in the Horrible Property Set Format, i.e. not a 77 * property set stream, or if any other error occurs, an appropriate 78 * exception is thrown.</p> 79 * 80 * <p>A {@link PropertySet} has a list of {@link Section}s, and each 81 * {@link Section} has a {@link Property} array. Use {@link 82 * #getSections} to retrieve the {@link Section}s, then call {@link 83 * Section#getProperties} for each {@link Section} to get hold of the 84 * {@link Property} arrays.</p> 85 * 86 * Since the vast majority of {@link PropertySet}s contains only a 87 * single {@link Section}, the convenience method {@link 88 * #getProperties} returns the properties of a {@link PropertySet}'s 89 * {@link Section} (throwing a {@link NoSingleSectionException} if the 90 * {@link PropertySet} contains more (or less) than exactly one {@link 91 * Section}). 92 * 93 * @author Rainer Klute (klute@rainer-klute.de) 94 * @author Drew Varner (Drew.Varner hanginIn sc.edu) 95 * 96 * @version $Id: PropertySet.java,v 1.4 2002/05/03 07:29:09 klute Exp $ 97 * @since 2002-02-09 98 */ 99 public class PropertySet 100 { 101 static final byte[] BYTE_ORDER_ASSERTION = 102 new byte[] {(byte) 0xFF, (byte) 0xFE}; 103 static final byte[] FORMAT_ASSERTION = 104 new byte[] {(byte) 0x00, (byte) 0x00}; 105 106 107 108 private Word byteOrder; // Must equal BYTE_ORDER_ASSERTION 109 110 /** 111 * <p>Returns the property set stream's low-level "byte order" 112 * field. It is always <tt>0xFFFE</tt>.</p> 113 */ 114 public Word getByteOrder() 115 { 116 return byteOrder; 117 } 118 119 120 121 private Word format; // Must equal FORMAT_ASSERTION 122 123 /** 124 * <p>Returns the property set stream's low-level "format" 125 * field. It is always <tt>0x0000</tt>.</p> 126 */ 127 public Word getFormat() 128 { 129 return format; 130 } 131 132 133 134 private DWord osVersion; 135 136 /** 137 * <p>Returns the property set stream's low-level "OS version" 138 * field.</p> 139 */ 140 public DWord getOSVersion() 141 { 142 return osVersion; 143 } 144 145 146 147 private ClassID classID; 148 149 /** 150 * <p>Returns the property set stream's low-level "class ID" 151 * field.</p> 152 */ 153 public ClassID getClassID() 154 { 155 return classID; 156 } 157 158 159 160 private int sectionCount; 161 162 /** 163 * <p>Returns the number of {@link Section}s in the property 164 * set.</p> 165 */ 166 public int getSectionCount() 167 { 168 return sectionCount; 169 } 170 171 172 173 private List sections; 174 175 /** 176 * <p>Returns the {@link Section}s in the property set.</p> 177 */ 178 public List getSections() 179 { 180 return sections; 181 } 182 183 184 185 /** 186 * <p>Creates an empty (uninitialized) {@link PropertySet}.</p> 187 * 188 * <p><strong>Please note:</strong> For the time being this 189 * constructor is protected since it is used for internal purposes 190 * only, but expect it to become public once the property set 191 * writing functionality is implemented.</p> 192 */ 193 protected PropertySet() 194 {} 195 196 197 198 /** 199 * <p>Creates a {@link PropertySet} instance from an {@link 200 * InputStream} in the Horrible Property Set Format.</p> 201 * 202 * <p>The constructor reads the first few bytes from the stream 203 * and determines whether it is really a property set stream. If 204 * it is, it parses the rest of the stream. If it is not, it 205 * resets the stream to its beginning in order to let other 206 * components mess around with the data and throws an 207 * exception.</p> 208 * 209 * @throws NoPropertySetStreamException if the stream is not a 210 * property set stream. 211 * 212 * @throws MarkUnsupportedException if the stream does not support 213 * the {@link InputStream#markSupported} method. 214 * 215 * @throws IOException if the {@link InputStream} cannot not be 216 * accessed as needed. 217 */ 218 public PropertySet(final InputStream stream) 219 throws NoPropertySetStreamException, MarkUnsupportedException, 220 IOException 221 { 222 if (isPropertySetStream(stream)) 223 { 224 final int avail = stream.available(); 225 final byte[] buffer = new byte[avail]; 226 stream.read(buffer, 0, buffer.length); 227 init(buffer, 0, buffer.length); 228 } 229 else 230 throw new NoPropertySetStreamException(); 231 } 232 233 234 235 /** 236 * <p>Creates a {@link PropertySet} instance from a byte array 237 * that represents a stream in the Horrible Property Set 238 * Format.</p> 239 * 240 * @param stream The byte array holding the stream data. 241 * 242 * @param offset The offset in <var>stream</var> where the stream 243 * data begin. If the stream data begin with the first byte in the 244 * array, the <var>offset</var> is 0. 245 * 246 * @param length The length of the stream data. 247 * 248 * @throws NoPropertySetStreamException if the byte array is not a 249 * property set stream. 250 */ 251 public PropertySet(final byte[] stream, final int offset, final int length) 252 throws NoPropertySetStreamException 253 { 254 if (isPropertySetStream(stream, offset, length)) 255 init(stream, offset, length); 256 else 257 throw new NoPropertySetStreamException(); 258 } 259 260 261 262 /** 263 * <p>Creates a {@link PropertySet} instance from a byte array 264 * that represents a stream in the Horrible Property Set 265 * Format.</p> 266 * 267 * @param stream The byte array holding the stream data. The 268 * complete byte array contents is the stream data. 269 * 270 * @throws NoPropertySetStreamException if the byte array is not a 271 * property set stream. 272 */ 273 public PropertySet(final byte[] stream) 274 throws NoPropertySetStreamException 275 { 276 this(stream, 0, stream.length); 277 } 278 279 280 281 /** 282 * <p>Checks whether an {@link InputStream} is in the Horrible 283 * Property Set Format.</p> 284 * 285 * @param stream The {@link InputStream} to check. In order to 286 * perform the check, the method reads the first bytes from the 287 * stream. After reading, the stream is reset to the position it 288 * had before reading. The {@link InputStream} must support the 289 * {@link InputStream#mark} method. 290 * 291 * @return <code>true</code> if the stream is a property set 292 * stream, else <code>false</code>. 293 * 294 * @throws MarkUnsupportedException if the {@link InputStream} 295 * does not support the {@link InputStream#mark} method. 296 */ 297 public static boolean isPropertySetStream(final InputStream stream) 298 throws MarkUnsupportedException, IOException 299 { 300 /* Read at most this many bytes. */ 301 final int BUFFER_SIZE = 50; 302 303 /* Mark the current position in the stream so that we can 304 * reset to this position if the stream does not contain a 305 * property set. */ 306 if (!stream.markSupported()) 307 throw new MarkUnsupportedException(stream.getClass().getName()); 308 stream.mark(BUFFER_SIZE); 309 310 /* Read a couple of bytes from the stream. */ 311 final byte[] buffer = new byte[BUFFER_SIZE]; 312 final int bytes = 313 stream.read(buffer, 0, 314 Math.min(buffer.length, stream.available())); 315 final boolean isPropertySetStream = 316 isPropertySetStream(buffer, 0, bytes); 317 stream.reset(); 318 return isPropertySetStream; 319 } 320 321 322 323 /** 324 * <p>Checks whether a byte array is in the Horrible Property Set 325 * Format.</p> 326 * 327 * @param src The byte array to check. 328 * 329 * @param offset The offset in the byte array. 330 * 331 * @param length The significant number of bytes in the byte 332 * array. Only this number of bytes will be checked. 333 * 334 * @return <code>true</code> if the byte array is a property set 335 * stream, <code>false</code> if not. 336 */ 337 public static boolean isPropertySetStream(final byte[] src, int offset, 338 final int length) 339 { 340 /* Read the header fields of the stream. They must always be 341 * there. */ 342 final Word byteOrder = new Word(src, offset); 343 offset += Word.LENGTH; 344 if (!Util.equal(byteOrder.getBytes(), BYTE_ORDER_ASSERTION)) 345 return false; 346 final Word format = new Word(src, offset); 347 offset += Word.LENGTH; 348 if (!Util.equal(format.getBytes(), FORMAT_ASSERTION)) 349 return false; 350 final DWord osVersion = new DWord(src, offset); 351 offset += DWord.LENGTH; 352 final ClassID classID = new ClassID(src, offset); 353 offset += ClassID.LENGTH; 354 final DWord sectionCount = new DWord(src, offset); 355 offset += DWord.LENGTH; 356 if (sectionCount.intValue() < 1) 357 return false; 358 return true; 359 } 360 361 362 363 /** 364 * <p>Initializes this {@link PropertySet} instance from a byte 365 * array. The method assumes that it has been checked already that 366 * the byte array indeed represents a property set stream. It does 367 * no more checks on its own.</p> 368 */ 369 private void init(final byte[] src, int offset, final int length) 370 { 371 /* Read the stream's header fields. */ 372 byteOrder = new Word(src, offset); 373 offset += Word.LENGTH; 374 format = new Word(src, offset); 375 offset += Word.LENGTH; 376 osVersion = new DWord(src, offset); 377 offset += DWord.LENGTH; 378 classID = new ClassID(src, offset); 379 offset += ClassID.LENGTH; 380 sectionCount = new DWord(src, offset).intValue(); 381 offset += DWord.LENGTH; 382 383 /* Read the sections, which are following the header. They 384 * start with an array of section descriptions. Each one 385 * consists of a format ID telling what the section contains 386 * and an offset telling how many bytes from the start of the 387 * stream the section begins. */ 388 /* Most property sets have only one section. The Document 389 * Summary Information stream has 2. Everything else is a rare 390 * exception and is no longer fostered by Microsoft. */ 391 sections = new ArrayList(2); 392 393 /* Loop over the section descriptor array. Each descriptor 394 * consists of a ClassID and a DWord, and we have to increment 395 * "offset" accordingly. */ 396 for (int i = 0; i < sectionCount; i++) 397 { 398 final Section s = new Section(src, offset); 399 offset += ClassID.LENGTH + DWord.LENGTH; 400 sections.add(s); 401 } 402 } 403 404 405 406 /** 407 * <p>Checks whether this {@link PropertySet} represents a Summary 408 * Information.</p> 409 */ 410 public boolean isSummaryInformation() 411 { 412 return Util.equal(((Section) sections.get(0)).getFormatID().getBytes(), 413 SectionIDMap.SUMMARY_INFORMATION_ID); 414 } 415 416 417 418 /** 419 * <p>Checks whether this {@link PropertySet} is a Document 420 * Summary Information.</p> 421 */ 422 public boolean isDocumentSummaryInformation() 423 { 424 return Util.equal(((Section) sections.get(0)).getFormatID().getBytes(), 425 SectionIDMap.DOCUMENT_SUMMARY_INFORMATION_ID); 426 } 427 428 429 430 /** 431 * <p>Convenience method returning the {@link Property} array 432 * contained in this property set. It is a shortcut for getting 433 * the {@link PropertySet}'s {@link Section}s list and then 434 * getting the {@link Property} array from the first {@link 435 * Section}. However, it can only be used if the {@link 436 * PropertySet} contains exactly one {@link Section}, so check 437 * {@link #getSectionCount} first!</p> 438 * 439 * @return The properties of the only {@link Section} of this 440 * {@link PropertySet}. 441 * 442 * @throws NoSingleSectionException if the {@link PropertySet} has 443 * more or less than one {@link Section}. 444 */ 445 public Property[] getProperties() 446 throws NoSingleSectionException 447 { 448 return getSingleSection().getProperties(); 449 } 450 451 452 453 /** 454 * <p>Convenience method returning the value of the property with 455 * the specified ID. If the property is not available, 456 * <code>null</code> is returned and a subsequent call to {@link 457 * #wasNull} will return <code>true</code>.</p> 458 * 459 * @throws NoSingleSectionException if the {@link PropertySet} has 460 * more or less than one {@link Section}. 461 */ 462 protected Object getProperty(final int id) 463 throws NoSingleSectionException 464 { 465 return getSingleSection().getProperty(id); 466 } 467 468 469 470 /** 471 * <p>Convenience method returning the value of a boolean property 472 * with the specified ID. If the property is not available, 473 * <code>false</code> is returned. A subsequent call to {@link 474 * #wasNull} will return <code>true</code> to let the caller 475 * distinguish that case from a real property value of 476 * <code>false</code>.</p> 477 * 478 * @throws NoSingleSectionException if the {@link PropertySet} has 479 * more or less than one {@link Section}. 480 */ 481 protected boolean getPropertyBooleanValue(final int id) 482 throws NoSingleSectionException 483 { 484 return getSingleSection().getPropertyBooleanValue(id); 485 } 486 487 488 489 /** 490 * <p>Convenience method returning the value of the numeric 491 * property with the specified ID. If the property is not 492 * available, 0 is returned. A subsequent call to {@link #wasNull} 493 * will return <code>true</code> to let the caller distinguish 494 * that case from a real property value of 0.</p> 495 * 496 * @throws NoSingleSectionException if the {@link PropertySet} has 497 * more or less than one {@link Section}. 498 */ 499 protected int getPropertyIntValue(final int id) 500 throws NoSingleSectionException 501 { 502 return getSingleSection().getPropertyIntValue(id); 503 } 504 505 506 507 /** 508 * <p>Checks whether the property which the last call to {@link 509 * #getPropertyIntValue} or {@link #getProperty} tried to access 510 * was available or not. This information might be important for 511 * callers of {@link #getPropertyIntValue} since the latter 512 * returns 0 if the property does not exist. Using {@link 513 * #wasNull}, the caller can distiguish this case from a 514 * property's real value of 0.</p> 515 * 516 * @return <code>true</code> if the last call to {@link 517 * #getPropertyIntValue} or {@link #getProperty} tried to access a 518 * property that was not available, else <code>false</code>. 519 * 520 * @throws NoSingleSectionException if the {@link PropertySet} has 521 * more than one {@link Section}. 522 */ 523 public boolean wasNull() throws NoSingleSectionException 524 { 525 return getSingleSection().wasNull(); 526 } 527 528 529 530 /** 531 * <p>If the {@link PropertySet} has only a single section this 532 * method returns it.</p> 533 * 534 * @throws NoSingleSectionException if the {@link PropertySet} has 535 * more or less than exactly one {@link Section}. 536 */ 537 public Section getSingleSection() 538 { 539 if (sectionCount != 1) 540 throw new NoSingleSectionException 541 ("Property set contains " + sectionCount + " sections."); 542 return ((Section) sections.get(0)); 543 } 544 545 } 546