1 /* ==================================================================== 2 * The Apache Software License, Version 1.1 3 * 4 * Copyright (c) 2000 The Apache Software Foundation. All rights 5 * reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 19 * 3. The end-user documentation included with the redistribution, 20 * if any, must include the following acknowledgment: 21 * "This product includes software developed by the 22 * Apache Software Foundation (http://www.apache.org/)." 23 * Alternately, this acknowledgment may appear in the software itself, 24 * if and wherever such third-party acknowledgments normally appear. 25 * 26 * 4. The names "Apache" and "Apache Software Foundation" must 27 * not be used to endorse or promote products derived from this 28 * software without prior written permission. For written 29 * permission, please contact apache@apache.org. 30 * 31 * 5. Products derived from this software may not be called "Apache", 32 * nor may "Apache" appear in their name, without prior written 33 * permission of the Apache Software Foundation. 34 * 35 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED 36 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 37 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 38 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR 39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 41 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 42 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 43 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 44 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 45 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 46 * SUCH DAMAGE. 47 * ==================================================================== 48 * 49 * This software consists of voluntary contributions made by many 50 * individuals on behalf of the Apache Software Foundation. For more 51 * information on the Apache Software Foundation, please see 52 * <http://www.apache.org/>. 53 * 54 * Portions of this software are based upon public domain software 55 * originally written at the National Center for Supercomputing Applications, 56 * University of Illinois, Urbana-Champaign. 57 */ 58 59 package org.apache.poi.hpsf; 60 61 import java.io.*; 62 import java.util.*; 63 import org.apache.poi.hpsf.littleendian.*; 64 import org.apache.poi.hpsf.wellknown.*; 65 import org.apache.poi.poifs.filesystem.*; 66 67 /** 68 * <p>Represents a property set in the Horrible Property Set Format 69 * (HPSF). These are usually metadata of a Microsoft Office 70 * document.</p> 71 * 72 * <p>An application that wants to access these metadata should create 73 * an instance of this class or one of its subclasses by calling the 74 * factory method {@link PropertySetFactory#create} and then retrieve 75 * the information its needs by calling appropriate methods.</p> 76 * 77 * <p>{@link PropertySetFactory#create} does its work by calling one 78 * of the constructors {@link PropertySet#PropertySet(InputStream)} or 79 * {@link PropertySet#PropertySet(byte[])}. If the constructor's 80 * argument is not in the Horrible Property Set Format, i.e. not a 81 * property set stream, or if any other error occurs, an appropriate 82 * exception is thrown.</p> 83 * 84 * <p>A {@link PropertySet} has a list of {@link Section}s, and each 85 * {@link Section} has a {@link Property} array. Use {@link 86 * #getSections} to retrieve the {@link Section}s, then call {@link 87 * Section#getProperties} for each {@link Section} to get hold of the 88 * {@link Property} arrays.</p> 89 * 90 * Since the vast majority of {@link PropertySet}s contains only a 91 * single {@link Section}, the convenience method {@link 92 * #getProperties} returns the properties of a {@link PropertySet}'s 93 * {@link Section} (throwing a {@link NoSingleSectionException} if the 94 * {@link PropertySet} contains more (or less) than exactly one {@link 95 * Section}). 96 * 97 * @author Rainer Klute (klute@rainer-klute.de) 98 * @version $Id: PropertySet.java,v 1.1 2002/02/14 04:00:59 mjohnson Exp $ 99 * @since 2002-02-09 100 */ 101 public class PropertySet 102 { 103 static final byte[] BYTE_ORDER_ASSERTION = 104 new byte[] {(byte) 0xFF, (byte) 0xFE}; 105 static final byte[] FORMAT_ASSERTION = 106 new byte[] {(byte) 0x00, (byte) 0x00}; 107 108 109 110 private Word byteOrder; // Must equal BYTE_ORDER_ASSERTION 111 112 /** 113 * <p>Returns the property set stream's low-level "byte order" 114 * field. It is always <tt>0xFFFE</tt>.</p> 115 */ 116 public Word getByteOrder() 117 { 118 return byteOrder; 119 } 120 121 122 123 private Word format; // Must equal FORMAT_ASSERTION 124 125 /** 126 * <p>Returns the property set stream's low-level "format" 127 * field. It is always <tt>0x0000</tt>.</p> 128 */ 129 public Word getFormat() 130 { 131 return format; 132 } 133 134 135 136 private DWord osVersion; 137 138 /** 139 * <p>Returns the property set stream's low-level "OS version" 140 * field.</p> 141 */ 142 public DWord getOSVersion() 143 { 144 return osVersion; 145 } 146 147 148 149 private ClassID classID; 150 151 /** 152 * <p>Returns the property set stream's low-level "class ID" 153 * field.</p> 154 */ 155 public ClassID getClassID() 156 { 157 return classID; 158 } 159 160 161 162 private int sectionCount; 163 164 /** 165 * <p>Returns the number of {@link Section}s in the property 166 * set.</p> 167 */ 168 public int getSectionCount() 169 { 170 return sectionCount; 171 } 172 173 174 175 private List sections; 176 177 /** 178 * <p>Returns the {@link Section}s in the property set.</p> 179 */ 180 public List getSections() 181 { 182 return sections; 183 } 184 185 186 187 /** 188 * <p>Creates an empty (uninitialized) {@link PropertySet}.</p> 189 * 190 * <p><strong>Please note:</strong> For the time being this 191 * constructor is protected since it is used for internal purposes 192 * only, but expect it to become public once the property set 193 * writing functionality is implemented.</p> 194 */ 195 protected PropertySet() 196 {} 197 198 199 200 /** 201 * <p>Creates a {@link PropertySet} instance from an {@link 202 * InputStream} in the Horrible Property Set Format.</p> 203 * 204 * <p>The constructor reads the first few bytes from the stream 205 * and determines whether it is really a property set stream. If 206 * it is, it parses the rest of the stream. If it is not, it 207 * resets the stream to its beginning in order to let other 208 * components mess around with the data and throws an 209 * exception.</p> 210 * 211 * @throws NoPropertySetStreamException if the stream is not a 212 * property set stream. 213 * 214 * @throws MarkUnsupportedException if the stream does not support 215 * the {@link InputStream#markSupported} method. 216 * 217 * @throws IOException if the {@link InputStream} cannot not be 218 * accessed as needed. 219 */ 220 public PropertySet(final InputStream stream) 221 throws NoPropertySetStreamException, MarkUnsupportedException, 222 IOException 223 { 224 if (isPropertySetStream(stream)) 225 { 226 final int avail = stream.available(); 227 final byte[] buffer = new byte[avail]; 228 stream.read(buffer, 0, buffer.length); 229 init(buffer, 0, buffer.length); 230 } 231 else 232 throw new NoPropertySetStreamException(); 233 } 234 235 236 237 /** 238 * <p>Creates a {@link PropertySet} instance from a byte array 239 * that represents a stream in the Horrible Property Set 240 * Format.</p> 241 * 242 * @param stream The byte array holding the stream data. 243 * 244 * @param offset The offset in <var>stream</var> where the stream 245 * data begin. If the stream data begin with the first byte in the 246 * array, the <var>offset</var> is 0. 247 * 248 * @param length The length of the stream data. 249 * 250 * @throws NoPropertySetStreamException if the byte array is not a 251 * property set stream. 252 */ 253 public PropertySet(final byte[] stream, final int offset, final int length) 254 throws NoPropertySetStreamException 255 { 256 if (isPropertySetStream(stream, offset, length)) 257 init(stream, offset, length); 258 else 259 throw new NoPropertySetStreamException(); 260 } 261 262 263 264 /** 265 * <p>Creates a {@link PropertySet} instance from a byte array 266 * that represents a stream in the Horrible Property Set 267 * Format.</p> 268 * 269 * @param stream The byte array holding the stream data. The 270 * complete byte array contents is the stream data. 271 * 272 * @throws NoPropertySetStreamException if the byte array is not a 273 * property set stream. 274 */ 275 public PropertySet(final byte[] stream) 276 throws NoPropertySetStreamException 277 { 278 this(stream, 0, stream.length); 279 } 280 281 282 283 /** 284 * <p>Checks whether an {@link InputStream} is in the Horrible 285 * Property Set Format.</p> 286 * 287 * @param stream The {@link InputStream} to check. In order to 288 * perform the check, the method reads the first bytes from the 289 * stream. After reading, the stream is reset to the position it 290 * had before reading. The {@link InputStream} must support the 291 * {@link InputStream#mark} method. 292 * 293 * @return <code>true</code> if the stream is a property set 294 * stream, else <code>false</code>. 295 * 296 * @throws MarkUnsupportedException if the {@link InputStream} 297 * does not support the {@link InputStream#mark} method. 298 */ 299 public static boolean isPropertySetStream(final InputStream stream) 300 throws MarkUnsupportedException, IOException 301 { 302 /* Read at most this many bytes. */ 303 final int BUFFER_SIZE = 50; 304 305 /* Mark the current position in the stream so that we can 306 * reset to this position if the stream does not contain a 307 * property set. */ 308 if (!stream.markSupported()) 309 throw new MarkUnsupportedException(stream.getClass().getName()); 310 stream.mark(BUFFER_SIZE); 311 312 /* Read a couple of bytes from the stream. */ 313 final byte[] buffer = new byte[BUFFER_SIZE]; 314 final int bytes = 315 stream.read(buffer, 0, 316 Math.min(buffer.length, stream.available())); 317 final boolean isPropertySetStream = 318 isPropertySetStream(buffer, 0, bytes); 319 stream.reset(); 320 return isPropertySetStream; 321 } 322 323 324 325 /** 326 * <p>Checks whether a byte array is in the Horrible Property Set 327 * Format.</p> 328 * 329 * @param src The byte array to check. 330 * 331 * @param offset The offset in the byte array. 332 * 333 * @param length The significant number of bytes in the byte 334 * array. Only this number of bytes will be checked. 335 * 336 * @return <code>true</code> if the byte array is a property set 337 * stream, <code>false</code> if not. 338 */ 339 public static boolean isPropertySetStream(final byte[] src, int offset, 340 final int length) 341 { 342 /* Read the header fields of the stream. They must always be 343 * there. */ 344 final Word byteOrder = new Word(src, offset); 345 offset += Word.LENGTH; 346 if (!Util.equal(byteOrder.getBytes(), BYTE_ORDER_ASSERTION)) 347 return false; 348 final Word format = new Word(src, offset); 349 offset += Word.LENGTH; 350 if (!Util.equal(format.getBytes(), FORMAT_ASSERTION)) 351 return false; 352 final DWord osVersion = new DWord(src, offset); 353 offset += DWord.LENGTH; 354 final ClassID classID = new ClassID(src, offset); 355 offset += ClassID.LENGTH; 356 final DWord sectionCount = new DWord(src, offset); 357 offset += DWord.LENGTH; 358 if (sectionCount.intValue() < 1) 359 return false; 360 return true; 361 } 362 363 364 365 /** 366 * <p>Initializes this {@link PropertySet} instance from a byte 367 * array. The method assumes that it has been checked already that 368 * the byte array indeed represents a property set stream. It does 369 * no more checks on its own.</p> 370 */ 371 private void init(final byte[] src, int offset, final int length) 372 { 373 /* Read the stream's header fields. */ 374 byteOrder = new Word(src, offset); 375 offset += Word.LENGTH; 376 format = new Word(src, offset); 377 offset += Word.LENGTH; 378 osVersion = new DWord(src, offset); 379 offset += DWord.LENGTH; 380 classID = new ClassID(src, offset); 381 offset += ClassID.LENGTH; 382 sectionCount = new DWord(src, offset).intValue(); 383 offset += DWord.LENGTH; 384 385 /* Read the sections, which are following the header. They 386 * start with an array of section descriptions. Each one 387 * consists of a format ID telling what the section contains 388 * and an offset telling how many bytes from the start of the 389 * stream the section begins. */ 390 /* Most property sets have only one section. The Document 391 * Summary Information stream has 2. Everything else is a rare 392 * exception and is no longer fostered by Microsoft. */ 393 sections = new ArrayList(2); 394 395 /* Loop over the section descriptor array. Each descriptor 396 * consists of a ClassID and a DWord, and we have to increment 397 * "offset" accordingly. */ 398 for (int i = 0; i < sectionCount; i++) 399 { 400 final Section s = new Section(src, offset); 401 offset += ClassID.LENGTH + DWord.LENGTH; 402 sections.add(s); 403 } 404 } 405 406 407 408 /** 409 * <p>Checks whether this {@link PropertySet} represents a Summary 410 * Information.</p> 411 */ 412 public boolean isSummaryInformation() 413 { 414 return Util.equal(((Section) sections.get(0)).getFormatID().getBytes(), 415 SectionIDMap.SUMMARY_INFORMATION_ID); 416 } 417 418 419 420 /** 421 * <p>Checks whether this {@link PropertySet} is a Document 422 * Summary Information.</p> 423 */ 424 public boolean isDocumentSummaryInformation() 425 { 426 return Util.equal(((Section) sections.get(0)).getFormatID().getBytes(), 427 SectionIDMap.DOCUMENT_SUMMARY_INFORMATION_ID); 428 } 429 430 431 432 /** 433 * <p>Convenience method returning the {@link Property} array 434 * contained in this property set. It is a shortcut for getting 435 * the {@link PropertySet}'s {@link Section}s list and then 436 * getting the {@link Property} array from the first {@link 437 * Section}. However, it can only be used if the {@link 438 * PropertySet} contains exactly one {@link Section}, so check 439 * {@link #getSectionCount} first!</p> 440 * 441 * @return The properties of the only {@link Section} of this 442 * {@link PropertySet}. 443 * 444 * @throws NoSingleSectionException if the {@link PropertySet} has 445 * more or less than one {@link Section}. 446 */ 447 public Property[] getProperties() 448 throws NoSingleSectionException 449 { 450 return getSingleSection().getProperties(); 451 } 452 453 454 455 /** 456 * <p>Convenience method returning the value of the property with 457 * the specified ID. If the property is not available, 458 * <code>null</code> is returned and a subsequent call to {@link 459 * #wasNull} will return <code>true</code>.</p> 460 * 461 * @throws NoSingleSectionException if the {@link PropertySet} has 462 * more or less than one {@link Section}. 463 */ 464 protected Object getProperty(final int id) 465 throws NoSingleSectionException 466 { 467 return getSingleSection().getProperty(id); 468 } 469 470 471 472 /** 473 * <p>Convenience method returning the value of the numeric 474 * property with the specified ID. If the property is not 475 * available, 0 is returned. A subsequent call to {@link #wasNull} 476 * will return <code>true</code> to let the caller distinguish 477 * that case from a real property value of 0.</p> 478 * 479 * @throws NoSingleSectionException if the {@link PropertySet} has 480 * more or less than one {@link Section}. 481 */ 482 protected int getPropertyIntValue(final int id) 483 throws NoSingleSectionException 484 { 485 return getSingleSection().getPropertyIntValue(id); 486 } 487 488 489 490 /** 491 * <p>Checks whether the property which the last call to {@link 492 * #getPropertyIntValue} or {@link #getProperty} tried to access 493 * was available or not. This information might be important for 494 * callers of {@link #getPropertyIntValue} since the latter 495 * returns 0 if the property does not exist. Using {@link 496 * #wasNull}, the caller can distiguish this case from a 497 * property's real value of 0.</p> 498 * 499 * @return <code>true</code> if the last call to {@link 500 * #getPropertyIntValue} or {@link #getProperty} tried to access a 501 * property that was not available, else <code>false</code>. 502 * 503 * @throws NoSingleSectionException if the {@link PropertySet} has 504 * more than one {@link Section}. 505 */ 506 public boolean wasNull() throws NoSingleSectionException 507 { 508 return getSingleSection().wasNull(); 509 } 510 511 512 513 /** 514 * <p>If the {@link PropertySet} has only a single section this 515 * method returns it.</p> 516 * 517 * @throws NoSingleSectionException if the {@link PropertySet} has 518 * more or less than exactly one {@link Section}. 519 */ 520 public Section getSingleSection() 521 { 522 if (sectionCount != 1) 523 throw new NoSingleSectionException 524 ("Property set contains " + sectionCount + " sections."); 525 return ((Section) sections.get(0)); 526 } 527 528 } 529