1    /* ====================================================================
2     * The Apache Software License, Version 1.1
3     *
4     * Copyright (c) 2000 The Apache Software Foundation.  All rights
5     * reserved.
6     *
7     * Redistribution and use in source and binary forms, with or without
8     * modification, are permitted provided that the following conditions
9     * are met:
10    *
11    * 1. Redistributions of source code must retain the above copyright
12    *    notice, this list of conditions and the following disclaimer.
13    *
14    * 2. Redistributions in binary form must reproduce the above copyright
15    *    notice, this list of conditions and the following disclaimer in
16    *    the documentation and/or other materials provided with the
17    *    distribution.
18    *
19    * 3. The end-user documentation included with the redistribution,
20    *    if any, must include the following acknowledgment:
21    *       "This product includes software developed by the
22    *        Apache Software Foundation (http://www.apache.org/)."
23    *    Alternately, this acknowledgment may appear in the software itself,
24    *    if and wherever such third-party acknowledgments normally appear.
25    *
26    * 4. The names "Apache" and "Apache Software Foundation" must
27    *    not be used to endorse or promote products derived from this
28    *    software without prior written permission. For written
29    *    permission, please contact apache@apache.org.
30    *
31    * 5. Products derived from this software may not be called "Apache",
32    *    nor may "Apache" appear in their name, without prior written
33    *    permission of the Apache Software Foundation.
34    *
35    * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
36    * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
37    * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
38    * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
39    * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40    * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
41    * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
42    * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
43    * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
44    * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
45    * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
46    * SUCH DAMAGE.
47    * ====================================================================
48    *
49    * This software consists of voluntary contributions made by many
50    * individuals on behalf of the Apache Software Foundation.  For more
51    * information on the Apache Software Foundation, please see
52    * <http://www.apache.org/>.
53    */
54   
55   package org.apache.poi.hpsf;
56   
57   import java.io.*;
58   import java.util.*;
59   import org.apache.poi.hpsf.littleendian.*;
60   import org.apache.poi.hpsf.wellknown.*;
61   import org.apache.poi.poifs.filesystem.*;
62   
63   /**
64    * <p>Represents a property set in the Horrible Property Set Format
65    * (HPSF). These are usually metadata of a Microsoft Office
66    * document.</p>
67    *
68    * <p>An application that wants to access these metadata should create
69    * an instance of this class or one of its subclasses by calling the
70    * factory method {@link PropertySetFactory#create} and then retrieve
71    * the information its needs by calling appropriate methods.</p>
72    *
73    * <p>{@link PropertySetFactory#create} does its work by calling one
74    * of the constructors {@link PropertySet#PropertySet(InputStream)} or
75    * {@link PropertySet#PropertySet(byte[])}. If the constructor's
76    * argument is not in the Horrible Property Set Format, i.e. not a
77    * property set stream, or if any other error occurs, an appropriate
78    * exception is thrown.</p>
79    *
80    * <p>A {@link PropertySet} has a list of {@link Section}s, and each
81    * {@link Section} has a {@link Property} array. Use {@link
82    * #getSections} to retrieve the {@link Section}s, then call {@link
83    * Section#getProperties} for each {@link Section} to get hold of the
84    * {@link Property} arrays.</p>
85    *
86    * Since the vast majority of {@link PropertySet}s contains only a
87    * single {@link Section}, the convenience method {@link
88    * #getProperties} returns the properties of a {@link PropertySet}'s
89    * {@link Section} (throwing a {@link NoSingleSectionException} if the
90    * {@link PropertySet} contains more (or less) than exactly one {@link
91    * Section}).
92    *
93    * @author Rainer Klute (klute@rainer-klute.de)
94    * @author Drew Varner (Drew.Varner hanginIn sc.edu)
95    *
96    * @version $Id: PropertySet.java,v 1.4 2002/05/03 07:29:09 klute Exp $
97    * @since 2002-02-09
98    */
99   public class PropertySet
100  {
101      static final byte[] BYTE_ORDER_ASSERTION =
102          new byte[] {(byte) 0xFF, (byte) 0xFE};
103      static final byte[] FORMAT_ASSERTION =
104          new byte[] {(byte) 0x00, (byte) 0x00};
105  
106  
107  
108      private Word byteOrder; // Must equal BYTE_ORDER_ASSERTION
109  
110      /**
111       * <p>Returns the property set stream's low-level "byte order"
112       * field. It is always <tt>0xFFFE</tt>.</p>
113       */
114      public Word getByteOrder()
115      {
116          return byteOrder;
117      }
118  
119  
120  
121      private Word format;    // Must equal FORMAT_ASSERTION
122  
123      /**
124       * <p>Returns the property set stream's low-level "format"
125       * field. It is always <tt>0x0000</tt>.</p>
126       */
127      public Word getFormat()
128      {
129          return format;
130      }
131  
132  
133  
134      private DWord osVersion;
135  
136      /**
137       * <p>Returns the property set stream's low-level "OS version"
138       * field.</p>
139       */
140      public DWord getOSVersion()
141      {
142          return osVersion;
143      }
144  
145  
146  
147      private ClassID classID;
148  
149      /**
150       * <p>Returns the property set stream's low-level "class ID"
151       * field.</p>
152       */
153      public ClassID getClassID()
154      {
155          return classID;
156      }
157  
158  
159  
160      private int sectionCount;
161  
162      /**
163       * <p>Returns the number of {@link Section}s in the property
164       * set.</p>
165       */
166      public int getSectionCount()
167      {
168          return sectionCount;
169      }
170  
171  
172  
173      private List sections;
174  
175      /**
176       * <p>Returns the {@link Section}s in the property set.</p>
177       */
178      public List getSections()
179      {
180          return sections;
181      }
182  
183  
184  
185      /**
186       * <p>Creates an empty (uninitialized) {@link PropertySet}.</p>
187       *
188       * <p><strong>Please note:</strong> For the time being this
189       * constructor is protected since it is used for internal purposes
190       * only, but expect it to become public once the property set
191       * writing functionality is implemented.</p>
192       */
193      protected PropertySet()
194      {}
195  
196  
197  
198      /**
199       * <p>Creates a {@link PropertySet} instance from an {@link
200       *  InputStream} in the Horrible Property Set Format.</p>
201       *
202       * <p>The constructor reads the first few bytes from the stream
203       * and determines whether it is really a property set stream. If
204       * it is, it parses the rest of the stream. If it is not, it
205       * resets the stream to its beginning in order to let other
206       * components mess around with the data and throws an
207       * exception.</p>
208       *
209       * @throws NoPropertySetStreamException if the stream is not a
210       * property set stream.
211       *
212       * @throws MarkUnsupportedException if the stream does not support
213       * the {@link InputStream#markSupported} method.
214       *
215       * @throws IOException if the {@link InputStream} cannot not be
216       * accessed as needed.
217       */
218      public PropertySet(final InputStream stream)
219          throws NoPropertySetStreamException, MarkUnsupportedException,
220                 IOException
221      {
222          if (isPropertySetStream(stream))
223          {
224              final int avail = stream.available();
225              final byte[] buffer = new byte[avail];
226              stream.read(buffer, 0, buffer.length);
227              init(buffer, 0, buffer.length);
228          }
229          else
230              throw new NoPropertySetStreamException();
231      }
232  
233  
234  
235      /**
236       * <p>Creates a {@link PropertySet} instance from a byte array
237       * that represents a stream in the Horrible Property Set
238       * Format.</p>
239       *
240       * @param stream The byte array holding the stream data.
241       *
242       * @param offset The offset in <var>stream</var> where the stream
243       * data begin. If the stream data begin with the first byte in the
244       * array, the <var>offset</var> is 0.
245       *
246       * @param length The length of the stream data.
247       *
248       * @throws NoPropertySetStreamException if the byte array is not a
249       * property set stream.
250       */
251      public PropertySet(final byte[] stream, final int offset, final int length)
252          throws NoPropertySetStreamException
253      {
254          if (isPropertySetStream(stream, offset, length))
255              init(stream, offset, length);
256          else
257              throw new NoPropertySetStreamException();
258      }
259  
260  
261  
262      /**
263       * <p>Creates a {@link PropertySet} instance from a byte array
264       * that represents a stream in the Horrible Property Set
265       * Format.</p>
266       *
267       * @param stream The byte array holding the stream data. The
268       * complete byte array contents is the stream data.
269       *
270       * @throws NoPropertySetStreamException if the byte array is not a
271       * property set stream.
272       */
273      public PropertySet(final byte[] stream)
274          throws NoPropertySetStreamException
275      {
276          this(stream, 0, stream.length);
277      }
278  
279  
280  
281      /**
282       * <p>Checks whether an {@link InputStream} is in the Horrible
283       * Property Set Format.</p>
284       *
285       * @param stream The {@link InputStream} to check. In order to
286       * perform the check, the method reads the first bytes from the
287       * stream. After reading, the stream is reset to the position it
288       * had before reading. The {@link InputStream} must support the
289       * {@link InputStream#mark} method.
290       *
291       * @return <code>true</code> if the stream is a property set
292       * stream, else <code>false</code>.
293       *
294       * @throws MarkUnsupportedException if the {@link InputStream}
295       * does not support the {@link InputStream#mark} method.
296       */
297      public static boolean isPropertySetStream(final InputStream stream)
298          throws MarkUnsupportedException, IOException
299      {
300          /* Read at most this many bytes. */
301          final int BUFFER_SIZE = 50;
302  
303          /* Mark the current position in the stream so that we can
304           * reset to this position if the stream does not contain a
305           * property set. */
306          if (!stream.markSupported())
307              throw new MarkUnsupportedException(stream.getClass().getName());
308          stream.mark(BUFFER_SIZE);
309  
310          /* Read a couple of bytes from the stream. */
311          final byte[] buffer = new byte[BUFFER_SIZE];
312          final int bytes =
313              stream.read(buffer, 0,
314                          Math.min(buffer.length, stream.available()));
315          final boolean isPropertySetStream =
316              isPropertySetStream(buffer, 0, bytes);
317          stream.reset();
318          return isPropertySetStream;
319      }
320  
321  
322  
323      /**
324       * <p>Checks whether a byte array is in the Horrible Property Set
325       * Format.</p>
326       *
327       * @param src The byte array to check.
328       *
329       * @param offset The offset in the byte array.
330       *
331       * @param length The significant number of bytes in the byte
332       * array. Only this number of bytes will be checked.
333       *
334       * @return <code>true</code> if the byte array is a property set
335       * stream, <code>false</code> if not.
336       */
337      public static boolean isPropertySetStream(final byte[] src, int offset,
338                                                final int length)
339      {
340          /* Read the header fields of the stream. They must always be
341           * there. */
342          final Word byteOrder = new Word(src, offset);
343          offset += Word.LENGTH;
344          if (!Util.equal(byteOrder.getBytes(), BYTE_ORDER_ASSERTION))
345              return false;
346          final Word format = new Word(src, offset);
347          offset += Word.LENGTH;
348          if (!Util.equal(format.getBytes(), FORMAT_ASSERTION))
349              return false;
350          final DWord osVersion = new DWord(src, offset);
351          offset += DWord.LENGTH;
352          final ClassID classID = new ClassID(src, offset);
353          offset += ClassID.LENGTH;
354          final DWord sectionCount = new DWord(src, offset);
355          offset += DWord.LENGTH;
356          if (sectionCount.intValue() < 1)
357              return false;
358          return true;
359      }
360  
361  
362  
363      /**
364       * <p>Initializes this {@link PropertySet} instance from a byte
365       * array. The method assumes that it has been checked already that
366       * the byte array indeed represents a property set stream. It does
367       * no more checks on its own.</p>
368       */
369      private void init(final byte[] src, int offset, final int length)
370      {
371          /* Read the stream's header fields. */
372          byteOrder = new Word(src, offset);
373          offset += Word.LENGTH;
374          format = new Word(src, offset);
375          offset += Word.LENGTH;
376          osVersion = new DWord(src, offset);
377          offset += DWord.LENGTH;
378          classID = new ClassID(src, offset);
379          offset += ClassID.LENGTH;
380          sectionCount = new DWord(src, offset).intValue();
381          offset += DWord.LENGTH;
382  
383          /* Read the sections, which are following the header. They
384           * start with an array of section descriptions. Each one
385           * consists of a format ID telling what the section contains
386           * and an offset telling how many bytes from the start of the
387           * stream the section begins. */
388          /* Most property sets have only one section. The Document
389           * Summary Information stream has 2. Everything else is a rare
390           * exception and is no longer fostered by Microsoft. */
391          sections = new ArrayList(2);
392  
393          /* Loop over the section descriptor array. Each descriptor
394           * consists of a ClassID and a DWord, and we have to increment
395           * "offset" accordingly. */
396          for (int i = 0; i < sectionCount; i++)
397          {
398              final Section s = new Section(src, offset);
399              offset += ClassID.LENGTH + DWord.LENGTH;
400              sections.add(s);
401          }
402      }
403  
404  
405  
406      /**
407       * <p>Checks whether this {@link PropertySet} represents a Summary
408       * Information.</p>
409       */
410      public boolean isSummaryInformation()
411      {
412          return Util.equal(((Section) sections.get(0)).getFormatID().getBytes(),
413                            SectionIDMap.SUMMARY_INFORMATION_ID);
414      }
415  
416  
417  
418      /**
419       * <p>Checks whether this {@link PropertySet} is a Document
420       * Summary Information.</p>
421       */
422      public boolean isDocumentSummaryInformation()
423      {
424          return Util.equal(((Section) sections.get(0)).getFormatID().getBytes(),
425                            SectionIDMap.DOCUMENT_SUMMARY_INFORMATION_ID);
426      }
427  
428  
429  
430      /**
431       * <p>Convenience method returning the {@link Property} array
432       * contained in this property set. It is a shortcut for getting
433       * the {@link PropertySet}'s {@link Section}s list and then
434       * getting the {@link Property} array from the first {@link
435       * Section}. However, it can only be used if the {@link
436       * PropertySet} contains exactly one {@link Section}, so check
437       * {@link #getSectionCount} first!</p>
438       *
439       * @return The properties of the only {@link Section} of this
440       * {@link PropertySet}.
441       *
442       * @throws NoSingleSectionException if the {@link PropertySet} has
443       * more or less than one {@link Section}.
444       */
445      public Property[] getProperties()
446          throws NoSingleSectionException
447      {
448          return getSingleSection().getProperties();
449      }
450  
451  
452  
453      /**
454       * <p>Convenience method returning the value of the property with
455       * the specified ID. If the property is not available,
456       * <code>null</code> is returned and a subsequent call to {@link
457       * #wasNull} will return <code>true</code>.</p>
458       *
459       * @throws NoSingleSectionException if the {@link PropertySet} has
460       * more or less than one {@link Section}.
461       */
462      protected Object getProperty(final int id)
463          throws NoSingleSectionException
464      {
465          return getSingleSection().getProperty(id);
466      }
467  
468  
469  
470      /**
471       * <p>Convenience method returning the value of a boolean property
472       * with the specified ID. If the property is not available,
473       * <code>false</code> is returned. A subsequent call to {@link
474       * #wasNull} will return <code>true</code> to let the caller
475       * distinguish that case from a real property value of
476       * <code>false</code>.</p>
477       *
478       * @throws NoSingleSectionException if the {@link PropertySet} has
479       * more or less than one {@link Section}.
480       */
481      protected boolean getPropertyBooleanValue(final int id)
482          throws NoSingleSectionException
483      {
484          return getSingleSection().getPropertyBooleanValue(id);
485      }
486  
487  
488  
489      /**
490       * <p>Convenience method returning the value of the numeric
491       * property with the specified ID. If the property is not
492       * available, 0 is returned. A subsequent call to {@link #wasNull}
493       * will return <code>true</code> to let the caller distinguish
494       * that case from a real property value of 0.</p>
495       *
496       * @throws NoSingleSectionException if the {@link PropertySet} has
497       * more or less than one {@link Section}.
498       */
499      protected int getPropertyIntValue(final int id)
500          throws NoSingleSectionException
501      {
502          return getSingleSection().getPropertyIntValue(id);
503      }
504  
505  
506  
507      /**
508       * <p>Checks whether the property which the last call to {@link
509       * #getPropertyIntValue} or {@link #getProperty} tried to access
510       * was available or not. This information might be important for
511       * callers of {@link #getPropertyIntValue} since the latter
512       * returns 0 if the property does not exist. Using {@link
513       * #wasNull}, the caller can distiguish this case from a
514       * property's real value of 0.</p>
515       *
516       * @return <code>true</code> if the last call to {@link
517       * #getPropertyIntValue} or {@link #getProperty} tried to access a
518       * property that was not available, else <code>false</code>.
519       *
520       * @throws NoSingleSectionException if the {@link PropertySet} has
521       * more than one {@link Section}.
522       */
523      public boolean wasNull() throws NoSingleSectionException
524      {
525          return getSingleSection().wasNull();
526      }
527  
528  
529  
530      /**
531       * <p>If the {@link PropertySet} has only a single section this
532       * method returns it.</p>
533       *
534       * @throws NoSingleSectionException if the {@link PropertySet} has
535       * more or less than exactly one {@link Section}.
536       */
537      public Section getSingleSection()
538      {
539          if (sectionCount != 1)
540              throw new NoSingleSectionException
541                  ("Property set contains " + sectionCount + " sections.");
542          return ((Section) sections.get(0));
543      }
544  
545  }
546