1    /* ====================================================================
2     * The Apache Software License, Version 1.1
3     *
4     * Copyright (c) 2000 The Apache Software Foundation.  All rights
5     * reserved.
6     *
7     * Redistribution and use in source and binary forms, with or without
8     * modification, are permitted provided that the following conditions
9     * are met:
10    *
11    * 1. Redistributions of source code must retain the above copyright
12    *    notice, this list of conditions and the following disclaimer.
13    *
14    * 2. Redistributions in binary form must reproduce the above copyright
15    *    notice, this list of conditions and the following disclaimer in
16    *    the documentation and/or other materials provided with the
17    *    distribution.
18    *
19    * 3. The end-user documentation included with the redistribution,
20    *    if any, must include the following acknowledgment:
21    *       "This product includes software developed by the
22    *        Apache Software Foundation (http://www.apache.org/)."
23    *    Alternately, this acknowledgment may appear in the software itself,
24    *    if and wherever such third-party acknowledgments normally appear.
25    *
26    * 4. The names "Apache" and "Apache Software Foundation" must
27    *    not be used to endorse or promote products derived from this
28    *    software without prior written permission. For written
29    *    permission, please contact apache@apache.org.
30    *
31    * 5. Products derived from this software may not be called "Apache",
32    *    nor may "Apache" appear in their name, without prior written
33    *    permission of the Apache Software Foundation.
34    *
35    * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
36    * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
37    * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
38    * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
39    * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40    * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
41    * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
42    * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
43    * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
44    * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
45    * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
46    * SUCH DAMAGE.
47    * ====================================================================
48    *
49    * This software consists of voluntary contributions made by many
50    * individuals on behalf of the Apache Software Foundation.  For more
51    * information on the Apache Software Foundation, please see
52    * <http://www.apache.org/>.
53    *
54    * Portions of this software are based upon public domain software
55    * originally written at the National Center for Supercomputing Applications,
56    * University of Illinois, Urbana-Champaign.
57    */
58   
59   package org.apache.poi.hpsf;
60   
61   import java.io.*;
62   import java.util.*;
63   import org.apache.poi.hpsf.littleendian.*;
64   import org.apache.poi.hpsf.wellknown.*;
65   import org.apache.poi.poifs.filesystem.*;
66   
67   /**
68    * <p>Represents a property set in the Horrible Property Set Format
69    * (HPSF). These are usually metadata of a Microsoft Office
70    * document.</p>
71    *
72    * <p>An application that wants to access these metadata should create
73    * an instance of this class or one of its subclasses by calling the
74    * factory method {@link PropertySetFactory#create} and then retrieve
75    * the information its needs by calling appropriate methods.</p>
76    *
77    * <p>{@link PropertySetFactory#create} does its work by calling one
78    * of the constructors {@link PropertySet#PropertySet(InputStream)} or
79    * {@link PropertySet#PropertySet(byte[])}. If the constructor's
80    * argument is not in the Horrible Property Set Format, i.e. not a
81    * property set stream, or if any other error occurs, an appropriate
82    * exception is thrown.</p>
83    *
84    * <p>A {@link PropertySet} has a list of {@link Section}s, and each
85    * {@link Section} has a {@link Property} array. Use {@link
86    * #getSections} to retrieve the {@link Section}s, then call {@link
87    * Section#getProperties} for each {@link Section} to get hold of the
88    * {@link Property} arrays.</p>
89    *
90    * Since the vast majority of {@link PropertySet}s contains only a
91    * single {@link Section}, the convenience method {@link
92    * #getProperties} returns the properties of a {@link PropertySet}'s
93    * {@link Section} (throwing a {@link NoSingleSectionException} if the
94    * {@link PropertySet} contains more (or less) than exactly one {@link
95    * Section}).
96    *
97    * @author Rainer Klute (klute@rainer-klute.de)
98    * @version $Id: PropertySet.java,v 1.1 2002/02/14 04:00:59 mjohnson Exp $
99    * @since 2002-02-09
100   */
101  public class PropertySet
102  {
103      static final byte[] BYTE_ORDER_ASSERTION =
104          new byte[] {(byte) 0xFF, (byte) 0xFE};
105      static final byte[] FORMAT_ASSERTION =
106          new byte[] {(byte) 0x00, (byte) 0x00};
107  
108  
109  
110      private Word byteOrder; // Must equal BYTE_ORDER_ASSERTION
111  
112      /**
113       * <p>Returns the property set stream's low-level "byte order"
114       * field. It is always <tt>0xFFFE</tt>.</p>
115       */
116      public Word getByteOrder()
117      {
118          return byteOrder;
119      }
120  
121  
122  
123      private Word format;    // Must equal FORMAT_ASSERTION
124  
125      /**
126       * <p>Returns the property set stream's low-level "format"
127       * field. It is always <tt>0x0000</tt>.</p>
128       */
129      public Word getFormat()
130      {
131          return format;
132      }
133  
134  
135  
136      private DWord osVersion;
137  
138      /**
139       * <p>Returns the property set stream's low-level "OS version"
140       * field.</p>
141       */
142      public DWord getOSVersion()
143      {
144          return osVersion;
145      }
146  
147  
148  
149      private ClassID classID;
150  
151      /**
152       * <p>Returns the property set stream's low-level "class ID"
153       * field.</p>
154       */
155      public ClassID getClassID()
156      {
157          return classID;
158      }
159  
160  
161  
162      private int sectionCount;
163  
164      /**
165       * <p>Returns the number of {@link Section}s in the property
166       * set.</p>
167       */
168      public int getSectionCount()
169      {
170          return sectionCount;
171      }
172  
173  
174  
175      private List sections;
176  
177      /**
178       * <p>Returns the {@link Section}s in the property set.</p>
179       */
180      public List getSections()
181      {
182          return sections;
183      }
184  
185  
186  
187      /**
188       * <p>Creates an empty (uninitialized) {@link PropertySet}.</p>
189       *
190       * <p><strong>Please note:</strong> For the time being this
191       * constructor is protected since it is used for internal purposes
192       * only, but expect it to become public once the property set
193       * writing functionality is implemented.</p>
194       */
195      protected PropertySet()
196      {}
197  
198  
199  
200      /**
201       * <p>Creates a {@link PropertySet} instance from an {@link
202       *  InputStream} in the Horrible Property Set Format.</p>
203       *
204       * <p>The constructor reads the first few bytes from the stream
205       * and determines whether it is really a property set stream. If
206       * it is, it parses the rest of the stream. If it is not, it
207       * resets the stream to its beginning in order to let other
208       * components mess around with the data and throws an
209       * exception.</p>
210       *
211       * @throws NoPropertySetStreamException if the stream is not a
212       * property set stream.
213       *
214       * @throws MarkUnsupportedException if the stream does not support
215       * the {@link InputStream#markSupported} method.
216       *
217       * @throws IOException if the {@link InputStream} cannot not be
218       * accessed as needed.
219       */
220      public PropertySet(final InputStream stream)
221          throws NoPropertySetStreamException, MarkUnsupportedException,
222                 IOException
223      {
224          if (isPropertySetStream(stream))
225          {
226              final int avail = stream.available();
227              final byte[] buffer = new byte[avail];
228              stream.read(buffer, 0, buffer.length);
229              init(buffer, 0, buffer.length);
230          }
231          else
232              throw new NoPropertySetStreamException();
233      }
234  
235  
236  
237      /**
238       * <p>Creates a {@link PropertySet} instance from a byte array
239       * that represents a stream in the Horrible Property Set
240       * Format.</p>
241       *
242       * @param stream The byte array holding the stream data.
243       *
244       * @param offset The offset in <var>stream</var> where the stream
245       * data begin. If the stream data begin with the first byte in the
246       * array, the <var>offset</var> is 0.
247       *
248       * @param length The length of the stream data.
249       *
250       * @throws NoPropertySetStreamException if the byte array is not a
251       * property set stream.
252       */
253      public PropertySet(final byte[] stream, final int offset, final int length)
254          throws NoPropertySetStreamException
255      {
256          if (isPropertySetStream(stream, offset, length))
257              init(stream, offset, length);
258          else
259              throw new NoPropertySetStreamException();
260      }
261  
262  
263  
264      /**
265       * <p>Creates a {@link PropertySet} instance from a byte array
266       * that represents a stream in the Horrible Property Set
267       * Format.</p>
268       *
269       * @param stream The byte array holding the stream data. The
270       * complete byte array contents is the stream data.
271       *
272       * @throws NoPropertySetStreamException if the byte array is not a
273       * property set stream.
274       */
275      public PropertySet(final byte[] stream)
276          throws NoPropertySetStreamException
277      {
278          this(stream, 0, stream.length);
279      }
280  
281  
282  
283      /**
284       * <p>Checks whether an {@link InputStream} is in the Horrible
285       * Property Set Format.</p>
286       *
287       * @param stream The {@link InputStream} to check. In order to
288       * perform the check, the method reads the first bytes from the
289       * stream. After reading, the stream is reset to the position it
290       * had before reading. The {@link InputStream} must support the
291       * {@link InputStream#mark} method.
292       *
293       * @return <code>true</code> if the stream is a property set
294       * stream, else <code>false</code>.
295       *
296       * @throws MarkUnsupportedException if the {@link InputStream}
297       * does not support the {@link InputStream#mark} method.
298       */
299      public static boolean isPropertySetStream(final InputStream stream)
300          throws MarkUnsupportedException, IOException
301      {
302          /* Read at most this many bytes. */
303          final int BUFFER_SIZE = 50;
304  
305          /* Mark the current position in the stream so that we can
306           * reset to this position if the stream does not contain a
307           * property set. */
308          if (!stream.markSupported())
309              throw new MarkUnsupportedException(stream.getClass().getName());
310          stream.mark(BUFFER_SIZE);
311  
312          /* Read a couple of bytes from the stream. */
313          final byte[] buffer = new byte[BUFFER_SIZE];
314          final int bytes =
315              stream.read(buffer, 0,
316                          Math.min(buffer.length, stream.available()));
317          final boolean isPropertySetStream =
318              isPropertySetStream(buffer, 0, bytes);
319          stream.reset();
320          return isPropertySetStream;
321      }
322  
323  
324  
325      /**
326       * <p>Checks whether a byte array is in the Horrible Property Set
327       * Format.</p>
328       *
329       * @param src The byte array to check.
330       *
331       * @param offset The offset in the byte array.
332       *
333       * @param length The significant number of bytes in the byte
334       * array. Only this number of bytes will be checked.
335       *
336       * @return <code>true</code> if the byte array is a property set
337       * stream, <code>false</code> if not.
338       */
339      public static boolean isPropertySetStream(final byte[] src, int offset,
340                                                final int length)
341      {
342          /* Read the header fields of the stream. They must always be
343           * there. */
344          final Word byteOrder = new Word(src, offset);
345          offset += Word.LENGTH;
346          if (!Util.equal(byteOrder.getBytes(), BYTE_ORDER_ASSERTION))
347              return false;
348          final Word format = new Word(src, offset);
349          offset += Word.LENGTH;
350          if (!Util.equal(format.getBytes(), FORMAT_ASSERTION))
351              return false;
352          final DWord osVersion = new DWord(src, offset);
353          offset += DWord.LENGTH;
354          final ClassID classID = new ClassID(src, offset);
355          offset += ClassID.LENGTH;
356          final DWord sectionCount = new DWord(src, offset);
357          offset += DWord.LENGTH;
358          if (sectionCount.intValue() < 1)
359              return false;
360          return true;
361      }
362  
363  
364  
365      /**
366       * <p>Initializes this {@link PropertySet} instance from a byte
367       * array. The method assumes that it has been checked already that
368       * the byte array indeed represents a property set stream. It does
369       * no more checks on its own.</p>
370       */
371      private void init(final byte[] src, int offset, final int length)
372      {
373          /* Read the stream's header fields. */
374          byteOrder = new Word(src, offset);
375          offset += Word.LENGTH;
376          format = new Word(src, offset);
377          offset += Word.LENGTH;
378          osVersion = new DWord(src, offset);
379          offset += DWord.LENGTH;
380          classID = new ClassID(src, offset);
381          offset += ClassID.LENGTH;
382          sectionCount = new DWord(src, offset).intValue();
383          offset += DWord.LENGTH;
384          
385          /* Read the sections, which are following the header. They
386           * start with an array of section descriptions. Each one
387           * consists of a format ID telling what the section contains
388           * and an offset telling how many bytes from the start of the
389           * stream the section begins. */
390          /* Most property sets have only one section. The Document
391           * Summary Information stream has 2. Everything else is a rare
392           * exception and is no longer fostered by Microsoft. */
393          sections = new ArrayList(2);
394  
395          /* Loop over the section descriptor array. Each descriptor
396           * consists of a ClassID and a DWord, and we have to increment
397           * "offset" accordingly. */
398          for (int i = 0; i < sectionCount; i++)
399          {
400              final Section s = new Section(src, offset);
401              offset += ClassID.LENGTH + DWord.LENGTH;
402              sections.add(s);
403          }
404      }
405  
406  
407  
408      /**
409       * <p>Checks whether this {@link PropertySet} represents a Summary
410       * Information.</p>
411       */
412      public boolean isSummaryInformation()
413      {
414          return Util.equal(((Section) sections.get(0)).getFormatID().getBytes(),
415                            SectionIDMap.SUMMARY_INFORMATION_ID);
416      }
417  
418  
419  
420      /**
421       * <p>Checks whether this {@link PropertySet} is a Document
422       * Summary Information.</p>
423       */
424      public boolean isDocumentSummaryInformation()
425      {
426          return Util.equal(((Section) sections.get(0)).getFormatID().getBytes(),
427                            SectionIDMap.DOCUMENT_SUMMARY_INFORMATION_ID);
428      }
429  
430  
431  
432      /**
433       * <p>Convenience method returning the {@link Property} array
434       * contained in this property set. It is a shortcut for getting
435       * the {@link PropertySet}'s {@link Section}s list and then
436       * getting the {@link Property} array from the first {@link
437       * Section}. However, it can only be used if the {@link
438       * PropertySet} contains exactly one {@link Section}, so check
439       * {@link #getSectionCount} first!</p>
440       *
441       * @return The properties of the only {@link Section} of this
442       * {@link PropertySet}.
443       *
444       * @throws NoSingleSectionException if the {@link PropertySet} has
445       * more or less than one {@link Section}.
446       */
447      public Property[] getProperties()
448          throws NoSingleSectionException
449      {
450          return getSingleSection().getProperties();
451      }
452  
453  
454  
455      /**
456       * <p>Convenience method returning the value of the property with
457       * the specified ID. If the property is not available,
458       * <code>null</code> is returned and a subsequent call to {@link
459       * #wasNull} will return <code>true</code>.</p>
460       *
461       * @throws NoSingleSectionException if the {@link PropertySet} has
462       * more or less than one {@link Section}.
463       */
464      protected Object getProperty(final int id)
465          throws NoSingleSectionException
466      {
467          return getSingleSection().getProperty(id);
468      }
469  
470  
471  
472      /**
473       * <p>Convenience method returning the value of the numeric
474       * property with the specified ID. If the property is not
475       * available, 0 is returned. A subsequent call to {@link #wasNull}
476       * will return <code>true</code> to let the caller distinguish
477       * that case from a real property value of 0.</p>
478       *
479       * @throws NoSingleSectionException if the {@link PropertySet} has
480       * more or less than one {@link Section}.
481       */
482      protected int getPropertyIntValue(final int id)
483          throws NoSingleSectionException
484      {
485          return getSingleSection().getPropertyIntValue(id);
486      }
487  
488  
489  
490      /**
491       * <p>Checks whether the property which the last call to {@link
492       * #getPropertyIntValue} or {@link #getProperty} tried to access
493       * was available or not. This information might be important for
494       * callers of {@link #getPropertyIntValue} since the latter
495       * returns 0 if the property does not exist. Using {@link
496       * #wasNull}, the caller can distiguish this case from a
497       * property's real value of 0.</p>
498       *
499       * @return <code>true</code> if the last call to {@link
500       * #getPropertyIntValue} or {@link #getProperty} tried to access a
501       * property that was not available, else <code>false</code>.
502       *
503       * @throws NoSingleSectionException if the {@link PropertySet} has
504       * more than one {@link Section}.
505       */
506      public boolean wasNull() throws NoSingleSectionException
507      {
508          return getSingleSection().wasNull();
509      }
510  
511  
512  
513      /**
514       * <p>If the {@link PropertySet} has only a single section this
515       * method returns it.</p>
516       *
517       * @throws NoSingleSectionException if the {@link PropertySet} has
518       * more or less than exactly one {@link Section}.
519       */
520      public Section getSingleSection()
521      {
522          if (sectionCount != 1)
523              throw new NoSingleSectionException
524                  ("Property set contains " + sectionCount + " sections.");
525          return ((Section) sections.get(0));
526      }
527  
528  }
529