1    /*
2     *  ====================================================================
3     *  The Apache Software License, Version 1.1
4     *
5     *  Copyright (c) 2000 The Apache Software Foundation.  All rights
6     *  reserved.
7     *
8     *  Redistribution and use in source and binary forms, with or without
9     *  modification, are permitted provided that the following conditions
10    *  are met:
11    *
12    *  1. Redistributions of source code must retain the above copyright
13    *  notice, this list of conditions and the following disclaimer.
14    *
15    *  2. Redistributions in binary form must reproduce the above copyright
16    *  notice, this list of conditions and the following disclaimer in
17    *  the documentation and/or other materials provided with the
18    *  distribution.
19    *
20    *  3. The end-user documentation included with the redistribution,
21    *  if any, must include the following acknowledgment:
22    *  "This product includes software developed by the
23    *  Apache Software Foundation (http://www.apache.org/)."
24    *  Alternately, this acknowledgment may appear in the software itself,
25    *  if and wherever such third-party acknowledgments normally appear.
26    *
27    *  4. The names "Apache" and "Apache Software Foundation" must
28    *  not be used to endorse or promote products derived from this
29    *  software without prior written permission. For written
30    *  permission, please contact apache@apache.org.
31    *
32    *  5. Products derived from this software may not be called "Apache",
33    *  nor may "Apache" appear in their name, without prior written
34    *  permission of the Apache Software Foundation.
35    *
36    *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37    *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38    *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39    *  DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40    *  ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41    *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42    *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43    *  USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44    *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45    *  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46    *  OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47    *  SUCH DAMAGE.
48    *  ====================================================================
49    *
50    *  This software consists of voluntary contributions made by many
51    *  individuals on behalf of the Apache Software Foundation.  For more
52    *  information on the Apache Software Foundation, please see
53    *  <http://www.apache.org/>.
54    */
55   package org.apache.poi.hpsf;
56   
57   import java.io.*;
58   import java.util.*;
59   import org.apache.poi.util.LittleEndian;
60   import org.apache.poi.hpsf.wellknown.*;
61   import org.apache.poi.poifs.filesystem.*;
62   
63   /**
64    *  <p>
65    *
66    *  Represents a property set in the Horrible Property Set Format (HPSF). These
67    *  are usually metadata of a Microsoft Office document.</p> <p>
68    *
69    *  An application that wants to access these metadata should create an instance
70    *  of this class or one of its subclasses by calling the factory method {@link
71    *  PropertySetFactory#create} and then retrieve the information its needs by
72    *  calling appropriate methods.</p> <p>
73    *
74    *  {@link PropertySetFactory#create} does its work by calling one of the
75    *  constructors {@link PropertySet#PropertySet(InputStream)} or {@link
76    *  PropertySet#PropertySet(byte[])}. If the constructor's argument is not in
77    *  the Horrible Property Set Format, i.e. not a property set stream, or if any
78    *  other error occurs, an appropriate exception is thrown.</p> <p>
79    *
80    *  A {@link PropertySet} has a list of {@link Section}s, and each {@link
81    *  Section} has a {@link Property} array. Use {@link #getSections} to retrieve
82    *  the {@link Section}s, then call {@link Section#getProperties} for each
83    *  {@link Section} to get hold of the {@link Property} arrays.</p> Since the
84    *  vast majority of {@link PropertySet}s contains only a single {@link
85    *  Section}, the convenience method {@link #getProperties} returns the
86    *  properties of a {@link PropertySet}'s {@link Section} (throwing a {@link
87    *  NoSingleSectionException} if the {@link PropertySet} contains more (or less)
88    *  than exactly one {@link Section}).
89    *
90    *@author     Rainer Klute (klute@rainer-klute.de)
91    *@author     Drew Varner (Drew.Varner hanginIn sc.edu)
92    *@created    May 10, 2002
93    *@version    $Id: PropertySet.java,v 1.6 2002/05/19 18:09:26 acoliver Exp $
94    *@since      2002-02-09
95    */
96   public class PropertySet {
97       final static byte[] BYTE_ORDER_ASSERTION =
98               new byte[]{(byte) 0xFF, (byte) 0xFE};
99       final static byte[] FORMAT_ASSERTION =
100              new byte[]{(byte) 0x00, (byte) 0x00};
101  
102      private int byteOrder;
103  
104  
105      // Must equal BYTE_ORDER_ASSERTION
106  
107      /**
108       *  <p>
109       *
110       *  Returns the property set stream's low-level "byte order" field. It is
111       *  always <tt>0xFFFE</tt> .</p>
112       *
113       *@return    The byteOrder value
114       */
115      public int getByteOrder() {
116          return byteOrder;
117      }
118  
119  
120  
121      private int format;
122  
123  
124      // Must equal FORMAT_ASSERTION
125  
126      /**
127       *  <p>
128       *
129       *  Returns the property set stream's low-level "format" field. It is always
130       *  <tt>0x0000</tt> .</p>
131       *
132       *@return    The format value
133       */
134      public int getFormat() {
135          return format;
136      }
137  
138  
139  
140      private long osVersion;
141  
142  
143      /**
144       *  <p>
145       *
146       *  Returns the property set stream's low-level "OS version" field.</p>
147       *
148       *@return    The oSVersion value
149       */
150      public long getOSVersion() {
151          return osVersion;
152      }
153  
154  
155  
156      private ClassID classID;
157  
158  
159      /**
160       *  <p>
161       *
162       *  Returns the property set stream's low-level "class ID" field.</p>
163       *
164       *@return    The classID value
165       */
166      public ClassID getClassID() {
167          return classID;
168      }
169  
170  
171  
172      private long sectionCount;
173  
174  
175      /**
176       *  <p>
177       *
178       *  Returns the number of {@link Section}s in the property set.</p>
179       *
180       *@return    The sectionCount value
181       */
182      public long getSectionCount() {
183          return sectionCount;
184      }
185  
186  
187  
188      private List sections;
189  
190  
191      /**
192       *  <p>
193       *
194       *  Returns the {@link Section}s in the property set.</p>
195       *
196       *@return    The sections value
197       */
198      public List getSections() {
199          return sections;
200      }
201  
202  
203  
204      /**
205       *  <p>
206       *
207       *  Creates an empty (uninitialized) {@link PropertySet}.</p> <p>
208       *
209       *  <strong>Please note:</strong> For the time being this constructor is
210       *  protected since it is used for internal purposes only, but expect it to
211       *  become public once the property set writing functionality is
212       *  implemented.</p>
213       */
214      protected PropertySet() { }
215  
216  
217  
218      /**
219       *  <p>
220       *
221       *  Creates a {@link PropertySet} instance from an {@link InputStream} in
222       *  the Horrible Property Set Format.</p> <p>
223       *
224       *  The constructor reads the first few bytes from the stream and determines
225       *  whether it is really a property set stream. If it is, it parses the rest
226       *  of the stream. If it is not, it resets the stream to its beginning in
227       *  order to let other components mess around with the data and throws an
228       *  exception.</p>
229       *
230       *@param  stream                            Description of the Parameter
231       *@exception  NoPropertySetStreamException  Description of the Exception
232       *@exception  MarkUnsupportedException      Description of the Exception
233       *@exception  IOException                   Description of the Exception
234       *@throws  NoPropertySetStreamException     if the stream is not a property
235       *      set stream.
236       *@throws  MarkUnsupportedException         if the stream does not support
237       *      the {@link InputStream#markSupported} method.
238       *@throws  IOException                      if the {@link InputStream}
239       *      cannot not be accessed as needed.
240       */
241      public PropertySet(final InputStream stream)
242               throws NoPropertySetStreamException, MarkUnsupportedException,
243              IOException {
244          if (isPropertySetStream(stream)) {
245              final int avail = stream.available();
246              final byte[] buffer = new byte[avail];
247              stream.read(buffer, 0, buffer.length);
248              init(buffer, 0, buffer.length);
249          } else {
250              throw new NoPropertySetStreamException();
251          }
252      }
253  
254  
255  
256      /**
257       *  <p>
258       *
259       *  Creates a {@link PropertySet} instance from a byte array that represents
260       *  a stream in the Horrible Property Set Format.</p>
261       *
262       *@param  stream                            The byte array holding the
263       *      stream data.
264       *@param  offset                            The offset in <var>stream</var>
265       *      where the stream data begin. If the stream data begin with the first
266       *      byte in the array, the <var>offset</var> is 0.
267       *@param  length                            The length of the stream data.
268       *@exception  NoPropertySetStreamException  Description of the Exception
269       *@throws  NoPropertySetStreamException     if the byte array is not a
270       *      property set stream.
271       */
272      public PropertySet(final byte[] stream, final int offset, final int length)
273               throws NoPropertySetStreamException {
274          if (isPropertySetStream(stream, offset, length)) {
275              init(stream, offset, length);
276          } else {
277              throw new NoPropertySetStreamException();
278          }
279      }
280  
281  
282  
283      /**
284       *  <p>
285       *
286       *  Creates a {@link PropertySet} instance from a byte array that represents
287       *  a stream in the Horrible Property Set Format.</p>
288       *
289       *@param  stream                            The byte array holding the
290       *      stream data. The complete byte array contents is the stream data.
291       *@exception  NoPropertySetStreamException  Description of the Exception
292       *@throws  NoPropertySetStreamException     if the byte array is not a
293       *      property set stream.
294       */
295      public PropertySet(final byte[] stream)
296               throws NoPropertySetStreamException {
297          this(stream, 0, stream.length);
298      }
299  
300  
301  
302      /**
303       *  <p>
304       *
305       *  Checks whether an {@link InputStream} is in the Horrible Property Set
306       *  Format.</p>
307       *
308       *@param  stream                     The {@link InputStream} to check. In
309       *      order to perform the check, the method reads the first bytes from
310       *      the stream. After reading, the stream is reset to the position it
311       *      had before reading. The {@link InputStream} must support the {@link
312       *      InputStream#mark} method.
313       *@return                            <code>true</code> if the stream is a
314       *      property set stream, else <code>false</code>.
315       *@exception  IOException            Description of the Exception
316       *@throws  MarkUnsupportedException  if the {@link InputStream} does not
317       *      support the {@link InputStream#mark} method.
318       */
319      public static boolean isPropertySetStream(final InputStream stream)
320               throws MarkUnsupportedException, IOException {
321          /*
322           *  Read at most this many bytes.
323           */
324          final int BUFFER_SIZE = 50;
325  
326          /*
327           *  Mark the current position in the stream so that we can
328           *  reset to this position if the stream does not contain a
329           *  property set.
330           */
331          if (!stream.markSupported()) {
332              throw new MarkUnsupportedException(stream.getClass().getName());
333          }
334          stream.mark(BUFFER_SIZE);
335  
336          /*
337           *  Read a couple of bytes from the stream.
338           */
339          final byte[] buffer = new byte[BUFFER_SIZE];
340          final int bytes =
341                  stream.read(buffer, 0,
342                  Math.min(buffer.length, stream.available()));
343          final boolean isPropertySetStream =
344                  isPropertySetStream(buffer, 0, bytes);
345          stream.reset();
346          return isPropertySetStream;
347      }
348  
349  
350  
351      /**
352       *  <p>
353       *
354       *  Checks whether a byte array is in the Horrible Property Set Format.</p>
355       *
356       *@param  src     The byte array to check.
357       *@param  offset  The offset in the byte array.
358       *@param  length  The significant number of bytes in the byte array. Only
359       *      this number of bytes will be checked.
360       *@return         <code>true</code> if the byte array is a property set
361       *      stream, <code>false</code> if not.
362       */
363      public static boolean isPropertySetStream(final byte[] src, int offset,
364              final int length) {
365          /*
366           *  Read the header fields of the stream. They must always be
367           *  there.
368           */
369          final int byteOrder = LittleEndian.getUShort(src, offset);
370          offset += LittleEndian.SHORT_SIZE;
371          byte[] temp = new byte[LittleEndian.SHORT_SIZE];
372          LittleEndian.putShort(temp,(short)byteOrder);
373          if (!Util.equal(temp, BYTE_ORDER_ASSERTION)) {
374              return false;
375          }
376          final int format = LittleEndian.getUShort(src, offset);
377          offset += LittleEndian.SHORT_SIZE;
378          temp = new byte[LittleEndian.SHORT_SIZE];
379          LittleEndian.putShort(temp,(short)format);
380          if (!Util.equal(temp, FORMAT_ASSERTION)) {
381              return false;
382          }
383          final long osVersion = LittleEndian.getUInt(src, offset);
384          offset += LittleEndian.INT_SIZE;
385          final ClassID classID = new ClassID(src, offset);
386          offset += ClassID.LENGTH;
387          final long sectionCount = LittleEndian.getUInt(src, offset);
388          offset += LittleEndian.INT_SIZE;
389          if (sectionCount < 1) {
390              return false;
391          }
392          return true;
393      }
394  
395  
396  
397      /**
398       *  <p>
399       *
400       *  Initializes this {@link PropertySet} instance from a byte array. The
401       *  method assumes that it has been checked already that the byte array
402       *  indeed represents a property set stream. It does no more checks on its
403       *  own.</p>
404       *
405       *@param  src     Description of the Parameter
406       *@param  offset  Description of the Parameter
407       *@param  length  Description of the Parameter
408       */
409      private void init(final byte[] src, int offset, final int length) {
410          /*
411           *  Read the stream's header fields.
412           */
413          byteOrder = LittleEndian.getUShort(src, offset);
414          offset += LittleEndian.SHORT_SIZE;
415          format = LittleEndian.getUShort(src, offset);
416          offset += LittleEndian.SHORT_SIZE;
417          osVersion = LittleEndian.getUInt(src, offset);
418          offset += LittleEndian.INT_SIZE;
419          classID = new ClassID(src, offset);
420          offset += ClassID.LENGTH;
421          sectionCount = LittleEndian.getUInt(src, offset);
422          offset += LittleEndian.INT_SIZE;
423  
424          /*
425           *  Read the sections, which are following the header. They
426           *  start with an array of section descriptions. Each one
427           *  consists of a format ID telling what the section contains
428           *  and an offset telling how many bytes from the start of the
429           *  stream the section begins.
430           */
431          /*
432           *  Most property sets have only one section. The Document
433           *  Summary Information stream has 2. Everything else is a rare
434           *  exception and is no longer fostered by Microsoft.
435           */
436          sections = new ArrayList(2);
437  
438          /*
439           *  Loop over the section descriptor array. Each descriptor
440           *  consists of a ClassID and a DWord, and we have to increment
441           *  "offset" accordingly.
442           */
443          for (int i = 0; i < sectionCount; i++) {
444              final Section s = new Section(src, offset);
445              offset += ClassID.LENGTH + LittleEndian.INT_SIZE;
446              sections.add(s);
447          }
448      }
449  
450  
451  
452      /**
453       *  <p>
454       *
455       *  Checks whether this {@link PropertySet} represents a Summary
456       *  Information.</p>
457       *
458       *@return    The summaryInformation value
459       */
460      public boolean isSummaryInformation() {
461          return Util.equal(((Section) sections.get(0)).getFormatID().getBytes(),
462                  SectionIDMap.SUMMARY_INFORMATION_ID);
463      }
464  
465  
466  
467      /**
468       *  <p>
469       *
470       *  Checks whether this {@link PropertySet} is a Document Summary
471       *  Information.</p>
472       *
473       *@return    The documentSummaryInformation value
474       */
475      public boolean isDocumentSummaryInformation() {
476          return Util.equal(((Section) sections.get(0)).getFormatID().getBytes(),
477                  SectionIDMap.DOCUMENT_SUMMARY_INFORMATION_ID);
478      }
479  
480  
481  
482      /**
483       *  <p>
484       *
485       *  Convenience method returning the {@link Property} array contained in
486       *  this property set. It is a shortcut for getting the {@link
487       *  PropertySet}'s {@link Section}s list and then getting the {@link
488       *  Property} array from the first {@link Section}. However, it can only be
489       *  used if the {@link PropertySet} contains exactly one {@link Section}, so
490       *  check {@link #getSectionCount} first!</p>
491       *
492       *@return                            The properties of the only {@link
493       *      Section} of this {@link PropertySet}.
494       *@throws  NoSingleSectionException  if the {@link PropertySet} has more or
495       *      less than one {@link Section}.
496       */
497      public Property[] getProperties()
498               throws NoSingleSectionException {
499          return getSingleSection().getProperties();
500      }
501  
502  
503  
504      /**
505       *  <p>
506       *
507       *  Convenience method returning the value of the property with the
508       *  specified ID. If the property is not available, <code>null</code> is
509       *  returned and a subsequent call to {@link #wasNull} will return <code>true</code>
510       *  .</p>
511       *
512       *@param  id                         Description of the Parameter
513       *@return                            The property value
514       *@throws  NoSingleSectionException  if the {@link PropertySet} has more or
515       *      less than one {@link Section}.
516       */
517      protected Object getProperty(final int id)
518               throws NoSingleSectionException {
519          return getSingleSection().getProperty(id);
520      }
521  
522  
523  
524      /**
525       *  <p>
526       *
527       *  Convenience method returning the value of a boolean property with the
528       *  specified ID. If the property is not available, <code>false</code> is
529       *  returned. A subsequent call to {@link #wasNull} will return <code>true</code>
530       *  to let the caller distinguish that case from a real property value of
531       *  <code>false</code>.</p>
532       *
533       *@param  id                         Description of the Parameter
534       *@return                            The propertyBooleanValue value
535       *@throws  NoSingleSectionException  if the {@link PropertySet} has more or
536       *      less than one {@link Section}.
537       */
538      protected boolean getPropertyBooleanValue(final int id)
539               throws NoSingleSectionException {
540          return getSingleSection().getPropertyBooleanValue(id);
541      }
542  
543  
544  
545      /**
546       *  <p>
547       *
548       *  Convenience method returning the value of the numeric property with the
549       *  specified ID. If the property is not available, 0 is returned. A
550       *  subsequent call to {@link #wasNull} will return <code>true</code> to let
551       *  the caller distinguish that case from a real property value of 0.</p>
552       *
553       *@param  id                         Description of the Parameter
554       *@return                            The propertyIntValue value
555       *@throws  NoSingleSectionException  if the {@link PropertySet} has more or
556       *      less than one {@link Section}.
557       */
558      protected int getPropertyIntValue(final int id)
559               throws NoSingleSectionException {
560          return getSingleSection().getPropertyIntValue(id);
561      }
562  
563  
564  
565      /**
566       *  <p>
567       *
568       *  Checks whether the property which the last call to {@link
569       *  #getPropertyIntValue} or {@link #getProperty} tried to access was
570       *  available or not. This information might be important for callers of
571       *  {@link #getPropertyIntValue} since the latter returns 0 if the property
572       *  does not exist. Using {@link #wasNull}, the caller can distiguish this
573       *  case from a property's real value of 0.</p>
574       *
575       *@return                            <code>true</code> if the last call to
576       *      {@link #getPropertyIntValue} or {@link #getProperty} tried to access
577       *      a property that was not available, else <code>false</code>.
578       *@throws  NoSingleSectionException  if the {@link PropertySet} has more
579       *      than one {@link Section}.
580       */
581      public boolean wasNull() throws NoSingleSectionException {
582          return getSingleSection().wasNull();
583      }
584  
585  
586  
587      /**
588       *  <p>
589       *
590       *  If the {@link PropertySet} has only a single section this method returns
591       *  it.</p>
592       *
593       *@return                            The singleSection value
594       *@throws  NoSingleSectionException  if the {@link PropertySet} has more or
595       *      less than exactly one {@link Section}.
596       */
597      public Section getSingleSection() {
598          if (sectionCount != 1) {
599              throw new NoSingleSectionException
600                      ("Property set contains " + sectionCount + " sections.");
601          }
602          return ((Section) sections.get(0));
603      }
604  
605  }
606