View Javadoc

1   /**
2    * Copyright 2007 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase;
21  
22  import java.io.DataInput;
23  import java.io.DataOutput;
24  import java.io.IOException;
25  import java.util.Collections;
26  import java.util.HashMap;
27  import java.util.Map;
28  
29  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
30  import org.apache.hadoop.hbase.io.hfile.Compression;
31  import org.apache.hadoop.hbase.io.hfile.HFile;
32  import org.apache.hadoop.hbase.regionserver.StoreFile;
33  import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType;
34  import org.apache.hadoop.hbase.util.Bytes;
35  import org.apache.hadoop.io.Text;
36  import org.apache.hadoop.io.WritableComparable;
37  
38  /**
39   * An HColumnDescriptor contains information about a column family such as the
40   * number of versions, compression settings, etc.
41   *
42   * It is used as input when creating a table or adding a column. Once set, the
43   * parameters that specify a column cannot be changed without deleting the
44   * column and recreating it. If there is data stored in the column, it will be
45   * deleted when the column is deleted.
46   */
47  public class HColumnDescriptor implements WritableComparable<HColumnDescriptor> {
48    // For future backward compatibility
49  
50    // Version 3 was when column names become byte arrays and when we picked up
51    // Time-to-live feature.  Version 4 was when we moved to byte arrays, HBASE-82.
52    // Version 5 was when bloom filter descriptors were removed.
53    // Version 6 adds metadata as a map where keys and values are byte[].
54    // Version 7 -- add new compression and hfile blocksize to HColumnDescriptor (HBASE-1217)
55    // Version 8 -- reintroduction of bloom filters, changed from boolean to enum
56    private static final byte COLUMN_DESCRIPTOR_VERSION = (byte)8;
57  
58    /**
59     * The type of compression.
60     * @see org.apache.hadoop.io.SequenceFile.Writer
61     * @deprecated Compression now means which compression library
62     * rather than 'what' to compress.
63     */
64    @Deprecated
65    public static enum CompressionType {
66      /** Do not compress records. */
67      NONE,
68      /** Compress values only, each separately. */
69      RECORD,
70      /** Compress sequences of records together in blocks. */
71      BLOCK
72    }
73  
74    public static final String COMPRESSION = "COMPRESSION";
75    public static final String COMPRESSION_COMPACT = "COMPRESSION_COMPACT";
76    public static final String BLOCKCACHE = "BLOCKCACHE";
77    
78    /**
79     * Size of storefile/hfile 'blocks'.  Default is {@link #DEFAULT_BLOCKSIZE}.
80     * Use smaller block sizes for faster random-access at expense of larger
81     * indices (more memory consumption).
82     */
83    public static final String BLOCKSIZE = "BLOCKSIZE";
84  
85    public static final String LENGTH = "LENGTH";
86    public static final String TTL = "TTL";
87    public static final String BLOOMFILTER = "BLOOMFILTER";
88    public static final String FOREVER = "FOREVER";
89    public static final String REPLICATION_SCOPE = "REPLICATION_SCOPE";
90  
91    /**
92     * Default compression type.
93     */
94    public static final String DEFAULT_COMPRESSION =
95      Compression.Algorithm.NONE.getName();
96  
97    /**
98     * Default number of versions of a record to keep.
99     */
100   public static final int DEFAULT_VERSIONS = 3;
101 
102   /*
103    * Cache here the HCD value.
104    * Question: its OK to cache since when we're reenable, we create a new HCD?
105    */
106   private volatile Integer blocksize = null;
107 
108   /**
109    * Default setting for whether to serve from memory or not.
110    */
111   public static final boolean DEFAULT_IN_MEMORY = false;
112 
113   /**
114    * Default setting for whether to use a block cache or not.
115    */
116   public static final boolean DEFAULT_BLOCKCACHE = true;
117 
118   /**
119    * Default size of blocks in files stored to the filesytem (hfiles).
120    */
121   public static final int DEFAULT_BLOCKSIZE = HFile.DEFAULT_BLOCKSIZE;
122 
123   /**
124    * Default setting for whether or not to use bloomfilters.
125    */
126   public static final String DEFAULT_BLOOMFILTER = StoreFile.BloomType.NONE.toString();
127 
128   /**
129    * Default time to live of cell contents.
130    */
131   public static final int DEFAULT_TTL = HConstants.FOREVER;
132 
133   /**
134    * Default scope.
135    */
136   public static final int DEFAULT_REPLICATION_SCOPE = HConstants.REPLICATION_SCOPE_LOCAL;
137 
138   // Column family name
139   private byte [] name;
140 
141   // Column metadata
142   protected Map<ImmutableBytesWritable,ImmutableBytesWritable> values =
143     new HashMap<ImmutableBytesWritable,ImmutableBytesWritable>();
144 
145   /*
146    * Cache the max versions rather than calculate it every time.
147    */
148   private int cachedMaxVersions = -1;
149 
150   /**
151    * Default constructor. Must be present for Writable.
152    */
153   public HColumnDescriptor() {
154     this.name = null;
155   }
156 
157   /**
158    * Construct a column descriptor specifying only the family name
159    * The other attributes are defaulted.
160    *
161    * @param familyName Column family name. Must be 'printable' -- digit or
162    * letter -- and may not contain a <code>:<code>
163    */
164   public HColumnDescriptor(final String familyName) {
165     this(Bytes.toBytes(familyName));
166   }
167 
168   /**
169    * Construct a column descriptor specifying only the family name
170    * The other attributes are defaulted.
171    *
172    * @param familyName Column family name. Must be 'printable' -- digit or
173    * letter -- and may not contain a <code>:<code>
174    */
175   public HColumnDescriptor(final byte [] familyName) {
176     this (familyName == null || familyName.length <= 0?
177       HConstants.EMPTY_BYTE_ARRAY: familyName, DEFAULT_VERSIONS,
178       DEFAULT_COMPRESSION, DEFAULT_IN_MEMORY, DEFAULT_BLOCKCACHE,
179       DEFAULT_TTL, DEFAULT_BLOOMFILTER);
180   }
181 
182   /**
183    * Constructor.
184    * Makes a deep copy of the supplied descriptor.
185    * Can make a modifiable descriptor from an UnmodifyableHColumnDescriptor.
186    * @param desc The descriptor.
187    */
188   public HColumnDescriptor(HColumnDescriptor desc) {
189     super();
190     this.name = desc.name.clone();
191     for (Map.Entry<ImmutableBytesWritable, ImmutableBytesWritable> e:
192         desc.values.entrySet()) {
193       this.values.put(e.getKey(), e.getValue());
194     }
195     setMaxVersions(desc.getMaxVersions());
196   }
197 
198   /**
199    * Constructor
200    * @param familyName Column family name. Must be 'printable' -- digit or
201    * letter -- and may not contain a <code>:<code>
202    * @param maxVersions Maximum number of versions to keep
203    * @param compression Compression type
204    * @param inMemory If true, column data should be kept in an HRegionServer's
205    * cache
206    * @param blockCacheEnabled If true, MapFile blocks should be cached
207    * @param timeToLive Time-to-live of cell contents, in seconds
208    * (use HConstants.FOREVER for unlimited TTL)
209    * @param bloomFilter Bloom filter type for this column
210    *
211    * @throws IllegalArgumentException if passed a family name that is made of
212    * other than 'word' characters: i.e. <code>[a-zA-Z_0-9]</code> or contains
213    * a <code>:</code>
214    * @throws IllegalArgumentException if the number of versions is &lt;= 0
215    */
216   public HColumnDescriptor(final byte [] familyName, final int maxVersions,
217       final String compression, final boolean inMemory,
218       final boolean blockCacheEnabled,
219       final int timeToLive, final String bloomFilter) {
220     this(familyName, maxVersions, compression, inMemory, blockCacheEnabled,
221       DEFAULT_BLOCKSIZE, timeToLive, bloomFilter, DEFAULT_REPLICATION_SCOPE);
222   }
223 
224   /**
225    * Constructor
226    * @param familyName Column family name. Must be 'printable' -- digit or
227    * letter -- and may not contain a <code>:<code>
228    * @param maxVersions Maximum number of versions to keep
229    * @param compression Compression type
230    * @param inMemory If true, column data should be kept in an HRegionServer's
231    * cache
232    * @param blockCacheEnabled If true, MapFile blocks should be cached
233    * @param blocksize Block size to use when writing out storefiles.  Use
234    * smaller blocksizes for faster random-access at expense of larger indices
235    * (more memory consumption).  Default is usually 64k.
236    * @param timeToLive Time-to-live of cell contents, in seconds
237    * (use HConstants.FOREVER for unlimited TTL)
238    * @param bloomFilter Bloom filter type for this column
239    * @param scope The scope tag for this column
240    *
241    * @throws IllegalArgumentException if passed a family name that is made of
242    * other than 'word' characters: i.e. <code>[a-zA-Z_0-9]</code> or contains
243    * a <code>:</code>
244    * @throws IllegalArgumentException if the number of versions is &lt;= 0
245    */
246   public HColumnDescriptor(final byte [] familyName, final int maxVersions,
247       final String compression, final boolean inMemory,
248       final boolean blockCacheEnabled, final int blocksize,
249       final int timeToLive, final String bloomFilter, final int scope) {
250     isLegalFamilyName(familyName);
251     this.name = familyName;
252 
253     if (maxVersions <= 0) {
254       // TODO: Allow maxVersion of 0 to be the way you say "Keep all versions".
255       // Until there is support, consider 0 or < 0 -- a configuration error.
256       throw new IllegalArgumentException("Maximum versions must be positive");
257     }
258     setMaxVersions(maxVersions);
259     setInMemory(inMemory);
260     setBlockCacheEnabled(blockCacheEnabled);
261     setTimeToLive(timeToLive);
262     setCompressionType(Compression.Algorithm.
263       valueOf(compression.toUpperCase()));
264     setBloomFilterType(StoreFile.BloomType.
265       valueOf(bloomFilter.toUpperCase()));
266     setBlocksize(blocksize);
267     setScope(scope);
268   }
269 
270   /**
271    * @param b Family name.
272    * @return <code>b</code>
273    * @throws IllegalArgumentException If not null and not a legitimate family
274    * name: i.e. 'printable' and ends in a ':' (Null passes are allowed because
275    * <code>b</code> can be null when deserializing).  Cannot start with a '.'
276    * either.
277    */
278   public static byte [] isLegalFamilyName(final byte [] b) {
279     if (b == null) {
280       return b;
281     }
282     if (b[0] == '.') {
283       throw new IllegalArgumentException("Family names cannot start with a " +
284         "period: " + Bytes.toString(b));
285     }
286     for (int i = 0; i < b.length; i++) {
287       if (Character.isISOControl(b[i]) || b[i] == ':') {
288         throw new IllegalArgumentException("Illegal character <" + b[i] +
289           ">. Family names cannot contain control characters or colons: " +
290           Bytes.toString(b));
291       }
292     }
293     return b;
294   }
295 
296   /**
297    * @return Name of this column family
298    */
299   public byte [] getName() {
300     return name;
301   }
302 
303   /**
304    * @return Name of this column family
305    */
306   public String getNameAsString() {
307     return Bytes.toString(this.name);
308   }
309 
310   /**
311    * @param key The key.
312    * @return The value.
313    */
314   public byte[] getValue(byte[] key) {
315     ImmutableBytesWritable ibw = values.get(new ImmutableBytesWritable(key));
316     if (ibw == null)
317       return null;
318     return ibw.get();
319   }
320 
321   /**
322    * @param key The key.
323    * @return The value as a string.
324    */
325   public String getValue(String key) {
326     byte[] value = getValue(Bytes.toBytes(key));
327     if (value == null)
328       return null;
329     return Bytes.toString(value);
330   }
331 
332   /**
333    * @return All values.
334    */
335   public Map<ImmutableBytesWritable,ImmutableBytesWritable> getValues() {
336     return Collections.unmodifiableMap(values);
337   }
338 
339   /**
340    * @param key The key.
341    * @param value The value.
342    */
343   public void setValue(byte[] key, byte[] value) {
344     values.put(new ImmutableBytesWritable(key),
345       new ImmutableBytesWritable(value));
346   }
347 
348   /**
349    * @param key Key whose key and value we're to remove from HCD parameters.
350    */
351   public void remove(final byte [] key) {
352     values.remove(new ImmutableBytesWritable(key));
353   }
354 
355   /**
356    * @param key The key.
357    * @param value The value.
358    */
359   public void setValue(String key, String value) {
360     setValue(Bytes.toBytes(key), Bytes.toBytes(value));
361   }
362 
363   /** @return compression type being used for the column family */
364   public Compression.Algorithm getCompression() {
365     String n = getValue(COMPRESSION);
366     if (n == null) {
367       return Compression.Algorithm.NONE;
368     }
369     return Compression.Algorithm.valueOf(n.toUpperCase());
370   }
371 
372   /** @return compression type being used for the column family for major 
373       compression */
374   public Compression.Algorithm getCompactionCompression() {
375     String n = getValue(COMPRESSION_COMPACT);
376     if (n == null) {
377       return getCompression();
378     }
379     return Compression.Algorithm.valueOf(n.toUpperCase());
380   }
381 
382   /** @return maximum number of versions */
383   public int getMaxVersions() {
384     return this.cachedMaxVersions;
385   }
386 
387   /**
388    * @param maxVersions maximum number of versions
389    */
390   public void setMaxVersions(int maxVersions) {
391     setValue(HConstants.VERSIONS, Integer.toString(maxVersions));
392     cachedMaxVersions = maxVersions;
393   }
394 
395   /**
396    * @return The storefile/hfile blocksize for this column family.
397    */
398   public synchronized int getBlocksize() {
399     if (this.blocksize == null) {
400       String value = getValue(BLOCKSIZE);
401       this.blocksize = (value != null)?
402         Integer.decode(value): Integer.valueOf(DEFAULT_BLOCKSIZE);
403     }
404     return this.blocksize.intValue();
405   }
406 
407   /**
408    * @param s Blocksize to use when writing out storefiles/hfiles on this
409    * column family.
410    */
411   public void setBlocksize(int s) {
412     setValue(BLOCKSIZE, Integer.toString(s));
413     this.blocksize = null;
414   }
415 
416   /**
417    * @return Compression type setting.
418    */
419   public Compression.Algorithm getCompressionType() {
420     return getCompression();
421   }
422 
423   /**
424    * Compression types supported in hbase.
425    * LZO is not bundled as part of the hbase distribution.
426    * See <a href="http://wiki.apache.org/hadoop/UsingLzoCompression">LZO Compression</a>
427    * for how to enable it.
428    * @param type Compression type setting.
429    */
430   public void setCompressionType(Compression.Algorithm type) {
431     String compressionType;
432     switch (type) {
433       case LZO: compressionType = "LZO"; break;
434       case GZ: compressionType = "GZ"; break;
435       default: compressionType = "NONE"; break;
436     }
437     setValue(COMPRESSION, compressionType);
438   }
439 
440   /**
441    * @return Compression type setting.
442    */
443   public Compression.Algorithm getCompactionCompressionType() {
444     return getCompactionCompression();
445   }
446 
447   /**
448    * Compression types supported in hbase.
449    * LZO is not bundled as part of the hbase distribution.
450    * See <a href="http://wiki.apache.org/hadoop/UsingLzoCompression">LZO Compression</a>
451    * for how to enable it.
452    * @param type Compression type setting.
453    */
454   public void setCompactionCompressionType(Compression.Algorithm type) {
455     String compressionType;
456     switch (type) {
457       case LZO: compressionType = "LZO"; break;
458       case GZ: compressionType = "GZ"; break;
459       default: compressionType = "NONE"; break;
460     }
461     setValue(COMPRESSION_COMPACT, compressionType);
462   }
463 
464   /**
465    * @return True if we are to keep all in use HRegionServer cache.
466    */
467   public boolean isInMemory() {
468     String value = getValue(HConstants.IN_MEMORY);
469     if (value != null)
470       return Boolean.valueOf(value).booleanValue();
471     return DEFAULT_IN_MEMORY;
472   }
473 
474   /**
475    * @param inMemory True if we are to keep all values in the HRegionServer
476    * cache
477    */
478   public void setInMemory(boolean inMemory) {
479     setValue(HConstants.IN_MEMORY, Boolean.toString(inMemory));
480   }
481 
482   /**
483    * @return Time-to-live of cell contents, in seconds.
484    */
485   public int getTimeToLive() {
486     String value = getValue(TTL);
487     return (value != null)? Integer.valueOf(value).intValue(): DEFAULT_TTL;
488   }
489 
490   /**
491    * @param timeToLive Time-to-live of cell contents, in seconds.
492    */
493   public void setTimeToLive(int timeToLive) {
494     setValue(TTL, Integer.toString(timeToLive));
495   }
496 
497   /**
498    * @return True if MapFile blocks should be cached.
499    */
500   public boolean isBlockCacheEnabled() {
501     String value = getValue(BLOCKCACHE);
502     if (value != null)
503       return Boolean.valueOf(value).booleanValue();
504     return DEFAULT_BLOCKCACHE;
505   }
506 
507   /**
508    * @param blockCacheEnabled True if MapFile blocks should be cached.
509    */
510   public void setBlockCacheEnabled(boolean blockCacheEnabled) {
511     setValue(BLOCKCACHE, Boolean.toString(blockCacheEnabled));
512   }
513 
514   /**
515    * @return bloom filter type used for new StoreFiles in ColumnFamily
516    */
517   public StoreFile.BloomType getBloomFilterType() {
518     String n = getValue(BLOOMFILTER);
519     if (n == null) {
520       n = DEFAULT_BLOOMFILTER;
521     }
522     return StoreFile.BloomType.valueOf(n.toUpperCase());
523   }
524 
525   /**
526    * @param bt bloom filter type
527    */
528   public void setBloomFilterType(final StoreFile.BloomType bt) {
529     setValue(BLOOMFILTER, bt.toString());
530   }
531 
532    /**
533     * @return the scope tag
534     */
535   public int getScope() {
536     String value = getValue(REPLICATION_SCOPE);
537     if (value != null) {
538       return Integer.valueOf(value).intValue();
539     }
540     return DEFAULT_REPLICATION_SCOPE;
541   }
542 
543  /**
544   * @param scope the scope tag
545   */
546   public void setScope(int scope) {
547     setValue(REPLICATION_SCOPE, Integer.toString(scope));
548   }
549 
550   /**
551    * @see java.lang.Object#toString()
552    */
553   @Override
554   public String toString() {
555     StringBuilder s = new StringBuilder();
556     s.append('{');
557     s.append(HConstants.NAME);
558     s.append(" => '");
559     s.append(Bytes.toString(name));
560     s.append("'");
561     for (Map.Entry<ImmutableBytesWritable, ImmutableBytesWritable> e:
562         values.entrySet()) {
563       String key = Bytes.toString(e.getKey().get());
564       String value = Bytes.toString(e.getValue().get());
565       s.append(", ");
566       s.append(key);
567       s.append(" => '");
568       s.append(value);
569       s.append("'");
570     }
571     s.append('}');
572     return s.toString();
573   }
574 
575   /**
576    * @see java.lang.Object#equals(java.lang.Object)
577    */
578   @Override
579   public boolean equals(Object obj) {
580     if (this == obj) {
581       return true;
582     }
583     if (obj == null) {
584       return false;
585     }
586     if (!(obj instanceof HColumnDescriptor)) {
587       return false;
588     }
589     return compareTo((HColumnDescriptor)obj) == 0;
590   }
591 
592   /**
593    * @see java.lang.Object#hashCode()
594    */
595   @Override
596   public int hashCode() {
597     int result = Bytes.hashCode(this.name);
598     result ^= Byte.valueOf(COLUMN_DESCRIPTOR_VERSION).hashCode();
599     result ^= values.hashCode();
600     return result;
601   }
602 
603   // Writable
604 
605   public void readFields(DataInput in) throws IOException {
606     int version = in.readByte();
607     if (version < 6) {
608       if (version <= 2) {
609         Text t = new Text();
610         t.readFields(in);
611         this.name = t.getBytes();
612 //        if(KeyValue.getFamilyDelimiterIndex(this.name, 0, this.name.length)
613 //            > 0) {
614 //          this.name = stripColon(this.name);
615 //        }
616       } else {
617         this.name = Bytes.readByteArray(in);
618       }
619       this.values.clear();
620       setMaxVersions(in.readInt());
621       int ordinal = in.readInt();
622       setCompressionType(Compression.Algorithm.values()[ordinal]);
623       setInMemory(in.readBoolean());
624       setBloomFilterType(in.readBoolean() ? BloomType.ROW : BloomType.NONE);
625       if (getBloomFilterType() != BloomType.NONE && version < 5) {
626         // If a bloomFilter is enabled and the column descriptor is less than
627         // version 5, we need to skip over it to read the rest of the column
628         // descriptor. There are no BloomFilterDescriptors written to disk for
629         // column descriptors with a version number >= 5
630         throw new UnsupportedClassVersionError(this.getClass().getName() +
631             " does not support backward compatibility with versions older " +
632             "than version 5");
633       }
634       if (version > 1) {
635         setBlockCacheEnabled(in.readBoolean());
636       }
637       if (version > 2) {
638        setTimeToLive(in.readInt());
639       }
640     } else {
641       // version 6+
642       this.name = Bytes.readByteArray(in);
643       this.values.clear();
644       int numValues = in.readInt();
645       for (int i = 0; i < numValues; i++) {
646         ImmutableBytesWritable key = new ImmutableBytesWritable();
647         ImmutableBytesWritable value = new ImmutableBytesWritable();
648         key.readFields(in);
649         value.readFields(in);
650 
651         // in version 8, the BloomFilter setting changed from bool to enum
652         if (version < 8 && Bytes.toString(key.get()).equals(BLOOMFILTER)) {
653           value.set(Bytes.toBytes(
654               Boolean.getBoolean(Bytes.toString(value.get()))
655                 ? BloomType.ROW.toString()
656                 : BloomType.NONE.toString()));
657         }
658 
659         values.put(key, value);
660       }
661       if (version == 6) {
662         // Convert old values.
663         setValue(COMPRESSION, Compression.Algorithm.NONE.getName());
664       }
665       String value = getValue(HConstants.VERSIONS);
666       this.cachedMaxVersions = (value != null)?
667           Integer.valueOf(value).intValue(): DEFAULT_VERSIONS;
668     }
669   }
670 
671   public void write(DataOutput out) throws IOException {
672     out.writeByte(COLUMN_DESCRIPTOR_VERSION);
673     Bytes.writeByteArray(out, this.name);
674     out.writeInt(values.size());
675     for (Map.Entry<ImmutableBytesWritable, ImmutableBytesWritable> e:
676         values.entrySet()) {
677       e.getKey().write(out);
678       e.getValue().write(out);
679     }
680   }
681 
682   // Comparable
683 
684   public int compareTo(HColumnDescriptor o) {
685     int result = Bytes.compareTo(this.name, o.getName());
686     if (result == 0) {
687       // punt on comparison for ordering, just calculate difference
688       result = this.values.hashCode() - o.values.hashCode();
689       if (result < 0)
690         result = -1;
691       else if (result > 0)
692         result = 1;
693     }
694     return result;
695   }
696 }