View Javadoc

1   /*
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.client;
22  
23  import org.apache.hadoop.conf.Configuration;
24  import org.apache.hadoop.hbase.HConstants;
25  import org.apache.hadoop.hbase.KeyValue;
26  import org.apache.hadoop.hbase.filter.Filter;
27  import org.apache.hadoop.hbase.filter.IncompatibleFilterException;
28  import org.apache.hadoop.hbase.io.TimeRange;
29  import org.apache.hadoop.hbase.util.Bytes;
30  import org.apache.hadoop.io.Writable;
31  import org.apache.hadoop.io.WritableFactories;
32  
33  import java.io.DataInput;
34  import java.io.DataOutput;
35  import java.io.IOException;
36  import java.util.Map;
37  import java.util.NavigableSet;
38  import java.util.TreeMap;
39  import java.util.TreeSet;
40  
41  /**
42   * Used to perform Scan operations.
43   * <p>
44   * All operations are identical to {@link Get} with the exception of
45   * instantiation.  Rather than specifying a single row, an optional startRow
46   * and stopRow may be defined.  If rows are not specified, the Scanner will
47   * iterate over all rows.
48   * <p>
49   * To scan everything for each row, instantiate a Scan object.
50   * <p>
51   * To modify scanner caching for just this scan, use {@link #setCaching(int) setCaching}.
52   * If caching is NOT set, we will use the caching value of the hosting
53   * {@link HTable}.  See {@link HTable#setScannerCaching(int)}.
54   * <p>
55   * To further define the scope of what to get when scanning, perform additional
56   * methods as outlined below.
57   * <p>
58   * To get all columns from specific families, execute {@link #addFamily(byte[]) addFamily}
59   * for each family to retrieve.
60   * <p>
61   * To get specific columns, execute {@link #addColumn(byte[], byte[]) addColumn}
62   * for each column to retrieve.
63   * <p>
64   * To only retrieve columns within a specific range of version timestamps,
65   * execute {@link #setTimeRange(long, long) setTimeRange}.
66   * <p>
67   * To only retrieve columns with a specific timestamp, execute
68   * {@link #setTimeStamp(long) setTimestamp}.
69   * <p>
70   * To limit the number of versions of each column to be returned, execute
71   * {@link #setMaxVersions(int) setMaxVersions}.
72   * <p>
73   * To limit the maximum number of values returned for each call to next(),
74   * execute {@link #setBatch(int) setBatch}.
75   * <p>
76   * To add a filter, execute {@link #setFilter(org.apache.hadoop.hbase.filter.Filter) setFilter}.
77   * <p>
78   * Expert: To explicitly disable server-side block caching for this scan,
79   * execute {@link #setCacheBlocks(boolean)}.
80   */
81  public class Scan implements Writable {
82    private static final byte SCAN_VERSION = (byte)1;
83    private byte [] startRow = HConstants.EMPTY_START_ROW;
84    private byte [] stopRow  = HConstants.EMPTY_END_ROW;
85    private int maxVersions = 1;
86    private int batch = -1;
87    /*
88     * -1 means no caching
89     */
90    private int caching = -1;
91    private boolean cacheBlocks = true;
92    private Filter filter = null;
93    private TimeRange tr = new TimeRange();
94    private Map<byte [], NavigableSet<byte []>> familyMap =
95      new TreeMap<byte [], NavigableSet<byte []>>(Bytes.BYTES_COMPARATOR);
96  
97    /**
98     * Create a Scan operation across all rows.
99     */
100   public Scan() {}
101 
102   public Scan(byte [] startRow, Filter filter) {
103     this(startRow);
104     this.filter = filter;
105   }
106 
107   /**
108    * Create a Scan operation starting at the specified row.
109    * <p>
110    * If the specified row does not exist, the Scanner will start from the
111    * next closest row after the specified row.
112    * @param startRow row to start scanner at or after
113    */
114   public Scan(byte [] startRow) {
115     this.startRow = startRow;
116   }
117 
118   /**
119    * Create a Scan operation for the range of rows specified.
120    * @param startRow row to start scanner at or after (inclusive)
121    * @param stopRow row to stop scanner before (exclusive)
122    */
123   public Scan(byte [] startRow, byte [] stopRow) {
124     this.startRow = startRow;
125     this.stopRow = stopRow;
126   }
127 
128   /**
129    * Creates a new instance of this class while copying all values.
130    *
131    * @param scan  The scan instance to copy from.
132    * @throws IOException When copying the values fails.
133    */
134   public Scan(Scan scan) throws IOException {
135     startRow = scan.getStartRow();
136     stopRow  = scan.getStopRow();
137     maxVersions = scan.getMaxVersions();
138     batch = scan.getBatch();
139     caching = scan.getCaching();
140     cacheBlocks = scan.getCacheBlocks();
141     filter = scan.getFilter(); // clone?
142     TimeRange ctr = scan.getTimeRange();
143     tr = new TimeRange(ctr.getMin(), ctr.getMax());
144     Map<byte[], NavigableSet<byte[]>> fams = scan.getFamilyMap();
145     for (Map.Entry<byte[],NavigableSet<byte[]>> entry : fams.entrySet()) {
146       byte [] fam = entry.getKey();
147       NavigableSet<byte[]> cols = entry.getValue();
148       if (cols != null && cols.size() > 0) {
149         for (byte[] col : cols) {
150           addColumn(fam, col);
151         }
152       } else {
153         addFamily(fam);
154       }
155     }
156   }
157 
158   /**
159    * Builds a scan object with the same specs as get.
160    * @param get get to model scan after
161    */
162   public Scan(Get get) {
163     this.startRow = get.getRow();
164     this.stopRow = get.getRow();
165     this.filter = get.getFilter();
166     this.cacheBlocks = get.getCacheBlocks();
167     this.maxVersions = get.getMaxVersions();
168     this.tr = get.getTimeRange();
169     this.familyMap = get.getFamilyMap();
170   }
171 
172   public boolean isGetScan() {
173     return this.startRow != null && this.startRow.length > 0 &&
174       Bytes.equals(this.startRow, this.stopRow);
175   }
176 
177   /**
178    * Get all columns from the specified family.
179    * <p>
180    * Overrides previous calls to addColumn for this family.
181    * @param family family name
182    * @return this
183    */
184   public Scan addFamily(byte [] family) {
185     familyMap.remove(family);
186     familyMap.put(family, null);
187     return this;
188   }
189 
190   /**
191    * Get the column from the specified family with the specified qualifier.
192    * <p>
193    * Overrides previous calls to addFamily for this family.
194    * @param family family name
195    * @param qualifier column qualifier
196    * @return this
197    */
198   public Scan addColumn(byte [] family, byte [] qualifier) {
199     NavigableSet<byte []> set = familyMap.get(family);
200     if(set == null) {
201       set = new TreeSet<byte []>(Bytes.BYTES_COMPARATOR);
202     }
203     set.add(qualifier);
204     familyMap.put(family, set);
205 
206     return this;
207   }
208 
209   /**
210    * Get versions of columns only within the specified timestamp range,
211    * [minStamp, maxStamp).  Note, default maximum versions to return is 1.  If
212    * your time range spans more than one version and you want all versions
213    * returned, up the number of versions beyond the defaut.
214    * @param minStamp minimum timestamp value, inclusive
215    * @param maxStamp maximum timestamp value, exclusive
216    * @throws IOException if invalid time range
217    * @see #setMaxVersions()
218    * @see #setMaxVersions(int)
219    * @return this
220    */
221   public Scan setTimeRange(long minStamp, long maxStamp)
222   throws IOException {
223     tr = new TimeRange(minStamp, maxStamp);
224     return this;
225   }
226 
227   /**
228    * Get versions of columns with the specified timestamp. Note, default maximum
229    * versions to return is 1.  If your time range spans more than one version
230    * and you want all versions returned, up the number of versions beyond the
231    * defaut.
232    * @param timestamp version timestamp
233    * @see #setMaxVersions()
234    * @see #setMaxVersions(int)
235    * @return this
236    */
237   public Scan setTimeStamp(long timestamp) {
238     try {
239       tr = new TimeRange(timestamp, timestamp+1);
240     } catch(IOException e) {
241       // Will never happen
242     }
243     return this;
244   }
245 
246   /**
247    * Set the start row of the scan.
248    * @param startRow row to start scan on, inclusive
249    * @return this
250    */
251   public Scan setStartRow(byte [] startRow) {
252     this.startRow = startRow;
253     return this;
254   }
255 
256   /**
257    * Set the stop row.
258    * @param stopRow row to end at (exclusive)
259    * @return this
260    */
261   public Scan setStopRow(byte [] stopRow) {
262     this.stopRow = stopRow;
263     return this;
264   }
265 
266   /**
267    * Get all available versions.
268    * @return this
269    */
270   public Scan setMaxVersions() {
271     this.maxVersions = Integer.MAX_VALUE;
272     return this;
273   }
274 
275   /**
276    * Get up to the specified number of versions of each column.
277    * @param maxVersions maximum versions for each column
278    * @return this
279    */
280   public Scan setMaxVersions(int maxVersions) {
281     this.maxVersions = maxVersions;
282     return this;
283   }
284 
285   /**
286    * Set the maximum number of values to return for each call to next()
287    * @param batch the maximum number of values
288    */
289   public void setBatch(int batch) {
290 	if(this.hasFilter() && this.filter.hasFilterRow()) {
291 	  throw new IncompatibleFilterException(
292         "Cannot set batch on a scan using a filter" +
293         " that returns true for filter.hasFilterRow");
294 	}
295     this.batch = batch;
296   }
297 
298   /**
299    * Set the number of rows for caching that will be passed to scanners.
300    * If not set, the default setting from {@link HTable#getScannerCaching()} will apply.
301    * Higher caching values will enable faster scanners but will use more memory.
302    * @param caching the number of rows for caching
303    */
304   public void setCaching(int caching) {
305     this.caching = caching;
306   }
307 
308   /**
309    * Apply the specified server-side filter when performing the Scan.
310    * @param filter filter to run on the server
311    * @return this
312    */
313   public Scan setFilter(Filter filter) {
314     this.filter = filter;
315     return this;
316   }
317 
318   /**
319    * Setting the familyMap
320    * @param familyMap map of family to qualifier
321    * @return this
322    */
323   public Scan setFamilyMap(Map<byte [], NavigableSet<byte []>> familyMap) {
324     this.familyMap = familyMap;
325     return this;
326   }
327 
328   /**
329    * Getting the familyMap
330    * @return familyMap
331    */
332   public Map<byte [], NavigableSet<byte []>> getFamilyMap() {
333     return this.familyMap;
334   }
335 
336   /**
337    * @return the number of families in familyMap
338    */
339   public int numFamilies() {
340     if(hasFamilies()) {
341       return this.familyMap.size();
342     }
343     return 0;
344   }
345 
346   /**
347    * @return true if familyMap is non empty, false otherwise
348    */
349   public boolean hasFamilies() {
350     return !this.familyMap.isEmpty();
351   }
352 
353   /**
354    * @return the keys of the familyMap
355    */
356   public byte[][] getFamilies() {
357     if(hasFamilies()) {
358       return this.familyMap.keySet().toArray(new byte[0][0]);
359     }
360     return null;
361   }
362 
363   /**
364    * @return the startrow
365    */
366   public byte [] getStartRow() {
367     return this.startRow;
368   }
369 
370   /**
371    * @return the stoprow
372    */
373   public byte [] getStopRow() {
374     return this.stopRow;
375   }
376 
377   /**
378    * @return the max number of versions to fetch
379    */
380   public int getMaxVersions() {
381     return this.maxVersions;
382   }
383 
384   /**
385    * @return maximum number of values to return for a single call to next()
386    */
387   public int getBatch() {
388     return this.batch;
389   }
390 
391   /**
392    * @return caching the number of rows fetched when calling next on a scanner
393    */
394   public int getCaching() {
395     return this.caching;
396   }
397 
398   /**
399    * @return TimeRange
400    */
401   public TimeRange getTimeRange() {
402     return this.tr;
403   }
404 
405   /**
406    * @return RowFilter
407    */
408   public Filter getFilter() {
409     return filter;
410   }
411 
412   /**
413    * @return true is a filter has been specified, false if not
414    */
415   public boolean hasFilter() {
416     return filter != null;
417   }
418 
419   /**
420    * Set whether blocks should be cached for this Scan.
421    * <p>
422    * This is true by default.  When true, default settings of the table and
423    * family are used (this will never override caching blocks if the block
424    * cache is disabled for that family or entirely).
425    *
426    * @param cacheBlocks if false, default settings are overridden and blocks
427    * will not be cached
428    */
429   public void setCacheBlocks(boolean cacheBlocks) {
430     this.cacheBlocks = cacheBlocks;
431   }
432 
433   /**
434    * Get whether blocks should be cached for this Scan.
435    * @return true if default caching should be used, false if blocks should not
436    * be cached
437    */
438   public boolean getCacheBlocks() {
439     return cacheBlocks;
440   }
441 
442   /**
443    * @return String
444    */
445   @Override
446   public String toString() {
447     StringBuilder sb = new StringBuilder();
448     sb.append("startRow=");
449     sb.append(Bytes.toStringBinary(this.startRow));
450     sb.append(", stopRow=");
451     sb.append(Bytes.toStringBinary(this.stopRow));
452     sb.append(", maxVersions=");
453     sb.append(this.maxVersions);
454     sb.append(", batch=");
455     sb.append(this.batch);
456     sb.append(", caching=");
457     sb.append(this.caching);
458     sb.append(", cacheBlocks=");
459     sb.append(this.cacheBlocks);
460     sb.append(", timeRange=");
461     sb.append("[").append(this.tr.getMin()).append(",");
462     sb.append(this.tr.getMax()).append(")");
463     sb.append(", families=");
464     if(this.familyMap.size() == 0) {
465       sb.append("ALL");
466       return sb.toString();
467     }
468     boolean moreThanOne = false;
469     for(Map.Entry<byte [], NavigableSet<byte[]>> entry : this.familyMap.entrySet()) {
470       if(moreThanOne) {
471         sb.append("), ");
472       } else {
473         moreThanOne = true;
474         sb.append("{");
475       }
476       sb.append("(family=");
477       sb.append(Bytes.toStringBinary(entry.getKey()));
478       sb.append(", columns=");
479       if(entry.getValue() == null) {
480         sb.append("ALL");
481       } else {
482         sb.append("{");
483         boolean moreThanOneB = false;
484         for(byte [] column : entry.getValue()) {
485           if(moreThanOneB) {
486             sb.append(", ");
487           } else {
488             moreThanOneB = true;
489           }
490           sb.append(Bytes.toStringBinary(column));
491         }
492         sb.append("}");
493       }
494     }
495     sb.append("}");
496     return sb.toString();
497   }
498 
499   @SuppressWarnings("unchecked")
500   private Writable createForName(String className) {
501     try {
502       Class<? extends Writable> clazz =
503         (Class<? extends Writable>) Class.forName(className);
504       return WritableFactories.newInstance(clazz, new Configuration());
505     } catch (ClassNotFoundException e) {
506       throw new RuntimeException("Can't find class " + className);
507     }
508   }
509 
510   //Writable
511   public void readFields(final DataInput in)
512   throws IOException {
513     int version = in.readByte();
514     if (version > (int)SCAN_VERSION) {
515       throw new IOException("version not supported");
516     }
517     this.startRow = Bytes.readByteArray(in);
518     this.stopRow = Bytes.readByteArray(in);
519     this.maxVersions = in.readInt();
520     this.batch = in.readInt();
521     this.caching = in.readInt();
522     this.cacheBlocks = in.readBoolean();
523     if(in.readBoolean()) {
524       this.filter = (Filter)createForName(Bytes.toString(Bytes.readByteArray(in)));
525       this.filter.readFields(in);
526     }
527     this.tr = new TimeRange();
528     tr.readFields(in);
529     int numFamilies = in.readInt();
530     this.familyMap =
531       new TreeMap<byte [], NavigableSet<byte []>>(Bytes.BYTES_COMPARATOR);
532     for(int i=0; i<numFamilies; i++) {
533       byte [] family = Bytes.readByteArray(in);
534       int numColumns = in.readInt();
535       TreeSet<byte []> set = new TreeSet<byte []>(Bytes.BYTES_COMPARATOR);
536       for(int j=0; j<numColumns; j++) {
537         byte [] qualifier = Bytes.readByteArray(in);
538         set.add(qualifier);
539       }
540       this.familyMap.put(family, set);
541     }
542   }
543 
544   public void write(final DataOutput out)
545   throws IOException {
546     out.writeByte(SCAN_VERSION);
547     Bytes.writeByteArray(out, this.startRow);
548     Bytes.writeByteArray(out, this.stopRow);
549     out.writeInt(this.maxVersions);
550     out.writeInt(this.batch);
551     out.writeInt(this.caching);
552     out.writeBoolean(this.cacheBlocks);
553     if(this.filter == null) {
554       out.writeBoolean(false);
555     } else {
556       out.writeBoolean(true);
557       Bytes.writeByteArray(out, Bytes.toBytes(filter.getClass().getName()));
558       filter.write(out);
559     }
560     tr.write(out);
561     out.writeInt(familyMap.size());
562     for(Map.Entry<byte [], NavigableSet<byte []>> entry : familyMap.entrySet()) {
563       Bytes.writeByteArray(out, entry.getKey());
564       NavigableSet<byte []> columnSet = entry.getValue();
565       if(columnSet != null){
566         out.writeInt(columnSet.size());
567         for(byte [] qualifier : columnSet) {
568           Bytes.writeByteArray(out, qualifier);
569         }
570       } else {
571         out.writeInt(0);
572       }
573     }
574   }
575 
576    /**
577    * Parses a combined family and qualifier and adds either both or just the
578    * family in case there is not qualifier. This assumes the older colon
579    * divided notation, e.g. "data:contents" or "meta:".
580    * <p>
581    * Note: It will through an error when the colon is missing.
582    *
583    * @param familyAndQualifier family and qualifier
584    * @return A reference to this instance.
585    * @throws IllegalArgumentException When the colon is missing.
586    * @deprecated use {@link #addColumn(byte[], byte[])} instead
587    */
588   public Scan addColumn(byte[] familyAndQualifier) {
589     byte [][] fq = KeyValue.parseColumn(familyAndQualifier);
590     if (fq.length > 1 && fq[1] != null && fq[1].length > 0) {
591       addColumn(fq[0], fq[1]);
592     } else {
593       addFamily(fq[0]);
594     }
595     return this;
596   }
597 
598   /**
599    * Adds an array of columns specified using old format, family:qualifier.
600    * <p>
601    * Overrides previous calls to addFamily for any families in the input.
602    *
603    * @param columns array of columns, formatted as <pre>family:qualifier</pre>
604    * @deprecated issue multiple {@link #addColumn(byte[], byte[])} instead
605    * @return this
606    */
607   public Scan addColumns(byte [][] columns) {
608     for (byte[] column : columns) {
609       addColumn(column);
610     }
611     return this;
612   }
613 
614   /**
615    * Convenience method to help parse old style (or rather user entry on the
616    * command line) column definitions, e.g. "data:contents mime:". The columns
617    * must be space delimited and always have a colon (":") to denote family
618    * and qualifier.
619    *
620    * @param columns  The columns to parse.
621    * @return A reference to this instance.
622    * @deprecated use {@link #addColumn(byte[], byte[])} instead
623    */
624   public Scan addColumns(String columns) {
625     String[] cols = columns.split(" ");
626     for (String col : cols) {
627       addColumn(Bytes.toBytes(col));
628     }
629     return this;
630   }
631 
632   /**
633    * Helps to convert the binary column families and qualifiers to a text
634    * representation, e.g. "data:mimetype data:contents meta:". Binary values
635    * are properly encoded using {@link Bytes#toBytesBinary(String)}.
636    *
637    * @return The columns in an old style string format.
638    * @deprecated
639    */
640   public String getInputColumns() {
641     StringBuilder cols = new StringBuilder("");
642     for (Map.Entry<byte[], NavigableSet<byte[]>> e :
643       familyMap.entrySet()) {
644       byte[] fam = e.getKey();
645       if (cols.length() > 0) cols.append(" ");
646       NavigableSet<byte[]> quals = e.getValue();
647       // check if this family has qualifiers
648       if (quals != null && quals.size() > 0) {
649         StringBuilder cs = new StringBuilder("");
650         for (byte[] qual : quals) {
651           if (cs.length() > 0) cs.append(" ");
652           // encode values to make parsing easier later
653           cs.append(Bytes.toStringBinary(fam)).append(":").append(Bytes.toStringBinary(qual));
654         }
655         cols.append(cs);
656       } else {
657         // only add the family but with old style delimiter
658         cols.append(Bytes.toStringBinary(fam)).append(":");
659       }
660     }
661     return cols.toString();
662   }
663 }