View Javadoc

1   /*
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.client;
22  
23  import org.apache.hadoop.conf.Configuration;
24  import org.apache.hadoop.hbase.HConstants;
25  import org.apache.hadoop.hbase.filter.Filter;
26  import org.apache.hadoop.hbase.filter.IncompatibleFilterException;
27  import org.apache.hadoop.hbase.io.TimeRange;
28  import org.apache.hadoop.hbase.util.Bytes;
29  import org.apache.hadoop.io.Writable;
30  import org.apache.hadoop.io.WritableFactories;
31  
32  import java.io.DataInput;
33  import java.io.DataOutput;
34  import java.io.IOException;
35  import java.util.ArrayList;
36  import java.util.HashMap;
37  import java.util.List;
38  import java.util.Map;
39  import java.util.NavigableSet;
40  import java.util.TreeMap;
41  import java.util.TreeSet;
42  
43  /**
44   * Used to perform Scan operations.
45   * <p>
46   * All operations are identical to {@link Get} with the exception of
47   * instantiation.  Rather than specifying a single row, an optional startRow
48   * and stopRow may be defined.  If rows are not specified, the Scanner will
49   * iterate over all rows.
50   * <p>
51   * To scan everything for each row, instantiate a Scan object.
52   * <p>
53   * To modify scanner caching for just this scan, use {@link #setCaching(int) setCaching}.
54   * If caching is NOT set, we will use the caching value of the hosting
55   * {@link HTable}.  See {@link HTable#setScannerCaching(int)}.
56   * <p>
57   * To further define the scope of what to get when scanning, perform additional
58   * methods as outlined below.
59   * <p>
60   * To get all columns from specific families, execute {@link #addFamily(byte[]) addFamily}
61   * for each family to retrieve.
62   * <p>
63   * To get specific columns, execute {@link #addColumn(byte[], byte[]) addColumn}
64   * for each column to retrieve.
65   * <p>
66   * To only retrieve columns within a specific range of version timestamps,
67   * execute {@link #setTimeRange(long, long) setTimeRange}.
68   * <p>
69   * To only retrieve columns with a specific timestamp, execute
70   * {@link #setTimeStamp(long) setTimestamp}.
71   * <p>
72   * To limit the number of versions of each column to be returned, execute
73   * {@link #setMaxVersions(int) setMaxVersions}.
74   * <p>
75   * To limit the maximum number of values returned for each call to next(),
76   * execute {@link #setBatch(int) setBatch}.
77   * <p>
78   * To add a filter, execute {@link #setFilter(org.apache.hadoop.hbase.filter.Filter) setFilter}.
79   * <p>
80   * Expert: To explicitly disable server-side block caching for this scan,
81   * execute {@link #setCacheBlocks(boolean)}.
82   */
83  public class Scan extends OperationWithAttributes implements Writable {
84    private static final String RAW_ATTR = "_raw_";
85    private static final String ONDEMAND_ATTR = "_ondemand_";
86    private static final String ISOLATION_LEVEL = "_isolationlevel_";
87  
88    private static final byte SCAN_VERSION = (byte)2;
89    private byte [] startRow = HConstants.EMPTY_START_ROW;
90    private byte [] stopRow  = HConstants.EMPTY_END_ROW;
91    private int maxVersions = 1;
92    private int batch = -1;
93    // If application wants to collect scan metrics, it needs to
94    // call scan.setAttribute(SCAN_ATTRIBUTES_ENABLE, Bytes.toBytes(Boolean.TRUE))
95    static public String SCAN_ATTRIBUTES_METRICS_ENABLE = "scan.attributes.metrics.enable";
96    static public String SCAN_ATTRIBUTES_METRICS_DATA = "scan.attributes.metrics.data";
97    
98    // If an application wants to use multiple scans over different tables each scan must
99    // define this attribute with the appropriate table name by calling
100   // scan.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(tableName))
101   static public final String SCAN_ATTRIBUTES_TABLE_NAME = "scan.attributes.table.name";
102 
103   /*
104    * -1 means no caching
105    */
106   private int caching = -1;
107   private boolean cacheBlocks = true;
108   private Filter filter = null;
109   private TimeRange tr = new TimeRange();
110   private Map<byte [], NavigableSet<byte []>> familyMap =
111     new TreeMap<byte [], NavigableSet<byte []>>(Bytes.BYTES_COMPARATOR);
112 
113   /**
114    * Create a Scan operation across all rows.
115    */
116   public Scan() {}
117 
118   public Scan(byte [] startRow, Filter filter) {
119     this(startRow);
120     this.filter = filter;
121   }
122 
123   /**
124    * Create a Scan operation starting at the specified row.
125    * <p>
126    * If the specified row does not exist, the Scanner will start from the
127    * next closest row after the specified row.
128    * @param startRow row to start scanner at or after
129    */
130   public Scan(byte [] startRow) {
131     this.startRow = startRow;
132   }
133 
134   /**
135    * Create a Scan operation for the range of rows specified.
136    * @param startRow row to start scanner at or after (inclusive)
137    * @param stopRow row to stop scanner before (exclusive)
138    */
139   public Scan(byte [] startRow, byte [] stopRow) {
140     this.startRow = startRow;
141     this.stopRow = stopRow;
142   }
143 
144   /**
145    * Creates a new instance of this class while copying all values.
146    *
147    * @param scan  The scan instance to copy from.
148    * @throws IOException When copying the values fails.
149    */
150   public Scan(Scan scan) throws IOException {
151     startRow = scan.getStartRow();
152     stopRow  = scan.getStopRow();
153     maxVersions = scan.getMaxVersions();
154     batch = scan.getBatch();
155     caching = scan.getCaching();
156     cacheBlocks = scan.getCacheBlocks();
157     filter = scan.getFilter(); // clone?
158     TimeRange ctr = scan.getTimeRange();
159     tr = new TimeRange(ctr.getMin(), ctr.getMax());
160     Map<byte[], NavigableSet<byte[]>> fams = scan.getFamilyMap();
161     for (Map.Entry<byte[],NavigableSet<byte[]>> entry : fams.entrySet()) {
162       byte [] fam = entry.getKey();
163       NavigableSet<byte[]> cols = entry.getValue();
164       if (cols != null && cols.size() > 0) {
165         for (byte[] col : cols) {
166           addColumn(fam, col);
167         }
168       } else {
169         addFamily(fam);
170       }
171     }
172     for (Map.Entry<String, byte[]> attr : scan.getAttributesMap().entrySet()) {
173       setAttribute(attr.getKey(), attr.getValue());
174     }
175   }
176 
177   /**
178    * Builds a scan object with the same specs as get.
179    * @param get get to model scan after
180    */
181   public Scan(Get get) {
182     this.startRow = get.getRow();
183     this.stopRow = get.getRow();
184     this.filter = get.getFilter();
185     this.cacheBlocks = get.getCacheBlocks();
186     this.maxVersions = get.getMaxVersions();
187     this.tr = get.getTimeRange();
188     this.familyMap = get.getFamilyMap();
189   }
190 
191   public boolean isGetScan() {
192     return this.startRow != null && this.startRow.length > 0 &&
193       Bytes.equals(this.startRow, this.stopRow);
194   }
195 
196   /**
197    * Get all columns from the specified family.
198    * <p>
199    * Overrides previous calls to addColumn for this family.
200    * @param family family name
201    * @return this
202    */
203   public Scan addFamily(byte [] family) {
204     familyMap.remove(family);
205     familyMap.put(family, null);
206     return this;
207   }
208 
209   /**
210    * Get the column from the specified family with the specified qualifier.
211    * <p>
212    * Overrides previous calls to addFamily for this family.
213    * @param family family name
214    * @param qualifier column qualifier
215    * @return this
216    */
217   public Scan addColumn(byte [] family, byte [] qualifier) {
218     NavigableSet<byte []> set = familyMap.get(family);
219     if(set == null) {
220       set = new TreeSet<byte []>(Bytes.BYTES_COMPARATOR);
221     }
222     if (qualifier == null) {
223       qualifier = HConstants.EMPTY_BYTE_ARRAY;
224     }
225     set.add(qualifier);
226     familyMap.put(family, set);
227 
228     return this;
229   }
230 
231   /**
232    * Get versions of columns only within the specified timestamp range,
233    * [minStamp, maxStamp).  Note, default maximum versions to return is 1.  If
234    * your time range spans more than one version and you want all versions
235    * returned, up the number of versions beyond the defaut.
236    * @param minStamp minimum timestamp value, inclusive
237    * @param maxStamp maximum timestamp value, exclusive
238    * @throws IOException if invalid time range
239    * @see #setMaxVersions()
240    * @see #setMaxVersions(int)
241    * @return this
242    */
243   public Scan setTimeRange(long minStamp, long maxStamp)
244   throws IOException {
245     tr = new TimeRange(minStamp, maxStamp);
246     return this;
247   }
248 
249   /**
250    * Get versions of columns with the specified timestamp. Note, default maximum
251    * versions to return is 1.  If your time range spans more than one version
252    * and you want all versions returned, up the number of versions beyond the
253    * defaut.
254    * @param timestamp version timestamp
255    * @see #setMaxVersions()
256    * @see #setMaxVersions(int)
257    * @return this
258    */
259   public Scan setTimeStamp(long timestamp) {
260     try {
261       tr = new TimeRange(timestamp, timestamp+1);
262     } catch(IOException e) {
263       // Will never happen
264     }
265     return this;
266   }
267 
268   /**
269    * Set the start row of the scan.
270    * @param startRow row to start scan on (inclusive)
271    * Note: In order to make startRow exclusive add a trailing 0 byte
272    * @return this
273    */
274   public Scan setStartRow(byte [] startRow) {
275     this.startRow = startRow;
276     return this;
277   }
278 
279   /**
280    * Set the stop row.
281    * @param stopRow row to end at (exclusive)
282    * Note: In order to make stopRow inclusive add a trailing 0 byte
283    * @return this
284    */
285   public Scan setStopRow(byte [] stopRow) {
286     this.stopRow = stopRow;
287     return this;
288   }
289 
290   /**
291    * Get all available versions.
292    * @return this
293    */
294   public Scan setMaxVersions() {
295     this.maxVersions = Integer.MAX_VALUE;
296     return this;
297   }
298 
299   /**
300    * Get up to the specified number of versions of each column.
301    * @param maxVersions maximum versions for each column
302    * @return this
303    */
304   public Scan setMaxVersions(int maxVersions) {
305     this.maxVersions = maxVersions;
306     return this;
307   }
308 
309   /**
310    * Set the maximum number of values to return for each call to next()
311    * @param batch the maximum number of values
312    */
313   public void setBatch(int batch) {
314     if (this.hasFilter() && this.filter.hasFilterRow()) {
315       throw new IncompatibleFilterException(
316         "Cannot set batch on a scan using a filter" +
317         " that returns true for filter.hasFilterRow");
318     }
319     this.batch = batch;
320   }
321 
322   /**
323    * Set the number of rows for caching that will be passed to scanners.
324    * If not set, the default setting from {@link HTable#getScannerCaching()} will apply.
325    * Higher caching values will enable faster scanners but will use more memory.
326    * @param caching the number of rows for caching
327    */
328   public void setCaching(int caching) {
329     this.caching = caching;
330   }
331 
332   /**
333    * Apply the specified server-side filter when performing the Scan.
334    * @param filter filter to run on the server
335    * @return this
336    */
337   public Scan setFilter(Filter filter) {
338     this.filter = filter;
339     return this;
340   }
341 
342   /**
343    * Setting the familyMap
344    * @param familyMap map of family to qualifier
345    * @return this
346    */
347   public Scan setFamilyMap(Map<byte [], NavigableSet<byte []>> familyMap) {
348     this.familyMap = familyMap;
349     return this;
350   }
351 
352   /**
353    * Getting the familyMap
354    * @return familyMap
355    */
356   public Map<byte [], NavigableSet<byte []>> getFamilyMap() {
357     return this.familyMap;
358   }
359 
360   /**
361    * @return the number of families in familyMap
362    */
363   public int numFamilies() {
364     if(hasFamilies()) {
365       return this.familyMap.size();
366     }
367     return 0;
368   }
369 
370   /**
371    * @return true if familyMap is non empty, false otherwise
372    */
373   public boolean hasFamilies() {
374     return !this.familyMap.isEmpty();
375   }
376 
377   /**
378    * @return the keys of the familyMap
379    */
380   public byte[][] getFamilies() {
381     if(hasFamilies()) {
382       return this.familyMap.keySet().toArray(new byte[0][0]);
383     }
384     return null;
385   }
386 
387   /**
388    * @return the startrow
389    */
390   public byte [] getStartRow() {
391     return this.startRow;
392   }
393 
394   /**
395    * @return the stoprow
396    */
397   public byte [] getStopRow() {
398     return this.stopRow;
399   }
400 
401   /**
402    * @return the max number of versions to fetch
403    */
404   public int getMaxVersions() {
405     return this.maxVersions;
406   }
407 
408   /**
409    * @return maximum number of values to return for a single call to next()
410    */
411   public int getBatch() {
412     return this.batch;
413   }
414 
415   /**
416    * @return caching the number of rows fetched when calling next on a scanner
417    */
418   public int getCaching() {
419     return this.caching;
420   }
421 
422   /**
423    * @return TimeRange
424    */
425   public TimeRange getTimeRange() {
426     return this.tr;
427   }
428 
429   /**
430    * @return RowFilter
431    */
432   public Filter getFilter() {
433     return filter;
434   }
435 
436   /**
437    * @return true is a filter has been specified, false if not
438    */
439   public boolean hasFilter() {
440     return filter != null;
441   }
442 
443   /**
444    * Set whether blocks should be cached for this Scan.
445    * <p>
446    * This is true by default.  When true, default settings of the table and
447    * family are used (this will never override caching blocks if the block
448    * cache is disabled for that family or entirely).
449    *
450    * @param cacheBlocks if false, default settings are overridden and blocks
451    * will not be cached
452    */
453   public void setCacheBlocks(boolean cacheBlocks) {
454     this.cacheBlocks = cacheBlocks;
455   }
456 
457   /**
458    * Get whether blocks should be cached for this Scan.
459    * @return true if default caching should be used, false if blocks should not
460    * be cached
461    */
462   public boolean getCacheBlocks() {
463     return cacheBlocks;
464   }
465 
466   /**
467    * Set the value indicating whether loading CFs on demand should be allowed (cluster
468    * default is false). On-demand CF loading doesn't load column families until necessary, e.g.
469    * if you filter on one column, the other column family data will be loaded only for the rows
470    * that are included in result, not all rows like in normal case.
471    * With column-specific filters, like SingleColumnValueFilter w/filterIfMissing == true,
472    * this can deliver huge perf gains when there's a cf with lots of data; however, it can
473    * also lead to some inconsistent results, as follows:
474    * - if someone does a concurrent update to both column families in question you may get a row
475    *   that never existed, e.g. for { rowKey = 5, { cat_videos => 1 }, { video => "my cat" } }
476    *   someone puts rowKey 5 with { cat_videos => 0 }, { video => "my dog" }, concurrent scan
477    *   filtering on "cat_videos == 1" can get { rowKey = 5, { cat_videos => 1 },
478    *   { video => "my dog" } }.
479    * - if there's a concurrent split and you have more than 2 column families, some rows may be
480    *   missing some column families.
481    */
482   public void setLoadColumnFamiliesOnDemand(boolean value) {
483     setAttribute(ONDEMAND_ATTR, Bytes.toBytes(value));
484   }
485 
486   /**
487    * Get the logical value indicating whether on-demand CF loading should be allowed.
488    */
489   public boolean doLoadColumnFamiliesOnDemand() {
490     byte[] attr = getAttribute(ONDEMAND_ATTR);
491     return attr == null ? false : Bytes.toBoolean(attr);
492   }
493 
494   /**
495    * Compile the table and column family (i.e. schema) information
496    * into a String. Useful for parsing and aggregation by debugging,
497    * logging, and administration tools.
498    * @return Map
499    */
500   @Override
501   public Map<String, Object> getFingerprint() {
502     Map<String, Object> map = new HashMap<String, Object>();
503     List<String> families = new ArrayList<String>();
504     if(this.familyMap.size() == 0) {
505       map.put("families", "ALL");
506       return map;
507     } else {
508       map.put("families", families);
509     }
510     for (Map.Entry<byte [], NavigableSet<byte[]>> entry :
511         this.familyMap.entrySet()) {
512       families.add(Bytes.toStringBinary(entry.getKey()));
513     }
514     return map;
515   }
516 
517   /**
518    * Compile the details beyond the scope of getFingerprint (row, columns,
519    * timestamps, etc.) into a Map along with the fingerprinted information.
520    * Useful for debugging, logging, and administration tools.
521    * @param maxCols a limit on the number of columns output prior to truncation
522    * @return Map
523    */
524   @Override
525   public Map<String, Object> toMap(int maxCols) {
526     // start with the fingerpring map and build on top of it
527     Map<String, Object> map = getFingerprint();
528     // map from families to column list replaces fingerprint's list of families
529     Map<String, List<String>> familyColumns =
530       new HashMap<String, List<String>>();
531     map.put("families", familyColumns);
532     // add scalar information first
533     map.put("startRow", Bytes.toStringBinary(this.startRow));
534     map.put("stopRow", Bytes.toStringBinary(this.stopRow));
535     map.put("maxVersions", this.maxVersions);
536     map.put("batch", this.batch);
537     map.put("caching", this.caching);
538     map.put("cacheBlocks", this.cacheBlocks);
539     List<Long> timeRange = new ArrayList<Long>();
540     timeRange.add(this.tr.getMin());
541     timeRange.add(this.tr.getMax());
542     map.put("timeRange", timeRange);
543     int colCount = 0;
544     // iterate through affected families and list out up to maxCols columns
545     for (Map.Entry<byte [], NavigableSet<byte[]>> entry :
546       this.familyMap.entrySet()) {
547       List<String> columns = new ArrayList<String>();
548       familyColumns.put(Bytes.toStringBinary(entry.getKey()), columns);
549       if(entry.getValue() == null) {
550         colCount++;
551         --maxCols;
552         columns.add("ALL");
553       } else {
554         colCount += entry.getValue().size();
555         if (maxCols <= 0) {
556           continue;
557         } 
558         for (byte [] column : entry.getValue()) {
559           if (--maxCols <= 0) {
560             continue;
561           }
562           columns.add(Bytes.toStringBinary(column));
563         }
564       } 
565     }       
566     map.put("totalColumns", colCount);
567     if (this.filter != null) {
568       map.put("filter", this.filter.toString());
569     }
570     // add the id if set
571     if (getId() != null) {
572       map.put("id", getId());
573     }
574     return map;
575   }
576 
577   @SuppressWarnings("unchecked")
578   private Writable createForName(String className) {
579     try {
580       Class<? extends Writable> clazz =
581         (Class<? extends Writable>) Class.forName(className);
582       return WritableFactories.newInstance(clazz, new Configuration());
583     } catch (ClassNotFoundException e) {
584       throw new RuntimeException("Can't find class " + className);
585     }
586   }
587 
588   //Writable
589   public void readFields(final DataInput in)
590   throws IOException {
591     int version = in.readByte();
592     if (version > (int)SCAN_VERSION) {
593       throw new IOException("version not supported");
594     }
595     this.startRow = Bytes.readByteArray(in);
596     this.stopRow = Bytes.readByteArray(in);
597     this.maxVersions = in.readInt();
598     this.batch = in.readInt();
599     this.caching = in.readInt();
600     this.cacheBlocks = in.readBoolean();
601     if(in.readBoolean()) {
602       this.filter = (Filter)createForName(Bytes.toString(Bytes.readByteArray(in)));
603       this.filter.readFields(in);
604     }
605     this.tr = new TimeRange();
606     tr.readFields(in);
607     int numFamilies = in.readInt();
608     this.familyMap =
609       new TreeMap<byte [], NavigableSet<byte []>>(Bytes.BYTES_COMPARATOR);
610     for(int i=0; i<numFamilies; i++) {
611       byte [] family = Bytes.readByteArray(in);
612       int numColumns = in.readInt();
613       TreeSet<byte []> set = new TreeSet<byte []>(Bytes.BYTES_COMPARATOR);
614       for(int j=0; j<numColumns; j++) {
615         byte [] qualifier = Bytes.readByteArray(in);
616         set.add(qualifier);
617       }
618       this.familyMap.put(family, set);
619     }
620 
621     if (version > 1) {
622       readAttributes(in);
623     }
624   }
625 
626   public void write(final DataOutput out)
627   throws IOException {
628     out.writeByte(SCAN_VERSION);
629     Bytes.writeByteArray(out, this.startRow);
630     Bytes.writeByteArray(out, this.stopRow);
631     out.writeInt(this.maxVersions);
632     out.writeInt(this.batch);
633     out.writeInt(this.caching);
634     out.writeBoolean(this.cacheBlocks);
635     if(this.filter == null) {
636       out.writeBoolean(false);
637     } else {
638       out.writeBoolean(true);
639       Bytes.writeByteArray(out, Bytes.toBytes(filter.getClass().getName()));
640       filter.write(out);
641     }
642     tr.write(out);
643     out.writeInt(familyMap.size());
644     for(Map.Entry<byte [], NavigableSet<byte []>> entry : familyMap.entrySet()) {
645       Bytes.writeByteArray(out, entry.getKey());
646       NavigableSet<byte []> columnSet = entry.getValue();
647       if(columnSet != null){
648         out.writeInt(columnSet.size());
649         for(byte [] qualifier : columnSet) {
650           Bytes.writeByteArray(out, qualifier);
651         }
652       } else {
653         out.writeInt(0);
654       }
655     }
656     writeAttributes(out);
657   }
658 
659   /**
660    * Enable/disable "raw" mode for this scan.
661    * If "raw" is enabled the scan will return all
662    * delete marker and deleted rows that have not
663    * been collected, yet.
664    * This is mostly useful for Scan on column families
665    * that have KEEP_DELETED_ROWS enabled.
666    * It is an error to specify any column when "raw" is set.
667    * @param raw True/False to enable/disable "raw" mode.
668    */
669   public void setRaw(boolean raw) {
670     setAttribute(RAW_ATTR, Bytes.toBytes(raw));
671   }
672 
673   /**
674    * @return True if this Scan is in "raw" mode.
675    */
676   public boolean isRaw() {
677     byte[] attr = getAttribute(RAW_ATTR);
678     return attr == null ? false : Bytes.toBoolean(attr);
679   }
680 
681   /*
682    * Set the isolation level for this scan. If the
683    * isolation level is set to READ_UNCOMMITTED, then
684    * this scan will return data from committed and
685    * uncommitted transactions. If the isolation level 
686    * is set to READ_COMMITTED, then this scan will return 
687    * data from committed transactions only. If a isolation
688    * level is not explicitly set on a Scan, then it 
689    * is assumed to be READ_COMMITTED.
690    * @param level IsolationLevel for this scan
691    */
692   public void setIsolationLevel(IsolationLevel level) {
693     setAttribute(ISOLATION_LEVEL, level.toBytes());
694   }
695   /*
696    * @return The isolation level of this scan.
697    * If no isolation level was set for this scan object, 
698    * then it returns READ_COMMITTED.
699    * @return The IsolationLevel for this scan
700    */
701   public IsolationLevel getIsolationLevel() {
702     byte[] attr = getAttribute(ISOLATION_LEVEL);
703     return attr == null ? IsolationLevel.READ_COMMITTED :
704                           IsolationLevel.fromBytes(attr);
705   }
706 }