View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.client;
21  
22  import java.io.IOException;
23  import java.util.ArrayList;
24  import java.util.Arrays;
25  import java.util.HashMap;
26  import java.util.List;
27  import java.util.Map;
28  import java.util.NavigableSet;
29  import java.util.TreeMap;
30  import java.util.TreeSet;
31  
32  import org.apache.commons.logging.Log;
33  import org.apache.commons.logging.LogFactory;
34  import org.apache.hadoop.hbase.classification.InterfaceAudience;
35  import org.apache.hadoop.hbase.classification.InterfaceStability;
36  import org.apache.hadoop.hbase.HConstants;
37  import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
38  import org.apache.hadoop.hbase.filter.Filter;
39  import org.apache.hadoop.hbase.filter.IncompatibleFilterException;
40  import org.apache.hadoop.hbase.io.TimeRange;
41  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
42  import org.apache.hadoop.hbase.security.access.Permission;
43  import org.apache.hadoop.hbase.security.visibility.Authorizations;
44  import org.apache.hadoop.hbase.util.Bytes;
45  
46  /**
47   * Used to perform Scan operations.
48   * <p>
49   * All operations are identical to {@link Get} with the exception of
50   * instantiation.  Rather than specifying a single row, an optional startRow
51   * and stopRow may be defined.  If rows are not specified, the Scanner will
52   * iterate over all rows.
53   * <p>
54   * To scan everything for each row, instantiate a Scan object.
55   * <p>
56   * To modify scanner caching for just this scan, use {@link #setCaching(int) setCaching}.
57   * If caching is NOT set, we will use the caching value of the hosting {@link Table}.
58   * In addition to row caching, it is possible to specify a
59   * maximum result size, using {@link #setMaxResultSize(long)}. When both are used,
60   * single server requests are limited by either number of rows or maximum result size, whichever
61   * limit comes first.
62   * <p>
63   * To further define the scope of what to get when scanning, perform additional
64   * methods as outlined below.
65   * <p>
66   * To get all columns from specific families, execute {@link #addFamily(byte[]) addFamily}
67   * for each family to retrieve.
68   * <p>
69   * To get specific columns, execute {@link #addColumn(byte[], byte[]) addColumn}
70   * for each column to retrieve.
71   * <p>
72   * To only retrieve columns within a specific range of version timestamps,
73   * execute {@link #setTimeRange(long, long) setTimeRange}.
74   * <p>
75   * To only retrieve columns with a specific timestamp, execute
76   * {@link #setTimeStamp(long) setTimestamp}.
77   * <p>
78   * To limit the number of versions of each column to be returned, execute
79   * {@link #setMaxVersions(int) setMaxVersions}.
80   * <p>
81   * To limit the maximum number of values returned for each call to next(),
82   * execute {@link #setBatch(int) setBatch}.
83   * <p>
84   * To add a filter, execute {@link #setFilter(org.apache.hadoop.hbase.filter.Filter) setFilter}.
85   * <p>
86   * Expert: To explicitly disable server-side block caching for this scan,
87   * execute {@link #setCacheBlocks(boolean)}.
88   */
89  @InterfaceAudience.Public
90  @InterfaceStability.Stable
91  public class Scan extends Query {
92    private static final Log LOG = LogFactory.getLog(Scan.class);
93  
94    private static final String RAW_ATTR = "_raw_";
95  
96    /**
97     * EXPERT ONLY.
98     * An integer (not long) indicating to the scanner logic how many times we attempt to retrieve the
99     * next KV before we schedule a reseek.
100    * The right value depends on the size of the average KV. A reseek is more efficient when
101    * it can skip 5-10 KVs or 512B-1KB, or when the next KV is likely found in another HFile block.
102    * Setting this only has any effect when columns were added with
103    * {@link #addColumn(byte[], byte[])}
104    * <pre>{@code
105    * Scan s = new Scan(...);
106    * s.addColumn(...);
107    * s.setAttribute(Scan.HINT_LOOKAHEAD, Bytes.toBytes(2));
108    * }</pre>
109    * Default is 0 (always reseek).
110    * @deprecated without replacement
111    *             This is now a no-op, SEEKs and SKIPs are optimizated automatically.
112    */
113   @Deprecated
114   public static final String HINT_LOOKAHEAD = "_look_ahead_";
115 
116   private byte [] startRow = HConstants.EMPTY_START_ROW;
117   private byte [] stopRow  = HConstants.EMPTY_END_ROW;
118   private int maxVersions = 1;
119   private int batch = -1;
120 
121   private int storeLimit = -1;
122   private int storeOffset = 0;
123   private boolean getScan;
124 
125   /**
126    * @deprecated since 1.0.0. Use {@link #setScanMetricsEnabled(boolean)}
127    */
128   // Make private or remove.
129   @Deprecated
130   static public final String SCAN_ATTRIBUTES_METRICS_ENABLE = "scan.attributes.metrics.enable";
131 
132   /**
133    * Use {@link #getScanMetrics()}
134    */
135   // Make this private or remove.
136   @Deprecated
137   static public final String SCAN_ATTRIBUTES_METRICS_DATA = "scan.attributes.metrics.data";
138 
139   // If an application wants to use multiple scans over different tables each scan must
140   // define this attribute with the appropriate table name by calling
141   // scan.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(tableName))
142   static public final String SCAN_ATTRIBUTES_TABLE_NAME = "scan.attributes.table.name";
143 
144   /*
145    * -1 means no caching
146    */
147   private int caching = -1;
148   private long maxResultSize = -1;
149   private boolean cacheBlocks = true;
150   private boolean reversed = false;
151   private TimeRange tr = new TimeRange();
152   private Map<byte [], NavigableSet<byte []>> familyMap =
153     new TreeMap<byte [], NavigableSet<byte []>>(Bytes.BYTES_COMPARATOR);
154   private Boolean loadColumnFamiliesOnDemand = null;
155 
156   /**
157    * Set it true for small scan to get better performance
158    *
159    * Small scan should use pread and big scan can use seek + read
160    *
161    * seek + read is fast but can cause two problem (1) resource contention (2)
162    * cause too much network io
163    *
164    * [89-fb] Using pread for non-compaction read request
165    * https://issues.apache.org/jira/browse/HBASE-7266
166    *
167    * On the other hand, if setting it true, we would do
168    * openScanner,next,closeScanner in one RPC call. It means the better
169    * performance for small scan. [HBASE-9488].
170    *
171    * Generally, if the scan range is within one data block(64KB), it could be
172    * considered as a small scan.
173    */
174   private boolean small = false;
175 
176   /**
177    * Create a Scan operation across all rows.
178    */
179   public Scan() {}
180 
181   public Scan(byte [] startRow, Filter filter) {
182     this(startRow);
183     this.filter = filter;
184   }
185 
186   /**
187    * Create a Scan operation starting at the specified row.
188    * <p>
189    * If the specified row does not exist, the Scanner will start from the
190    * next closest row after the specified row.
191    * @param startRow row to start scanner at or after
192    */
193   public Scan(byte [] startRow) {
194     this.startRow = startRow;
195   }
196 
197   /**
198    * Create a Scan operation for the range of rows specified.
199    * @param startRow row to start scanner at or after (inclusive)
200    * @param stopRow row to stop scanner before (exclusive)
201    */
202   public Scan(byte [] startRow, byte [] stopRow) {
203     this.startRow = startRow;
204     this.stopRow = stopRow;
205     //if the startRow and stopRow both are empty, it is not a Get
206     this.getScan = isStartRowAndEqualsStopRow();
207   }
208 
209   /**
210    * Creates a new instance of this class while copying all values.
211    *
212    * @param scan  The scan instance to copy from.
213    * @throws IOException When copying the values fails.
214    */
215   public Scan(Scan scan) throws IOException {
216     startRow = scan.getStartRow();
217     stopRow  = scan.getStopRow();
218     maxVersions = scan.getMaxVersions();
219     batch = scan.getBatch();
220     storeLimit = scan.getMaxResultsPerColumnFamily();
221     storeOffset = scan.getRowOffsetPerColumnFamily();
222     caching = scan.getCaching();
223     maxResultSize = scan.getMaxResultSize();
224     cacheBlocks = scan.getCacheBlocks();
225     getScan = scan.isGetScan();
226     filter = scan.getFilter(); // clone?
227     loadColumnFamiliesOnDemand = scan.getLoadColumnFamiliesOnDemandValue();
228     consistency = scan.getConsistency();
229     reversed = scan.isReversed();
230     small = scan.isSmall();
231     TimeRange ctr = scan.getTimeRange();
232     tr = new TimeRange(ctr.getMin(), ctr.getMax());
233     Map<byte[], NavigableSet<byte[]>> fams = scan.getFamilyMap();
234     for (Map.Entry<byte[],NavigableSet<byte[]>> entry : fams.entrySet()) {
235       byte [] fam = entry.getKey();
236       NavigableSet<byte[]> cols = entry.getValue();
237       if (cols != null && cols.size() > 0) {
238         for (byte[] col : cols) {
239           addColumn(fam, col);
240         }
241       } else {
242         addFamily(fam);
243       }
244     }
245     for (Map.Entry<String, byte[]> attr : scan.getAttributesMap().entrySet()) {
246       setAttribute(attr.getKey(), attr.getValue());
247     }
248   }
249 
250   /**
251    * Builds a scan object with the same specs as get.
252    * @param get get to model scan after
253    */
254   public Scan(Get get) {
255     this.startRow = get.getRow();
256     this.stopRow = get.getRow();
257     this.filter = get.getFilter();
258     this.cacheBlocks = get.getCacheBlocks();
259     this.maxVersions = get.getMaxVersions();
260     this.storeLimit = get.getMaxResultsPerColumnFamily();
261     this.storeOffset = get.getRowOffsetPerColumnFamily();
262     this.tr = get.getTimeRange();
263     this.familyMap = get.getFamilyMap();
264     this.getScan = true;
265     this.consistency = get.getConsistency();
266     for (Map.Entry<String, byte[]> attr : get.getAttributesMap().entrySet()) {
267       setAttribute(attr.getKey(), attr.getValue());
268     }
269   }
270 
271   public boolean isGetScan() {
272     return this.getScan || isStartRowAndEqualsStopRow();
273   }
274 
275   private boolean isStartRowAndEqualsStopRow() {
276     return this.startRow != null && this.startRow.length > 0 &&
277         Bytes.equals(this.startRow, this.stopRow);
278   }
279   /**
280    * Get all columns from the specified family.
281    * <p>
282    * Overrides previous calls to addColumn for this family.
283    * @param family family name
284    * @return this
285    */
286   public Scan addFamily(byte [] family) {
287     familyMap.remove(family);
288     familyMap.put(family, null);
289     return this;
290   }
291 
292   /**
293    * Get the column from the specified family with the specified qualifier.
294    * <p>
295    * Overrides previous calls to addFamily for this family.
296    * @param family family name
297    * @param qualifier column qualifier
298    * @return this
299    */
300   public Scan addColumn(byte [] family, byte [] qualifier) {
301     NavigableSet<byte []> set = familyMap.get(family);
302     if(set == null) {
303       set = new TreeSet<byte []>(Bytes.BYTES_COMPARATOR);
304     }
305     if (qualifier == null) {
306       qualifier = HConstants.EMPTY_BYTE_ARRAY;
307     }
308     set.add(qualifier);
309     familyMap.put(family, set);
310     return this;
311   }
312 
313   /**
314    * Get versions of columns only within the specified timestamp range,
315    * [minStamp, maxStamp).  Note, default maximum versions to return is 1.  If
316    * your time range spans more than one version and you want all versions
317    * returned, up the number of versions beyond the default.
318    * @param minStamp minimum timestamp value, inclusive
319    * @param maxStamp maximum timestamp value, exclusive
320    * @throws IOException if invalid time range
321    * @see #setMaxVersions()
322    * @see #setMaxVersions(int)
323    * @return this
324    */
325   public Scan setTimeRange(long minStamp, long maxStamp)
326   throws IOException {
327     tr = new TimeRange(minStamp, maxStamp);
328     return this;
329   }
330 
331   /**
332    * Get versions of columns with the specified timestamp. Note, default maximum
333    * versions to return is 1.  If your time range spans more than one version
334    * and you want all versions returned, up the number of versions beyond the
335    * defaut.
336    * @param timestamp version timestamp
337    * @see #setMaxVersions()
338    * @see #setMaxVersions(int)
339    * @return this
340    */
341   public Scan setTimeStamp(long timestamp)
342   throws IOException {
343     try {
344       tr = new TimeRange(timestamp, timestamp+1);
345     } catch(IOException e) {
346       // This should never happen, unless integer overflow or something extremely wrong...
347       LOG.error("TimeRange failed, likely caused by integer overflow. ", e);
348       throw e;
349     }
350     return this;
351   }
352 
353   /**
354    * Set the start row of the scan.
355    * @param startRow row to start scan on (inclusive)
356    * Note: In order to make startRow exclusive add a trailing 0 byte
357    * @return this
358    */
359   public Scan setStartRow(byte [] startRow) {
360     this.startRow = startRow;
361     return this;
362   }
363 
364   /**
365    * Set the stop row.
366    * @param stopRow row to end at (exclusive)
367    * <p><b>Note:</b> In order to make stopRow inclusive add a trailing 0 byte</p>
368    * <p><b>Note:</b> When doing a filter for a rowKey <u>Prefix</u>
369    * use {@link #setRowPrefixFilter(byte[])}.
370    * The 'trailing 0' will not yield the desired result.</p>
371    * @return this
372    */
373   public Scan setStopRow(byte [] stopRow) {
374     this.stopRow = stopRow;
375     return this;
376   }
377 
378   /**
379    * <p>Set a filter (using stopRow and startRow) so the result set only contains rows where the
380    * rowKey starts with the specified prefix.</p>
381    * <p>This is a utility method that converts the desired rowPrefix into the appropriate values
382    * for the startRow and stopRow to achieve the desired result.</p>
383    * <p>This can safely be used in combination with setFilter.</p>
384    * <p><b>NOTE: Doing a {@link #setStartRow(byte[])} and/or {@link #setStopRow(byte[])}
385    * after this method will yield undefined results.</b></p>
386    * @param rowPrefix the prefix all rows must start with. (Set <i>null</i> to remove the filter.)
387    * @return this
388    */
389   public Scan setRowPrefixFilter(byte[] rowPrefix) {
390     if (rowPrefix == null) {
391       setStartRow(HConstants.EMPTY_START_ROW);
392       setStopRow(HConstants.EMPTY_END_ROW);
393     } else {
394       this.setStartRow(rowPrefix);
395       this.setStopRow(calculateTheClosestNextRowKeyForPrefix(rowPrefix));
396     }
397     return this;
398   }
399 
400   /**
401    * <p>When scanning for a prefix the scan should stop immediately after the the last row that
402    * has the specified prefix. This method calculates the closest next rowKey immediately following
403    * the given rowKeyPrefix.</p>
404    * <p><b>IMPORTANT: This converts a rowKey<u>Prefix</u> into a rowKey</b>.</p>
405    * <p>If the prefix is an 'ASCII' string put into a byte[] then this is easy because you can
406    * simply increment the last byte of the array.
407    * But if your application uses real binary rowids you may run into the scenario that your
408    * prefix is something like:</p>
409    * &nbsp;&nbsp;&nbsp;<b>{ 0x12, 0x23, 0xFF, 0xFF }</b><br/>
410    * Then this stopRow needs to be fed into the actual scan<br/>
411    * &nbsp;&nbsp;&nbsp;<b>{ 0x12, 0x24 }</b> (Notice that it is shorter now)<br/>
412    * This method calculates the correct stop row value for this usecase.
413    *
414    * @param rowKeyPrefix the rowKey<u>Prefix</u>.
415    * @return the closest next rowKey immediately following the given rowKeyPrefix.
416    */
417   private byte[] calculateTheClosestNextRowKeyForPrefix(byte[] rowKeyPrefix) {
418     // Essentially we are treating it like an 'unsigned very very long' and doing +1 manually.
419     // Search for the place where the trailing 0xFFs start
420     int offset = rowKeyPrefix.length;
421     while (offset > 0) {
422       if (rowKeyPrefix[offset - 1] != (byte) 0xFF) {
423         break;
424       }
425       offset--;
426     }
427 
428     if (offset == 0) {
429       // We got an 0xFFFF... (only FFs) stopRow value which is
430       // the last possible prefix before the end of the table.
431       // So set it to stop at the 'end of the table'
432       return HConstants.EMPTY_END_ROW;
433     }
434 
435     // Copy the right length of the original
436     byte[] newStopRow = Arrays.copyOfRange(rowKeyPrefix, 0, offset);
437     // And increment the last one
438     newStopRow[newStopRow.length - 1]++;
439     return newStopRow;
440   }
441 
442   /**
443    * Get all available versions.
444    * @return this
445    */
446   public Scan setMaxVersions() {
447     this.maxVersions = Integer.MAX_VALUE;
448     return this;
449   }
450 
451   /**
452    * Get up to the specified number of versions of each column.
453    * @param maxVersions maximum versions for each column
454    * @return this
455    */
456   public Scan setMaxVersions(int maxVersions) {
457     this.maxVersions = maxVersions;
458     return this;
459   }
460 
461   /**
462    * Set the maximum number of values to return for each call to next()
463    * @param batch the maximum number of values
464    */
465   public Scan setBatch(int batch) {
466     if (this.hasFilter() && this.filter.hasFilterRow()) {
467       throw new IncompatibleFilterException(
468         "Cannot set batch on a scan using a filter" +
469         " that returns true for filter.hasFilterRow");
470     }
471     this.batch = batch;
472     return this;
473   }
474 
475   /**
476    * Set the maximum number of values to return per row per Column Family
477    * @param limit the maximum number of values returned / row / CF
478    */
479   public Scan setMaxResultsPerColumnFamily(int limit) {
480     this.storeLimit = limit;
481     return this;
482   }
483 
484   /**
485    * Set offset for the row per Column Family.
486    * @param offset is the number of kvs that will be skipped.
487    */
488   public Scan setRowOffsetPerColumnFamily(int offset) {
489     this.storeOffset = offset;
490     return this;
491   }
492 
493   /**
494    * Set the number of rows for caching that will be passed to scanners.
495    * If not set, the Configuration setting {@link HConstants#HBASE_CLIENT_SCANNER_CACHING} will
496    * apply.
497    * Higher caching values will enable faster scanners but will use more memory.
498    * @param caching the number of rows for caching
499    */
500   public Scan setCaching(int caching) {
501     this.caching = caching;
502     return this;
503   }
504 
505   /**
506    * @return the maximum result size in bytes. See {@link #setMaxResultSize(long)}
507    */
508   public long getMaxResultSize() {
509     return maxResultSize;
510   }
511 
512   /**
513    * Set the maximum result size. The default is -1; this means that no specific
514    * maximum result size will be set for this scan, and the global configured
515    * value will be used instead. (Defaults to unlimited).
516    *
517    * @param maxResultSize The maximum result size in bytes.
518    */
519   public Scan setMaxResultSize(long maxResultSize) {
520     this.maxResultSize = maxResultSize;
521     return this;
522   }
523 
524   @Override
525   public Scan setFilter(Filter filter) {
526     super.setFilter(filter);
527     return this;
528   }
529 
530   /**
531    * Setting the familyMap
532    * @param familyMap map of family to qualifier
533    * @return this
534    */
535   public Scan setFamilyMap(Map<byte [], NavigableSet<byte []>> familyMap) {
536     this.familyMap = familyMap;
537     return this;
538   }
539 
540   /**
541    * Getting the familyMap
542    * @return familyMap
543    */
544   public Map<byte [], NavigableSet<byte []>> getFamilyMap() {
545     return this.familyMap;
546   }
547 
548   /**
549    * @return the number of families in familyMap
550    */
551   public int numFamilies() {
552     if(hasFamilies()) {
553       return this.familyMap.size();
554     }
555     return 0;
556   }
557 
558   /**
559    * @return true if familyMap is non empty, false otherwise
560    */
561   public boolean hasFamilies() {
562     return !this.familyMap.isEmpty();
563   }
564 
565   /**
566    * @return the keys of the familyMap
567    */
568   public byte[][] getFamilies() {
569     if(hasFamilies()) {
570       return this.familyMap.keySet().toArray(new byte[0][0]);
571     }
572     return null;
573   }
574 
575   /**
576    * @return the startrow
577    */
578   public byte [] getStartRow() {
579     return this.startRow;
580   }
581 
582   /**
583    * @return the stoprow
584    */
585   public byte [] getStopRow() {
586     return this.stopRow;
587   }
588 
589   /**
590    * @return the max number of versions to fetch
591    */
592   public int getMaxVersions() {
593     return this.maxVersions;
594   }
595 
596   /**
597    * @return maximum number of values to return for a single call to next()
598    */
599   public int getBatch() {
600     return this.batch;
601   }
602 
603   /**
604    * @return maximum number of values to return per row per CF
605    */
606   public int getMaxResultsPerColumnFamily() {
607     return this.storeLimit;
608   }
609 
610   /**
611    * Method for retrieving the scan's offset per row per column
612    * family (#kvs to be skipped)
613    * @return row offset
614    */
615   public int getRowOffsetPerColumnFamily() {
616     return this.storeOffset;
617   }
618 
619   /**
620    * @return caching the number of rows fetched when calling next on a scanner
621    */
622   public int getCaching() {
623     return this.caching;
624   }
625 
626   /**
627    * @return TimeRange
628    */
629   public TimeRange getTimeRange() {
630     return this.tr;
631   }
632 
633   /**
634    * @return RowFilter
635    */
636   @Override
637   public Filter getFilter() {
638     return filter;
639   }
640 
641   /**
642    * @return true is a filter has been specified, false if not
643    */
644   public boolean hasFilter() {
645     return filter != null;
646   }
647 
648   /**
649    * Set whether blocks should be cached for this Scan.
650    * <p>
651    * This is true by default.  When true, default settings of the table and
652    * family are used (this will never override caching blocks if the block
653    * cache is disabled for that family or entirely).
654    *
655    * @param cacheBlocks if false, default settings are overridden and blocks
656    * will not be cached
657    */
658   public Scan setCacheBlocks(boolean cacheBlocks) {
659     this.cacheBlocks = cacheBlocks;
660     return this;
661   }
662 
663   /**
664    * Get whether blocks should be cached for this Scan.
665    * @return true if default caching should be used, false if blocks should not
666    * be cached
667    */
668   public boolean getCacheBlocks() {
669     return cacheBlocks;
670   }
671 
672   /**
673    * Set whether this scan is a reversed one
674    * <p>
675    * This is false by default which means forward(normal) scan.
676    *
677    * @param reversed if true, scan will be backward order
678    * @return this
679    */
680   public Scan setReversed(boolean reversed) {
681     this.reversed = reversed;
682     return this;
683   }
684 
685   /**
686    * Get whether this scan is a reversed one.
687    * @return true if backward scan, false if forward(default) scan
688    */
689   public boolean isReversed() {
690     return reversed;
691   }
692 
693   /**
694    * Set the value indicating whether loading CFs on demand should be allowed (cluster
695    * default is false). On-demand CF loading doesn't load column families until necessary, e.g.
696    * if you filter on one column, the other column family data will be loaded only for the rows
697    * that are included in result, not all rows like in normal case.
698    * With column-specific filters, like SingleColumnValueFilter w/filterIfMissing == true,
699    * this can deliver huge perf gains when there's a cf with lots of data; however, it can
700    * also lead to some inconsistent results, as follows:
701    * - if someone does a concurrent update to both column families in question you may get a row
702    *   that never existed, e.g. for { rowKey = 5, { cat_videos => 1 }, { video => "my cat" } }
703    *   someone puts rowKey 5 with { cat_videos => 0 }, { video => "my dog" }, concurrent scan
704    *   filtering on "cat_videos == 1" can get { rowKey = 5, { cat_videos => 1 },
705    *   { video => "my dog" } }.
706    * - if there's a concurrent split and you have more than 2 column families, some rows may be
707    *   missing some column families.
708    */
709   public Scan setLoadColumnFamiliesOnDemand(boolean value) {
710     this.loadColumnFamiliesOnDemand = value;
711     return this;
712   }
713 
714   /**
715    * Get the raw loadColumnFamiliesOnDemand setting; if it's not set, can be null.
716    */
717   public Boolean getLoadColumnFamiliesOnDemandValue() {
718     return this.loadColumnFamiliesOnDemand;
719   }
720 
721   /**
722    * Get the logical value indicating whether on-demand CF loading should be allowed.
723    */
724   public boolean doLoadColumnFamiliesOnDemand() {
725     return (this.loadColumnFamiliesOnDemand != null)
726       && this.loadColumnFamiliesOnDemand.booleanValue();
727   }
728 
729   /**
730    * Compile the table and column family (i.e. schema) information
731    * into a String. Useful for parsing and aggregation by debugging,
732    * logging, and administration tools.
733    * @return Map
734    */
735   @Override
736   public Map<String, Object> getFingerprint() {
737     Map<String, Object> map = new HashMap<String, Object>();
738     List<String> families = new ArrayList<String>();
739     if(this.familyMap.size() == 0) {
740       map.put("families", "ALL");
741       return map;
742     } else {
743       map.put("families", families);
744     }
745     for (Map.Entry<byte [], NavigableSet<byte[]>> entry :
746         this.familyMap.entrySet()) {
747       families.add(Bytes.toStringBinary(entry.getKey()));
748     }
749     return map;
750   }
751 
752   /**
753    * Compile the details beyond the scope of getFingerprint (row, columns,
754    * timestamps, etc.) into a Map along with the fingerprinted information.
755    * Useful for debugging, logging, and administration tools.
756    * @param maxCols a limit on the number of columns output prior to truncation
757    * @return Map
758    */
759   @Override
760   public Map<String, Object> toMap(int maxCols) {
761     // start with the fingerpring map and build on top of it
762     Map<String, Object> map = getFingerprint();
763     // map from families to column list replaces fingerprint's list of families
764     Map<String, List<String>> familyColumns =
765       new HashMap<String, List<String>>();
766     map.put("families", familyColumns);
767     // add scalar information first
768     map.put("startRow", Bytes.toStringBinary(this.startRow));
769     map.put("stopRow", Bytes.toStringBinary(this.stopRow));
770     map.put("maxVersions", this.maxVersions);
771     map.put("batch", this.batch);
772     map.put("caching", this.caching);
773     map.put("maxResultSize", this.maxResultSize);
774     map.put("cacheBlocks", this.cacheBlocks);
775     map.put("loadColumnFamiliesOnDemand", this.loadColumnFamiliesOnDemand);
776     List<Long> timeRange = new ArrayList<Long>();
777     timeRange.add(this.tr.getMin());
778     timeRange.add(this.tr.getMax());
779     map.put("timeRange", timeRange);
780     int colCount = 0;
781     // iterate through affected families and list out up to maxCols columns
782     for (Map.Entry<byte [], NavigableSet<byte[]>> entry :
783       this.familyMap.entrySet()) {
784       List<String> columns = new ArrayList<String>();
785       familyColumns.put(Bytes.toStringBinary(entry.getKey()), columns);
786       if(entry.getValue() == null) {
787         colCount++;
788         --maxCols;
789         columns.add("ALL");
790       } else {
791         colCount += entry.getValue().size();
792         if (maxCols <= 0) {
793           continue;
794         }
795         for (byte [] column : entry.getValue()) {
796           if (--maxCols <= 0) {
797             continue;
798           }
799           columns.add(Bytes.toStringBinary(column));
800         }
801       }
802     }
803     map.put("totalColumns", colCount);
804     if (this.filter != null) {
805       map.put("filter", this.filter.toString());
806     }
807     // add the id if set
808     if (getId() != null) {
809       map.put("id", getId());
810     }
811     return map;
812   }
813 
814   /**
815    * Enable/disable "raw" mode for this scan.
816    * If "raw" is enabled the scan will return all
817    * delete marker and deleted rows that have not
818    * been collected, yet.
819    * This is mostly useful for Scan on column families
820    * that have KEEP_DELETED_ROWS enabled.
821    * It is an error to specify any column when "raw" is set.
822    * @param raw True/False to enable/disable "raw" mode.
823    */
824   public Scan setRaw(boolean raw) {
825     setAttribute(RAW_ATTR, Bytes.toBytes(raw));
826     return this;
827   }
828 
829   /**
830    * @return True if this Scan is in "raw" mode.
831    */
832   public boolean isRaw() {
833     byte[] attr = getAttribute(RAW_ATTR);
834     return attr == null ? false : Bytes.toBoolean(attr);
835   }
836 
837 
838 
839   /**
840    * Set whether this scan is a small scan
841    * <p>
842    * Small scan should use pread and big scan can use seek + read
843    *
844    * seek + read is fast but can cause two problem (1) resource contention (2)
845    * cause too much network io
846    *
847    * [89-fb] Using pread for non-compaction read request
848    * https://issues.apache.org/jira/browse/HBASE-7266
849    *
850    * On the other hand, if setting it true, we would do
851    * openScanner,next,closeScanner in one RPC call. It means the better
852    * performance for small scan. [HBASE-9488].
853    *
854    * Generally, if the scan range is within one data block(64KB), it could be
855    * considered as a small scan.
856    *
857    * @param small
858    */
859   public Scan setSmall(boolean small) {
860     this.small = small;
861     return this;
862   }
863 
864   /**
865    * Get whether this scan is a small scan
866    * @return true if small scan
867    */
868   public boolean isSmall() {
869     return small;
870   }
871 
872   @Override
873   public Scan setAttribute(String name, byte[] value) {
874     return (Scan) super.setAttribute(name, value);
875   }
876 
877   @Override
878   public Scan setId(String id) {
879     return (Scan) super.setId(id);
880   }
881 
882   @Override
883   public Scan setAuthorizations(Authorizations authorizations) {
884     return (Scan) super.setAuthorizations(authorizations);
885   }
886 
887   @Override
888   public Scan setACL(Map<String, Permission> perms) {
889     return (Scan) super.setACL(perms);
890   }
891 
892   @Override
893   public Scan setACL(String user, Permission perms) {
894     return (Scan) super.setACL(user, perms);
895   }
896 
897   @Override
898   public Scan setConsistency(Consistency consistency) {
899     return (Scan) super.setConsistency(consistency);
900   }
901 
902   @Override
903   public Scan setReplicaId(int Id) {
904     return (Scan) super.setReplicaId(Id);
905   }
906 
907   @Override
908   public Scan setIsolationLevel(IsolationLevel level) {
909     return (Scan) super.setIsolationLevel(level);
910   }
911 
912   /**
913    * Utility that creates a Scan that will do a  small scan in reverse from passed row
914    * looking for next closest row.
915    * @param row
916    * @param family
917    * @return An instance of Scan primed with passed <code>row</code> and <code>family</code> to
918    * scan in reverse for one row only.
919    */
920   static Scan createGetClosestRowOrBeforeReverseScan(byte[] row) {
921     // Below does not work if you add in family; need to add the family qualifier that is highest
922     // possible family qualifier.  Do we have such a notion?  Would have to be magic.
923     Scan scan = new Scan(row);
924     scan.setSmall(true);
925     scan.setReversed(true);
926     scan.setCaching(1);
927     return scan;
928   }
929 
930   /**
931    * Enable collection of {@link ScanMetrics}. For advanced users.
932    * @param enabled Set to true to enable accumulating scan metrics
933    */
934   public Scan setScanMetricsEnabled(final boolean enabled) {
935     setAttribute(Scan.SCAN_ATTRIBUTES_METRICS_ENABLE, Bytes.toBytes(Boolean.valueOf(enabled)));
936     return this;
937   }
938 
939   /**
940    * @return True if collection of scan metrics is enabled. For advanced users.
941    */
942   public boolean isScanMetricsEnabled() {
943     byte[] attr = getAttribute(Scan.SCAN_ATTRIBUTES_METRICS_ENABLE);
944     return attr == null ? false : Bytes.toBoolean(attr);
945   }
946 
947   /**
948    * @return Metrics on this Scan, if metrics were enabled.
949    * @see #setScanMetricsEnabled(boolean)
950    */
951   public ScanMetrics getScanMetrics() {
952     byte [] bytes = getAttribute(Scan.SCAN_ATTRIBUTES_METRICS_DATA);
953     if (bytes == null) return null;
954     return ProtobufUtil.toScanMetrics(bytes);
955   }
956 }