View Javadoc

1   /*
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.client;
22  
23  import org.apache.hadoop.conf.Configuration;
24  import org.apache.hadoop.hbase.HConstants;
25  import org.apache.hadoop.hbase.KeyValue;
26  import org.apache.hadoop.hbase.filter.Filter;
27  import org.apache.hadoop.hbase.filter.IncompatibleFilterException;
28  import org.apache.hadoop.hbase.io.TimeRange;
29  import org.apache.hadoop.hbase.util.Bytes;
30  import org.apache.hadoop.io.Writable;
31  import org.apache.hadoop.io.WritableFactories;
32  
33  import java.io.DataInput;
34  import java.io.DataOutput;
35  import java.io.IOException;
36  import java.util.Map;
37  import java.util.NavigableSet;
38  import java.util.TreeMap;
39  import java.util.TreeSet;
40  
41  /**
42   * Used to perform Scan operations.
43   * <p>
44   * All operations are identical to {@link Get} with the exception of
45   * instantiation.  Rather than specifying a single row, an optional startRow
46   * and stopRow may be defined.  If rows are not specified, the Scanner will
47   * iterate over all rows.
48   * <p>
49   * To scan everything for each row, instantiate a Scan object.
50   * <p>
51   * To modify scanner caching for just this scan, use {@link #setCaching(int) setCaching}.
52   * <p>
53   * To further define the scope of what to get when scanning, perform additional
54   * methods as outlined below.
55   * <p>
56   * To get all columns from specific families, execute {@link #addFamily(byte[]) addFamily}
57   * for each family to retrieve.
58   * <p>
59   * To get specific columns, execute {@link #addColumn(byte[], byte[]) addColumn}
60   * for each column to retrieve.
61   * <p>
62   * To only retrieve columns within a specific range of version timestamps,
63   * execute {@link #setTimeRange(long, long) setTimeRange}.
64   * <p>
65   * To only retrieve columns with a specific timestamp, execute
66   * {@link #setTimeStamp(long) setTimestamp}.
67   * <p>
68   * To limit the number of versions of each column to be returned, execute
69   * {@link #setMaxVersions(int) setMaxVersions}.
70   * <p>
71   * To limit the maximum number of values returned for each call to next(),
72   * execute {@link #setBatch(int) setBatch}.
73   * <p>
74   * To add a filter, execute {@link #setFilter(org.apache.hadoop.hbase.filter.Filter) setFilter}.
75   * <p>
76   * Expert: To explicitly disable server-side block caching for this scan,
77   * execute {@link #setCacheBlocks(boolean)}.
78   */
79  public class Scan implements Writable {
80    private static final byte SCAN_VERSION = (byte)1;
81    private byte [] startRow = HConstants.EMPTY_START_ROW;
82    private byte [] stopRow  = HConstants.EMPTY_END_ROW;
83    private int maxVersions = 1;
84    private int batch = -1;
85    private int caching = -1;
86    private boolean cacheBlocks = true;
87    private Filter filter = null;
88    private TimeRange tr = new TimeRange();
89    private Map<byte [], NavigableSet<byte []>> familyMap =
90      new TreeMap<byte [], NavigableSet<byte []>>(Bytes.BYTES_COMPARATOR);
91  
92    /**
93     * Create a Scan operation across all rows.
94     */
95    public Scan() {}
96  
97    public Scan(byte [] startRow, Filter filter) {
98      this(startRow);
99      this.filter = filter;
100   }
101 
102   /**
103    * Create a Scan operation starting at the specified row.
104    * <p>
105    * If the specified row does not exist, the Scanner will start from the
106    * next closest row after the specified row.
107    * @param startRow row to start scanner at or after
108    */
109   public Scan(byte [] startRow) {
110     this.startRow = startRow;
111   }
112 
113   /**
114    * Create a Scan operation for the range of rows specified.
115    * @param startRow row to start scanner at or after (inclusive)
116    * @param stopRow row to stop scanner before (exclusive)
117    */
118   public Scan(byte [] startRow, byte [] stopRow) {
119     this.startRow = startRow;
120     this.stopRow = stopRow;
121   }
122 
123   /**
124    * Creates a new instance of this class while copying all values.
125    *
126    * @param scan  The scan instance to copy from.
127    * @throws IOException When copying the values fails.
128    */
129   public Scan(Scan scan) throws IOException {
130     startRow = scan.getStartRow();
131     stopRow  = scan.getStopRow();
132     maxVersions = scan.getMaxVersions();
133     batch = scan.getBatch();
134     caching = scan.getCaching();
135     cacheBlocks = scan.getCacheBlocks();
136     filter = scan.getFilter(); // clone?
137     TimeRange ctr = scan.getTimeRange();
138     tr = new TimeRange(ctr.getMin(), ctr.getMax());
139     Map<byte[], NavigableSet<byte[]>> fams = scan.getFamilyMap();
140     for (Map.Entry<byte[],NavigableSet<byte[]>> entry : fams.entrySet()) {
141       byte [] fam = entry.getKey();
142       NavigableSet<byte[]> cols = entry.getValue();
143       if (cols != null && cols.size() > 0) {
144         for (byte[] col : cols) {
145           addColumn(fam, col);
146         }
147       } else {
148         addFamily(fam);
149       }
150     }
151   }
152 
153   /**
154    * Builds a scan object with the same specs as get.
155    * @param get get to model scan after
156    */
157   public Scan(Get get) {
158     this.startRow = get.getRow();
159     this.stopRow = get.getRow();
160     this.filter = get.getFilter();
161     this.maxVersions = get.getMaxVersions();
162     this.tr = get.getTimeRange();
163     this.familyMap = get.getFamilyMap();
164   }
165 
166   public boolean isGetScan() {
167     return this.startRow != null && this.startRow.length > 0 &&
168       Bytes.equals(this.startRow, this.stopRow);
169   }
170 
171   /**
172    * Get all columns from the specified family.
173    * <p>
174    * Overrides previous calls to addColumn for this family.
175    * @param family family name
176    * @return this
177    */
178   public Scan addFamily(byte [] family) {
179     familyMap.remove(family);
180     familyMap.put(family, null);
181     return this;
182   }
183 
184   /**
185    * Get the column from the specified family with the specified qualifier.
186    * <p>
187    * Overrides previous calls to addFamily for this family.
188    * @param family family name
189    * @param qualifier column qualifier
190    * @return this
191    */
192   public Scan addColumn(byte [] family, byte [] qualifier) {
193     NavigableSet<byte []> set = familyMap.get(family);
194     if(set == null) {
195       set = new TreeSet<byte []>(Bytes.BYTES_COMPARATOR);
196     }
197     set.add(qualifier);
198     familyMap.put(family, set);
199 
200     return this;
201   }
202 
203   /**
204    * Get versions of columns only within the specified timestamp range,
205    * [minStamp, maxStamp).  Note, default maximum versions to return is 1.  If
206    * your time range spans more than one version and you want all versions
207    * returned, up the number of versions beyond the defaut.
208    * @param minStamp minimum timestamp value, inclusive
209    * @param maxStamp maximum timestamp value, exclusive
210    * @throws IOException if invalid time range
211    * @see #setMaxVersions()
212    * @see #setMaxVersions(int)
213    * @return this
214    */
215   public Scan setTimeRange(long minStamp, long maxStamp)
216   throws IOException {
217     tr = new TimeRange(minStamp, maxStamp);
218     return this;
219   }
220 
221   /**
222    * Get versions of columns with the specified timestamp. Note, default maximum
223    * versions to return is 1.  If your time range spans more than one version
224    * and you want all versions returned, up the number of versions beyond the
225    * defaut.
226    * @param timestamp version timestamp
227    * @see #setMaxVersions()
228    * @see #setMaxVersions(int)
229    * @return this
230    */
231   public Scan setTimeStamp(long timestamp) {
232     try {
233       tr = new TimeRange(timestamp, timestamp+1);
234     } catch(IOException e) {
235       // Will never happen
236     }
237     return this;
238   }
239 
240   /**
241    * Set the start row of the scan.
242    * @param startRow row to start scan on, inclusive
243    * @return this
244    */
245   public Scan setStartRow(byte [] startRow) {
246     this.startRow = startRow;
247     return this;
248   }
249 
250   /**
251    * Set the stop row.
252    * @param stopRow row to end at (exclusive)
253    * @return this
254    */
255   public Scan setStopRow(byte [] stopRow) {
256     this.stopRow = stopRow;
257     return this;
258   }
259 
260   /**
261    * Get all available versions.
262    * @return this
263    */
264   public Scan setMaxVersions() {
265     this.maxVersions = Integer.MAX_VALUE;
266     return this;
267   }
268 
269   /**
270    * Get up to the specified number of versions of each column.
271    * @param maxVersions maximum versions for each column
272    * @return this
273    */
274   public Scan setMaxVersions(int maxVersions) {
275     this.maxVersions = maxVersions;
276     return this;
277   }
278 
279   /**
280    * Set the maximum number of values to return for each call to next()
281    * @param batch the maximum number of values
282    */
283   public void setBatch(int batch) {
284 	if(this.hasFilter() && this.filter.hasFilterRow()) {
285 	  throw new IncompatibleFilterException(
286         "Cannot set batch on a scan using a filter" +
287         " that returns true for filter.hasFilterRow");
288 	}
289     this.batch = batch;
290   }
291 
292   /**
293    * Set the number of rows for caching that will be passed to scanners.
294    * If not set, the default setting from {@link HTable#getScannerCaching()} will apply.
295    * Higher caching values will enable faster scanners but will use more memory.
296    * @param caching the number of rows for caching
297    */
298   public void setCaching(int caching) {
299     this.caching = caching;
300   }
301 
302   /**
303    * Apply the specified server-side filter when performing the Scan.
304    * @param filter filter to run on the server
305    * @return this
306    */
307   public Scan setFilter(Filter filter) {
308     this.filter = filter;
309     return this;
310   }
311 
312   /**
313    * Setting the familyMap
314    * @param familyMap map of family to qualifier
315    * @return this
316    */
317   public Scan setFamilyMap(Map<byte [], NavigableSet<byte []>> familyMap) {
318     this.familyMap = familyMap;
319     return this;
320   }
321 
322   /**
323    * Getting the familyMap
324    * @return familyMap
325    */
326   public Map<byte [], NavigableSet<byte []>> getFamilyMap() {
327     return this.familyMap;
328   }
329 
330   /**
331    * @return the number of families in familyMap
332    */
333   public int numFamilies() {
334     if(hasFamilies()) {
335       return this.familyMap.size();
336     }
337     return 0;
338   }
339 
340   /**
341    * @return true if familyMap is non empty, false otherwise
342    */
343   public boolean hasFamilies() {
344     return !this.familyMap.isEmpty();
345   }
346 
347   /**
348    * @return the keys of the familyMap
349    */
350   public byte[][] getFamilies() {
351     if(hasFamilies()) {
352       return this.familyMap.keySet().toArray(new byte[0][0]);
353     }
354     return null;
355   }
356 
357   /**
358    * @return the startrow
359    */
360   public byte [] getStartRow() {
361     return this.startRow;
362   }
363 
364   /**
365    * @return the stoprow
366    */
367   public byte [] getStopRow() {
368     return this.stopRow;
369   }
370 
371   /**
372    * @return the max number of versions to fetch
373    */
374   public int getMaxVersions() {
375     return this.maxVersions;
376   }
377 
378   /**
379    * @return maximum number of values to return for a single call to next()
380    */
381   public int getBatch() {
382     return this.batch;
383   }
384 
385   /**
386    * @return caching the number of rows fetched when calling next on a scanner
387    */
388   public int getCaching() {
389     return this.caching;
390   }
391 
392   /**
393    * @return TimeRange
394    */
395   public TimeRange getTimeRange() {
396     return this.tr;
397   }
398 
399   /**
400    * @return RowFilter
401    */
402   public Filter getFilter() {
403     return filter;
404   }
405 
406   /**
407    * @return true is a filter has been specified, false if not
408    */
409   public boolean hasFilter() {
410     return filter != null;
411   }
412 
413   /**
414    * Set whether blocks should be cached for this Scan.
415    * <p>
416    * This is true by default.  When true, default settings of the table and
417    * family are used (this will never override caching blocks if the block
418    * cache is disabled for that family or entirely).
419    *
420    * @param cacheBlocks if false, default settings are overridden and blocks
421    * will not be cached
422    */
423   public void setCacheBlocks(boolean cacheBlocks) {
424     this.cacheBlocks = cacheBlocks;
425   }
426 
427   /**
428    * Get whether blocks should be cached for this Scan.
429    * @return true if default caching should be used, false if blocks should not
430    * be cached
431    */
432   public boolean getCacheBlocks() {
433     return cacheBlocks;
434   }
435 
436   /**
437    * @return String
438    */
439   @Override
440   public String toString() {
441     StringBuilder sb = new StringBuilder();
442     sb.append("startRow=");
443     sb.append(Bytes.toString(this.startRow));
444     sb.append(", stopRow=");
445     sb.append(Bytes.toString(this.stopRow));
446     sb.append(", maxVersions=");
447     sb.append(this.maxVersions);
448     sb.append(", batch=");
449     sb.append(this.batch);
450     sb.append(", caching=");
451     sb.append(this.caching);
452     sb.append(", cacheBlocks=");
453     sb.append(this.cacheBlocks);
454     sb.append(", timeRange=");
455     sb.append("[").append(this.tr.getMin()).append(",");
456     sb.append(this.tr.getMax()).append(")");
457     sb.append(", families=");
458     if(this.familyMap.size() == 0) {
459       sb.append("ALL");
460       return sb.toString();
461     }
462     boolean moreThanOne = false;
463     for(Map.Entry<byte [], NavigableSet<byte[]>> entry : this.familyMap.entrySet()) {
464       if(moreThanOne) {
465         sb.append("), ");
466       } else {
467         moreThanOne = true;
468         sb.append("{");
469       }
470       sb.append("(family=");
471       sb.append(Bytes.toString(entry.getKey()));
472       sb.append(", columns=");
473       if(entry.getValue() == null) {
474         sb.append("ALL");
475       } else {
476         sb.append("{");
477         boolean moreThanOneB = false;
478         for(byte [] column : entry.getValue()) {
479           if(moreThanOneB) {
480             sb.append(", ");
481           } else {
482             moreThanOneB = true;
483           }
484           sb.append(Bytes.toString(column));
485         }
486         sb.append("}");
487       }
488     }
489     sb.append("}");
490     return sb.toString();
491   }
492 
493   @SuppressWarnings("unchecked")
494   private Writable createForName(String className) {
495     try {
496       Class<? extends Writable> clazz =
497         (Class<? extends Writable>) Class.forName(className);
498       return WritableFactories.newInstance(clazz, new Configuration());
499     } catch (ClassNotFoundException e) {
500       throw new RuntimeException("Can't find class " + className);
501     }
502   }
503 
504   //Writable
505   public void readFields(final DataInput in)
506   throws IOException {
507     int version = in.readByte();
508     if (version > (int)SCAN_VERSION) {
509       throw new IOException("version not supported");
510     }
511     this.startRow = Bytes.readByteArray(in);
512     this.stopRow = Bytes.readByteArray(in);
513     this.maxVersions = in.readInt();
514     this.batch = in.readInt();
515     this.caching = in.readInt();
516     this.cacheBlocks = in.readBoolean();
517     if(in.readBoolean()) {
518       this.filter = (Filter)createForName(Bytes.toString(Bytes.readByteArray(in)));
519       this.filter.readFields(in);
520     }
521     this.tr = new TimeRange();
522     tr.readFields(in);
523     int numFamilies = in.readInt();
524     this.familyMap =
525       new TreeMap<byte [], NavigableSet<byte []>>(Bytes.BYTES_COMPARATOR);
526     for(int i=0; i<numFamilies; i++) {
527       byte [] family = Bytes.readByteArray(in);
528       int numColumns = in.readInt();
529       TreeSet<byte []> set = new TreeSet<byte []>(Bytes.BYTES_COMPARATOR);
530       for(int j=0; j<numColumns; j++) {
531         byte [] qualifier = Bytes.readByteArray(in);
532         set.add(qualifier);
533       }
534       this.familyMap.put(family, set);
535     }
536   }
537 
538   public void write(final DataOutput out)
539   throws IOException {
540     out.writeByte(SCAN_VERSION);
541     Bytes.writeByteArray(out, this.startRow);
542     Bytes.writeByteArray(out, this.stopRow);
543     out.writeInt(this.maxVersions);
544     out.writeInt(this.batch);
545     out.writeInt(this.caching);
546     out.writeBoolean(this.cacheBlocks);
547     if(this.filter == null) {
548       out.writeBoolean(false);
549     } else {
550       out.writeBoolean(true);
551       Bytes.writeByteArray(out, Bytes.toBytes(filter.getClass().getName()));
552       filter.write(out);
553     }
554     tr.write(out);
555     out.writeInt(familyMap.size());
556     for(Map.Entry<byte [], NavigableSet<byte []>> entry : familyMap.entrySet()) {
557       Bytes.writeByteArray(out, entry.getKey());
558       NavigableSet<byte []> columnSet = entry.getValue();
559       if(columnSet != null){
560         out.writeInt(columnSet.size());
561         for(byte [] qualifier : columnSet) {
562           Bytes.writeByteArray(out, qualifier);
563         }
564       } else {
565         out.writeInt(0);
566       }
567     }
568   }
569 
570    /**
571    * Parses a combined family and qualifier and adds either both or just the
572    * family in case there is not qualifier. This assumes the older colon
573    * divided notation, e.g. "data:contents" or "meta:".
574    * <p>
575    * Note: It will through an error when the colon is missing.
576    *
577    * @param familyAndQualifier family and qualifier
578    * @return A reference to this instance.
579    * @throws IllegalArgumentException When the colon is missing.
580    * @deprecated use {@link #addColumn(byte[], byte[])} instead
581    */
582   public Scan addColumn(byte[] familyAndQualifier) {
583     byte [][] fq = KeyValue.parseColumn(familyAndQualifier);
584     if (fq.length > 1 && fq[1] != null && fq[1].length > 0) {
585       addColumn(fq[0], fq[1]);
586     } else {
587       addFamily(fq[0]);
588     }
589     return this;
590   }
591 
592   /**
593    * Adds an array of columns specified using old format, family:qualifier.
594    * <p>
595    * Overrides previous calls to addFamily for any families in the input.
596    *
597    * @param columns array of columns, formatted as <pre>family:qualifier</pre>
598    * @deprecated issue multiple {@link #addColumn(byte[], byte[])} instead
599    * @return this
600    */
601   public Scan addColumns(byte [][] columns) {
602     for (byte[] column : columns) {
603       addColumn(column);
604     }
605     return this;
606   }
607 
608   /**
609    * Convenience method to help parse old style (or rather user entry on the
610    * command line) column definitions, e.g. "data:contents mime:". The columns
611    * must be space delimited and always have a colon (":") to denote family
612    * and qualifier.
613    *
614    * @param columns  The columns to parse.
615    * @return A reference to this instance.
616    * @deprecated use {@link #addColumn(byte[], byte[])} instead
617    */
618   public Scan addColumns(String columns) {
619     String[] cols = columns.split(" ");
620     for (String col : cols) {
621       addColumn(Bytes.toBytes(col));
622     }
623     return this;
624   }
625 
626   /**
627    * Helps to convert the binary column families and qualifiers to a text
628    * representation, e.g. "data:mimetype data:contents meta:". Binary values
629    * are properly encoded using {@link Bytes#toBytesBinary(String)}.
630    *
631    * @return The columns in an old style string format.
632    * @deprecated
633    */
634   public String getInputColumns() {
635     StringBuilder cols = new StringBuilder("");
636     for (Map.Entry<byte[], NavigableSet<byte[]>> e :
637       familyMap.entrySet()) {
638       byte[] fam = e.getKey();
639       if (cols.length() > 0) cols.append(" ");
640       NavigableSet<byte[]> quals = e.getValue();
641       // check if this family has qualifiers
642       if (quals != null && quals.size() > 0) {
643         StringBuilder cs = new StringBuilder("");
644         for (byte[] qual : quals) {
645           if (cs.length() > 0) cs.append(" ");
646           // encode values to make parsing easier later
647           cs.append(Bytes.toStringBinary(fam)).append(":").append(Bytes.toStringBinary(qual));
648         }
649         cols.append(cs);
650       } else {
651         // only add the family but with old style delimiter
652         cols.append(Bytes.toStringBinary(fam)).append(":");
653       }
654     }
655     return cols.toString();
656   }
657 }