1 /**
2 * Copyright 2009 The Apache Software Foundation
3 *
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20 package org.apache.hadoop.hbase.io.hfile;
21
22 import java.io.IOException;
23 import java.nio.ByteBuffer;
24
25 import org.apache.hadoop.hbase.KeyValue;
26
27 /**
28 * A scanner allows you to position yourself within a HFile and
29 * scan through it. It allows you to reposition yourself as well.
30 *
31 * <p>A scanner doesn't always have a key/value that it is pointing to
32 * when it is first created and before
33 * {@link #seekTo()}/{@link #seekTo(byte[])} are called.
34 * In this case, {@link #getKey()}/{@link #getValue()} returns null. At most
35 * other times, a key and value will be available. The general pattern is that
36 * you position the Scanner using the seekTo variants and then getKey and
37 * getValue.
38 */
39 public interface HFileScanner {
40 /**
41 * SeekTo or just before the passed <code>key</code>. Examine the return
42 * code to figure whether we found the key or not.
43 * Consider the key stream of all the keys in the file,
44 * <code>k[0] .. k[n]</code>, where there are n keys in the file.
45 * @param key Key to find.
46 * @return -1, if key < k[0], no position;
47 * 0, such that k[i] = key and scanner is left in position i; and
48 * 1, such that k[i] < key, and scanner is left in position i.
49 * The scanner will position itself between k[i] and k[i+1] where
50 * k[i] < key <= k[i+1].
51 * If there is no key k[i+1] greater than or equal to the input key, then the
52 * scanner will position itself at the end of the file and next() will return
53 * false when it is called.
54 * @throws IOException
55 */
56 public int seekTo(byte[] key) throws IOException;
57 public int seekTo(byte[] key, int offset, int length) throws IOException;
58 /**
59 * Reseek to or just before the passed <code>key</code>. Similar to seekTo
60 * except that this can be called even if the scanner is not at the beginning
61 * of a file.
62 * This can be used to seek only to keys which come after the current position
63 * of the scanner.
64 * Consider the key stream of all the keys in the file,
65 * <code>k[0] .. k[n]</code>, where there are n keys in the file after
66 * current position of HFileScanner.
67 * The scanner will position itself between k[i] and k[i+1] where
68 * k[i] < key <= k[i+1].
69 * If there is no key k[i+1] greater than or equal to the input key, then the
70 * scanner will position itself at the end of the file and next() will return
71 * false when it is called.
72 * @param key Key to find (should be non-null)
73 * @return -1, if key < k[0], no position;
74 * 0, such that k[i] = key and scanner is left in position i; and
75 * 1, such that k[i] < key, and scanner is left in position i.
76 * @throws IOException
77 */
78 public int reseekTo(byte[] key) throws IOException;
79 public int reseekTo(byte[] key, int offset, int length) throws IOException;
80 /**
81 * Consider the key stream of all the keys in the file,
82 * <code>k[0] .. k[n]</code>, where there are n keys in the file.
83 * @param key Key to find
84 * @return false if key <= k[0] or true with scanner in position 'i' such
85 * that: k[i] < key. Furthermore: there may be a k[i+1], such that
86 * k[i] < key <= k[i+1] but there may also NOT be a k[i+1], and next() will
87 * return false (EOF).
88 * @throws IOException
89 */
90 public boolean seekBefore(byte [] key) throws IOException;
91 public boolean seekBefore(byte []key, int offset, int length) throws IOException;
92 /**
93 * Positions this scanner at the start of the file.
94 * @return False if empty file; i.e. a call to next would return false and
95 * the current key and value are undefined.
96 * @throws IOException
97 */
98 public boolean seekTo() throws IOException;
99 /**
100 * Scans to the next entry in the file.
101 * @return Returns false if you are at the end otherwise true if more in file.
102 * @throws IOException
103 */
104 public boolean next() throws IOException;
105 /**
106 * Gets a buffer view to the current key. You must call
107 * {@link #seekTo(byte[])} before this method.
108 * @return byte buffer for the key. The limit is set to the key size, and the
109 * position is 0, the start of the buffer view.
110 */
111 public ByteBuffer getKey();
112 /**
113 * Gets a buffer view to the current value. You must call
114 * {@link #seekTo(byte[])} before this method.
115 *
116 * @return byte buffer for the value. The limit is set to the value size, and
117 * the position is 0, the start of the buffer view.
118 */
119 public ByteBuffer getValue();
120 /**
121 * @return Instance of {@link KeyValue}.
122 */
123 public KeyValue getKeyValue();
124 /**
125 * Convenience method to get a copy of the key as a string - interpreting the
126 * bytes as UTF8. You must call {@link #seekTo(byte[])} before this method.
127 * @return key as a string
128 */
129 public String getKeyString();
130 /**
131 * Convenience method to get a copy of the value as a string - interpreting
132 * the bytes as UTF8. You must call {@link #seekTo(byte[])} before this method.
133 * @return value as a string
134 */
135 public String getValueString();
136 /**
137 * @return Reader that underlies this Scanner instance.
138 */
139 public HFile.Reader getReader();
140 /**
141 * @return True is scanner has had one of the seek calls invoked; i.e.
142 * {@link #seekBefore(byte[])} or {@link #seekTo()} or {@link #seekTo(byte[])}.
143 * Otherwise returns false.
144 */
145 public boolean isSeeked();
146 }