1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.regionserver;
22  
23  import static org.junit.Assert.*;
24  
25  import java.io.IOException;
26  import java.util.ArrayList;
27  import java.util.Collection;
28  import java.util.HashMap;
29  import java.util.HashSet;
30  import java.util.List;
31  import java.util.Set;
32  
33  import org.apache.commons.logging.Log;
34  import org.apache.commons.logging.LogFactory;
35  import org.apache.hadoop.hbase.HBaseTestingUtility;
36  import org.apache.hadoop.hbase.HColumnDescriptor;
37  import org.apache.hadoop.hbase.HRegionInfo;
38  import org.apache.hadoop.hbase.HTableDescriptor;
39  import org.apache.hadoop.hbase.KeyValue;
40  import org.apache.hadoop.hbase.KeyValueTestUtil;
41  import org.apache.hadoop.hbase.client.Put;
42  import org.apache.hadoop.hbase.client.Scan;
43  import org.apache.hadoop.hbase.regionserver.HRegion;
44  import org.apache.hadoop.hbase.regionserver.InternalScanner;
45  import org.apache.hadoop.hbase.util.Bytes;
46  import org.junit.Test;
47  
48  public class TestColumnSeeking {
49  
50    private final static HBaseTestingUtility TEST_UTIL =
51        new HBaseTestingUtility();
52  
53    static final Log LOG = LogFactory.getLog(TestColumnSeeking.class);
54  
55    @SuppressWarnings("unchecked")
56    @Test
57    public void testDuplicateVersions() throws IOException {
58      String family = "Family";
59      byte[] familyBytes = Bytes.toBytes("Family");
60      String table = "TestDuplicateVersions";
61  
62      HColumnDescriptor hcd =
63          new HColumnDescriptor(familyBytes, 1000,
64              HColumnDescriptor.DEFAULT_COMPRESSION,
65              HColumnDescriptor.DEFAULT_IN_MEMORY,
66              HColumnDescriptor.DEFAULT_BLOCKCACHE,
67              HColumnDescriptor.DEFAULT_TTL,
68              HColumnDescriptor.DEFAULT_BLOOMFILTER);
69      HTableDescriptor htd = new HTableDescriptor(table);
70      htd.addFamily(hcd);
71      HRegionInfo info = new HRegionInfo(htd, null, null, false);
72      HRegion region =
73          HRegion.createHRegion(info, HBaseTestingUtility.getTestDir(), TEST_UTIL
74              .getConfiguration());
75  
76      List<String> rows = generateRandomWords(10, "row");
77      List<String> allColumns = generateRandomWords(10, "column");
78      List<String> values = generateRandomWords(100, "value");
79  
80      long maxTimestamp = 2;
81      double selectPercent = 0.5;
82      int numberOfTests = 5;
83      double flushPercentage = 0.2;
84      double minorPercentage = 0.2;
85      double majorPercentage = 0.2;
86      double putPercentage = 0.2;
87  
88      HashMap<String, KeyValue> allKVMap = new HashMap<String, KeyValue>();
89  
90      HashMap<String, KeyValue>[] kvMaps = new HashMap[numberOfTests];
91      ArrayList<String>[] columnLists = new ArrayList[numberOfTests];
92  
93      for (int i = 0; i < numberOfTests; i++) {
94        kvMaps[i] = new HashMap<String, KeyValue>();
95        columnLists[i] = new ArrayList<String>();
96        for (String column : allColumns) {
97          if (Math.random() < selectPercent) {
98            columnLists[i].add(column);
99          }
100       }
101     }
102 
103     for (String value : values) {
104       for (String row : rows) {
105         Put p = new Put(Bytes.toBytes(row));
106         for (String column : allColumns) {
107           for (long timestamp = 1; timestamp <= maxTimestamp; timestamp++) {
108             KeyValue kv =
109                 KeyValueTestUtil.create(row, family, column, timestamp, value);
110             if (Math.random() < putPercentage) {
111               p.add(kv);
112               allKVMap.put(kv.getKeyString(), kv);
113               for (int i = 0; i < numberOfTests; i++) {
114                 if (columnLists[i].contains(column)) {
115                   kvMaps[i].put(kv.getKeyString(), kv);
116                 }
117               }
118             }
119           }
120         }
121         region.put(p);
122         if (Math.random() < flushPercentage) {
123           LOG.info("Flushing... ");
124           region.flushcache();
125         }
126 
127         if (Math.random() < minorPercentage) {
128           LOG.info("Minor compacting... ");
129           region.compactStores(false);
130         }
131 
132         if (Math.random() < majorPercentage) {
133           LOG.info("Major compacting... ");
134           region.compactStores(true);
135         }
136       }
137     }
138 
139     for (int i = 0; i < numberOfTests + 1; i++) {
140       Collection<KeyValue> kvSet;
141       Scan scan = new Scan();
142       scan.setMaxVersions();
143       if (i < numberOfTests) {
144         kvSet = kvMaps[i].values();
145         for (String column : columnLists[i]) {
146           scan.addColumn(familyBytes, Bytes.toBytes(column));
147         }
148         LOG.info("ExplicitColumns scanner");
149         LOG.info("Columns: " + columnLists[i].size() + "  Keys: "
150             + kvSet.size());
151       } else {
152         kvSet = allKVMap.values();
153         LOG.info("Wildcard scanner");
154         LOG.info("Columns: " + allColumns.size() + "  Keys: " + kvSet.size());
155 
156       }
157       InternalScanner scanner = region.getScanner(scan);
158       List<KeyValue> results = new ArrayList<KeyValue>();
159       while (scanner.next(results))
160         ;
161       assertEquals(kvSet.size(), results.size());
162       assertTrue(results.containsAll(kvSet));
163     }
164   }
165 
166   @SuppressWarnings("unchecked")
167   @Test
168   public void testReseeking() throws IOException {
169     String family = "Family";
170     byte[] familyBytes = Bytes.toBytes("Family");
171     String table = "TestSingleVersions";
172 
173     HTableDescriptor htd = new HTableDescriptor(table);
174     htd.addFamily(new HColumnDescriptor(family));
175     HRegionInfo info = new HRegionInfo(htd, null, null, false);
176     HRegion region =
177         HRegion.createHRegion(info, HBaseTestingUtility.getTestDir(), TEST_UTIL
178             .getConfiguration());
179 
180     List<String> rows = generateRandomWords(10, "row");
181     List<String> allColumns = generateRandomWords(100, "column");
182 
183     long maxTimestamp = 2;
184     double selectPercent = 0.5;
185     int numberOfTests = 5;
186     double flushPercentage = 0.2;
187     double minorPercentage = 0.2;
188     double majorPercentage = 0.2;
189     double putPercentage = 0.2;
190 
191     HashMap<String, KeyValue> allKVMap = new HashMap<String, KeyValue>();
192 
193     HashMap<String, KeyValue>[] kvMaps = new HashMap[numberOfTests];
194     ArrayList<String>[] columnLists = new ArrayList[numberOfTests];
195     String valueString = "Value";
196 
197     for (int i = 0; i < numberOfTests; i++) {
198       kvMaps[i] = new HashMap<String, KeyValue>();
199       columnLists[i] = new ArrayList<String>();
200       for (String column : allColumns) {
201         if (Math.random() < selectPercent) {
202           columnLists[i].add(column);
203         }
204       }
205     }
206 
207     for (String row : rows) {
208       Put p = new Put(Bytes.toBytes(row));
209       for (String column : allColumns) {
210         for (long timestamp = 1; timestamp <= maxTimestamp; timestamp++) {
211           KeyValue kv =
212               KeyValueTestUtil.create(row, family, column, timestamp,
213                   valueString);
214           if (Math.random() < putPercentage) {
215             p.add(kv);
216             allKVMap.put(kv.getKeyString(), kv);
217             for (int i = 0; i < numberOfTests; i++) {
218               if (columnLists[i].contains(column)) {
219                 kvMaps[i].put(kv.getKeyString(), kv);
220               }
221             }
222           }
223 
224         }
225       }
226       region.put(p);
227       if (Math.random() < flushPercentage) {
228         LOG.info("Flushing... ");
229         region.flushcache();
230       }
231 
232       if (Math.random() < minorPercentage) {
233         LOG.info("Minor compacting... ");
234         region.compactStores(false);
235       }
236 
237       if (Math.random() < majorPercentage) {
238         LOG.info("Major compacting... ");
239         region.compactStores(true);
240       }
241     }
242 
243     for (int i = 0; i < numberOfTests + 1; i++) {
244       Collection<KeyValue> kvSet;
245       Scan scan = new Scan();
246       scan.setMaxVersions();
247       if (i < numberOfTests) {
248         kvSet = kvMaps[i].values();
249         for (String column : columnLists[i]) {
250           scan.addColumn(familyBytes, Bytes.toBytes(column));
251         }
252         LOG.info("ExplicitColumns scanner");
253         LOG.info("Columns: " + columnLists[i].size() + "  Keys: "
254             + kvSet.size());
255       } else {
256         kvSet = allKVMap.values();
257         LOG.info("Wildcard scanner");
258         LOG.info("Columns: " + allColumns.size() + "  Keys: " + kvSet.size());
259 
260       }
261       InternalScanner scanner = region.getScanner(scan);
262       List<KeyValue> results = new ArrayList<KeyValue>();
263       while (scanner.next(results))
264         ;
265       assertEquals(kvSet.size(), results.size());
266       assertTrue(results.containsAll(kvSet));
267     }
268   }
269 
270   List<String> generateRandomWords(int numberOfWords, String suffix) {
271     Set<String> wordSet = new HashSet<String>();
272     for (int i = 0; i < numberOfWords; i++) {
273       int lengthOfWords = (int) (Math.random() * 5) + 1;
274       char[] wordChar = new char[lengthOfWords];
275       for (int j = 0; j < wordChar.length; j++) {
276         wordChar[j] = (char) (Math.random() * 26 + 97);
277       }
278       String word;
279       if (suffix == null) {
280         word = new String(wordChar);
281       } else {
282         word = new String(wordChar) + suffix;
283       }
284       wordSet.add(word);
285     }
286     List<String> wordList = new ArrayList<String>(wordSet);
287     return wordList;
288   }
289 }