View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import java.io.IOException;
22  import java.util.ArrayList;
23  import java.util.List;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.hadoop.conf.Configuration;
28  import org.apache.hadoop.fs.FileSystem;
29  import org.apache.hadoop.fs.Path;
30  import org.apache.hadoop.hbase.HBaseTestingUtility;
31  import org.apache.hadoop.hbase.HColumnDescriptor;
32  import org.apache.hadoop.hbase.HRegionInfo;
33  import org.apache.hadoop.hbase.HTableDescriptor;
34  import org.apache.hadoop.hbase.LargeTests;
35  import org.apache.hadoop.hbase.MiniHBaseCluster;
36  import org.apache.hadoop.hbase.TableName;
37  import org.apache.hadoop.hbase.client.HTable;
38  import org.apache.hadoop.hbase.client.Put;
39  import org.apache.hadoop.hbase.client.Result;
40  import org.apache.hadoop.hbase.client.ResultScanner;
41  import org.apache.hadoop.hbase.client.Scan;
42  import org.apache.hadoop.hbase.filter.CompareFilter;
43  import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
44  import org.apache.hadoop.hbase.util.Bytes;
45  import org.junit.Test;
46  import org.junit.experimental.categories.Category;
47  
48  
49  
50  /**
51   * Test performance improvement of joined scanners optimization:
52   * https://issues.apache.org/jira/browse/HBASE-5416
53   */
54  @Category(LargeTests.class)
55  public class TestJoinedScanners {
56    static final Log LOG = LogFactory.getLog(TestJoinedScanners.class);
57  
58    private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
59    private static final String DIR = TEST_UTIL.getDataTestDir("TestJoinedScanners").toString();
60  
61    private static final byte[] cf_essential = Bytes.toBytes("essential");
62    private static final byte[] cf_joined = Bytes.toBytes("joined");
63    private static final byte[] col_name = Bytes.toBytes("a");
64    private static final byte[] flag_yes = Bytes.toBytes("Y");
65    private static final byte[] flag_no  = Bytes.toBytes("N");
66  
67    @Test
68    public void testJoinedScanners() throws Exception {
69      String dataNodeHosts[] = new String[] { "host1", "host2", "host3" };
70      int regionServersCount = 3;
71  
72      HBaseTestingUtility htu = new HBaseTestingUtility();
73  
74      final int DEFAULT_BLOCK_SIZE = 1024*1024;
75      htu.getConfiguration().setLong("dfs.block.size", DEFAULT_BLOCK_SIZE);
76      htu.getConfiguration().setInt("dfs.replication", 1);
77      htu.getConfiguration().setLong("hbase.hregion.max.filesize", 322122547200L);
78      MiniHBaseCluster cluster = null;
79  
80      try {
81        cluster = htu.startMiniCluster(1, regionServersCount, dataNodeHosts);
82        byte [][] families = {cf_essential, cf_joined};
83  
84        HTable ht = htu.createTable(
85          Bytes.toBytes(this.getClass().getSimpleName()), families);
86  
87        long rows_to_insert = 1000;
88        int insert_batch = 20;
89        int flag_percent = 1;
90        int large_bytes = 128 * 1024;
91        long time = System.nanoTime();
92  
93        LOG.info("Make " + Long.toString(rows_to_insert) + " rows, total size = "
94          + Float.toString(rows_to_insert * large_bytes / 1024 / 1024) + " MB");
95  
96        byte [] val_large = new byte[large_bytes];
97  
98        List<Put> puts = new ArrayList<Put>();
99  
100       for (long i = 0; i < rows_to_insert; i++) {
101         Put put = new Put(Bytes.toBytes(Long.toString (i)));
102         if (i % 100 <= flag_percent) {
103           put.add(cf_essential, col_name, flag_yes);
104         }
105         else {
106           put.add(cf_essential, col_name, flag_no);
107         }
108         put.add(cf_joined, col_name, val_large);
109         puts.add(put);
110         if (puts.size() >= insert_batch) {
111           ht.put(puts);
112           puts.clear();
113         }
114       }
115       if (puts.size() >= 0) {
116         ht.put(puts);
117         puts.clear();
118       }
119 
120       LOG.info("Data generated in "
121         + Double.toString((System.nanoTime() - time) / 1000000000.0) + " seconds");
122 
123       boolean slow = true;
124       for (int i = 0; i < 20; ++i) {
125         runScanner(ht, slow);
126         slow = !slow;
127       }
128 
129       ht.close();
130     } finally {
131       if (cluster != null) {
132         htu.shutdownMiniCluster();
133       }
134     }
135   }
136 
137   private void runScanner(HTable table, boolean slow) throws Exception {
138     long time = System.nanoTime();
139     Scan scan = new Scan();
140     scan.addColumn(cf_essential, col_name);
141     scan.addColumn(cf_joined, col_name);
142 
143     SingleColumnValueFilter filter = new SingleColumnValueFilter(
144         cf_essential, col_name, CompareFilter.CompareOp.EQUAL, flag_yes);
145     filter.setFilterIfMissing(true);
146     scan.setFilter(filter);
147     scan.setLoadColumnFamiliesOnDemand(!slow);
148 
149     ResultScanner result_scanner = table.getScanner(scan);
150     Result res;
151     long rows_count = 0;
152     while ((res = result_scanner.next()) != null) {
153       rows_count++;
154     }
155 
156     double timeSec = (System.nanoTime() - time) / 1000000000.0;
157     result_scanner.close();
158     LOG.info((slow ? "Slow" : "Joined") + " scanner finished in " + Double.toString(timeSec)
159       + " seconds, got " + Long.toString(rows_count/2) + " rows");
160   }
161 
162   private static HRegion initHRegion(byte[] tableName, byte[] startKey, byte[] stopKey,
163       String callingMethod, Configuration conf, byte[]... families)
164       throws IOException {
165     HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(tableName));
166     for(byte [] family : families) {
167       htd.addFamily(new HColumnDescriptor(family));
168     }
169     HRegionInfo info = new HRegionInfo(htd.getTableName(), startKey, stopKey, false);
170     Path path = new Path(DIR + callingMethod);
171     FileSystem fs = FileSystem.get(conf);
172     if (fs.exists(path)) {
173       if (!fs.delete(path, true)) {
174         throw new IOException("Failed delete of " + path);
175       }
176     }
177     return HRegion.createHRegion(info, path, conf, htd);
178   }
179 }