View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.mapred;
21  
22  import java.io.IOException;
23  import java.util.Map;
24  
25  import org.apache.hadoop.conf.Configured;
26  import org.apache.hadoop.fs.Path;
27  import org.apache.hadoop.hbase.HBaseConfiguration;
28  import org.apache.hadoop.hbase.KeyValue;
29  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
30  import org.apache.hadoop.hbase.client.Result;
31  import org.apache.hadoop.mapred.FileOutputFormat;
32  import org.apache.hadoop.mapred.JobClient;
33  import org.apache.hadoop.mapred.JobConf;
34  import org.apache.hadoop.mapred.OutputCollector;
35  import org.apache.hadoop.mapred.Reporter;
36  import org.apache.hadoop.mapred.lib.IdentityReducer;
37  import org.apache.hadoop.util.Tool;
38  import org.apache.hadoop.util.ToolRunner;
39  
40  /**
41   * A job with a map to count rows.
42   * Map outputs table rows IF the input row has columns that have content.
43   * Uses an {@link IdentityReducer}
44   */
45  @Deprecated
46  public class RowCounter extends Configured implements Tool {
47    // Name of this 'program'
48    static final String NAME = "rowcounter";
49  
50    /**
51     * Mapper that runs the count.
52     */
53    static class RowCounterMapper
54    implements TableMap<ImmutableBytesWritable, Result> {
55      private static enum Counters {ROWS}
56  
57      public void map(ImmutableBytesWritable row, Result values,
58          OutputCollector<ImmutableBytesWritable, Result> output,
59          Reporter reporter)
60      throws IOException {
61          // Count every row containing data, whether it's in qualifiers or values
62          reporter.incrCounter(Counters.ROWS, 1);
63      }
64  
65      public void configure(JobConf jc) {
66        // Nothing to do.
67      }
68  
69      public void close() throws IOException {
70        // Nothing to do.
71      }
72    }
73  
74    /**
75     * @param args
76     * @return the JobConf
77     * @throws IOException
78     */
79    public JobConf createSubmittableJob(String[] args) throws IOException {
80      JobConf c = new JobConf(getConf(), getClass());
81      c.setJobName(NAME);
82      // Columns are space delimited
83      StringBuilder sb = new StringBuilder();
84      final int columnoffset = 2;
85      for (int i = columnoffset; i < args.length; i++) {
86        if (i > columnoffset) {
87          sb.append(" ");
88        }
89        sb.append(args[i]);
90      }
91      // Second argument is the table name.
92      TableMapReduceUtil.initTableMapJob(args[1], sb.toString(),
93        RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, c);
94      c.setNumReduceTasks(0);
95      // First arg is the output directory.
96      FileOutputFormat.setOutputPath(c, new Path(args[0]));
97      return c;
98    }
99  
100   static int printUsage() {
101     System.out.println(NAME +
102       " <outputdir> <tablename> <column1> [<column2>...]");
103     return -1;
104   }
105 
106   public int run(final String[] args) throws Exception {
107     // Make sure there are at least 3 parameters
108     if (args.length < 3) {
109       System.err.println("ERROR: Wrong number of parameters: " + args.length);
110       return printUsage();
111     }
112     JobClient.runJob(createSubmittableJob(args));
113     return 0;
114   }
115 
116   /**
117    * @param args
118    * @throws Exception
119    */
120   public static void main(String[] args) throws Exception {
121     int errCode = ToolRunner.run(HBaseConfiguration.create(), new RowCounter(), args);
122     System.exit(errCode);
123   }
124 }