View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.mapred;
21  
22  import java.io.IOException;
23  import java.util.Map;
24  
25  import org.apache.hadoop.conf.Configured;
26  import org.apache.hadoop.fs.Path;
27  import org.apache.hadoop.hbase.HBaseConfiguration;
28  import org.apache.hadoop.hbase.KeyValue;
29  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
30  import org.apache.hadoop.hbase.client.Result;
31  import org.apache.hadoop.mapred.FileOutputFormat;
32  import org.apache.hadoop.mapred.JobClient;
33  import org.apache.hadoop.mapred.JobConf;
34  import org.apache.hadoop.mapred.OutputCollector;
35  import org.apache.hadoop.mapred.Reporter;
36  import org.apache.hadoop.mapred.lib.IdentityReducer;
37  import org.apache.hadoop.util.Tool;
38  import org.apache.hadoop.util.ToolRunner;
39  
40  /**
41   * A job with a map to count rows.
42   * Map outputs table rows IF the input row has columns that have content.
43   * Uses an {@link IdentityReducer}
44   */
45  @Deprecated
46  public class RowCounter extends Configured implements Tool {
47    // Name of this 'program'
48    static final String NAME = "rowcounter";
49  
50    /**
51     * Mapper that runs the count.
52     */
53    static class RowCounterMapper
54    implements TableMap<ImmutableBytesWritable, Result> {
55      private static enum Counters {ROWS}
56  
57      public void map(ImmutableBytesWritable row, Result values,
58          OutputCollector<ImmutableBytesWritable, Result> output,
59          Reporter reporter)
60      throws IOException {
61        boolean content = false;
62  
63        for (KeyValue value: values.list()) {
64          if (value.getValue().length > 0) {
65            content = true;
66            break;
67          }
68        }
69        if (!content) {
70          // Don't count rows that are all empty values.
71          return;
72        }
73        // Give out same value every time.  We're only interested in the row/key
74        reporter.incrCounter(Counters.ROWS, 1);
75      }
76  
77      public void configure(JobConf jc) {
78        // Nothing to do.
79      }
80  
81      public void close() throws IOException {
82        // Nothing to do.
83      }
84    }
85  
86    /**
87     * @param args
88     * @return the JobConf
89     * @throws IOException
90     */
91    public JobConf createSubmittableJob(String[] args) throws IOException {
92      JobConf c = new JobConf(getConf(), getClass());
93      c.setJobName(NAME);
94      // Columns are space delimited
95      StringBuilder sb = new StringBuilder();
96      final int columnoffset = 2;
97      for (int i = columnoffset; i < args.length; i++) {
98        if (i > columnoffset) {
99          sb.append(" ");
100       }
101       sb.append(args[i]);
102     }
103     // Second argument is the table name.
104     TableMapReduceUtil.initTableMapJob(args[1], sb.toString(),
105       RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, c);
106     c.setNumReduceTasks(0);
107     // First arg is the output directory.
108     FileOutputFormat.setOutputPath(c, new Path(args[0]));
109     return c;
110   }
111 
112   static int printUsage() {
113     System.out.println(NAME +
114       " <outputdir> <tablename> <column1> [<column2>...]");
115     return -1;
116   }
117 
118   public int run(final String[] args) throws Exception {
119     // Make sure there are at least 3 parameters
120     if (args.length < 3) {
121       System.err.println("ERROR: Wrong number of parameters: " + args.length);
122       return printUsage();
123     }
124     JobClient.runJob(createSubmittableJob(args));
125     return 0;
126   }
127 
128   /**
129    * @param args
130    * @throws Exception
131    */
132   public static void main(String[] args) throws Exception {
133     int errCode = ToolRunner.run(HBaseConfiguration.create(), new RowCounter(), args);
134     System.exit(errCode);
135   }
136 }