|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectorg.apache.hadoop.conf.Configured
org.apache.mahout.common.AbstractJob
public abstract class AbstractJob
Superclass of many Mahout Hadoop "jobs". A job drives configuration and launch of one or more maps and reduces in order to accomplish some task.
Command line arguments available to all subclasses are:
In addition, note some key command line parameters that are parsed by Hadoop, which jobs may need to set:
Note that because of how Hadoop parses arguments, all "-D" arguments must appear before all other arguments.
Constructor Summary | |
---|---|
protected |
AbstractJob()
|
Method Summary | |
---|---|
protected void |
addFlag(String name,
String shortName,
String description)
Add an option with no argument whose presence can be checked for using containsKey method on the map returned by parseArguments(String[]) ; |
protected void |
addInputOption()
Add the default input directory option, '-i' which takes a directory name as an argument. |
protected org.apache.commons.cli2.Option |
addOption(org.apache.commons.cli2.Option option)
Add an arbitrary option to the set of options this job will parse when parseArguments(String[]) is called. |
protected void |
addOption(String name,
String shortName,
String description)
Add an option to the the set of options this job will parse when parseArguments(String[]) is called. |
protected void |
addOption(String name,
String shortName,
String description,
boolean required)
Add an option to the the set of options this job will parse when parseArguments(String[]) is called. |
protected void |
addOption(String name,
String shortName,
String description,
String defaultValue)
Add an option to the the set of options this job will parse when parseArguments(String[]) is called. |
protected void |
addOutputOption()
Add the default output directory option, '-o' which takes a directory name as an argument. |
protected static org.apache.commons.cli2.Option |
buildOption(String name,
String shortName,
String description,
boolean hasArg,
boolean required,
String defaultValue)
Build an option with the given parameters. |
protected Class<? extends org.apache.lucene.analysis.Analyzer> |
getAnalyzerClassFromOption()
|
protected org.apache.commons.cli2.Option |
getCLIOption(String name)
|
protected org.apache.hadoop.fs.Path |
getCombinedTempPath(String directory1,
String directory2)
|
protected org.apache.commons.cli2.Group |
getGroup()
|
protected org.apache.hadoop.fs.Path |
getInputPath()
Returns the input path established by a call to parseArguments(String[]) . |
String |
getOption(String optionName)
|
String |
getOption(String optionName,
String defaultVal)
Get the option, else the default |
protected org.apache.hadoop.fs.Path |
getOutputPath()
Returns the output path established by a call to parseArguments(String[]) . |
protected org.apache.hadoop.fs.Path |
getOutputPath(String path)
|
protected org.apache.hadoop.fs.Path |
getTempPath()
|
protected org.apache.hadoop.fs.Path |
getTempPath(String directory)
|
boolean |
hasOption(String optionName)
|
static String |
keyFor(String optionName)
Build the option key (--name) from the option name |
protected static void |
maybePut(Map<String,String> args,
org.apache.commons.cli2.CommandLine cmdLine,
org.apache.commons.cli2.Option... opt)
|
Map<String,String> |
parseArguments(String[] args)
Parse the arguments specified based on the options defined using the various addOption methods. |
protected void |
parseDirectories(org.apache.commons.cli2.CommandLine cmdLine)
Obtain input and output directories from command-line options or hadoop properties. |
protected org.apache.hadoop.mapreduce.Job |
prepareJob(org.apache.hadoop.fs.Path inputPath,
org.apache.hadoop.fs.Path outputPath,
Class<? extends org.apache.hadoop.mapreduce.InputFormat> inputFormat,
Class<? extends org.apache.hadoop.mapreduce.Mapper> mapper,
Class<? extends org.apache.hadoop.io.Writable> mapperKey,
Class<? extends org.apache.hadoop.io.Writable> mapperValue,
Class<? extends org.apache.hadoop.mapreduce.OutputFormat> outputFormat)
|
protected org.apache.hadoop.mapreduce.Job |
prepareJob(org.apache.hadoop.fs.Path inputPath,
org.apache.hadoop.fs.Path outputPath,
Class<? extends org.apache.hadoop.mapreduce.InputFormat> inputFormat,
Class<? extends org.apache.hadoop.mapreduce.Mapper> mapper,
Class<? extends org.apache.hadoop.io.Writable> mapperKey,
Class<? extends org.apache.hadoop.io.Writable> mapperValue,
Class<? extends org.apache.hadoop.mapreduce.Reducer> reducer,
Class<? extends org.apache.hadoop.io.Writable> reducerKey,
Class<? extends org.apache.hadoop.io.Writable> reducerValue,
Class<? extends org.apache.hadoop.mapreduce.OutputFormat> outputFormat)
|
protected org.apache.hadoop.mapreduce.Job |
prepareJob(org.apache.hadoop.fs.Path inputPath,
org.apache.hadoop.fs.Path outputPath,
Class<? extends org.apache.hadoop.mapreduce.Mapper> mapper,
Class<? extends org.apache.hadoop.io.Writable> mapperKey,
Class<? extends org.apache.hadoop.io.Writable> mapperValue,
Class<? extends org.apache.hadoop.mapreduce.Reducer> reducer,
Class<? extends org.apache.hadoop.io.Writable> reducerKey,
Class<? extends org.apache.hadoop.io.Writable> reducerValue)
|
static void |
setS3SafeCombinedInputPath(org.apache.hadoop.mapreduce.Job job,
org.apache.hadoop.fs.Path referencePath,
org.apache.hadoop.fs.Path inputPathOne,
org.apache.hadoop.fs.Path inputPathTwo)
necessary to make this job (having a combined input path) work on Amazon S3, hopefully this is obsolete when MultipleInputs is available again |
protected static boolean |
shouldRunNextPhase(Map<String,String> args,
AtomicInteger currentPhase)
|
Methods inherited from class org.apache.hadoop.conf.Configured |
---|
getConf, setConf |
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Methods inherited from interface org.apache.hadoop.util.Tool |
---|
run |
Methods inherited from interface org.apache.hadoop.conf.Configurable |
---|
getConf, setConf |
Constructor Detail |
---|
protected AbstractJob()
Method Detail |
---|
protected org.apache.hadoop.fs.Path getInputPath()
parseArguments(String[])
.
The source of the path may be an input option added using addInputOption()
or it may be the value of the mapred.input.dir
configuration
property.
protected org.apache.hadoop.fs.Path getOutputPath()
parseArguments(String[])
.
The source of the path may be an output option added using addOutputOption()
or it may be the value of the mapred.input.dir
configuration
property.
protected org.apache.hadoop.fs.Path getOutputPath(String path)
protected org.apache.hadoop.fs.Path getTempPath()
protected org.apache.hadoop.fs.Path getTempPath(String directory)
protected org.apache.hadoop.fs.Path getCombinedTempPath(String directory1, String directory2)
protected void addFlag(String name, String shortName, String description)
containsKey
method on the map returned by parseArguments(String[])
;
protected void addOption(String name, String shortName, String description)
parseArguments(String[])
is called. This options has an argument
with null as its default value.
protected void addOption(String name, String shortName, String description, boolean required)
parseArguments(String[])
is called.
required
- if true the parseArguments(String[])
will throw
fail with an error and usage message if this option is not specified
on the command line.protected void addOption(String name, String shortName, String description, String defaultValue)
parseArguments(String[])
is called. If this option is not
specified on the command line the default value will be
used.
defaultValue
- the default argument value if this argument is not
found on the command-line. null is allowed.protected org.apache.commons.cli2.Option addOption(org.apache.commons.cli2.Option option)
parseArguments(String[])
is called. If this option has no
argument, use containsKey
on the map returned by
parseArguments
to check for its presence. Otherwise, the
string value of the option will be placed in the map using a key
equal to this options long name preceded by '--'.
protected org.apache.commons.cli2.Group getGroup()
protected void addInputOption()
parseArguments(String[])
is
called, the inputPath will be set based upon the value for this option.
If this method is called, the input is required.
protected void addOutputOption()
parseArguments(String[])
is
called, the outputPath will be set based upon the value for this option.
If this method is called, the output is required.
protected static org.apache.commons.cli2.Option buildOption(String name, String shortName, String description, boolean hasArg, boolean required, String defaultValue)
name
- the long name of the option prefixed with '--' on the command-lineshortName
- the short name of the option, prefixed with '-' on the command-linedescription
- description of the option displayed in help methodhasArg
- true if the option has an argument.required
- true if the option is required.defaultValue
- default argument value, can be null.
protected org.apache.commons.cli2.Option getCLIOption(String name)
name
- The name of the option
Option
with the name, else nullpublic Map<String,String> parseArguments(String[] args) throws IOException
addOption
methods. If -h is specified or an
exception is encountered print help and return null. Has the
side effect of setting inputPath and outputPath
if addInputOption
or addOutputOption
or mapred.input.dir
or mapred.output.dir
are present in the Configuration.
Map<String,String>
containing options and their argument values.
The presence of a flag can be tested using containsKey
, while
argument values can be retrieved using get(optionName)
. The
names used for keys are the option name parameter prefixed by '--'.
IOException
public static String keyFor(String optionName)
public String getOption(String optionName)
public String getOption(String optionName, String defaultVal)
optionName
- The name of the option to look up, without the --defaultVal
- The default value.
public boolean hasOption(String optionName)
protected void parseDirectories(org.apache.commons.cli2.CommandLine cmdLine)
addInputOption
or addOutputOption
has been called, this method will throw an OptionException
if
no source (command-line or property) for that value is present.
Otherwise, inputPath
or outputPath
will be
non-null only if specified as a hadoop property. Command-line options
take precedence over hadoop properties.
cmdLine
-
IllegalArgumentException
- if either inputOption is present,
and neither --input
nor -Dmapred.input dir
are
specified or outputOption is present and neither --output
nor -Dmapred.output.dir
are specified.protected static void maybePut(Map<String,String> args, org.apache.commons.cli2.CommandLine cmdLine, org.apache.commons.cli2.Option... opt)
protected static boolean shouldRunNextPhase(Map<String,String> args, AtomicInteger currentPhase)
protected org.apache.hadoop.mapreduce.Job prepareJob(org.apache.hadoop.fs.Path inputPath, org.apache.hadoop.fs.Path outputPath, Class<? extends org.apache.hadoop.mapreduce.InputFormat> inputFormat, Class<? extends org.apache.hadoop.mapreduce.Mapper> mapper, Class<? extends org.apache.hadoop.io.Writable> mapperKey, Class<? extends org.apache.hadoop.io.Writable> mapperValue, Class<? extends org.apache.hadoop.mapreduce.OutputFormat> outputFormat) throws IOException
IOException
protected org.apache.hadoop.mapreduce.Job prepareJob(org.apache.hadoop.fs.Path inputPath, org.apache.hadoop.fs.Path outputPath, Class<? extends org.apache.hadoop.mapreduce.Mapper> mapper, Class<? extends org.apache.hadoop.io.Writable> mapperKey, Class<? extends org.apache.hadoop.io.Writable> mapperValue, Class<? extends org.apache.hadoop.mapreduce.Reducer> reducer, Class<? extends org.apache.hadoop.io.Writable> reducerKey, Class<? extends org.apache.hadoop.io.Writable> reducerValue) throws IOException
IOException
protected org.apache.hadoop.mapreduce.Job prepareJob(org.apache.hadoop.fs.Path inputPath, org.apache.hadoop.fs.Path outputPath, Class<? extends org.apache.hadoop.mapreduce.InputFormat> inputFormat, Class<? extends org.apache.hadoop.mapreduce.Mapper> mapper, Class<? extends org.apache.hadoop.io.Writable> mapperKey, Class<? extends org.apache.hadoop.io.Writable> mapperValue, Class<? extends org.apache.hadoop.mapreduce.Reducer> reducer, Class<? extends org.apache.hadoop.io.Writable> reducerKey, Class<? extends org.apache.hadoop.io.Writable> reducerValue, Class<? extends org.apache.hadoop.mapreduce.OutputFormat> outputFormat) throws IOException
IOException
public static void setS3SafeCombinedInputPath(org.apache.hadoop.mapreduce.Job job, org.apache.hadoop.fs.Path referencePath, org.apache.hadoop.fs.Path inputPathOne, org.apache.hadoop.fs.Path inputPathTwo) throws IOException
IOException
protected Class<? extends org.apache.lucene.analysis.Analyzer> getAnalyzerClassFromOption() throws ClassNotFoundException
ClassNotFoundException
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |