public class ParserJob extends NutchTool implements org.apache.hadoop.util.Tool
Modifier and Type | Class and Description |
---|---|
static class |
ParserJob.ParserMapper |
Modifier and Type | Field and Description |
---|---|
static org.slf4j.Logger |
LOG |
static String |
SKIP_TRUNCATED |
currentJob, currentJobNum, numJobs, results, status
Constructor and Description |
---|
ParserJob() |
ParserJob(org.apache.hadoop.conf.Configuration conf) |
Modifier and Type | Method and Description |
---|---|
org.apache.hadoop.conf.Configuration |
getConf() |
Collection<WebPage.Field> |
getFields(org.apache.hadoop.mapreduce.Job job) |
static boolean |
isTruncated(String url,
WebPage page)
Checks if the page's content is truncated.
|
static void |
main(String[] args) |
int |
parse(String batchId,
boolean shouldResume,
boolean force) |
Map<String,Object> |
run(Map<String,Object> args)
Runs the tool, using a map of arguments.
|
int |
run(String[] args) |
void |
setConf(org.apache.hadoop.conf.Configuration conf) |
getProgress, getStatus, killJob, stopJob
public static final org.slf4j.Logger LOG
public static final String SKIP_TRUNCATED
public ParserJob()
public ParserJob(org.apache.hadoop.conf.Configuration conf)
public static boolean isTruncated(String url, WebPage page)
url
- page
- true
. When it is not, or when
it could be determined, false
.public Collection<WebPage.Field> getFields(org.apache.hadoop.mapreduce.Job job)
public org.apache.hadoop.conf.Configuration getConf()
getConf
in interface org.apache.hadoop.conf.Configurable
getConf
in class org.apache.hadoop.conf.Configured
public void setConf(org.apache.hadoop.conf.Configuration conf)
setConf
in interface org.apache.hadoop.conf.Configurable
setConf
in class org.apache.hadoop.conf.Configured
public Map<String,Object> run(Map<String,Object> args) throws Exception
NutchTool
public int parse(String batchId, boolean shouldResume, boolean force) throws Exception
Exception
public int run(String[] args) throws Exception
run
in interface org.apache.hadoop.util.Tool
Exception
Copyright © 2015 The Apache Software Foundation