public class ParseSegment extends NutchTool implements Tool
Modifier and Type | Class and Description |
---|---|
static class |
ParseSegment.ParseSegmentMapper |
static class |
ParseSegment.ParseSegmentReducer |
Modifier and Type | Field and Description |
---|---|
static java.lang.String |
SKIP_TRUNCATED |
currentJob, currentJobNum, numJobs, results, status
Constructor and Description |
---|
ParseSegment() |
ParseSegment(Configuration conf) |
Modifier and Type | Method and Description |
---|---|
static boolean |
isTruncated(Content content)
Checks if the page's content is truncated.
|
static void |
main(java.lang.String[] args) |
void |
parse(Path segment) |
java.util.Map<java.lang.String,java.lang.Object> |
run(java.util.Map<java.lang.String,java.lang.Object> args,
java.lang.String crawlId)
Runs the tool, using a map of arguments.
|
int |
run(java.lang.String[] args) |
getProgress, getStatus, killJob, stopJob
getConf, setConf
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
getConf, setConf
public static final java.lang.String SKIP_TRUNCATED
public ParseSegment()
public ParseSegment(Configuration conf)
public static boolean isTruncated(Content content)
content
- true
. When it is not, or when
it could be determined, false
.public void parse(Path segment) throws java.io.IOException, java.lang.InterruptedException, java.lang.ClassNotFoundException
java.io.IOException
java.lang.InterruptedException
java.lang.ClassNotFoundException
public static void main(java.lang.String[] args) throws java.lang.Exception
java.lang.Exception
public int run(java.lang.String[] args) throws java.lang.Exception
public java.util.Map<java.lang.String,java.lang.Object> run(java.util.Map<java.lang.String,java.lang.Object> args, java.lang.String crawlId) throws java.lang.Exception
NutchTool
Copyright © 2018 The Apache Software Foundation