public class CrawlDb extends NutchTool implements Tool
Modifier and Type | Field and Description |
---|---|
static java.lang.String |
CRAWLDB_ADDITIONS_ALLOWED |
static java.lang.String |
CRAWLDB_PURGE_404 |
static java.lang.String |
CRAWLDB_PURGE_ORPHANS |
static java.lang.String |
CURRENT_NAME |
static java.lang.String |
LOCK_NAME |
currentJob, currentJobNum, numJobs, results, status
Constructor and Description |
---|
CrawlDb() |
CrawlDb(Configuration conf) |
Modifier and Type | Method and Description |
---|---|
static Job |
createJob(Configuration config,
Path crawlDb) |
static void |
install(Job job,
Path crawlDb) |
static Path |
lock(Configuration job,
Path crawlDb,
boolean force) |
static void |
main(java.lang.String[] args) |
java.util.Map<java.lang.String,java.lang.Object> |
run(java.util.Map<java.lang.String,java.lang.Object> args,
java.lang.String crawlId)
Runs the tool, using a map of arguments.
|
int |
run(java.lang.String[] args) |
void |
update(Path crawlDb,
Path[] segments,
boolean normalize,
boolean filter) |
void |
update(Path crawlDb,
Path[] segments,
boolean normalize,
boolean filter,
boolean additionsAllowed,
boolean force) |
getProgress, getStatus, killJob, stopJob
getConf, setConf
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
getConf, setConf
public static final java.lang.String CRAWLDB_ADDITIONS_ALLOWED
public static final java.lang.String CRAWLDB_PURGE_404
public static final java.lang.String CRAWLDB_PURGE_ORPHANS
public static final java.lang.String CURRENT_NAME
public static final java.lang.String LOCK_NAME
public CrawlDb()
public CrawlDb(Configuration conf)
public void update(Path crawlDb, Path[] segments, boolean normalize, boolean filter) throws java.io.IOException, java.lang.InterruptedException, java.lang.ClassNotFoundException
java.io.IOException
java.lang.InterruptedException
java.lang.ClassNotFoundException
public void update(Path crawlDb, Path[] segments, boolean normalize, boolean filter, boolean additionsAllowed, boolean force) throws java.io.IOException, java.lang.InterruptedException, java.lang.ClassNotFoundException
java.io.IOException
java.lang.InterruptedException
java.lang.ClassNotFoundException
public static Job createJob(Configuration config, Path crawlDb) throws java.io.IOException
java.io.IOException
public static Path lock(Configuration job, Path crawlDb, boolean force) throws java.io.IOException
java.io.IOException
public static void install(Job job, Path crawlDb) throws java.io.IOException
java.io.IOException
public static void main(java.lang.String[] args) throws java.lang.Exception
java.lang.Exception
public int run(java.lang.String[] args) throws java.lang.Exception
public java.util.Map<java.lang.String,java.lang.Object> run(java.util.Map<java.lang.String,java.lang.Object> args, java.lang.String crawlId) throws java.lang.Exception
NutchTool
Copyright © 2019 The Apache Software Foundation