public class CrawlDbReader extends AbstractChecker implements java.io.Closeable
Modifier and Type | Class and Description |
---|---|
static class |
CrawlDbReader.CrawlDatumCsvOutputFormat |
static class |
CrawlDbReader.CrawlDbDumpMapper |
static class |
CrawlDbReader.CrawlDbStatMapper |
static class |
CrawlDbReader.CrawlDbStatReducer |
static class |
CrawlDbReader.CrawlDbTopNMapper |
static class |
CrawlDbReader.CrawlDbTopNReducer |
Modifier and Type | Field and Description |
---|---|
protected java.lang.String |
crawlDb |
keepClientCnxOpen, stdin, tcpPort, usage
Constructor and Description |
---|
CrawlDbReader() |
Modifier and Type | Method and Description |
---|---|
void |
close() |
CrawlDatum |
get(java.lang.String crawlDb,
java.lang.String url,
Configuration config) |
static void |
main(java.lang.String[] args) |
protected int |
process(java.lang.String line,
java.lang.StringBuilder output) |
void |
processDumpJob(java.lang.String crawlDb,
java.lang.String output,
Configuration config,
java.lang.String format,
java.lang.String regex,
java.lang.String status,
java.lang.Integer retry,
java.lang.String expr,
java.lang.Float sample) |
void |
processStatJob(java.lang.String crawlDb,
Configuration config,
boolean sort) |
void |
processTopNJob(java.lang.String crawlDb,
long topN,
float min,
java.lang.String output,
Configuration config) |
java.lang.Object |
query(java.util.Map<java.lang.String,java.lang.String> args,
Configuration conf,
java.lang.String type,
java.lang.String crawlId) |
void |
readUrl(java.lang.String crawlDb,
java.lang.String url,
Configuration config,
java.lang.StringBuilder output) |
int |
run(java.lang.String[] args) |
getProtocolOutput, parseArgs, processSingle, processStdin, processTCP, run
getConf, setConf
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
getConf, setConf
public void close()
close
in interface java.io.Closeable
close
in interface java.lang.AutoCloseable
public void processStatJob(java.lang.String crawlDb, Configuration config, boolean sort) throws java.io.IOException, java.lang.InterruptedException, java.lang.ClassNotFoundException
java.io.IOException
java.lang.InterruptedException
java.lang.ClassNotFoundException
public CrawlDatum get(java.lang.String crawlDb, java.lang.String url, Configuration config) throws java.io.IOException
java.io.IOException
protected int process(java.lang.String line, java.lang.StringBuilder output) throws java.lang.Exception
process
in class AbstractChecker
java.lang.Exception
public void readUrl(java.lang.String crawlDb, java.lang.String url, Configuration config, java.lang.StringBuilder output) throws java.io.IOException
java.io.IOException
public void processDumpJob(java.lang.String crawlDb, java.lang.String output, Configuration config, java.lang.String format, java.lang.String regex, java.lang.String status, java.lang.Integer retry, java.lang.String expr, java.lang.Float sample) throws java.io.IOException, java.lang.ClassNotFoundException, java.lang.InterruptedException
java.io.IOException
java.lang.ClassNotFoundException
java.lang.InterruptedException
public void processTopNJob(java.lang.String crawlDb, long topN, float min, java.lang.String output, Configuration config) throws java.io.IOException, java.lang.ClassNotFoundException, java.lang.InterruptedException
java.io.IOException
java.lang.ClassNotFoundException
java.lang.InterruptedException
public int run(java.lang.String[] args) throws java.io.IOException, java.lang.InterruptedException, java.lang.ClassNotFoundException, java.lang.Exception
public static void main(java.lang.String[] args) throws java.lang.Exception
java.lang.Exception
public java.lang.Object query(java.util.Map<java.lang.String,java.lang.String> args, Configuration conf, java.lang.String type, java.lang.String crawlId) throws java.lang.Exception
java.lang.Exception
Copyright © 2018 The Apache Software Foundation