public class UpdateHostDbMapper extends Object implements Mapper<Text,Writable,Text,NutchWritable>
Modifier and Type | Field and Description |
---|---|
protected String[] |
args |
protected String |
buffer |
protected CrawlDatum |
crawlDatum |
protected boolean |
filter |
protected URLFilters |
filters |
protected Text |
host |
protected HostDatum |
hostDatum |
protected boolean |
normalize |
protected URLNormalizers |
normalizers |
protected boolean |
readingCrawlDb |
protected String |
reprUrl |
Constructor and Description |
---|
UpdateHostDbMapper() |
Modifier and Type | Method and Description |
---|---|
void |
close() |
void |
configure(JobConf job) |
protected String |
filterNormalize(String url)
Filters and or normalizes the input URL
|
void |
map(Text key,
Writable value,
OutputCollector<Text,NutchWritable> output,
Reporter reporter)
Mapper ingesting records from the HostDB, CrawlDB and plaintext host
scores file.
|
protected Text host
protected HostDatum hostDatum
protected CrawlDatum crawlDatum
protected String reprUrl
protected String buffer
protected String[] args
protected boolean filter
protected boolean normalize
protected boolean readingCrawlDb
protected URLFilters filters
protected URLNormalizers normalizers
public void close()
close
in interface Closeable
close
in interface AutoCloseable
public void configure(JobConf job)
configure
in interface JobConfigurable
JobConf
- protected String filterNormalize(String url)
String
- public void map(Text key, Writable value, OutputCollector<Text,NutchWritable> output, Reporter reporter) throws IOException
map
in interface Mapper<Text,Writable,Text,NutchWritable>
Text
- keyWritable
- valueOutputCollector
- outputReporter
- reporterIOException
Copyright © 2017 The Apache Software Foundation