org.apache.nutch.crawl
Class LinkDb

java.lang.Object
  extended byorg.apache.hadoop.conf.Configured
      extended byorg.apache.nutch.crawl.LinkDb
All Implemented Interfaces:
Closeable, Configurable, JobConfigurable, Mapper, Reducer

public class LinkDb
extends Configured
implements Mapper, Reducer

Maintains an inverted link map, listing incoming links for each url.


Nested Class Summary
static class LinkDb.Merger
           
 
Field Summary
static String CURRENT_NAME
           
static org.apache.commons.logging.Log LOG
           
 
Constructor Summary
LinkDb()
           
LinkDb(Configuration conf)
          Construct an LinkDb.
 
Method Summary
 void close()
           
 void configure(JobConf job)
           
static JobConf createMergeJob(Configuration config, Path linkDb)
           
static void install(JobConf job, Path linkDb)
           
 void invert(Path linkDb, Path segmentsDir)
           
 void invert(Path linkDb, Path[] segments)
           
static void main(String[] args)
           
 void map(WritableComparable key, Writable value, OutputCollector output, Reporter reporter)
           
 void reduce(WritableComparable key, Iterator values, OutputCollector output, Reporter reporter)
           
 
Methods inherited from class org.apache.hadoop.conf.Configured
getConf, setConf
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

LOG

public static final org.apache.commons.logging.Log LOG

CURRENT_NAME

public static String CURRENT_NAME
Constructor Detail

LinkDb

public LinkDb()

LinkDb

public LinkDb(Configuration conf)
Construct an LinkDb.

Method Detail

configure

public void configure(JobConf job)
Specified by:
configure in interface JobConfigurable

close

public void close()
Specified by:
close in interface Closeable

map

public void map(WritableComparable key,
                Writable value,
                OutputCollector output,
                Reporter reporter)
         throws IOException
Specified by:
map in interface Mapper
Throws:
IOException

reduce

public void reduce(WritableComparable key,
                   Iterator values,
                   OutputCollector output,
                   Reporter reporter)
            throws IOException
Specified by:
reduce in interface Reducer
Throws:
IOException

invert

public void invert(Path linkDb,
                   Path segmentsDir)
            throws IOException
Throws:
IOException

invert

public void invert(Path linkDb,
                   Path[] segments)
            throws IOException
Throws:
IOException

createMergeJob

public static JobConf createMergeJob(Configuration config,
                                     Path linkDb)

install

public static void install(JobConf job,
                           Path linkDb)
                    throws IOException
Throws:
IOException

main

public static void main(String[] args)
                 throws Exception
Throws:
Exception


Copyright © 2006 The Apache Software Foundation