org.apache.mahout.text
Class SequenceFilesFromDirectoryFilter

java.lang.Object
  extended by org.apache.hadoop.conf.Configured
      extended by org.apache.mahout.common.AbstractJob
          extended by org.apache.mahout.text.SequenceFilesFromDirectory
              extended by org.apache.mahout.text.SequenceFilesFromDirectoryFilter
All Implemented Interfaces:
org.apache.hadoop.conf.Configurable, org.apache.hadoop.fs.PathFilter, org.apache.hadoop.util.Tool
Direct Known Subclasses:
PrefixAdditionFilter, SequenceFilesFromCsvFilter

public abstract class SequenceFilesFromDirectoryFilter
extends SequenceFilesFromDirectory
implements org.apache.hadoop.fs.PathFilter

Implement this interface if you wish to extend SequenceFilesFromDirectory with your own parsing logic.


Field Summary
protected  Charset charset
           
protected  org.apache.hadoop.conf.Configuration conf
           
protected  org.apache.hadoop.fs.FileSystem fs
           
protected  Map<String,String> options
           
protected  String prefix
           
protected  ChunkedWriter writer
           
 
Fields inherited from class org.apache.mahout.text.SequenceFilesFromDirectory
CHARSET_OPTION, CHUNK_SIZE_OPTION, FILE_FILTER_CLASS_OPTION, KEY_PREFIX_OPTION
 
Constructor Summary
protected SequenceFilesFromDirectoryFilter()
           
protected SequenceFilesFromDirectoryFilter(org.apache.hadoop.conf.Configuration conf, String keyPrefix, Map<String,String> options, ChunkedWriter writer)
           
 
Method Summary
 boolean accept(org.apache.hadoop.fs.Path current)
           
protected  Map<String,String> getOptions()
           
protected abstract  void process(org.apache.hadoop.fs.FileStatus in, org.apache.hadoop.fs.Path current)
           
 
Methods inherited from class org.apache.mahout.text.SequenceFilesFromDirectory
addOptions, main, parseOptions, run, run
 
Methods inherited from class org.apache.mahout.common.AbstractJob
addFlag, addInputOption, addOption, addOption, addOption, addOption, addOutputOption, buildOption, getInputPath, getOption, getOutputPath, hasOption, keyFor, maybePut, parseArguments, parseDirectories, prepareJob, shouldRunNextPhase
 
Methods inherited from class org.apache.hadoop.conf.Configured
getConf, setConf
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 
Methods inherited from interface org.apache.hadoop.conf.Configurable
getConf, setConf
 

Field Detail

prefix

protected final String prefix

writer

protected final ChunkedWriter writer

charset

protected final Charset charset

conf

protected final org.apache.hadoop.conf.Configuration conf

fs

protected final org.apache.hadoop.fs.FileSystem fs

options

protected final Map<String,String> options
Constructor Detail

SequenceFilesFromDirectoryFilter

protected SequenceFilesFromDirectoryFilter()

SequenceFilesFromDirectoryFilter

protected SequenceFilesFromDirectoryFilter(org.apache.hadoop.conf.Configuration conf,
                                           String keyPrefix,
                                           Map<String,String> options,
                                           ChunkedWriter writer)
                                    throws IOException
Throws:
IOException
Method Detail

getOptions

protected final Map<String,String> getOptions()

accept

public final boolean accept(org.apache.hadoop.fs.Path current)
Specified by:
accept in interface org.apache.hadoop.fs.PathFilter

process

protected abstract void process(org.apache.hadoop.fs.FileStatus in,
                                org.apache.hadoop.fs.Path current)
                         throws IOException
Throws:
IOException


Copyright © 2008-2011 The Apache Software Foundation. All Rights Reserved.