org.apache.mahout.common
Class HadoopUtil

java.lang.Object
  extended by org.apache.mahout.common.HadoopUtil

public final class HadoopUtil
extends Object


Method Summary
static org.apache.hadoop.fs.Path cachedFile(org.apache.hadoop.conf.Configuration conf)
           
static void cacheFiles(org.apache.hadoop.fs.Path fileToCache, org.apache.hadoop.conf.Configuration conf)
           
static long countRecords(org.apache.hadoop.fs.Path path, org.apache.hadoop.conf.Configuration conf)
           
static long countRecords(org.apache.hadoop.fs.Path path, PathType pt, org.apache.hadoop.fs.PathFilter filter, org.apache.hadoop.conf.Configuration conf)
          Count all the records in a directory using a SequenceFileDirValueIterator
static void delete(org.apache.hadoop.conf.Configuration conf, Iterable<org.apache.hadoop.fs.Path> paths)
           
static void delete(org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.fs.Path... paths)
           
static String getCustomJobName(String className, org.apache.hadoop.mapreduce.JobContext job, Class<? extends org.apache.hadoop.mapreduce.Mapper> mapper, Class<? extends org.apache.hadoop.mapreduce.Reducer> reducer)
           
static org.apache.hadoop.fs.FileStatus[] getFileStatus(org.apache.hadoop.fs.Path path, PathType pathType, org.apache.hadoop.fs.PathFilter filter, Comparator<org.apache.hadoop.fs.FileStatus> ordering, org.apache.hadoop.conf.Configuration conf)
           
static InputStream openStream(org.apache.hadoop.fs.Path path, org.apache.hadoop.conf.Configuration conf)
           
static org.apache.hadoop.mapreduce.Job prepareJob(org.apache.hadoop.fs.Path inputPath, org.apache.hadoop.fs.Path outputPath, Class<? extends org.apache.hadoop.mapreduce.InputFormat> inputFormat, Class<? extends org.apache.hadoop.mapreduce.Mapper> mapper, Class<? extends org.apache.hadoop.io.Writable> mapperKey, Class<? extends org.apache.hadoop.io.Writable> mapperValue, Class<? extends org.apache.hadoop.mapreduce.OutputFormat> outputFormat, org.apache.hadoop.conf.Configuration conf)
          Create a map-only Hadoop Job out of the passed in parameters.
static org.apache.hadoop.mapreduce.Job prepareJob(org.apache.hadoop.fs.Path inputPath, org.apache.hadoop.fs.Path outputPath, Class<? extends org.apache.hadoop.mapreduce.InputFormat> inputFormat, Class<? extends org.apache.hadoop.mapreduce.Mapper> mapper, Class<? extends org.apache.hadoop.io.Writable> mapperKey, Class<? extends org.apache.hadoop.io.Writable> mapperValue, Class<? extends org.apache.hadoop.mapreduce.Reducer> reducer, Class<? extends org.apache.hadoop.io.Writable> reducerKey, Class<? extends org.apache.hadoop.io.Writable> reducerValue, Class<? extends org.apache.hadoop.mapreduce.OutputFormat> outputFormat, org.apache.hadoop.conf.Configuration conf)
          Create a map and reduce Hadoop job.
static int readInt(org.apache.hadoop.fs.Path path, org.apache.hadoop.conf.Configuration conf)
           
static void setSerializations(org.apache.hadoop.conf.Configuration conf)
           
static void writeInt(int value, org.apache.hadoop.fs.Path path, org.apache.hadoop.conf.Configuration conf)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Method Detail

prepareJob

public static org.apache.hadoop.mapreduce.Job prepareJob(org.apache.hadoop.fs.Path inputPath,
                                                         org.apache.hadoop.fs.Path outputPath,
                                                         Class<? extends org.apache.hadoop.mapreduce.InputFormat> inputFormat,
                                                         Class<? extends org.apache.hadoop.mapreduce.Mapper> mapper,
                                                         Class<? extends org.apache.hadoop.io.Writable> mapperKey,
                                                         Class<? extends org.apache.hadoop.io.Writable> mapperValue,
                                                         Class<? extends org.apache.hadoop.mapreduce.OutputFormat> outputFormat,
                                                         org.apache.hadoop.conf.Configuration conf)
                                                  throws IOException
Create a map-only Hadoop Job out of the passed in parameters. Does not set the Job name.

Throws:
IOException
See Also:
getCustomJobName(String, org.apache.hadoop.mapreduce.JobContext, Class, Class)

prepareJob

public static org.apache.hadoop.mapreduce.Job prepareJob(org.apache.hadoop.fs.Path inputPath,
                                                         org.apache.hadoop.fs.Path outputPath,
                                                         Class<? extends org.apache.hadoop.mapreduce.InputFormat> inputFormat,
                                                         Class<? extends org.apache.hadoop.mapreduce.Mapper> mapper,
                                                         Class<? extends org.apache.hadoop.io.Writable> mapperKey,
                                                         Class<? extends org.apache.hadoop.io.Writable> mapperValue,
                                                         Class<? extends org.apache.hadoop.mapreduce.Reducer> reducer,
                                                         Class<? extends org.apache.hadoop.io.Writable> reducerKey,
                                                         Class<? extends org.apache.hadoop.io.Writable> reducerValue,
                                                         Class<? extends org.apache.hadoop.mapreduce.OutputFormat> outputFormat,
                                                         org.apache.hadoop.conf.Configuration conf)
                                                  throws IOException
Create a map and reduce Hadoop job. Does not set the name on the job.

Parameters:
inputPath - The input Path
outputPath - The output Path
inputFormat - The InputFormat
mapper - The Mapper class to use
mapperKey - The Writable key class. If the Mapper is a no-op, this value may be null
mapperValue - The Writable value class. If the Mapper is a no-op, this value may be null
reducer - The Reducer to use
reducerKey - The reducer key class.
reducerValue - The reducer value class.
outputFormat - The OutputFormat.
conf - The Configuration to use.
Returns:
The Job.
Throws:
IOException - if there is a problem with the IO.
See Also:
getCustomJobName(String, org.apache.hadoop.mapreduce.JobContext, Class, Class), prepareJob(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path, Class, Class, Class, Class, Class, org.apache.hadoop.conf.Configuration)

getCustomJobName

public static String getCustomJobName(String className,
                                      org.apache.hadoop.mapreduce.JobContext job,
                                      Class<? extends org.apache.hadoop.mapreduce.Mapper> mapper,
                                      Class<? extends org.apache.hadoop.mapreduce.Reducer> reducer)

delete

public static void delete(org.apache.hadoop.conf.Configuration conf,
                          Iterable<org.apache.hadoop.fs.Path> paths)
                   throws IOException
Throws:
IOException

delete

public static void delete(org.apache.hadoop.conf.Configuration conf,
                          org.apache.hadoop.fs.Path... paths)
                   throws IOException
Throws:
IOException

countRecords

public static long countRecords(org.apache.hadoop.fs.Path path,
                                org.apache.hadoop.conf.Configuration conf)
                         throws IOException
Throws:
IOException

countRecords

public static long countRecords(org.apache.hadoop.fs.Path path,
                                PathType pt,
                                org.apache.hadoop.fs.PathFilter filter,
                                org.apache.hadoop.conf.Configuration conf)
                         throws IOException
Count all the records in a directory using a SequenceFileDirValueIterator

Parameters:
path - The Path to count
pt - The PathType
filter - Apply the PathFilter. May be null
conf - The Hadoop Configuration
Returns:
The number of records
Throws:
IOException - if there was an IO error

openStream

public static InputStream openStream(org.apache.hadoop.fs.Path path,
                                     org.apache.hadoop.conf.Configuration conf)
                              throws IOException
Throws:
IOException

getFileStatus

public static org.apache.hadoop.fs.FileStatus[] getFileStatus(org.apache.hadoop.fs.Path path,
                                                              PathType pathType,
                                                              org.apache.hadoop.fs.PathFilter filter,
                                                              Comparator<org.apache.hadoop.fs.FileStatus> ordering,
                                                              org.apache.hadoop.conf.Configuration conf)
                                                       throws IOException
Throws:
IOException

cacheFiles

public static void cacheFiles(org.apache.hadoop.fs.Path fileToCache,
                              org.apache.hadoop.conf.Configuration conf)

cachedFile

public static org.apache.hadoop.fs.Path cachedFile(org.apache.hadoop.conf.Configuration conf)
                                            throws IOException
Throws:
IOException

setSerializations

public static void setSerializations(org.apache.hadoop.conf.Configuration conf)

writeInt

public static void writeInt(int value,
                            org.apache.hadoop.fs.Path path,
                            org.apache.hadoop.conf.Configuration conf)
                     throws IOException
Throws:
IOException

readInt

public static int readInt(org.apache.hadoop.fs.Path path,
                          org.apache.hadoop.conf.Configuration conf)
                   throws IOException
Throws:
IOException


Copyright © 2008-2012 The Apache Software Foundation. All Rights Reserved.