org.apache.hadoop.hive.ql.exec
Class Utilities

java.lang.Object
  extended by org.apache.hadoop.hive.ql.exec.Utilities

public class Utilities
extends Object


Nested Class Summary
static class Utilities.EnumDelegate
          Java 1.5 workaround.
static class Utilities.ReduceField
          The object in the reducer are composed of these top level fields
static class Utilities.StreamPrinter
           
static class Utilities.streamStatus
           
static class Utilities.Tuple<T,V>
           
 
Field Summary
static int ctrlaCode
           
static tableDesc defaultTd
           
static int newLineCode
           
static String NSTR
           
static String nullStringOutput
           
static String nullStringStorage
           
static Random randGen
           
static int tabCode
           
 
Constructor Summary
Utilities()
           
 
Method Summary
static String abbreviate(String str, int max)
          convert "From src insert blah blah" to "From src insert ...
static void addMapWork(mapredWork mr, Table tbl, String alias, Operator<?> work)
           
static ClassLoader addToClassPath(ClassLoader cloader, String[] newPaths)
          Add new elements to the classpath
static void clearMapRedWork(org.apache.hadoop.conf.Configuration job)
           
static boolean contentsEqual(InputStream is1, InputStream is2, boolean ignoreWhitespace)
           
static OutputStream createCompressedStream(org.apache.hadoop.mapred.JobConf jc, OutputStream out)
          Convert an output stream to a compressed output stream based on codecs and compression options specified in the Job Configuration.
static OutputStream createCompressedStream(org.apache.hadoop.mapred.JobConf jc, OutputStream out, boolean isCompressed)
          Convert an output stream to a compressed output stream based on codecs codecs in the Job Configuration.
static RCFile.Writer createRCFileWriter(org.apache.hadoop.mapred.JobConf jc, org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path file, boolean isCompressed)
          Create a RCFile output stream based on job configuration Uses user supplied compression flag (rather than obtaining it from the Job Configuration)
static org.apache.hadoop.io.SequenceFile.Writer createSequenceWriter(org.apache.hadoop.mapred.JobConf jc, org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path file, Class<?> keyClass, Class<?> valClass)
          Create a sequencefile output stream based on job configuration
static org.apache.hadoop.io.SequenceFile.Writer createSequenceWriter(org.apache.hadoop.mapred.JobConf jc, org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path file, Class<?> keyClass, Class<?> valClass, boolean isCompressed)
          Create a sequencefile output stream based on job configuration Uses user supplied compression flag (rather than obtaining it from the Job Configuration)
static mapredWork deserializeMapRedWork(InputStream in, org.apache.hadoop.conf.Configuration conf)
           
static List<String> getFieldSchemaString(List<FieldSchema> fl)
           
static String getFileExtension(org.apache.hadoop.mapred.JobConf jc, boolean isCompressed)
          Based on compression option and configured output codec - get extension for output file.
static mapredWork getMapRedWork(org.apache.hadoop.conf.Configuration job)
           
static String getNameMessage(Exception e)
           
static String getOpTreeSkel(Operator<?> op)
           
static partitionDesc getPartitionDesc(Partition part)
           
static tableDesc getTableDesc(Table tbl)
           
static String getTaskId(org.apache.hadoop.conf.Configuration hconf)
          Gets the task id if we are running as a Hadoop job.
static String getTaskIdFromFilename(String filename)
          Get the task id from the filename.
static boolean isTempPath(org.apache.hadoop.fs.FileStatus file)
          Detect if the supplied file is a temporary path
static ArrayList makeList(Object... olist)
           
static HashMap makeMap(Object... olist)
           
static Properties makeProperties(String... olist)
           
static List<String> mergeUniqElems(List<String> src, List<String> dest)
           
static Utilities.streamStatus readColumn(DataInput in, OutputStream out)
           
static String realFile(String newFile, org.apache.hadoop.conf.Configuration conf)
          Shamelessly cloned from GenericOptionsParser
static void removeFromClassPath(String[] pathsToRemove)
          remove elements from the classpath
static void removeTempOrDuplicateFiles(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path path)
          Remove all temporary files and duplicate (double-committed) files from a given directory.
static void rename(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path src, org.apache.hadoop.fs.Path dst)
          Rename src to dst, or in the case dst already exists, move files in src to dst.
static void renameOrMoveFiles(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path src, org.apache.hadoop.fs.Path dst)
          Rename src to dst, or in the case dst already exists, move files in src to dst.
static void serializeMapRedWork(mapredWork w, OutputStream out)
          Serialize the plan object to an output stream.
static void serializeTasks(Task<? extends Serializable> t, OutputStream out)
           
static void setMapRedWork(org.apache.hadoop.conf.Configuration job, mapredWork w)
           
static org.apache.hadoop.fs.Path toTempPath(org.apache.hadoop.fs.Path orig)
           
static org.apache.hadoop.fs.Path toTempPath(String orig)
          Given a path, convert to a temporary path
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

defaultTd

public static tableDesc defaultTd

newLineCode

public static final int newLineCode
See Also:
Constant Field Values

tabCode

public static final int tabCode
See Also:
Constant Field Values

ctrlaCode

public static final int ctrlaCode
See Also:
Constant Field Values

nullStringStorage

public static String nullStringStorage

nullStringOutput

public static String nullStringOutput

randGen

public static Random randGen

NSTR

public static final String NSTR
See Also:
Constant Field Values
Constructor Detail

Utilities

public Utilities()
Method Detail

clearMapRedWork

public static void clearMapRedWork(org.apache.hadoop.conf.Configuration job)

getMapRedWork

public static mapredWork getMapRedWork(org.apache.hadoop.conf.Configuration job)

getFieldSchemaString

public static List<String> getFieldSchemaString(List<FieldSchema> fl)

setMapRedWork

public static void setMapRedWork(org.apache.hadoop.conf.Configuration job,
                                 mapredWork w)

serializeTasks

public static void serializeTasks(Task<? extends Serializable> t,
                                  OutputStream out)

serializeMapRedWork

public static void serializeMapRedWork(mapredWork w,
                                       OutputStream out)
Serialize the plan object to an output stream. DO NOT use this to write to standard output since it closes the output stream DO USE mapredWork.toXML() instead


deserializeMapRedWork

public static mapredWork deserializeMapRedWork(InputStream in,
                                               org.apache.hadoop.conf.Configuration conf)

getTaskId

public static String getTaskId(org.apache.hadoop.conf.Configuration hconf)
Gets the task id if we are running as a Hadoop job. Gets a random number otherwise.


makeMap

public static HashMap makeMap(Object... olist)

makeProperties

public static Properties makeProperties(String... olist)

makeList

public static ArrayList makeList(Object... olist)

getTableDesc

public static tableDesc getTableDesc(Table tbl)

getPartitionDesc

public static partitionDesc getPartitionDesc(Partition part)

addMapWork

public static void addMapWork(mapredWork mr,
                              Table tbl,
                              String alias,
                              Operator<?> work)

getOpTreeSkel

public static String getOpTreeSkel(Operator<?> op)

contentsEqual

public static boolean contentsEqual(InputStream is1,
                                    InputStream is2,
                                    boolean ignoreWhitespace)
                             throws IOException
Throws:
IOException

abbreviate

public static String abbreviate(String str,
                                int max)
convert "From src insert blah blah" to "From src insert ... blah"


readColumn

public static Utilities.streamStatus readColumn(DataInput in,
                                                OutputStream out)
                                         throws IOException
Throws:
IOException

createCompressedStream

public static OutputStream createCompressedStream(org.apache.hadoop.mapred.JobConf jc,
                                                  OutputStream out)
                                           throws IOException
Convert an output stream to a compressed output stream based on codecs and compression options specified in the Job Configuration.

Parameters:
jc - Job Configuration
out - Output Stream to be converted into compressed output stream
Returns:
compressed output stream
Throws:
IOException

createCompressedStream

public static OutputStream createCompressedStream(org.apache.hadoop.mapred.JobConf jc,
                                                  OutputStream out,
                                                  boolean isCompressed)
                                           throws IOException
Convert an output stream to a compressed output stream based on codecs codecs in the Job Configuration. Caller specifies directly whether file is compressed or not

Parameters:
jc - Job Configuration
out - Output Stream to be converted into compressed output stream
isCompressed - whether the output stream needs to be compressed or not
Returns:
compressed output stream
Throws:
IOException

getFileExtension

public static String getFileExtension(org.apache.hadoop.mapred.JobConf jc,
                                      boolean isCompressed)
Based on compression option and configured output codec - get extension for output file. This is only required for text files - not sequencefiles

Parameters:
jc - Job Configuration
isCompressed - Whether the output file is compressed or not
Returns:
the required file extension (example: .gz)

createSequenceWriter

public static org.apache.hadoop.io.SequenceFile.Writer createSequenceWriter(org.apache.hadoop.mapred.JobConf jc,
                                                                            org.apache.hadoop.fs.FileSystem fs,
                                                                            org.apache.hadoop.fs.Path file,
                                                                            Class<?> keyClass,
                                                                            Class<?> valClass)
                                                                     throws IOException
Create a sequencefile output stream based on job configuration

Parameters:
jc - Job configuration
fs - File System to create file in
file - Path to be created
keyClass - Java Class for key
valClass - Java Class for value
Returns:
output stream over the created sequencefile
Throws:
IOException

createSequenceWriter

public static org.apache.hadoop.io.SequenceFile.Writer createSequenceWriter(org.apache.hadoop.mapred.JobConf jc,
                                                                            org.apache.hadoop.fs.FileSystem fs,
                                                                            org.apache.hadoop.fs.Path file,
                                                                            Class<?> keyClass,
                                                                            Class<?> valClass,
                                                                            boolean isCompressed)
                                                                     throws IOException
Create a sequencefile output stream based on job configuration Uses user supplied compression flag (rather than obtaining it from the Job Configuration)

Parameters:
jc - Job configuration
fs - File System to create file in
file - Path to be created
keyClass - Java Class for key
valClass - Java Class for value
Returns:
output stream over the created sequencefile
Throws:
IOException

createRCFileWriter

public static RCFile.Writer createRCFileWriter(org.apache.hadoop.mapred.JobConf jc,
                                               org.apache.hadoop.fs.FileSystem fs,
                                               org.apache.hadoop.fs.Path file,
                                               boolean isCompressed)
                                        throws IOException
Create a RCFile output stream based on job configuration Uses user supplied compression flag (rather than obtaining it from the Job Configuration)

Parameters:
jc - Job configuration
fs - File System to create file in
file - Path to be created
Returns:
output stream over the created rcfile
Throws:
IOException

realFile

public static String realFile(String newFile,
                              org.apache.hadoop.conf.Configuration conf)
                       throws IOException
Shamelessly cloned from GenericOptionsParser

Throws:
IOException

mergeUniqElems

public static List<String> mergeUniqElems(List<String> src,
                                          List<String> dest)

toTempPath

public static org.apache.hadoop.fs.Path toTempPath(org.apache.hadoop.fs.Path orig)

toTempPath

public static org.apache.hadoop.fs.Path toTempPath(String orig)
Given a path, convert to a temporary path


isTempPath

public static boolean isTempPath(org.apache.hadoop.fs.FileStatus file)
Detect if the supplied file is a temporary path


rename

public static void rename(org.apache.hadoop.fs.FileSystem fs,
                          org.apache.hadoop.fs.Path src,
                          org.apache.hadoop.fs.Path dst)
                   throws IOException,
                          HiveException
Rename src to dst, or in the case dst already exists, move files in src to dst. If there is an existing file with the same name, the new file's name will be appended with "_1", "_2", etc.

Parameters:
fs - the FileSystem where src and dst are on.
src - the src directory
dst - the target directory
Throws:
IOException
HiveException

renameOrMoveFiles

public static void renameOrMoveFiles(org.apache.hadoop.fs.FileSystem fs,
                                     org.apache.hadoop.fs.Path src,
                                     org.apache.hadoop.fs.Path dst)
                              throws IOException,
                                     HiveException
Rename src to dst, or in the case dst already exists, move files in src to dst. If there is an existing file with the same name, the new file's name will be appended with "_1", "_2", etc.

Parameters:
fs - the FileSystem where src and dst are on.
src - the src directory
dst - the target directory
Throws:
IOException
HiveException

getTaskIdFromFilename

public static String getTaskIdFromFilename(String filename)
Get the task id from the filename. E.g., get "000000" out of "24931_r_000000_0" or "24931_r_000000_0.gz"


removeTempOrDuplicateFiles

public static void removeTempOrDuplicateFiles(org.apache.hadoop.fs.FileSystem fs,
                                              org.apache.hadoop.fs.Path path)
                                       throws IOException
Remove all temporary files and duplicate (double-committed) files from a given directory.

Throws:
IOException

getNameMessage

public static String getNameMessage(Exception e)

addToClassPath

public static ClassLoader addToClassPath(ClassLoader cloader,
                                         String[] newPaths)
                                  throws Exception
Add new elements to the classpath

Parameters:
newPaths - Array of classpath elements
Throws:
Exception

removeFromClassPath

public static void removeFromClassPath(String[] pathsToRemove)
                                throws Exception
remove elements from the classpath

Parameters:
pathsToRemove - Array of classpath elements
Throws:
Exception


Copyright © 2009 The Apache Software Foundation