org.apache.hadoop.hive.ql.exec
Class Utilities

java.lang.Object
  extended by org.apache.hadoop.hive.ql.exec.Utilities

public final class Utilities
extends Object

Utilities.


Nested Class Summary
static class Utilities.CollectionPersistenceDelegate
           
static class Utilities.EnumDelegate
          Java 1.5 workaround.
static class Utilities.ListDelegate
           
static class Utilities.MapDelegate
           
static class Utilities.ReduceField
          ReduceField.
static class Utilities.SetDelegate
           
static class Utilities.StreamPrinter
          StreamPrinter.
static class Utilities.StreamStatus
          StreamStatus.
static class Utilities.Tuple<T,V>
          Tuple.
 
Field Summary
static int ctrlaCode
           
static TableDesc defaultTd
           
static String HADOOP_LOCAL_FS
          The object in the reducer are composed of these top level fields.
static String INDENT
           
static int newLineCode
           
static String NSTR
           
static String nullStringOutput
           
static String nullStringStorage
           
static Random randGen
           
static int tabCode
           
 
Method Summary
static String abbreviate(String str, int max)
          convert "From src insert blah blah" to "From src insert ...
static void addMapWork(MapredWork mr, Table tbl, String alias, Operator<?> work)
           
static ClassLoader addToClassPath(ClassLoader cloader, String[] newPaths)
          Add new elements to the classpath.
static void clearMapRedWork(org.apache.hadoop.conf.Configuration job)
           
static boolean contentsEqual(InputStream is1, InputStream is2, boolean ignoreWhitespace)
           
static void copyTableJobPropertiesToConf(TableDesc tbl, org.apache.hadoop.mapred.JobConf job)
          Copies the storage handler properties configured for a table descriptor to a runtime job configuration.
static OutputStream createCompressedStream(org.apache.hadoop.mapred.JobConf jc, OutputStream out)
          Convert an output stream to a compressed output stream based on codecs and compression options specified in the Job Configuration.
static OutputStream createCompressedStream(org.apache.hadoop.mapred.JobConf jc, OutputStream out, boolean isCompressed)
          Convert an output stream to a compressed output stream based on codecs codecs in the Job Configuration.
static RCFile.Writer createRCFileWriter(org.apache.hadoop.mapred.JobConf jc, org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path file, boolean isCompressed)
          Create a RCFile output stream based on job configuration Uses user supplied compression flag (rather than obtaining it from the Job Configuration).
static org.apache.hadoop.io.SequenceFile.Writer createSequenceWriter(org.apache.hadoop.mapred.JobConf jc, org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path file, Class<?> keyClass, Class<?> valClass)
          Create a sequencefile output stream based on job configuration.
static org.apache.hadoop.io.SequenceFile.Writer createSequenceWriter(org.apache.hadoop.mapred.JobConf jc, org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path file, Class<?> keyClass, Class<?> valClass, boolean isCompressed)
          Create a sequencefile output stream based on job configuration Uses user supplied compression flag (rather than obtaining it from the Job Configuration).
static MapredWork deserializeMapRedWork(InputStream in, org.apache.hadoop.conf.Configuration conf)
           
static QueryPlan deserializeQueryPlan(InputStream in, org.apache.hadoop.conf.Configuration conf)
          Deserialize the whole query plan.
static String formatBinaryString(byte[] array, int start, int length)
           
static List<String> getColumnNames(Properties props)
           
static List<String> getColumnNamesFromFieldSchema(List<FieldSchema> partCols)
           
static List<String> getColumnNamesFromSortCols(List<Order> sortCols)
           
static List<String> getColumnTypes(Properties props)
           
static int getDefaultNotificationInterval(org.apache.hadoop.conf.Configuration hconf)
          Gets the default notification interval to send progress updates to the tracker.
static List<String> getFieldSchemaString(List<FieldSchema> fl)
           
static String getFileExtension(org.apache.hadoop.mapred.JobConf jc, boolean isCompressed)
          Based on compression option and configured output codec - get extension for output file.
static org.apache.hadoop.fs.FileStatus[] getFileStatusRecurse(org.apache.hadoop.fs.Path path, int level, org.apache.hadoop.fs.FileSystem fs)
          Get all file status from a root path and recursively go deep into certain levels.
static MapredWork getMapRedWork(org.apache.hadoop.conf.Configuration job)
           
static String getNameMessage(Exception e)
           
static String getOpTreeSkel(Operator<?> op)
           
static PartitionDesc getPartitionDesc(Partition part)
           
static TableDesc getTableDesc(String cols, String colTypes)
           
static TableDesc getTableDesc(Table tbl)
           
static String getTaskId(org.apache.hadoop.conf.Configuration hconf)
          Gets the task id if we are running as a Hadoop job.
static String getTaskIdFromFilename(String filename)
          Get the task id from the filename.
static boolean isTempPath(org.apache.hadoop.fs.FileStatus file)
          Detect if the supplied file is a temporary path.
static ArrayList makeList(Object... olist)
           
static HashMap makeMap(Object... olist)
           
static Properties makeProperties(String... olist)
           
static List<String> mergeUniqElems(List<String> src, List<String> dest)
           
static Utilities.StreamStatus readColumn(DataInput in, OutputStream out)
           
static String realFile(String newFile, org.apache.hadoop.conf.Configuration conf)
          Shamelessly cloned from GenericOptionsParser.
static void removeFromClassPath(String[] pathsToRemove)
          remove elements from the classpath.
static HashMap<String,org.apache.hadoop.fs.FileStatus> removeTempOrDuplicateFiles(org.apache.hadoop.fs.FileStatus[] items, org.apache.hadoop.fs.FileSystem fs)
           
static void removeTempOrDuplicateFiles(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path path)
          Remove all temporary files and duplicate (double-committed) files from a given directory.
static ArrayList<String> removeTempOrDuplicateFiles(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path path, DynamicPartitionCtx dpCtx)
          Remove all temporary files and duplicate (double-committed) files from a given directory.
static void rename(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path src, org.apache.hadoop.fs.Path dst)
          Rename src to dst, or in the case dst already exists, move files in src to dst.
static void renameOrMoveFiles(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path src, org.apache.hadoop.fs.Path dst)
          Rename src to dst, or in the case dst already exists, move files in src to dst.
static String replaceTaskId(String taskId, int bucketNum)
           
static String replaceTaskIdFromFilename(String filename, int bucketNum)
          Replace the task id from the filename.
static String replaceTaskIdFromFilename(String filename, String oldTaskId, String newTaskId)
          Replace the oldTaskId appearing in the filename by the newTaskId.
static void serializeMapRedWork(MapredWork w, OutputStream out)
          Serialize the mapredWork object to an output stream.
static void serializeQueryPlan(QueryPlan plan, OutputStream out)
          Serialize the whole query plan.
static void serializeTasks(Task<? extends Serializable> t, OutputStream out)
          Serialize a single Task.
static void setMapRedWork(org.apache.hadoop.conf.Configuration job, MapredWork w, String hiveScratchDir)
           
static boolean supportCombineFileInputFormat()
           
static org.apache.hadoop.fs.Path toTempPath(org.apache.hadoop.fs.Path orig)
           
static org.apache.hadoop.fs.Path toTempPath(String orig)
          Given a path, convert to a temporary path.
static void validateColumnNames(List<String> colNames, List<String> checkCols)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

HADOOP_LOCAL_FS

public static String HADOOP_LOCAL_FS
The object in the reducer are composed of these top level fields.


defaultTd

public static TableDesc defaultTd

newLineCode

public static final int newLineCode
See Also:
Constant Field Values

tabCode

public static final int tabCode
See Also:
Constant Field Values

ctrlaCode

public static final int ctrlaCode
See Also:
Constant Field Values

INDENT

public static final String INDENT
See Also:
Constant Field Values

nullStringStorage

public static String nullStringStorage

nullStringOutput

public static String nullStringOutput

randGen

public static Random randGen

NSTR

public static final String NSTR
See Also:
Constant Field Values
Method Detail

clearMapRedWork

public static void clearMapRedWork(org.apache.hadoop.conf.Configuration job)

getMapRedWork

public static MapredWork getMapRedWork(org.apache.hadoop.conf.Configuration job)

getFieldSchemaString

public static List<String> getFieldSchemaString(List<FieldSchema> fl)

setMapRedWork

public static void setMapRedWork(org.apache.hadoop.conf.Configuration job,
                                 MapredWork w,
                                 String hiveScratchDir)

serializeTasks

public static void serializeTasks(Task<? extends Serializable> t,
                                  OutputStream out)
Serialize a single Task.


serializeQueryPlan

public static void serializeQueryPlan(QueryPlan plan,
                                      OutputStream out)
Serialize the whole query plan.


deserializeQueryPlan

public static QueryPlan deserializeQueryPlan(InputStream in,
                                             org.apache.hadoop.conf.Configuration conf)
Deserialize the whole query plan.


serializeMapRedWork

public static void serializeMapRedWork(MapredWork w,
                                       OutputStream out)
Serialize the mapredWork object to an output stream. DO NOT use this to write to standard output since it closes the output stream. DO USE mapredWork.toXML() instead.


deserializeMapRedWork

public static MapredWork deserializeMapRedWork(InputStream in,
                                               org.apache.hadoop.conf.Configuration conf)

getTaskId

public static String getTaskId(org.apache.hadoop.conf.Configuration hconf)
Gets the task id if we are running as a Hadoop job. Gets a random number otherwise.


makeMap

public static HashMap makeMap(Object... olist)

makeProperties

public static Properties makeProperties(String... olist)

makeList

public static ArrayList makeList(Object... olist)

getTableDesc

public static TableDesc getTableDesc(Table tbl)

getTableDesc

public static TableDesc getTableDesc(String cols,
                                     String colTypes)

getPartitionDesc

public static PartitionDesc getPartitionDesc(Partition part)
                                      throws HiveException
Throws:
HiveException

addMapWork

public static void addMapWork(MapredWork mr,
                              Table tbl,
                              String alias,
                              Operator<?> work)

getOpTreeSkel

public static String getOpTreeSkel(Operator<?> op)

contentsEqual

public static boolean contentsEqual(InputStream is1,
                                    InputStream is2,
                                    boolean ignoreWhitespace)
                             throws IOException
Throws:
IOException

abbreviate

public static String abbreviate(String str,
                                int max)
convert "From src insert blah blah" to "From src insert ... blah"


readColumn

public static Utilities.StreamStatus readColumn(DataInput in,
                                                OutputStream out)
                                         throws IOException
Throws:
IOException

createCompressedStream

public static OutputStream createCompressedStream(org.apache.hadoop.mapred.JobConf jc,
                                                  OutputStream out)
                                           throws IOException
Convert an output stream to a compressed output stream based on codecs and compression options specified in the Job Configuration.

Parameters:
jc - Job Configuration
out - Output Stream to be converted into compressed output stream
Returns:
compressed output stream
Throws:
IOException

createCompressedStream

public static OutputStream createCompressedStream(org.apache.hadoop.mapred.JobConf jc,
                                                  OutputStream out,
                                                  boolean isCompressed)
                                           throws IOException
Convert an output stream to a compressed output stream based on codecs codecs in the Job Configuration. Caller specifies directly whether file is compressed or not

Parameters:
jc - Job Configuration
out - Output Stream to be converted into compressed output stream
isCompressed - whether the output stream needs to be compressed or not
Returns:
compressed output stream
Throws:
IOException

getFileExtension

public static String getFileExtension(org.apache.hadoop.mapred.JobConf jc,
                                      boolean isCompressed)
Based on compression option and configured output codec - get extension for output file. This is only required for text files - not sequencefiles

Parameters:
jc - Job Configuration
isCompressed - Whether the output file is compressed or not
Returns:
the required file extension (example: .gz)

createSequenceWriter

public static org.apache.hadoop.io.SequenceFile.Writer createSequenceWriter(org.apache.hadoop.mapred.JobConf jc,
                                                                            org.apache.hadoop.fs.FileSystem fs,
                                                                            org.apache.hadoop.fs.Path file,
                                                                            Class<?> keyClass,
                                                                            Class<?> valClass)
                                                                     throws IOException
Create a sequencefile output stream based on job configuration.

Parameters:
jc - Job configuration
fs - File System to create file in
file - Path to be created
keyClass - Java Class for key
valClass - Java Class for value
Returns:
output stream over the created sequencefile
Throws:
IOException

createSequenceWriter

public static org.apache.hadoop.io.SequenceFile.Writer createSequenceWriter(org.apache.hadoop.mapred.JobConf jc,
                                                                            org.apache.hadoop.fs.FileSystem fs,
                                                                            org.apache.hadoop.fs.Path file,
                                                                            Class<?> keyClass,
                                                                            Class<?> valClass,
                                                                            boolean isCompressed)
                                                                     throws IOException
Create a sequencefile output stream based on job configuration Uses user supplied compression flag (rather than obtaining it from the Job Configuration).

Parameters:
jc - Job configuration
fs - File System to create file in
file - Path to be created
keyClass - Java Class for key
valClass - Java Class for value
Returns:
output stream over the created sequencefile
Throws:
IOException

createRCFileWriter

public static RCFile.Writer createRCFileWriter(org.apache.hadoop.mapred.JobConf jc,
                                               org.apache.hadoop.fs.FileSystem fs,
                                               org.apache.hadoop.fs.Path file,
                                               boolean isCompressed)
                                        throws IOException
Create a RCFile output stream based on job configuration Uses user supplied compression flag (rather than obtaining it from the Job Configuration).

Parameters:
jc - Job configuration
fs - File System to create file in
file - Path to be created
Returns:
output stream over the created rcfile
Throws:
IOException

realFile

public static String realFile(String newFile,
                              org.apache.hadoop.conf.Configuration conf)
                       throws IOException
Shamelessly cloned from GenericOptionsParser.

Throws:
IOException

mergeUniqElems

public static List<String> mergeUniqElems(List<String> src,
                                          List<String> dest)

toTempPath

public static org.apache.hadoop.fs.Path toTempPath(org.apache.hadoop.fs.Path orig)

toTempPath

public static org.apache.hadoop.fs.Path toTempPath(String orig)
Given a path, convert to a temporary path.


isTempPath

public static boolean isTempPath(org.apache.hadoop.fs.FileStatus file)
Detect if the supplied file is a temporary path.


rename

public static void rename(org.apache.hadoop.fs.FileSystem fs,
                          org.apache.hadoop.fs.Path src,
                          org.apache.hadoop.fs.Path dst)
                   throws IOException,
                          HiveException
Rename src to dst, or in the case dst already exists, move files in src to dst. If there is an existing file with the same name, the new file's name will be appended with "_1", "_2", etc.

Parameters:
fs - the FileSystem where src and dst are on.
src - the src directory
dst - the target directory
Throws:
IOException
HiveException

renameOrMoveFiles

public static void renameOrMoveFiles(org.apache.hadoop.fs.FileSystem fs,
                                     org.apache.hadoop.fs.Path src,
                                     org.apache.hadoop.fs.Path dst)
                              throws IOException,
                                     HiveException
Rename src to dst, or in the case dst already exists, move files in src to dst. If there is an existing file with the same name, the new file's name will be appended with "_1", "_2", etc.

Parameters:
fs - the FileSystem where src and dst are on.
src - the src directory
dst - the target directory
Throws:
IOException
HiveException

getTaskIdFromFilename

public static String getTaskIdFromFilename(String filename)
Get the task id from the filename. E.g., get "000000" out of "24931_r_000000_0" or "24931_r_000000_0.gz"


replaceTaskIdFromFilename

public static String replaceTaskIdFromFilename(String filename,
                                               int bucketNum)
Replace the task id from the filename. E.g., replace "000000" out of "24931_r_000000_0" or "24931_r_000000_0.gz" by 33 to "24931_r_000033_0" or "24931_r_000033_0.gz"


replaceTaskId

public static String replaceTaskId(String taskId,
                                   int bucketNum)

replaceTaskIdFromFilename

public static String replaceTaskIdFromFilename(String filename,
                                               String oldTaskId,
                                               String newTaskId)
Replace the oldTaskId appearing in the filename by the newTaskId. The string oldTaskId could appear multiple times, we should only replace the last one.

Parameters:
filename -
oldTaskId -
newTaskId -
Returns:

getFileStatusRecurse

public static org.apache.hadoop.fs.FileStatus[] getFileStatusRecurse(org.apache.hadoop.fs.Path path,
                                                                     int level,
                                                                     org.apache.hadoop.fs.FileSystem fs)
                                                              throws IOException
Get all file status from a root path and recursively go deep into certain levels.

Parameters:
path - the root path
level - the depth of directory should explore
fs - the file system
Returns:
array of FileStatus
Throws:
IOException

removeTempOrDuplicateFiles

public static void removeTempOrDuplicateFiles(org.apache.hadoop.fs.FileSystem fs,
                                              org.apache.hadoop.fs.Path path)
                                       throws IOException
Remove all temporary files and duplicate (double-committed) files from a given directory.

Throws:
IOException

removeTempOrDuplicateFiles

public static ArrayList<String> removeTempOrDuplicateFiles(org.apache.hadoop.fs.FileSystem fs,
                                                           org.apache.hadoop.fs.Path path,
                                                           DynamicPartitionCtx dpCtx)
                                                    throws IOException
Remove all temporary files and duplicate (double-committed) files from a given directory.

Returns:
a list of path names corresponding to should-be-created empty buckets.
Throws:
IOException

removeTempOrDuplicateFiles

public static HashMap<String,org.apache.hadoop.fs.FileStatus> removeTempOrDuplicateFiles(org.apache.hadoop.fs.FileStatus[] items,
                                                                                         org.apache.hadoop.fs.FileSystem fs)
                                                                                  throws IOException
Throws:
IOException

getNameMessage

public static String getNameMessage(Exception e)

addToClassPath

public static ClassLoader addToClassPath(ClassLoader cloader,
                                         String[] newPaths)
                                  throws Exception
Add new elements to the classpath.

Parameters:
newPaths - Array of classpath elements
Throws:
Exception

removeFromClassPath

public static void removeFromClassPath(String[] pathsToRemove)
                                throws Exception
remove elements from the classpath.

Parameters:
pathsToRemove - Array of classpath elements
Throws:
Exception

formatBinaryString

public static String formatBinaryString(byte[] array,
                                        int start,
                                        int length)

getColumnNamesFromSortCols

public static List<String> getColumnNamesFromSortCols(List<Order> sortCols)

getColumnNamesFromFieldSchema

public static List<String> getColumnNamesFromFieldSchema(List<FieldSchema> partCols)

getColumnNames

public static List<String> getColumnNames(Properties props)

getColumnTypes

public static List<String> getColumnTypes(Properties props)

validateColumnNames

public static void validateColumnNames(List<String> colNames,
                                       List<String> checkCols)
                                throws SemanticException
Throws:
SemanticException

getDefaultNotificationInterval

public static int getDefaultNotificationInterval(org.apache.hadoop.conf.Configuration hconf)
Gets the default notification interval to send progress updates to the tracker. Useful for operators that may not output data for a while.

Parameters:
hconf -
Returns:
the interval in milliseconds

copyTableJobPropertiesToConf

public static void copyTableJobPropertiesToConf(TableDesc tbl,
                                                org.apache.hadoop.mapred.JobConf job)
Copies the storage handler properties configured for a table descriptor to a runtime job configuration.

Parameters:
tbl - table descriptor from which to read
job - configuration which receives configured properties

supportCombineFileInputFormat

public static boolean supportCombineFileInputFormat()


Copyright © 2010 The Apache Software Foundation