org.apache.hadoop.hive.ql.exec
Class Operator<T extends Serializable>

java.lang.Object
  extended by org.apache.hadoop.hive.ql.exec.Operator<T>
All Implemented Interfaces:
Serializable, Node
Direct Known Subclasses:
CollectOperator, CommonJoinOperator, ExtractOperator, FilterOperator, ForwardOperator, GroupByOperator, LimitOperator, MapOperator, ScriptOperator, SelectOperator, TableScanOperator, TerminalOperator, UnionOperator

public abstract class Operator<T extends Serializable>
extends Object
implements Serializable, Node

Base operator implementation

See Also:
Serialized Form

Nested Class Summary
static interface Operator.OperatorFunc
           
static class Operator.State
           
 
Field Summary
protected  String alias
           
protected  List<Operator<? extends Serializable>> childOperators
           
protected  Operator<? extends Serializable>[] childOperatorsArray
          Cache childOperators in an array for faster access.
protected  int[] childOperatorsTag
           
protected  Map<String,exprNodeDesc> colExprMap
          A map of output column name to input expression map.
protected  T conf
           
protected  boolean done
           
protected  String id
           
protected  ObjectInspector[] inputObjInspectors
           
protected  org.apache.commons.logging.Log LOG
           
protected  org.apache.hadoop.mapred.OutputCollector out
           
protected  ObjectInspector outputObjInspector
           
protected  List<Operator<? extends Serializable>> parentOperators
           
protected  org.apache.hadoop.mapred.Reporter reporter
           
protected  Operator.State state
           
protected  HashMap<Enum<?>,org.apache.hadoop.io.LongWritable> statsMap
           
 
Constructor Summary
Operator()
           
Operator(org.apache.hadoop.mapred.Reporter reporter)
          Create an operator with a reporter.
 
Method Summary
protected  boolean areAllParentsInitialized()
          checks whether all parent operators are initialized or not
 void close(boolean abort)
           
protected  void closeOp(boolean abort)
          Operator specific close routine.
 String dump(int level)
           
 void endGroup()
           
protected  void forward(Object row, ObjectInspector rowInspector)
           
 List<Operator<? extends Serializable>> getChildOperators()
           
 Vector<Node> getChildren()
          Implements the getChildren function for the Node Interface.
 Map<String,exprNodeDesc> getColumnExprMap()
          Returns a map of output column name to input expression map Note that currently it returns only key columns for ReduceSink and GroupBy operators
 T getConf()
           
 boolean getDone()
           
 String getIdentifier()
          This function is not named getId(), to make sure java serialization does NOT serialize it.
 String getName()
          Implements the getName function for the Node Interface.
 List<Operator<? extends Serializable>> getParentOperators()
           
 RowSchema getSchema()
           
 Map<Enum<?>,Long> getStats()
           
protected static ObjectInspector[] initEvaluators(ExprNodeEvaluator[] evals, ObjectInspector rowInspector)
          Initialize an array of ExprNodeEvaluator and return the result ObjectInspectors.
protected static StructObjectInspector initEvaluatorsAndReturnStruct(ExprNodeEvaluator[] evals, List<String> outputColName, ObjectInspector rowInspector)
          Initialize an array of ExprNodeEvaluator and put the return values into a StructObjectInspector with integer field names.
 void initialize(org.apache.hadoop.conf.Configuration hconf, ObjectInspector[] inputOIs)
          Initializes operators only if all parents have been initialized.
protected  void initializeChildren(org.apache.hadoop.conf.Configuration hconf)
          Calls initialize on each of the children with outputObjetInspector as the output row format
protected  void initializeOp(org.apache.hadoop.conf.Configuration hconf)
          Operator specific initialization.
 void jobClose(org.apache.hadoop.conf.Configuration conf, boolean success)
          Unlike other operator interfaces which are called from map or reduce task, jobClose is called from the jobclient side once the job has completed
 void logStats()
           
 void preorderMap(Operator.OperatorFunc opFunc)
           
abstract  void process(Object row, int tag)
          Process the row.
 void removeChild(Operator<? extends Serializable> child)
           
 void replaceChild(Operator<? extends Serializable> child, Operator<? extends Serializable> newChild)
          Replace one child with another at the same position.
 void replaceParent(Operator<? extends Serializable> parent, Operator<? extends Serializable> newParent)
          Replace one parent with another at the same position.
 void resetStats()
           
 void setAlias(String alias)
          Store the alias this operator is working on behalf of
 void setChildOperators(List<Operator<? extends Serializable>> childOperators)
           
 void setColumnExprMap(Map<String,exprNodeDesc> colExprMap)
           
 void setConf(T conf)
           
 void setDone(boolean done)
           
 void setId(String id)
           
 void setOutputCollector(org.apache.hadoop.mapred.OutputCollector out)
           
 void setParentOperators(List<Operator<? extends Serializable>> parentOperators)
           
 void setReporter(org.apache.hadoop.mapred.Reporter rep)
           
 void setSchema(RowSchema rowSchema)
           
 void startGroup()
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

childOperators

protected List<Operator<? extends Serializable>> childOperators

parentOperators

protected List<Operator<? extends Serializable>> parentOperators

state

protected transient Operator.State state

conf

protected T extends Serializable conf

done

protected boolean done

statsMap

protected transient HashMap<Enum<?>,org.apache.hadoop.io.LongWritable> statsMap

out

protected transient org.apache.hadoop.mapred.OutputCollector out

LOG

protected transient org.apache.commons.logging.Log LOG

alias

protected transient String alias

reporter

protected transient org.apache.hadoop.mapred.Reporter reporter

id

protected transient String id

inputObjInspectors

protected transient ObjectInspector[] inputObjInspectors

outputObjInspector

protected transient ObjectInspector outputObjInspector

colExprMap

protected transient Map<String,exprNodeDesc> colExprMap
A map of output column name to input expression map. This is used by optimizer and built during semantic analysis contains only key elements for reduce sink and group by op


childOperatorsArray

protected transient Operator<? extends Serializable>[] childOperatorsArray
Cache childOperators in an array for faster access. childOperatorsArray is accessed per row, so it's important to make the access efficient.


childOperatorsTag

protected transient int[] childOperatorsTag
Constructor Detail

Operator

public Operator()

Operator

public Operator(org.apache.hadoop.mapred.Reporter reporter)
Create an operator with a reporter.

Parameters:
reporter - Used to report progress of certain operators.
Method Detail

setChildOperators

public void setChildOperators(List<Operator<? extends Serializable>> childOperators)

getChildOperators

public List<Operator<? extends Serializable>> getChildOperators()

getChildren

public Vector<Node> getChildren()
Implements the getChildren function for the Node Interface.

Specified by:
getChildren in interface Node
Returns:
Vector

setParentOperators

public void setParentOperators(List<Operator<? extends Serializable>> parentOperators)

getParentOperators

public List<Operator<? extends Serializable>> getParentOperators()

setConf

public void setConf(T conf)

getConf

public T getConf()

getDone

public boolean getDone()

setDone

public void setDone(boolean done)

setSchema

public void setSchema(RowSchema rowSchema)

getSchema

public RowSchema getSchema()

setId

public void setId(String id)

getIdentifier

public String getIdentifier()
This function is not named getId(), to make sure java serialization does NOT serialize it. Some TestParse tests will fail if we serialize this field, since the Operator ID will change based on the number of query tests.


setReporter

public void setReporter(org.apache.hadoop.mapred.Reporter rep)

setOutputCollector

public void setOutputCollector(org.apache.hadoop.mapred.OutputCollector out)

setAlias

public void setAlias(String alias)
Store the alias this operator is working on behalf of


getStats

public Map<Enum<?>,Long> getStats()

areAllParentsInitialized

protected boolean areAllParentsInitialized()
checks whether all parent operators are initialized or not

Returns:
true if there are no parents or all parents are initialized. false otherwise

initialize

public void initialize(org.apache.hadoop.conf.Configuration hconf,
                       ObjectInspector[] inputOIs)
                throws HiveException
Initializes operators only if all parents have been initialized. Calls operator specific initializer which then initializes child ops.

Parameters:
hconf -
inputOIs - input object inspector array indexes by tag id. null value is ignored.
Throws:
HiveException

initializeOp

protected void initializeOp(org.apache.hadoop.conf.Configuration hconf)
                     throws HiveException
Operator specific initialization.

Throws:
HiveException

initializeChildren

protected void initializeChildren(org.apache.hadoop.conf.Configuration hconf)
                           throws HiveException
Calls initialize on each of the children with outputObjetInspector as the output row format

Throws:
HiveException

process

public abstract void process(Object row,
                             int tag)
                      throws HiveException
Process the row.

Parameters:
row - The object representing the row.
tag - The tag of the row usually means which parent this row comes from. Rows with the same tag should have exactly the same rowInspector all the time.
Throws:
HiveException

startGroup

public void startGroup()
                throws HiveException
Throws:
HiveException

endGroup

public void endGroup()
              throws HiveException
Throws:
HiveException

close

public void close(boolean abort)
           throws HiveException
Throws:
HiveException

closeOp

protected void closeOp(boolean abort)
                throws HiveException
Operator specific close routine. Operators which inherents this class should overwrite this funtion for their specific cleanup routine.

Throws:
HiveException

jobClose

public void jobClose(org.apache.hadoop.conf.Configuration conf,
                     boolean success)
              throws HiveException
Unlike other operator interfaces which are called from map or reduce task, jobClose is called from the jobclient side once the job has completed

Parameters:
conf - Configuration with with which job was submitted
success - whether the job was completed successfully or not
Throws:
HiveException

replaceChild

public void replaceChild(Operator<? extends Serializable> child,
                         Operator<? extends Serializable> newChild)
Replace one child with another at the same position. The parent of the child is not changed

Parameters:
child - the old child
newChild - the new child

removeChild

public void removeChild(Operator<? extends Serializable> child)

replaceParent

public void replaceParent(Operator<? extends Serializable> parent,
                          Operator<? extends Serializable> newParent)
Replace one parent with another at the same position. Chilren of the new parent are not updated

Parameters:
parent - the old parent
newParent - the new parent

forward

protected void forward(Object row,
                       ObjectInspector rowInspector)
                throws HiveException
Throws:
HiveException

resetStats

public void resetStats()

preorderMap

public void preorderMap(Operator.OperatorFunc opFunc)

logStats

public void logStats()

getName

public String getName()
Implements the getName function for the Node Interface.

Specified by:
getName in interface Node
Returns:
the name of the operator

getColumnExprMap

public Map<String,exprNodeDesc> getColumnExprMap()
Returns a map of output column name to input expression map Note that currently it returns only key columns for ReduceSink and GroupBy operators

Returns:
null if the operator doesn't change columns

setColumnExprMap

public void setColumnExprMap(Map<String,exprNodeDesc> colExprMap)

dump

public String dump(int level)

initEvaluators

protected static ObjectInspector[] initEvaluators(ExprNodeEvaluator[] evals,
                                                  ObjectInspector rowInspector)
                                           throws HiveException
Initialize an array of ExprNodeEvaluator and return the result ObjectInspectors.

Throws:
HiveException

initEvaluatorsAndReturnStruct

protected static StructObjectInspector initEvaluatorsAndReturnStruct(ExprNodeEvaluator[] evals,
                                                                     List<String> outputColName,
                                                                     ObjectInspector rowInspector)
                                                              throws HiveException
Initialize an array of ExprNodeEvaluator and put the return values into a StructObjectInspector with integer field names.

Throws:
HiveException


Copyright © 2009 The Apache Software Foundation