org.apache.hadoop.hive.ql.exec
Class Operator<T extends Serializable>

java.lang.Object
  extended by org.apache.hadoop.hive.ql.exec.Operator<T>
All Implemented Interfaces:
Serializable, Node
Direct Known Subclasses:
CollectOperator, ExtractOperator, FilterOperator, ForwardOperator, GroupByOperator, JoinOperator, LimitOperator, MapOperator, ScriptOperator, SelectOperator, TableScanOperator, TerminalOperator, UnionOperator

public abstract class Operator<T extends Serializable>
extends Object
implements Serializable, Node

Base operator implementation

See Also:
Serialized Form

Nested Class Summary
static interface Operator.OperatorFunc
           
static class Operator.State
           
 
Field Summary
protected  String alias
           
protected  List<Operator<? extends Serializable>> childOperators
           
protected  T conf
           
protected  boolean done
           
protected  mapredWork gWork
           
protected  String id
           
protected  String joinAlias
           
protected  org.apache.commons.logging.Log LOG
           
protected  org.apache.hadoop.mapred.OutputCollector out
           
protected  List<Operator<? extends Serializable>> parentOperators
           
protected  org.apache.hadoop.mapred.Reporter reporter
           
protected  HashMap<Enum<?>,org.apache.hadoop.io.LongWritable> statsMap
           
 
Constructor Summary
Operator()
           
Operator(org.apache.hadoop.mapred.Reporter reporter)
          Create an operator with a reporter.
 
Method Summary
 void close(boolean abort)
           
 String dump()
           
 void endGroup()
           
protected  void forward(Object row, ObjectInspector rowInspector)
           
 List<Operator<? extends Serializable>> getChildOperators()
           
 Vector<Node> getChildren()
          Implements the getChildren function for the Node Interface.
 T getConf()
           
 boolean getDone()
           
 String getName()
          Implements the getName function for the Node Interface.
 List<Operator<? extends Serializable>> getParentOperators()
           
 RowSchema getSchema()
           
 Map<Enum<?>,Long> getStats()
           
 void initialize(org.apache.hadoop.conf.Configuration hconf, org.apache.hadoop.mapred.Reporter reporter)
           
 void jobClose(org.apache.hadoop.conf.Configuration conf, boolean success)
          Unlike other operator interfaces which are called from map or reduce task, jobClose is called from the jobclient side once the job has completed
 void logStats()
           
 void preorderMap(Operator.OperatorFunc opFunc)
           
abstract  void process(Object row, ObjectInspector rowInspector)
           
 void resetStats()
           
 void setAlias(String alias)
          Store the alias this operator is working on behalf of
 void setChildOperators(List<Operator<? extends Serializable>> childOperators)
           
 void setConf(T conf)
           
 void setDone(boolean done)
           
 void setId(String id)
           
 void setJoinAlias(String joinAlias)
          Store the join alias this operator is working on behalf of
 void setMapredWork(mapredWork gWork)
          Operators often need access to global variables.
 void setOutputCollector(org.apache.hadoop.mapred.OutputCollector out)
           
 void setParentOperators(List<Operator<? extends Serializable>> parentOperators)
           
 void setSchema(RowSchema rowSchema)
           
 void startGroup()
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

childOperators

protected List<Operator<? extends Serializable>> childOperators

parentOperators

protected List<Operator<? extends Serializable>> parentOperators

conf

protected T extends Serializable conf

done

protected boolean done

statsMap

protected transient HashMap<Enum<?>,org.apache.hadoop.io.LongWritable> statsMap

out

protected transient org.apache.hadoop.mapred.OutputCollector out

LOG

protected transient org.apache.commons.logging.Log LOG

gWork

protected transient mapredWork gWork

alias

protected transient String alias

joinAlias

protected transient String joinAlias

reporter

protected transient org.apache.hadoop.mapred.Reporter reporter

id

protected transient String id
Constructor Detail

Operator

public Operator()

Operator

public Operator(org.apache.hadoop.mapred.Reporter reporter)
Create an operator with a reporter.

Parameters:
reporter - Used to report progress of certain operators.
Method Detail

setChildOperators

public void setChildOperators(List<Operator<? extends Serializable>> childOperators)

getChildOperators

public List<Operator<? extends Serializable>> getChildOperators()

getChildren

public Vector<Node> getChildren()
Implements the getChildren function for the Node Interface.

Specified by:
getChildren in interface Node
Returns:
Vector

setParentOperators

public void setParentOperators(List<Operator<? extends Serializable>> parentOperators)

getParentOperators

public List<Operator<? extends Serializable>> getParentOperators()

setConf

public void setConf(T conf)

getConf

public T getConf()

getDone

public boolean getDone()

setDone

public void setDone(boolean done)

setSchema

public void setSchema(RowSchema rowSchema)

getSchema

public RowSchema getSchema()

setId

public void setId(String id)

setOutputCollector

public void setOutputCollector(org.apache.hadoop.mapred.OutputCollector out)

setMapredWork

public void setMapredWork(mapredWork gWork)
Operators often need access to global variables. This allows us to put global config information in the root configuration object and have that be accessible to all the operators in the tree.


setAlias

public void setAlias(String alias)
Store the alias this operator is working on behalf of


setJoinAlias

public void setJoinAlias(String joinAlias)
Store the join alias this operator is working on behalf of


getStats

public Map<Enum<?>,Long> getStats()

initialize

public void initialize(org.apache.hadoop.conf.Configuration hconf,
                       org.apache.hadoop.mapred.Reporter reporter)
                throws HiveException
Throws:
HiveException

process

public abstract void process(Object row,
                             ObjectInspector rowInspector)
                      throws HiveException
Throws:
HiveException

startGroup

public void startGroup()
                throws HiveException
Throws:
HiveException

endGroup

public void endGroup()
              throws HiveException
Throws:
HiveException

close

public void close(boolean abort)
           throws HiveException
Throws:
HiveException

jobClose

public void jobClose(org.apache.hadoop.conf.Configuration conf,
                     boolean success)
              throws HiveException
Unlike other operator interfaces which are called from map or reduce task, jobClose is called from the jobclient side once the job has completed

Parameters:
conf - Configuration with with which job was submitted
success - whether the job was completed successfully or not
Throws:
HiveException

forward

protected void forward(Object row,
                       ObjectInspector rowInspector)
                throws HiveException
Throws:
HiveException

resetStats

public void resetStats()

preorderMap

public void preorderMap(Operator.OperatorFunc opFunc)

logStats

public void logStats()

getName

public String getName()
Implements the getName function for the Node Interface.

Specified by:
getName in interface Node
Returns:
the name of the operator

dump

public String dump()


Copyright © 2009 The Apache Software Foundation