|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectorg.apache.hadoop.hive.ql.exec.Operator<T>
public abstract class Operator<T extends Serializable>
Base operator implementation.
Nested Class Summary | |
---|---|
static interface |
Operator.OperatorFunc
OperatorFunc. |
static class |
Operator.ProgressCounter
TODO This is a hack for hadoop 0.17 which only supports enum counters. |
static class |
Operator.State
State. |
Field Summary | |
---|---|
protected String |
alias
|
protected long |
beginTime
|
protected List<Operator<? extends Serializable>> |
childOperators
|
protected Operator<? extends Serializable>[] |
childOperatorsArray
Cache childOperators in an array for faster access. |
protected int[] |
childOperatorsTag
|
protected Map<String,ExprNodeDesc> |
colExprMap
A map of output column name to input expression map. |
protected T |
conf
|
protected ArrayList<String> |
counterNames
List of counter names associated with the operator. |
protected HashMap<String,Operator.ProgressCounter> |
counterNameToEnum
Each operator has its own map of its counter names to disjoint ProgressCounter - it is populated at compile time and is read in at run-time while extracting the operator specific counts. |
protected HashMap<String,Long> |
counters
populated at runtime from hadoop counters at run time in the client. |
protected boolean |
done
|
protected static String |
fatalErrorCntr
|
protected Object |
groupKeyObject
|
protected String |
id
|
protected ObjectInspector[] |
inputObjInspectors
|
protected long |
inputRows
|
protected org.apache.commons.logging.Log |
LOG
|
protected static String |
numInputRowsCntr
|
protected static String |
numOutputRowsCntr
|
protected String |
operatorId
|
protected org.apache.hadoop.mapred.OutputCollector |
out
|
protected ObjectInspector |
outputObjInspector
|
protected long |
outputRows
|
protected List<Operator<? extends Serializable>> |
parentOperators
|
protected org.apache.hadoop.mapred.Reporter |
reporter
|
protected Operator.State |
state
|
protected HashMap<Enum<?>,org.apache.hadoop.io.LongWritable> |
statsMap
|
protected static String |
timeTakenCntr
|
protected long |
totalTime
|
Constructor Summary | |
---|---|
Operator()
|
|
Operator(org.apache.hadoop.mapred.Reporter reporter)
Create an operator with a reporter. |
Method Summary | |
---|---|
protected boolean |
allInitializedParentsAreClosed()
|
protected boolean |
areAllParentsInitialized()
checks whether all parent operators are initialized or not. |
void |
assignCounterNameToEnum()
Called only in SemanticAnalyzer after all operators have added their own set of counter names. |
void |
augmentPlan()
Called during semantic analysis as operators are being added in order to give them a chance to compute any additional plan information needed. |
boolean |
checkFatalErrors(org.apache.hadoop.mapred.Counters ctrs,
StringBuilder errMsg)
Recursively check this operator and its descendants to see if the fatal error counter is set to non-zero. |
void |
close(boolean abort)
|
protected void |
closeOp(boolean abort)
Operator specific close routine. |
String |
dump(int level)
|
String |
dump(int level,
HashSet<Integer> seenOpts)
|
void |
endGroup()
|
protected void |
fatalErrorMessage(StringBuilder errMsg,
long counterValue)
Get the fatal error message based on counter's code. |
protected void |
forward(Object row,
ObjectInspector rowInspector)
|
List<Operator<? extends Serializable>> |
getChildOperators()
|
ArrayList<Node> |
getChildren()
Implements the getChildren function for the Node Interface. |
Map<String,ExprNodeDesc> |
getColumnExprMap()
Returns a map of output column name to input expression map Note that currently it returns only key columns for ReduceSink and GroupBy operators. |
T |
getConf()
|
ArrayList<String> |
getCounterNames()
|
HashMap<String,Operator.ProgressCounter> |
getCounterNameToEnum()
|
HashMap<String,Long> |
getCounters()
|
boolean |
getDone()
|
ExecMapperContext |
getExecContext()
|
Object |
getGroupKeyObject()
|
String |
getIdentifier()
This function is not named getId(), to make sure java serialization does NOT serialize it. |
String |
getName()
Implements the getName function for the Node Interface. |
String |
getOperatorId()
|
List<Operator<? extends Serializable>> |
getParentOperators()
|
RowSchema |
getSchema()
|
Map<Enum<?>,Long> |
getStats()
|
int |
getType()
Should be overridden to return the type of the specific operator among the types in OperatorType. |
protected void |
incrCounter(String name,
long amount)
this is called in operators in map or reduce tasks. |
protected static ObjectInspector[] |
initEvaluators(ExprNodeEvaluator[] evals,
ObjectInspector rowInspector)
Initialize an array of ExprNodeEvaluator and return the result ObjectInspectors. |
protected static StructObjectInspector |
initEvaluatorsAndReturnStruct(ExprNodeEvaluator[] evals,
List<String> outputColName,
ObjectInspector rowInspector)
Initialize an array of ExprNodeEvaluator and put the return values into a StructObjectInspector with integer field names. |
void |
initialize(org.apache.hadoop.conf.Configuration hconf,
ObjectInspector[] inputOIs)
Initializes operators only if all parents have been initialized. |
protected void |
initializeChildren(org.apache.hadoop.conf.Configuration hconf)
Calls initialize on each of the children with outputObjetInspector as the output row format. |
void |
initializeCounters()
|
void |
initializeLocalWork(org.apache.hadoop.conf.Configuration hconf)
|
protected void |
initializeOp(org.apache.hadoop.conf.Configuration hconf)
Operator specific initialization. |
void |
initOperatorId()
|
void |
jobClose(org.apache.hadoop.conf.Configuration conf,
boolean success,
JobCloseFeedBack feedBack)
Unlike other operator interfaces which are called from map or reduce task, jobClose is called from the jobclient side once the job has completed. |
void |
logStats()
|
void |
passExecContext(ExecMapperContext execContext)
Pass the execContext reference to every child operator |
void |
preorderMap(Operator.OperatorFunc opFunc)
|
void |
process(Object row,
int tag)
Process the row. |
abstract void |
processOp(Object row,
int tag)
Process the row. |
void |
removeChild(Operator<? extends Serializable> child)
|
void |
replaceChild(Operator<? extends Serializable> child,
Operator<? extends Serializable> newChild)
Replace one child with another at the same position. |
void |
replaceParent(Operator<? extends Serializable> parent,
Operator<? extends Serializable> newParent)
Replace one parent with another at the same position. |
static void |
resetId()
|
static void |
resetLastEnumUsed()
|
void |
resetStats()
|
void |
setAlias(String alias)
Store the alias this operator is working on behalf of. |
void |
setChildOperators(List<Operator<? extends Serializable>> childOperators)
|
void |
setColumnExprMap(Map<String,ExprNodeDesc> colExprMap)
|
void |
setConf(T conf)
|
void |
setCounterNames(ArrayList<String> counterNames)
|
void |
setCounterNameToEnum(HashMap<String,Operator.ProgressCounter> counterNameToEnum)
|
void |
setDone(boolean done)
|
void |
setExecContext(ExecMapperContext execContext)
|
void |
setGroupKeyObject(Object keyObject)
|
void |
setId(String id)
|
void |
setOperatorId(String operatorId)
|
void |
setOutputCollector(org.apache.hadoop.mapred.OutputCollector out)
|
void |
setParentOperators(List<Operator<? extends Serializable>> parentOperators)
|
void |
setReporter(org.apache.hadoop.mapred.Reporter rep)
|
void |
setSchema(RowSchema rowSchema)
|
void |
startGroup()
|
void |
updateCounters(org.apache.hadoop.mapred.Counters ctrs)
called in ExecDriver.progress periodically. |
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
protected List<Operator<? extends Serializable>> childOperators
protected List<Operator<? extends Serializable>> parentOperators
protected String operatorId
protected ArrayList<String> counterNames
protected HashMap<String,Operator.ProgressCounter> counterNameToEnum
protected transient Operator.State state
protected T extends Serializable conf
protected boolean done
protected transient HashMap<Enum<?>,org.apache.hadoop.io.LongWritable> statsMap
protected transient org.apache.hadoop.mapred.OutputCollector out
protected transient org.apache.commons.logging.Log LOG
protected transient String alias
protected transient org.apache.hadoop.mapred.Reporter reporter
protected transient String id
protected transient ObjectInspector[] inputObjInspectors
protected transient ObjectInspector outputObjInspector
protected transient Map<String,ExprNodeDesc> colExprMap
protected transient Operator<? extends Serializable>[] childOperatorsArray
protected transient int[] childOperatorsTag
protected transient HashMap<String,Long> counters
protected transient long inputRows
protected transient long outputRows
protected transient long beginTime
protected transient long totalTime
protected transient Object groupKeyObject
protected static String numInputRowsCntr
protected static String numOutputRowsCntr
protected static String timeTakenCntr
protected static String fatalErrorCntr
Constructor Detail |
---|
public Operator()
public Operator(org.apache.hadoop.mapred.Reporter reporter)
reporter
- Used to report progress of certain operators.Method Detail |
---|
public static void resetId()
public void setChildOperators(List<Operator<? extends Serializable>> childOperators)
public List<Operator<? extends Serializable>> getChildOperators()
public ArrayList<Node> getChildren()
getChildren
in interface Node
public void setParentOperators(List<Operator<? extends Serializable>> parentOperators)
public List<Operator<? extends Serializable>> getParentOperators()
public void setConf(T conf)
public T getConf()
public boolean getDone()
public void setDone(boolean done)
public void setSchema(RowSchema rowSchema)
public RowSchema getSchema()
public void setId(String id)
public String getIdentifier()
public void setReporter(org.apache.hadoop.mapred.Reporter rep)
public void setOutputCollector(org.apache.hadoop.mapred.OutputCollector out)
public void setAlias(String alias)
public Map<Enum<?>,Long> getStats()
protected boolean areAllParentsInitialized()
public void initialize(org.apache.hadoop.conf.Configuration hconf, ObjectInspector[] inputOIs) throws HiveException
hconf
- inputOIs
- input object inspector array indexes by tag id. null value is
ignored.
HiveException
public void initializeLocalWork(org.apache.hadoop.conf.Configuration hconf) throws HiveException
HiveException
protected void initializeOp(org.apache.hadoop.conf.Configuration hconf) throws HiveException
HiveException
protected void initializeChildren(org.apache.hadoop.conf.Configuration hconf) throws HiveException
HiveException
public void passExecContext(ExecMapperContext execContext)
public abstract void processOp(Object row, int tag) throws HiveException
row
- The object representing the row.tag
- The tag of the row usually means which parent this row comes from.
Rows with the same tag should have exactly the same rowInspector
all the time.
HiveException
public void process(Object row, int tag) throws HiveException
row
- The object representing the row.tag
- The tag of the row usually means which parent this row comes from.
Rows with the same tag should have exactly the same rowInspector
all the time.
HiveException
public void startGroup() throws HiveException
HiveException
public void endGroup() throws HiveException
HiveException
protected boolean allInitializedParentsAreClosed()
public void close(boolean abort) throws HiveException
HiveException
protected void closeOp(boolean abort) throws HiveException
HiveException
public void jobClose(org.apache.hadoop.conf.Configuration conf, boolean success, JobCloseFeedBack feedBack) throws HiveException
conf
- Configuration with with which job was submittedsuccess
- whether the job was completed successfully or not
HiveException
public void replaceChild(Operator<? extends Serializable> child, Operator<? extends Serializable> newChild)
child
- the old childnewChild
- the new childpublic void removeChild(Operator<? extends Serializable> child)
public void replaceParent(Operator<? extends Serializable> parent, Operator<? extends Serializable> newParent)
parent
- the old parentnewParent
- the new parentprotected void forward(Object row, ObjectInspector rowInspector) throws HiveException
HiveException
public void resetStats()
public void preorderMap(Operator.OperatorFunc opFunc)
public void logStats()
public String getName()
getName
in interface Node
public Map<String,ExprNodeDesc> getColumnExprMap()
public void setColumnExprMap(Map<String,ExprNodeDesc> colExprMap)
public String dump(int level)
public String dump(int level, HashSet<Integer> seenOpts)
protected static ObjectInspector[] initEvaluators(ExprNodeEvaluator[] evals, ObjectInspector rowInspector) throws HiveException
HiveException
protected static StructObjectInspector initEvaluatorsAndReturnStruct(ExprNodeEvaluator[] evals, List<String> outputColName, ObjectInspector rowInspector) throws HiveException
HiveException
protected void incrCounter(String name, long amount)
name
- amount
- public ArrayList<String> getCounterNames()
public void setCounterNames(ArrayList<String> counterNames)
public String getOperatorId()
public void initOperatorId()
public void setOperatorId(String operatorId)
public HashMap<String,Long> getCounters()
public void updateCounters(org.apache.hadoop.mapred.Counters ctrs)
ctrs
- counters from the running jobpublic boolean checkFatalErrors(org.apache.hadoop.mapred.Counters ctrs, StringBuilder errMsg)
ctrs
- protected void fatalErrorMessage(StringBuilder errMsg, long counterValue)
errMsg
- error message should be appended to this output parameter.counterValue
- input counter code.public static void resetLastEnumUsed()
public void assignCounterNameToEnum()
public void initializeCounters()
public HashMap<String,Operator.ProgressCounter> getCounterNameToEnum()
public void setCounterNameToEnum(HashMap<String,Operator.ProgressCounter> counterNameToEnum)
public int getType()
public void setGroupKeyObject(Object keyObject)
public Object getGroupKeyObject()
public void augmentPlan()
public ExecMapperContext getExecContext()
public void setExecContext(ExecMapperContext execContext)
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |