public final class MapReduceAlgebra extends Object
Constructor and Description |
---|
MapReduceAlgebra() |
Modifier and Type | Method and Description |
---|---|
static MRData |
aggregate(Function accumulator,
MRData zero,
Bag s)
aggregate the Bag elements
|
static MRData |
BSP(int[] source,
Function superstep,
MRData init_state,
boolean order,
Bag[] inputs)
The BSP operation
|
static Bag |
closure(Function loop,
Bag init,
int max_num)
transitive closure: repeat the loop until the new set is equal to the previous set
or until we reach the max num of steps
|
static Bag |
cmap(Function f,
Bag s)
lazy concat-map (stream-based)
|
static Bag |
crossProduct(Function mx,
Function my,
Function r,
Bag X,
Bag Y)
A cross-product
|
static void |
dump(String file,
Tree type,
MRData value)
Dump the value of some type to a binary local file;
The type is dumped to a separate file.type
|
static Bag |
filter(Function p,
Function f,
Bag s)
lazy filter combined with a map
|
static MRData |
fold(Function acc,
MRData zero,
Bag s)
general reduction using an accumulator function and a zero element
|
static Bag |
generator(int source_num,
long min,
long max)
generate a lazy bag of long numbers {min...max} and tag each lon number with a source num
|
static Bag |
generator(long min,
long max)
generate a lazy bag of long numbers {min...max}
|
static Tree |
get_type(String file)
return the type of the dumped binary local file from file.type
|
static MRData |
getCache(int loc)
return the cache element at location loc
|
static Bag |
groupBy(Bag s)
strict group-by
|
static Bag |
groupByJoin(Function kx,
Function ky,
Function gx,
Function gy,
Function acc,
MRData zero,
Function r,
Bag X,
Bag Y)
An equi-join combined with a group-by (see GroupByJoinPlan)
|
static Bag |
hash_join(Function kx,
Function ky,
Function f,
Bag X,
Bag Y)
A hash-based equi-join
|
static Tuple |
loop(Function loop,
Tuple init,
int num)
repetition: repeat the loop until we reach the num of steps
|
static Bag |
map(Function f,
Bag s)
lazy map
|
static Bag |
mapJoin(Function kx,
Function ky,
Function r,
Bag X,
Bag Y)
The fragment-replicate join (map-side join)
|
static Bag |
mapReduce(Function m,
Function r,
Bag s)
the MapReduce operation
|
static Bag |
mapReduce2(Function mx,
Function my,
Function r,
Bag X,
Bag Y)
A map-reduce operation with two mappers (a join)
|
static MRData |
materialize(MRData x) |
static Bag |
mergeGroupByJoin(Function kx,
Function ky,
Function gx,
Function gy,
Function acc,
MRData zero,
Function r,
Bag X,
Bag Y)
An equi-join combined with a group-by implemented using a sort-merge join
combined with hash-based groupby/aggregation
|
static Bag |
mergeGroupByJoin2(Function kx,
Function ky,
Function gx,
Function gy,
Function acc,
MRData zero,
Function r,
Bag X,
Bag Y)
An equi-join combined with a group-by implemented using hashing
|
static Bag |
parsedSource(int source_num,
Parser parser,
String file,
Trees args)
parse a text document using a given parser and tag output data with a source num
|
static Bag |
parsedSource(int source_num,
String parser,
String file,
Trees args)
parse a text document using a given parser and tag output data with a source num
|
static Bag |
parsedSource(Parser parser,
String path,
Trees args)
parse a text document using a given parser
|
static Bag |
parsedSource(String parser,
String file,
Trees args)
parse a text document using a given parser
|
static Bag |
read_binary(int source_num,
String file)
read the contents of a dumped local binary file and tag data with a source num
|
static MRData |
read_binary(String file)
read the contents of a dumped local binary file
|
static Bag |
repeat(Function loop,
Bag init,
int max_num)
repeat the loop until all termination conditions are true or until we reach the max num of steps
|
static MRData |
setCache(int loc,
MRData value,
MRData ret)
set the cache element at location loc to value and return ret
|
public static Bag cmap(Function f, Bag s)
f
- a function from a to {b}s
- the input of type {a}public static Bag map(Function f, Bag s)
f
- a function from a to bs
- the input of type {a}public static Bag filter(Function p, Function f, Bag s)
p
- a function from a to booleanf
- a function from a to bs
- the input of type {a}public static MRData fold(Function acc, MRData zero, Bag s)
acc
- a function from (a,b) to bzero
- a value of type bs
- the input of type {a}public static Bag groupBy(Bag s)
s
- the input of type {(a,b)}public static Bag mapReduce(Function m, Function r, Bag s)
m
- a map function from a to {(k,b)}r
- a reduce function from (k,{b}) to {c}s
- the input of type {a}public static Bag hash_join(Function kx, Function ky, Function f, Bag X, Bag Y)
kx
- left key function from a to kky
- right key function from b to kf
- reducer from (a,b) to cX
- left input of type {a}Y
- right input of type {b}public static Bag crossProduct(Function mx, Function my, Function r, Bag X, Bag Y)
mx
- left map function from a to {a'}my
- right key function from b to {b'}r
- reducer from (a',b') to {c}X
- left input of type {a}Y
- right input of type {b}public static Bag mapReduce2(Function mx, Function my, Function r, Bag X, Bag Y)
mx
- left map function from a to {(k,a')}my
- right key function from b to {(k,b')}r
- reducer from ({a'},{b'}) to {c}X
- left input of type {a}Y
- right input of type {b}public static Bag mapJoin(Function kx, Function ky, Function r, Bag X, Bag Y)
kx
- left key function from a to kky
- right key function from b to kr
- reducer from (a,{b}) to {c}X
- left input of type {a}Y
- right input of type {b}public static Bag groupByJoin(Function kx, Function ky, Function gx, Function gy, Function acc, MRData zero, Function r, Bag X, Bag Y)
kx
- left key function from a to kky
- right key function from b to kgx
- group-by key function from a to k1gy
- group-by key function from b to k2acc
- accumulator from (c,(a,b)) to czero
- of type cr
- reducer from ((k1,k2),c) to dX
- left input of type {a}Y
- right input of type {b}public static final Bag mergeGroupByJoin(Function kx, Function ky, Function gx, Function gy, Function acc, MRData zero, Function r, Bag X, Bag Y)
kx
- left key function from a to kky
- right key function from b to kgx
- group-by key function from a to k1gy
- group-by key function from b to k2acc
- accumulator from (c,(a,b)) to czero
- of type cr
- reducer from ((k1,k2),c) to dX
- left input of type {a}Y
- right input of type {b}public static final Bag mergeGroupByJoin2(Function kx, Function ky, Function gx, Function gy, Function acc, MRData zero, Function r, Bag X, Bag Y)
kx
- left key function from a to kky
- right key function from b to kgx
- group-by key function from a to k1gy
- group-by key function from b to k2acc
- accumulator from (c,(a,b)) to czero
- of type cr
- reducer from ((k1,k2),c) to dX
- left input of type {a}Y
- right input of type {b}public static Bag repeat(Function loop, Bag init, int max_num) throws Exception
loop
- a function from {a} to {(a,boolean)}init
- the initial value of type {a}max_num
- the maximum number of stepsException
public static Bag closure(Function loop, Bag init, int max_num) throws Exception
loop
- a function from {a} to {a}init
- the initial value of type {a}max_num
- the maximum number of stepsException
public static Tuple loop(Function loop, Tuple init, int num)
loop
- a function from ({a1},...,{ak}) to ({a1},...,{ak})init
- the initial value of type ({a1},...,{ak})num
- the number of stepspublic static Bag parsedSource(Parser parser, String path, Trees args)
parser
- the parserpath
- the text document (local file or directory of files)args
- the arguments to pass to the parserpublic static Bag parsedSource(String parser, String file, Trees args)
parser
- the name of the parserfile
- the text document (local file)args
- the arguments to pass to the parserpublic static Bag parsedSource(int source_num, Parser parser, String file, Trees args)
source_num
- the source idparser
- the parserfile
- the text document (local file)args
- the arguments to pass to the parserpublic static Bag parsedSource(int source_num, String parser, String file, Trees args)
source_num
- the source idparser
- the name of the parserfile
- the text document (local file)args
- the arguments to pass to the parserpublic static MRData aggregate(Function accumulator, MRData zero, Bag s)
accumulator
- a function from (b,a) to bzero
- a value of type bs
- a Bag of type {a}public static void dump(String file, Tree type, MRData value) throws IOException
IOException
public static Tree get_type(String file)
public static MRData read_binary(String file)
public static Bag read_binary(int source_num, String file)
public static Bag generator(long min, long max)
public static Bag generator(int source_num, long min, long max)
public static MRData getCache(int loc)
public static MRData setCache(int loc, MRData value, MRData ret)
public static MRData BSP(int[] source, Function superstep, MRData init_state, boolean order, Bag[] inputs)
source
- the source ids of the input Bagssuperstep
- the BSP superstep is a function from ({M},S) to ({M},S,boolean)init_state
- is the initial state of type Sorder
- do we need to order the result?inputs
- the input BagsCopyright © 2013-2016 The Apache Software Foundation. All Rights Reserved.