|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectorg.apache.mahout.fpm.pfpgrowth.PFPGrowth
public final class PFPGrowth
Parallel FP Growth Driver Class. Runs each stage of PFPGrowth as described in the paper http://infolab.stanford.edu/~echang/recsys08-69.pdf
Field Summary | |
---|---|
static java.lang.String |
ENCODING
|
static java.lang.String |
F_LIST
|
static java.lang.String |
FILE_PATTERN
|
static java.lang.String |
FPGROWTH
|
static java.lang.String |
FREQUENT_PATTERNS
|
static java.lang.String |
G_LIST
|
static java.lang.String |
INPUT
|
static java.lang.String |
MAX_HEAPSIZE
|
static java.lang.String |
MIN_SUPPORT
|
static java.lang.String |
NUM_GROUPS
|
static java.lang.String |
OUTPUT
|
static java.lang.String |
PARALLEL_COUNTING
|
static java.lang.String |
PFP_PARAMETERS
|
static java.lang.String |
SORTED_OUTPUT
|
static java.lang.String |
SPLIT_PATTERN
|
static java.util.regex.Pattern |
SPLITTER
|
static java.lang.String |
TREE_CACHE_SIZE
|
Method Summary | |
---|---|
static java.util.List<Pair<java.lang.String,java.lang.Long>> |
deserializeList(Parameters params,
java.lang.String key,
org.apache.hadoop.conf.Configuration conf)
Generates the fList from the serialized string representation |
static java.util.Map<java.lang.String,java.lang.Long> |
deserializeMap(Parameters params,
java.lang.String key,
org.apache.hadoop.conf.Configuration conf)
Generates the gList(Group ID Mapping of Various frequent Features) Map from the corresponding serialized representation |
static java.util.List<Pair<java.lang.String,java.lang.Long>> |
readFList(Parameters params)
read the feature frequency List which is built at the end of the Parallel counting job |
static java.util.List<Pair<java.lang.String,TopKStringPatterns>> |
readFrequentPattern(Parameters params)
Read the Frequent Patterns generated from Text |
static void |
runPFPGrowth(Parameters params)
|
static void |
startAggregating(Parameters params)
Run the aggregation Job to aggregate the different TopK patterns and group each Pattern by the features present in it and thus calculate the final Top K frequent Patterns for each feature |
static void |
startGroupingItems(Parameters params)
Group the given Features into g groups as defined by the numGroups parameter in params |
static void |
startParallelCounting(Parameters params)
Count the frequencies of various features in parallel using Map/Reduce |
static void |
startParallelFPGrowth(Parameters params)
Run the Parallel FPGrowth Map/Reduce Job to calculate the Top K features of group dependent shards |
static void |
startTransactionSorting(Parameters params)
Run the Parallel FPGrowth Map/Reduce Job to calculate the Top K features of group dependent shards |
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
public static final java.lang.String ENCODING
public static final java.lang.String F_LIST
public static final java.lang.String G_LIST
public static final java.lang.String NUM_GROUPS
public static final java.lang.String OUTPUT
public static final java.lang.String MIN_SUPPORT
public static final java.lang.String MAX_HEAPSIZE
public static final java.lang.String INPUT
public static final java.lang.String PFP_PARAMETERS
public static final java.lang.String FILE_PATTERN
public static final java.lang.String FPGROWTH
public static final java.lang.String FREQUENT_PATTERNS
public static final java.lang.String PARALLEL_COUNTING
public static final java.lang.String SORTED_OUTPUT
public static final java.lang.String SPLIT_PATTERN
public static final java.lang.String TREE_CACHE_SIZE
public static final java.util.regex.Pattern SPLITTER
Method Detail |
---|
public static java.util.List<Pair<java.lang.String,java.lang.Long>> deserializeList(Parameters params, java.lang.String key, org.apache.hadoop.conf.Configuration conf) throws java.io.IOException
params
- key
- conf
-
java.io.IOException
public static java.util.Map<java.lang.String,java.lang.Long> deserializeMap(Parameters params, java.lang.String key, org.apache.hadoop.conf.Configuration conf) throws java.io.IOException
params
- key
- conf
-
java.io.IOException
public static java.util.List<Pair<java.lang.String,java.lang.Long>> readFList(Parameters params) throws java.io.IOException
java.io.IOException
public static java.util.List<Pair<java.lang.String,TopKStringPatterns>> readFrequentPattern(Parameters params) throws java.io.IOException
java.io.IOException
public static void runPFPGrowth(Parameters params) throws java.io.IOException, java.lang.InterruptedException, java.lang.ClassNotFoundException
params
- params should contain input and output locations as a string value, the additional parameters
include minSupport(3), maxHeapSize(50), numGroups(1000)
java.io.IOException
java.lang.InterruptedException
java.lang.ClassNotFoundException
public static void startAggregating(Parameters params) throws java.io.IOException, java.lang.InterruptedException, java.lang.ClassNotFoundException
java.io.IOException
java.lang.InterruptedException
java.lang.ClassNotFoundException
public static void startGroupingItems(Parameters params) throws java.io.IOException
params
-
java.io.IOException
public static void startParallelCounting(Parameters params) throws java.io.IOException, java.lang.InterruptedException, java.lang.ClassNotFoundException
java.io.IOException
java.lang.InterruptedException
java.lang.ClassNotFoundException
public static void startTransactionSorting(Parameters params) throws java.io.IOException, java.lang.InterruptedException, java.lang.ClassNotFoundException
java.io.IOException
java.lang.InterruptedException
java.lang.ClassNotFoundException
public static void startParallelFPGrowth(Parameters params) throws java.io.IOException, java.lang.InterruptedException, java.lang.ClassNotFoundException
java.io.IOException
java.lang.InterruptedException
java.lang.ClassNotFoundException
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |