org.apache.mahout.utils.clustering
Class ClusterDumper
java.lang.Object
org.apache.hadoop.conf.Configured
org.apache.mahout.common.AbstractJob
org.apache.mahout.utils.clustering.ClusterDumper
- All Implemented Interfaces:
- org.apache.hadoop.conf.Configurable, org.apache.hadoop.util.Tool
public final class ClusterDumper
- extends AbstractJob
Methods inherited from class org.apache.mahout.common.AbstractJob |
addFlag, addInputOption, addOption, addOption, addOption, addOption, addOutputOption, buildOption, getAnalyzerClassFromOption, getCLIOption, getCombinedTempPath, getGroup, getInputPath, getOption, getOption, getOutputPath, getOutputPath, getTempPath, getTempPath, hasOption, keyFor, maybePut, parseArguments, parseDirectories, prepareJob, prepareJob, prepareJob, setS3SafeCombinedInputPath, shouldRunNextPhase |
Methods inherited from class org.apache.hadoop.conf.Configured |
getConf, setConf |
Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Methods inherited from interface org.apache.hadoop.conf.Configurable |
getConf, setConf |
SAMPLE_POINTS
public static final String SAMPLE_POINTS
- See Also:
- Constant Field Values
measure
protected DistanceMeasure measure
OUTPUT_OPTION
public static final String OUTPUT_OPTION
- See Also:
- Constant Field Values
DICTIONARY_TYPE_OPTION
public static final String DICTIONARY_TYPE_OPTION
- See Also:
- Constant Field Values
DICTIONARY_OPTION
public static final String DICTIONARY_OPTION
- See Also:
- Constant Field Values
POINTS_DIR_OPTION
public static final String POINTS_DIR_OPTION
- See Also:
- Constant Field Values
NUM_WORDS_OPTION
public static final String NUM_WORDS_OPTION
- See Also:
- Constant Field Values
SUBSTRING_OPTION
public static final String SUBSTRING_OPTION
- See Also:
- Constant Field Values
SEQ_FILE_DIR_OPTION
public static final String SEQ_FILE_DIR_OPTION
- See Also:
- Constant Field Values
EVALUATE_CLUSTERS
public static final String EVALUATE_CLUSTERS
- See Also:
- Constant Field Values
OUTPUT_FORMAT_OPT
public static final String OUTPUT_FORMAT_OPT
- See Also:
- Constant Field Values
ClusterDumper
public ClusterDumper(org.apache.hadoop.fs.Path seqFileDir,
org.apache.hadoop.fs.Path pointsDir)
ClusterDumper
public ClusterDumper()
main
public static void main(String[] args)
throws Exception
- Throws:
Exception
run
public int run(String[] args)
throws Exception
- Throws:
Exception
printClusters
public void printClusters(String[] dictionary)
throws Exception
- Throws:
Exception
getOutputFile
public String getOutputFile()
setOutputFile
public void setOutputFile(String outputFile)
getSubString
public int getSubString()
setSubString
public void setSubString(int subString)
getClusterIdToPoints
public Map<Integer,List<WeightedVectorWritable>> getClusterIdToPoints()
getTermDictionary
public String getTermDictionary()
setTermDictionary
public void setTermDictionary(String termDictionary,
String dictionaryType)
setNumTopFeatures
public void setNumTopFeatures(int num)
getNumTopFeatures
public int getNumTopFeatures()
getMaxPointsPerCluster
public long getMaxPointsPerCluster()
setMaxPointsPerCluster
public void setMaxPointsPerCluster(long maxPointsPerCluster)
readPoints
public static Map<Integer,List<WeightedVectorWritable>> readPoints(org.apache.hadoop.fs.Path pointsPathDir,
long maxPointsPerCluster,
org.apache.hadoop.conf.Configuration conf)
Copyright © 2008-2012 The Apache Software Foundation. All Rights Reserved.