|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectorg.apache.hadoop.mapreduce.OutputFormat<org.apache.hadoop.io.Text,BlurMutate>
org.apache.blur.mapreduce.lib.BlurOutputFormat
public class BlurOutputFormat
BlurOutputFormat
is used to index data and delivery the indexes to
the proper Blur table for searching. A typical usage of this class would be
as follows.
Blur.Iface
client = BlurClient
.getClient("controller1:40010");
TableDescriptor tableDescriptor = client.describe(tableName);
Job job = new Job(jobConf, "blur index");
job.setJarByClass(BlurOutputFormatTest.class);
job.setMapperClass(CsvBlurMapper.class);
job.setInputFormatClass(TextInputFormat.class);
FileInputFormat.addInputPath(job, new Path(input));
CsvBlurMapper.addColumns(job, "cf1", "col");
BlurOutputFormat.setupJob(job, tableDescriptor);
BlurOutputFormat.setIndexLocally(job, true);
BlurOutputFormat.setOptimizeInFlight(job, false);
job.waitForCompletion(true);
Field Summary | |
---|---|
static String |
BLUR_OUTPUT_DOCUMENT_BUFFER_STRATEGY
|
static String |
BLUR_OUTPUT_INDEXLOCALLY
|
static String |
BLUR_OUTPUT_MAX_DOCUMENT_BUFFER_HEAP_SIZE
|
static String |
BLUR_OUTPUT_MAX_DOCUMENT_BUFFER_SIZE
|
static String |
BLUR_OUTPUT_OPTIMIZEINFLIGHT
|
static String |
BLUR_OUTPUT_PATH
|
static String |
BLUR_OUTPUT_REDUCER_MULTIPLIER
|
static String |
BLUR_TABLE_DESCRIPTOR
|
Constructor Summary | |
---|---|
BlurOutputFormat()
|
Method Summary | |
---|---|
void |
checkOutputSpecs(org.apache.hadoop.mapreduce.JobContext context)
|
static DocumentBufferStrategy |
getDocumentBufferStrategy(org.apache.hadoop.conf.Configuration configuration)
|
static GetCounter |
getGetCounter()
|
static int |
getMaxDocumentBufferHeapSize(org.apache.hadoop.conf.Configuration configuration)
|
static int |
getMaxDocumentBufferSize(org.apache.hadoop.conf.Configuration configuration)
|
org.apache.hadoop.mapreduce.OutputCommitter |
getOutputCommitter(org.apache.hadoop.mapreduce.TaskAttemptContext context)
|
static org.apache.hadoop.fs.Path |
getOutputPath(org.apache.hadoop.conf.Configuration configuration)
|
static org.apache.hadoop.util.Progressable |
getProgressable()
|
org.apache.hadoop.mapreduce.RecordWriter<org.apache.hadoop.io.Text,BlurMutate> |
getRecordWriter(org.apache.hadoop.mapreduce.TaskAttemptContext context)
|
static int |
getReducerMultiplier(org.apache.hadoop.conf.Configuration configuration)
|
static TableDescriptor |
getTableDescriptor(org.apache.hadoop.conf.Configuration configuration)
|
static boolean |
isIndexLocally(org.apache.hadoop.conf.Configuration configuration)
|
static boolean |
isOptimizeInFlight(org.apache.hadoop.conf.Configuration configuration)
|
static void |
setDocumentBufferStrategy(org.apache.hadoop.conf.Configuration configuration,
Class<? extends DocumentBufferStrategy> documentBufferStrategyClass)
|
static void |
setDocumentBufferStrategy(org.apache.hadoop.mapreduce.Job job,
Class<? extends DocumentBufferStrategy> documentBufferStrategyClass)
|
static void |
setGetCounter(GetCounter getCounter)
|
static void |
setIndexLocally(org.apache.hadoop.conf.Configuration configuration,
boolean b)
Enabled by default, this will enable local indexing on the machine where the task is running. |
static void |
setIndexLocally(org.apache.hadoop.mapreduce.Job job,
boolean b)
Enabled by default, this will enable local indexing on the machine where the task is running. |
static void |
setMaxDocumentBufferHeapSize(org.apache.hadoop.conf.Configuration configuration,
int maxDocumentBufferHeapSize)
|
static void |
setMaxDocumentBufferHeapSize(org.apache.hadoop.mapreduce.Job job,
int maxDocumentBufferHeapSize)
|
static void |
setMaxDocumentBufferSize(org.apache.hadoop.conf.Configuration configuration,
int maxDocumentBufferSize)
Sets the maximum number of documents that the buffer will hold in memory before overflowing to disk. |
static void |
setMaxDocumentBufferSize(org.apache.hadoop.mapreduce.Job job,
int maxDocumentBufferSize)
Sets the maximum number of documents that the buffer will hold in memory before overflowing to disk. |
static void |
setOptimizeInFlight(org.apache.hadoop.conf.Configuration configuration,
boolean b)
Enabled by default, this will optimize the index while copying from the local index to the remote destination in HDFS. |
static void |
setOptimizeInFlight(org.apache.hadoop.mapreduce.Job job,
boolean b)
Enabled by default, this will optimize the index while copying from the local index to the remote destination in HDFS. |
static void |
setOutputPath(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.Path path)
|
static void |
setOutputPath(org.apache.hadoop.mapreduce.Job job,
org.apache.hadoop.fs.Path path)
|
static void |
setProgressable(org.apache.hadoop.util.Progressable progressable)
|
static void |
setReducerMultiplier(org.apache.hadoop.mapreduce.Job job,
int multiple)
This will multiple the number of reducers for this job. |
static void |
setTableDescriptor(org.apache.hadoop.conf.Configuration configuration,
TableDescriptor tableDescriptor)
Sets the TableDescriptor for this job. |
static void |
setTableDescriptor(org.apache.hadoop.mapreduce.Job job,
TableDescriptor tableDescriptor)
Sets the TableDescriptor for this job. |
static void |
setupJob(org.apache.hadoop.mapreduce.Job job,
TableDescriptor tableDescriptor)
Sets up the output portion of the map reduce job. |
Methods inherited from class java.lang.Object |
---|
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
public static final String BLUR_OUTPUT_REDUCER_MULTIPLIER
public static final String BLUR_OUTPUT_OPTIMIZEINFLIGHT
public static final String BLUR_OUTPUT_INDEXLOCALLY
public static final String BLUR_OUTPUT_MAX_DOCUMENT_BUFFER_SIZE
public static final String BLUR_OUTPUT_MAX_DOCUMENT_BUFFER_HEAP_SIZE
public static final String BLUR_OUTPUT_DOCUMENT_BUFFER_STRATEGY
public static final String BLUR_TABLE_DESCRIPTOR
public static final String BLUR_OUTPUT_PATH
Constructor Detail |
---|
public BlurOutputFormat()
Method Detail |
---|
public static void setProgressable(org.apache.hadoop.util.Progressable progressable)
public static org.apache.hadoop.util.Progressable getProgressable()
public static void setGetCounter(GetCounter getCounter)
public static GetCounter getGetCounter()
public void checkOutputSpecs(org.apache.hadoop.mapreduce.JobContext context) throws IOException, InterruptedException
checkOutputSpecs
in class org.apache.hadoop.mapreduce.OutputFormat<org.apache.hadoop.io.Text,BlurMutate>
IOException
InterruptedException
public org.apache.hadoop.mapreduce.RecordWriter<org.apache.hadoop.io.Text,BlurMutate> getRecordWriter(org.apache.hadoop.mapreduce.TaskAttemptContext context) throws IOException, InterruptedException
getRecordWriter
in class org.apache.hadoop.mapreduce.OutputFormat<org.apache.hadoop.io.Text,BlurMutate>
IOException
InterruptedException
public org.apache.hadoop.mapreduce.OutputCommitter getOutputCommitter(org.apache.hadoop.mapreduce.TaskAttemptContext context) throws IOException, InterruptedException
getOutputCommitter
in class org.apache.hadoop.mapreduce.OutputFormat<org.apache.hadoop.io.Text,BlurMutate>
IOException
InterruptedException
public static TableDescriptor getTableDescriptor(org.apache.hadoop.conf.Configuration configuration) throws IOException
IOException
public static void setReducerMultiplier(org.apache.hadoop.mapreduce.Job job, int multiple) throws IOException
job
- the job to setup.multiple
- the multiple to use.
IOException
public static int getReducerMultiplier(org.apache.hadoop.conf.Configuration configuration)
public static void setTableDescriptor(org.apache.hadoop.mapreduce.Job job, TableDescriptor tableDescriptor) throws IOException
TableDescriptor
for this job.
job
- the job to setup.tableDescriptor
- the TableDescriptor
.
IOException
public static void setTableDescriptor(org.apache.hadoop.conf.Configuration configuration, TableDescriptor tableDescriptor) throws IOException
TableDescriptor
for this job.
job
- the job to setup.tableDescriptor
- the TableDescriptor
.
IOException
public static void setMaxDocumentBufferSize(org.apache.hadoop.mapreduce.Job job, int maxDocumentBufferSize)
job
- the job to setup.maxDocumentBufferSize
- the maxDocumentBufferSize.public static void setMaxDocumentBufferSize(org.apache.hadoop.conf.Configuration configuration, int maxDocumentBufferSize)
configuration
- the configuration to setup.maxDocumentBufferSize
- the maxDocumentBufferSize.public static int getMaxDocumentBufferSize(org.apache.hadoop.conf.Configuration configuration)
public static int getMaxDocumentBufferHeapSize(org.apache.hadoop.conf.Configuration configuration)
public static void setMaxDocumentBufferHeapSize(org.apache.hadoop.conf.Configuration configuration, int maxDocumentBufferHeapSize)
public static void setMaxDocumentBufferHeapSize(org.apache.hadoop.mapreduce.Job job, int maxDocumentBufferHeapSize)
public static DocumentBufferStrategy getDocumentBufferStrategy(org.apache.hadoop.conf.Configuration configuration)
public static void setDocumentBufferStrategy(org.apache.hadoop.mapreduce.Job job, Class<? extends DocumentBufferStrategy> documentBufferStrategyClass)
public static void setDocumentBufferStrategy(org.apache.hadoop.conf.Configuration configuration, Class<? extends DocumentBufferStrategy> documentBufferStrategyClass)
public static void setOutputPath(org.apache.hadoop.mapreduce.Job job, org.apache.hadoop.fs.Path path)
public static void setOutputPath(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.Path path)
public static org.apache.hadoop.fs.Path getOutputPath(org.apache.hadoop.conf.Configuration configuration)
public static void setIndexLocally(org.apache.hadoop.mapreduce.Job job, boolean b)
RecordWriter
closes the index is
copied to the remote destination in HDFS.
job
- the job to setup.b
- the boolean to true enable, false to disable.public static void setIndexLocally(org.apache.hadoop.conf.Configuration configuration, boolean b)
RecordWriter
closes the index is
copied to the remote destination in HDFS.
configuration
- the configuration to setup.b
- the boolean to true enable, false to disable.public static boolean isIndexLocally(org.apache.hadoop.conf.Configuration configuration)
public static void setOptimizeInFlight(org.apache.hadoop.mapreduce.Job job, boolean b)
job
- the job to setup.b
- the boolean to true enable, false to disable.public static void setOptimizeInFlight(org.apache.hadoop.conf.Configuration configuration, boolean b)
job
- the job to setup.b
- the boolean to true enable, false to disable.public static boolean isOptimizeInFlight(org.apache.hadoop.conf.Configuration configuration)
public static void setupJob(org.apache.hadoop.mapreduce.Job job, TableDescriptor tableDescriptor) throws IOException
job
- the job to setup.tableDescriptor
- the table descriptor to write the output of the indexing job.
IOException
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |