|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectorg.apache.hadoop.mapreduce.Mapper<KEY,VALUE,org.apache.hadoop.io.Text,BlurMutate>
org.apache.blur.mapreduce.lib.BaseBlurMapper<org.apache.hadoop.io.Writable,org.apache.hadoop.io.Text>
org.apache.blur.mapreduce.lib.CsvBlurMapper
public class CsvBlurMapper
This will parse a standard csv file into a BlurMutate
object. Use the
static addColumns, and setSeparator methods to configure the class.
Nested Class Summary |
---|
Nested classes/interfaces inherited from class org.apache.hadoop.mapreduce.Mapper |
---|
org.apache.hadoop.mapreduce.Mapper.Context |
Field Summary | |
---|---|
static String |
BLUR_CSV_AUTO_GENERATE_RECORD_ID_AS_HASH_OF_DATA
|
static String |
BLUR_CSV_AUTO_GENERATE_ROW_ID_AS_HASH_OF_DATA
|
static String |
BLUR_CSV_FAMILIES
|
static String |
BLUR_CSV_FAMILY_COLUMN_PREFIX
|
static String |
BLUR_CSV_FAMILY_PATH_MAPPINGS_FAMILIES
|
static String |
BLUR_CSV_FAMILY_PATH_MAPPINGS_FAMILY_PREFIX
|
static String |
BLUR_CSV_SEPARATOR_BASE64
|
static String |
HIVE_NULL
|
static String |
UTF_8
|
Constructor Summary | |
---|---|
CsvBlurMapper()
|
Method Summary | |
---|---|
static void |
addColumns(org.apache.hadoop.conf.Configuration configuration,
String family,
String... columns)
Adds the column layout for the given family. |
static void |
addColumns(org.apache.hadoop.mapreduce.Job job,
String family,
String... columns)
Adds the column layout for the given family. |
static void |
addFamilyPath(org.apache.hadoop.conf.Configuration configuration,
String family,
org.apache.hadoop.fs.Path path)
Add a mapping for a family to a path. |
static void |
addFamilyPath(org.apache.hadoop.mapreduce.Job job,
String family,
org.apache.hadoop.fs.Path path)
Add a mapping for a family to a path. |
static Map<String,List<String>> |
getFamilyAndColumnNameMap(org.apache.hadoop.conf.Configuration configuration)
|
static Collection<String> |
getFamilyNames(org.apache.hadoop.conf.Configuration configuration)
|
static boolean |
isAutoGenerateRecordIdAsHashOfData(org.apache.hadoop.conf.Configuration configuration)
Gets whether or not to generate a recordid for the record based on the data. |
static boolean |
isAutoGenerateRowIdAsHashOfData(org.apache.hadoop.conf.Configuration configuration)
Gets whether or not to generate a recordid for the record based on the data. |
static void |
setAutoGenerateRecordIdAsHashOfData(org.apache.hadoop.conf.Configuration configuration,
boolean autoGenerateRecordIdAsHashOfData)
If set to true the record id will be automatically generated as a hash of the data that the record contains. |
static void |
setAutoGenerateRecordIdAsHashOfData(org.apache.hadoop.mapreduce.Job job,
boolean autoGenerateRecordIdAsHashOfData)
If set to true the record id will be automatically generated as a hash of the data that the record contains. |
static void |
setAutoGenerateRowIdAsHashOfData(org.apache.hadoop.conf.Configuration configuration,
boolean autoGenerateRowIdAsHashOfData)
If set to true the record id will be automatically generated as a hash of the data that the record contains. |
static void |
setAutoGenerateRowIdAsHashOfData(org.apache.hadoop.mapreduce.Job job,
boolean autoGenerateRowIdAsHashOfData)
If set to true the record id will be automatically generated as a hash of the data that the record contains. |
static void |
setColumns(org.apache.hadoop.conf.Configuration configuration,
String strDefinition)
Sets all the family and column definitions. |
static void |
setColumns(org.apache.hadoop.mapreduce.Job job,
String strDefinition)
Sets all the family and column definitions. |
void |
setFamilyFromPath(String familyFromPath)
|
static void |
setSeparator(org.apache.hadoop.conf.Configuration configuration,
String separator)
Sets the separator of the file, by default it is ",". |
static void |
setSeparator(org.apache.hadoop.mapreduce.Job job,
String separator)
Sets the separator of the file, by default it is ",". |
Methods inherited from class org.apache.hadoop.mapreduce.Mapper |
---|
run |
Methods inherited from class java.lang.Object |
---|
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
public static final String UTF_8
public static final String BLUR_CSV_AUTO_GENERATE_RECORD_ID_AS_HASH_OF_DATA
public static final String BLUR_CSV_AUTO_GENERATE_ROW_ID_AS_HASH_OF_DATA
public static final String BLUR_CSV_FAMILY_PATH_MAPPINGS_FAMILIES
public static final String BLUR_CSV_FAMILY_PATH_MAPPINGS_FAMILY_PREFIX
public static final String BLUR_CSV_SEPARATOR_BASE64
public static final String BLUR_CSV_FAMILY_COLUMN_PREFIX
public static final String BLUR_CSV_FAMILIES
public static final String HIVE_NULL
Constructor Detail |
---|
public CsvBlurMapper()
Method Detail |
---|
public static void addFamilyPath(org.apache.hadoop.mapreduce.Job job, String family, org.apache.hadoop.fs.Path path)
job
- the job to setup.family
- the family.path
- the path.public static void addFamilyPath(org.apache.hadoop.conf.Configuration configuration, String family, org.apache.hadoop.fs.Path path)
configuration
- the configuration to setup.family
- the family.path
- the path.public static void setAutoGenerateRecordIdAsHashOfData(org.apache.hadoop.mapreduce.Job job, boolean autoGenerateRecordIdAsHashOfData)
job
- the job to setup.autoGenerateRecordIdAsHashOfData
- boolean.public static void setAutoGenerateRecordIdAsHashOfData(org.apache.hadoop.conf.Configuration configuration, boolean autoGenerateRecordIdAsHashOfData)
configuration
- the configuration to setup.autoGenerateRecordIdAsHashOfData
- boolean.public static boolean isAutoGenerateRecordIdAsHashOfData(org.apache.hadoop.conf.Configuration configuration)
configuration
- the configuration.
public static void setAutoGenerateRowIdAsHashOfData(org.apache.hadoop.mapreduce.Job job, boolean autoGenerateRowIdAsHashOfData)
job
- the job to setup.autoGenerateRecordIdAsHashOfData
- boolean.public static void setAutoGenerateRowIdAsHashOfData(org.apache.hadoop.conf.Configuration configuration, boolean autoGenerateRowIdAsHashOfData)
configuration
- the configuration to setup.autoGenerateRecordIdAsHashOfData
- boolean.public static boolean isAutoGenerateRowIdAsHashOfData(org.apache.hadoop.conf.Configuration configuration)
configuration
- the configuration.
public static void setColumns(org.apache.hadoop.mapreduce.Job job, String strDefinition)
job
- the job to setup.strDefinition
- the string definition. public static void setColumns(org.apache.hadoop.conf.Configuration configuration, String strDefinition)
configuration
- the configuration to setup.strDefinition
- the string definition. public static void addColumns(org.apache.hadoop.mapreduce.Job job, String family, String... columns)
job
- the job to apply the layout.family
- the family name.columns
- the column names.public static void addColumns(org.apache.hadoop.conf.Configuration configuration, String family, String... columns)
configuration
- the configuration to apply the layout.family
- the family name.columns
- the column names.public static Collection<String> getFamilyNames(org.apache.hadoop.conf.Configuration configuration)
public static Map<String,List<String>> getFamilyAndColumnNameMap(org.apache.hadoop.conf.Configuration configuration)
public static void setSeparator(org.apache.hadoop.mapreduce.Job job, String separator)
job
- the job to apply the separator change.separator
- the separator.public static void setSeparator(org.apache.hadoop.conf.Configuration configuration, String separator)
configuration
- the configuration to apply the separator change.separator
- the separator.public void setFamilyFromPath(String familyFromPath)
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |