public class WARCUtils extends Object
Modifier and Type | Field and Description |
---|---|
static String |
CONFORMS_TO |
static String |
FORMAT |
static org.archive.uid.UUIDGenerator |
generator |
static String |
HOSTNAME |
static String |
HTTP_HEADER_FROM |
static String |
HTTP_HEADER_USER_AGENT |
static String |
IP |
static String |
OPERATOR |
static String |
ROBOTS |
static String |
SOFTWARE |
Constructor and Description |
---|
WARCUtils() |
Modifier and Type | Method and Description |
---|---|
static org.archive.io.warc.WARCRecordInfo |
docToMetadata(NutchDocument doc) |
static String |
getAgentString(String name,
String version,
String description,
String URL,
String email) |
static String |
getHostname(Configuration conf) |
static String |
getIPAddress(Configuration conf) |
static org.archive.util.anvl.ANVLRecord |
getWARCInfoContent(Configuration conf) |
static byte[] |
toByteArray(org.archive.format.http.HttpHeaders headers) |
public static final String SOFTWARE
public static final String HTTP_HEADER_FROM
public static final String HTTP_HEADER_USER_AGENT
public static final String HOSTNAME
public static final String ROBOTS
public static final String OPERATOR
public static final String FORMAT
public static final String CONFORMS_TO
public static final String IP
public static final org.archive.uid.UUIDGenerator generator
public static final org.archive.util.anvl.ANVLRecord getWARCInfoContent(Configuration conf)
public static final String getHostname(Configuration conf) throws UnknownHostException
UnknownHostException
public static final String getIPAddress(Configuration conf) throws UnknownHostException
UnknownHostException
public static final byte[] toByteArray(org.archive.format.http.HttpHeaders headers) throws IOException
IOException
public static final String getAgentString(String name, String version, String description, String URL, String email)
public static final org.archive.io.warc.WARCRecordInfo docToMetadata(NutchDocument doc) throws UnsupportedEncodingException
UnsupportedEncodingException
Copyright © 2015 The Apache Software Foundation