public class WARCUtils
extends java.lang.Object
Modifier and Type | Field and Description |
---|---|
static java.lang.String |
CONFORMS_TO |
static java.lang.String |
FORMAT |
static org.archive.uid.UUIDGenerator |
generator |
static java.lang.String |
HOSTNAME |
static java.lang.String |
HTTP_HEADER_FROM |
static java.lang.String |
HTTP_HEADER_USER_AGENT |
static java.lang.String |
IP |
static java.lang.String |
OPERATOR |
static java.lang.String |
ROBOTS |
static java.lang.String |
SOFTWARE |
Constructor and Description |
---|
WARCUtils() |
Modifier and Type | Method and Description |
---|---|
static org.archive.io.warc.WARCRecordInfo |
docToMetadata(NutchDocument doc) |
static java.lang.String |
getAgentString(java.lang.String name,
java.lang.String version,
java.lang.String description,
java.lang.String URL,
java.lang.String email) |
static java.lang.String |
getHostname(Configuration conf) |
static java.lang.String |
getIPAddress(Configuration conf) |
static org.archive.util.anvl.ANVLRecord |
getWARCInfoContent(Configuration conf) |
static byte[] |
toByteArray(org.archive.format.http.HttpHeaders headers) |
public static final java.lang.String SOFTWARE
public static final java.lang.String HTTP_HEADER_FROM
public static final java.lang.String HTTP_HEADER_USER_AGENT
public static final java.lang.String HOSTNAME
public static final java.lang.String ROBOTS
public static final java.lang.String OPERATOR
public static final java.lang.String FORMAT
public static final java.lang.String CONFORMS_TO
public static final java.lang.String IP
public static final org.archive.uid.UUIDGenerator generator
public static final org.archive.util.anvl.ANVLRecord getWARCInfoContent(Configuration conf)
public static final java.lang.String getHostname(Configuration conf) throws java.net.UnknownHostException
java.net.UnknownHostException
public static final java.lang.String getIPAddress(Configuration conf) throws java.net.UnknownHostException
java.net.UnknownHostException
public static final byte[] toByteArray(org.archive.format.http.HttpHeaders headers) throws java.io.IOException
java.io.IOException
public static final java.lang.String getAgentString(java.lang.String name, java.lang.String version, java.lang.String description, java.lang.String URL, java.lang.String email)
public static final org.archive.io.warc.WARCRecordInfo docToMetadata(NutchDocument doc) throws java.io.UnsupportedEncodingException
java.io.UnsupportedEncodingException
Copyright © 2018 The Apache Software Foundation