public interface CommonCrawlFormat
extends java.io.Closeable
Modifier and Type | Method and Description |
---|---|
void |
close()
Optional method that could be implemented if the actual format needs some
close procedure.
|
java.util.List<java.lang.String> |
getInLinks()
gets set of inlinks
|
java.lang.String |
getJsonData() |
java.lang.String |
getJsonData(java.lang.String url,
Content content,
Metadata metadata)
Returns a string representation of the JSON structure of the URL content
|
java.lang.String |
getJsonData(java.lang.String url,
Content content,
Metadata metadata,
ParseData parseData)
Returns a string representation of the JSON structure of the URL content
takes into account the parsed metadata about the URL
|
void |
setInLinks(java.util.List<java.lang.String> inLinks)
sets inlinks of this document
|
java.lang.String getJsonData() throws java.io.IOException
java.io.IOException
java.lang.String getJsonData(java.lang.String url, Content content, Metadata metadata) throws java.io.IOException
url
- content
- metadata
- java.io.IOException
java.lang.String getJsonData(java.lang.String url, Content content, Metadata metadata, ParseData parseData) throws java.io.IOException
url
- content
- metadata
- java.io.IOException
void setInLinks(java.util.List<java.lang.String> inLinks)
inLinks
- list of inlinksjava.util.List<java.lang.String> getInLinks()
void close()
close
in interface java.lang.AutoCloseable
close
in interface java.io.Closeable
Copyright © 2018 The Apache Software Foundation