public class NaiveBayesParseFilter extends java.lang.Object implements HtmlParseFilter
Modifier and Type | Field and Description |
---|---|
static java.lang.String |
DICTFILE_MODELFILTER |
static java.lang.String |
TRAINFILE_MODELFILTER |
X_POINT_ID
Constructor and Description |
---|
NaiveBayesParseFilter() |
Modifier and Type | Method and Description |
---|---|
boolean |
classify(java.lang.String text) |
boolean |
containsWord(java.lang.String url,
java.util.ArrayList<java.lang.String> wordlist) |
ParseResult |
filter(Content content,
ParseResult parseResult,
HTMLMetaTags metaTags,
org.w3c.dom.DocumentFragment doc)
Adds metadata or otherwise modifies a parse of HTML content, given the DOM
tree of a page.
|
boolean |
filterParse(java.lang.String text) |
boolean |
filterUrl(java.lang.String url) |
Configuration |
getConf() |
void |
setConf(Configuration conf) |
void |
train() |
public static final java.lang.String TRAINFILE_MODELFILTER
public static final java.lang.String DICTFILE_MODELFILTER
public boolean filterParse(java.lang.String text)
public boolean filterUrl(java.lang.String url)
public boolean classify(java.lang.String text) throws java.io.IOException
java.io.IOException
public void train() throws java.lang.Exception
java.lang.Exception
public boolean containsWord(java.lang.String url, java.util.ArrayList<java.lang.String> wordlist)
public void setConf(Configuration conf)
setConf
in interface Configurable
public Configuration getConf()
getConf
in interface Configurable
public ParseResult filter(Content content, ParseResult parseResult, HTMLMetaTags metaTags, org.w3c.dom.DocumentFragment doc)
HtmlParseFilter
filter
in interface HtmlParseFilter
Copyright © 2018 The Apache Software Foundation