@MetaInfServices(value=org.apache.any23.extractor.Extractor.class) public class HTMLScraperExtractor extends Object implements Extractor.ContentExtractor
HTMLScraperPlugin
Extractor.BlindExtractor, Extractor.ContentExtractor, Extractor.TagSoupDOMExtractor
Modifier and Type | Field and Description |
---|---|
static org.openrdf.model.URI |
PAGE_CONTENT_AE_PROPERTY |
static org.openrdf.model.URI |
PAGE_CONTENT_CE_PROPERTY |
static org.openrdf.model.URI |
PAGE_CONTENT_DE_PROPERTY |
static org.openrdf.model.URI |
PAGE_CONTENT_LCE_PROPERTY |
Constructor and Description |
---|
HTMLScraperExtractor() |
Modifier and Type | Method and Description |
---|---|
void |
addTextExtractor(String name,
org.openrdf.model.URI property,
de.l3s.boilerpipe.BoilerpipeExtractor extractor) |
ExtractorDescription |
getDescription() |
String[] |
getTextExtractors() |
void |
run(ExtractionParameters extractionParameters,
ExtractionContext extractionContext,
InputStream inputStream,
ExtractionResult extractionResult) |
void |
setStopAtFirstError(boolean b) |
public static final org.openrdf.model.URI PAGE_CONTENT_DE_PROPERTY
public static final org.openrdf.model.URI PAGE_CONTENT_AE_PROPERTY
public static final org.openrdf.model.URI PAGE_CONTENT_LCE_PROPERTY
public static final org.openrdf.model.URI PAGE_CONTENT_CE_PROPERTY
public void addTextExtractor(String name, org.openrdf.model.URI property, de.l3s.boilerpipe.BoilerpipeExtractor extractor)
public String[] getTextExtractors()
public void run(ExtractionParameters extractionParameters, ExtractionContext extractionContext, InputStream inputStream, ExtractionResult extractionResult) throws IOException, ExtractionException
run
in interface Extractor<InputStream>
IOException
ExtractionException
public ExtractorDescription getDescription()
getDescription
in interface Extractor<InputStream>
public void setStopAtFirstError(boolean b)
setStopAtFirstError
in interface Extractor.ContentExtractor
Copyright © 2010-2014 The Apache Software Foundation. All Rights Reserved.