package org.apache.uima.tools.components;

import java.io.InputStream;
import java.net.URL;
import java.util.Iterator;
import javax.xml.parsers.SAXParserFactory;
import org.apache.uima.UIMAFramework;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.CasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.examples.SourceDocumentInformation;
import org.apache.uima.internal.util.XMLUtils;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.InvalidXMLException;
import org.apache.uima.util.XMLInputSource;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

/* loaded from: input_file:uimaj-tools-3.0.1.jar:org/apache/uima/tools/components/XmlDetagger.class */
public class XmlDetagger extends CasAnnotator_ImplBase {
    public static final String PARAM_TEXT_TAG = "XmlTagContainingText";
    private Type sourceDocInfoType;
    private SAXParserFactory parserFactory = XMLUtils.createSAXParserFactory();
    private String mXmlTagContainingText = null;

    /* loaded from: input_file:uimaj-tools-3.0.1.jar:org/apache/uima/tools/components/XmlDetagger$DetagHandler.class */
    class DetagHandler extends DefaultHandler {
        private StringBuffer detaggedText = new StringBuffer();
        private boolean insideTextTag;

        public DetagHandler() {
            this.insideTextTag = XmlDetagger.this.mXmlTagContainingText == null;
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
            if (str3.equalsIgnoreCase(XmlDetagger.this.mXmlTagContainingText)) {
                this.insideTextTag = true;
            }
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void endElement(String str, String str2, String str3) throws SAXException {
            if (str3.equalsIgnoreCase(XmlDetagger.this.mXmlTagContainingText)) {
                this.insideTextTag = false;
            }
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void characters(char[] cArr, int i, int i2) throws SAXException {
            if (this.insideTextTag) {
                this.detaggedText.append(cArr, i, i2);
            }
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void ignorableWhitespace(char[] cArr, int i, int i2) throws SAXException {
            if (this.insideTextTag) {
                this.detaggedText.append(cArr, i, i2);
            }
        }

        String getDetaggedText() {
            return this.detaggedText.toString();
        }
    }

    @Override // org.apache.uima.analysis_component.AnalysisComponent_ImplBase, org.apache.uima.analysis_component.AnalysisComponent
    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        this.mXmlTagContainingText = (String) getContext().getConfigParameterValue(PARAM_TEXT_TAG);
    }

    @Override // org.apache.uima.analysis_component.CasAnnotator_ImplBase
    public void typeSystemInit(TypeSystem typeSystem) throws AnalysisEngineProcessException {
        this.sourceDocInfoType = typeSystem.getType(SourceDocumentInformation._TypeName);
    }

    @Override // org.apache.uima.analysis_component.CasAnnotator_ImplBase
    public void process(CAS cas) throws AnalysisEngineProcessException {
        CAS view = cas.getView("xmlDocument");
        InputStream sofaDataStream = view.getSofa().getSofaDataStream();
        DetagHandler detagHandler = new DetagHandler();
        try {
            this.parserFactory.newSAXParser().parse(sofaDataStream, detagHandler);
            CAS createView = cas.createView("plainTextDocument");
            createView.setDocumentText(detagHandler.getDetaggedText());
            createView.setDocumentLanguage(cas.getView("_InitialView").getDocumentLanguage());
            Iterator it = view.getAnnotationIndex(this.sourceDocInfoType).iterator();
            if (it.hasNext()) {
                createView.getIndexRepository().addFS((FeatureStructure) it.next());
            }
        } catch (Exception e) {
            throw new AnalysisEngineProcessException(e);
        }
    }

    public static AnalysisEngineDescription getDescription() throws InvalidXMLException {
        return UIMAFramework.getXMLParser().parseAnalysisEngineDescription(new XMLInputSource(XmlDetagger.class.getResourceAsStream("XmlDetagger.xml"), null));
    }

    public static URL getDescriptorURL() {
        return XmlDetagger.class.getResource("XmlDetagger.xml");
    }
}
