package opennlp.tools.formats.ad;

import java.io.FileInputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import opennlp.tools.formats.ad.ADSentenceStream;
import opennlp.tools.sentdetect.SentenceSample;
import opennlp.tools.sentdetect.lang.Factory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Span;

/* loaded from: input_file:resources/install/10/tika-bundle-1.10.jar:opennlp-tools-1.5.3.jar:opennlp/tools/formats/ad/ADSentenceSampleStream.class */
public class ADSentenceSampleStream implements ObjectStream<SentenceSample> {
    private final ObjectStream<ADSentenceStream.Sentence> adSentenceStream;
    private int text;
    private int para;
    private boolean isSameText;
    private boolean isSamePara;
    private ADSentenceStream.Sentence sent;
    private boolean isIncludeTitles;
    private boolean isTitle;
    private final char[] ptEosCharacters;
    private Pattern meta1;

    public ADSentenceSampleStream(ObjectStream<String> objectStream, boolean z) {
        this.text = -1;
        this.para = -1;
        this.isIncludeTitles = true;
        this.meta1 = Pattern.compile("^(?:[a-zA-Z\\-]*(\\d+)).*?p=(\\d+).*");
        this.adSentenceStream = new ADSentenceStream(objectStream);
        this.ptEosCharacters = Factory.ptEosCharacters;
        Arrays.sort(this.ptEosCharacters);
        this.isIncludeTitles = z;
    }

    public ADSentenceSampleStream(FileInputStream fileInputStream, String str, boolean z) {
        this.text = -1;
        this.para = -1;
        this.isIncludeTitles = true;
        this.meta1 = Pattern.compile("^(?:[a-zA-Z\\-]*(\\d+)).*?p=(\\d+).*");
        try {
            this.adSentenceStream = new ADSentenceStream(new PlainTextByLineStream(fileInputStream, str));
            this.ptEosCharacters = Factory.ptEosCharacters;
            Arrays.sort(this.ptEosCharacters);
            this.isIncludeTitles = z;
        } catch (UnsupportedEncodingException e) {
            throw new IllegalStateException(e);
        }
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // opennlp.tools.util.ObjectStream
    public SentenceSample read() throws IOException {
        if (this.sent == null) {
            this.sent = this.adSentenceStream.read();
            updateMeta();
            if (this.sent == null) {
                return null;
            }
        }
        StringBuilder sb = new StringBuilder();
        ArrayList arrayList = new ArrayList();
        while (true) {
            if ((!this.isTitle || (this.isTitle && this.isIncludeTitles)) && hasPunctuation(this.sent.getText())) {
                int length = sb.length();
                sb.append(this.sent.getText());
                arrayList.add(new Span(length, sb.length()));
                sb.append(" ");
            }
            this.sent = this.adSentenceStream.read();
            updateMeta();
            if (!this.isSamePara && !this.isSameText) {
                break;
            }
        }
        return new SentenceSample(sb.length() > 0 ? sb.substring(0, sb.length() - 1) : sb.toString(), (Span[]) arrayList.toArray(new Span[arrayList.size()]));
    }

    private boolean hasPunctuation(String str) {
        String trim = str.trim();
        if (trim.length() > 0) {
            return Arrays.binarySearch(this.ptEosCharacters, trim.charAt(trim.length() - 1)) >= 0;
        }
        return false;
    }

    private void updateMeta() {
        if (this.sent == null) {
            this.isSameText = false;
            this.isSamePara = false;
            return;
        }
        String metadata = this.sent.getMetadata();
        Matcher matcher = this.meta1.matcher(metadata);
        if (!matcher.matches()) {
            throw new RuntimeException("Invalid metadata: " + metadata);
        }
        int parseInt = Integer.parseInt(matcher.group(1));
        int parseInt2 = Integer.parseInt(matcher.group(2));
        this.isSameText = false;
        this.isSamePara = false;
        if (parseInt == this.text) {
            this.isSameText = true;
        }
        if (this.isSameText && parseInt2 == this.para) {
            this.isSamePara = true;
        }
        this.isTitle = metadata.contains("title");
        this.text = parseInt;
        this.para = parseInt2;
    }

    @Override // opennlp.tools.util.ObjectStream
    public void reset() throws IOException, UnsupportedOperationException {
        this.adSentenceStream.reset();
    }

    @Override // opennlp.tools.util.ObjectStream
    public void close() throws IOException {
        this.adSentenceStream.close();
    }
}
