package opennlp.tools.tokenize;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import opennlp.tools.tokenize.Detokenizer;
import opennlp.tools.util.Span;
import org.antlr.v4.runtime.tree.xpath.XPath;

/* loaded from: input_file:resources/install/20/tika-bundle-1.21.jar:opennlp-tools-1.9.1.jar:opennlp/tools/tokenize/TokenSample.class */
public class TokenSample implements Serializable {
    public static final String DEFAULT_SEPARATOR_CHARS = "<SPLIT>";
    private static final String separatorChars = "<SPLIT>";
    private final String text;
    private final List<Span> tokenSpans;

    public TokenSample(String str, Span[] spanArr) {
        Objects.requireNonNull(spanArr, "tokenSpans must not be null");
        this.text = (String) Objects.requireNonNull(str, "text must not be null");
        this.tokenSpans = Collections.unmodifiableList(new ArrayList(Arrays.asList(spanArr)));
        for (Span span : spanArr) {
            if (span.getStart() < 0 || span.getStart() > str.length() || span.getEnd() > str.length() || span.getEnd() < 0) {
                throw new IllegalArgumentException("Span " + span.toString() + " is out of bounds, text length: " + str.length() + XPath.NOT);
            }
        }
    }

    public TokenSample(Detokenizer detokenizer, String[] strArr) {
        StringBuilder sb = new StringBuilder();
        Detokenizer.DetokenizationOperation[] detokenize = detokenizer.detokenize(strArr);
        ArrayList arrayList = new ArrayList();
        int i = 0;
        while (i < detokenize.length) {
            if ((i <= 0 || isMergeToRight(detokenize[i - 1]) || isMergeToLeft(detokenize[i])) ? false : true) {
                sb.append(' ');
            }
            int length = sb.length();
            sb.append(strArr[i]);
            arrayList.add(new Span(length, sb.length()));
            i++;
        }
        this.text = sb.toString();
        this.tokenSpans = Collections.unmodifiableList(arrayList);
    }

    private boolean isMergeToRight(Detokenizer.DetokenizationOperation detokenizationOperation) {
        return Detokenizer.DetokenizationOperation.MERGE_TO_RIGHT.equals(detokenizationOperation) || Detokenizer.DetokenizationOperation.MERGE_BOTH.equals(detokenizationOperation);
    }

    private boolean isMergeToLeft(Detokenizer.DetokenizationOperation detokenizationOperation) {
        return Detokenizer.DetokenizationOperation.MERGE_TO_LEFT.equals(detokenizationOperation) || Detokenizer.DetokenizationOperation.MERGE_BOTH.equals(detokenizationOperation);
    }

    public String getText() {
        return this.text;
    }

    public Span[] getTokenSpans() {
        return (Span[]) this.tokenSpans.toArray(new Span[this.tokenSpans.size()]);
    }

    public String toString() {
        StringBuilder sb = new StringBuilder();
        int i = -1;
        for (Span span : this.tokenSpans) {
            if (i != -1) {
                sb.append(i == span.getStart() ? "<SPLIT>" : " ");
            }
            sb.append(span.getCoveredText(this.text));
            i = span.getEnd();
        }
        return sb.toString();
    }

    private static void addToken(StringBuilder sb, List<Span> list, String str, boolean z) {
        int length = sb.length();
        sb.append(str);
        list.add(new Span(length, sb.length()));
        if (z) {
            return;
        }
        sb.append(" ");
    }

    public static TokenSample parse(String str, String str2) {
        Objects.requireNonNull(str, "sampleString must not be null");
        Objects.requireNonNull(str2, "separatorChars must not be null");
        Span[] spanArr = WhitespaceTokenizer.INSTANCE.tokenizePos(str);
        ArrayList arrayList = new ArrayList((int) (spanArr.length * 1.2d));
        StringBuilder sb = new StringBuilder();
        for (Span span : spanArr) {
            String charSequence = span.getCoveredText(str).toString();
            boolean z = false;
            int i = 0;
            while (true) {
                int indexOf = charSequence.indexOf(str2, i);
                if (indexOf <= -1) {
                    break;
                }
                addToken(sb, arrayList, charSequence.substring(i, indexOf), true);
                i = indexOf + str2.length();
                z = true;
            }
            if (z) {
                addToken(sb, arrayList, charSequence.substring(i), false);
            } else {
                addToken(sb, arrayList, charSequence, false);
            }
        }
        return new TokenSample(sb.toString(), (Span[]) arrayList.toArray(new Span[arrayList.size()]));
    }

    public int hashCode() {
        return Objects.hash(getText(), Integer.valueOf(Arrays.hashCode(getTokenSpans())));
    }

    public boolean equals(Object obj) {
        if (this == obj) {
            return true;
        }
        if (!(obj instanceof TokenSample)) {
            return false;
        }
        TokenSample tokenSample = (TokenSample) obj;
        return getText().equals(tokenSample.getText()) && Arrays.equals(getTokenSpans(), tokenSample.getTokenSpans());
    }
}
