package org.apache.tika.langdetect.opennlp;

import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Map;
import opennlp.tools.langdetect.Language;
import opennlp.tools.langdetect.LanguageDetector;
import opennlp.tools.langdetect.LanguageDetectorModel;
import opennlp.tools.util.normalizer.AggregateCharSequenceNormalizer;
import opennlp.tools.util.normalizer.CharSequenceNormalizer;

/* loaded from: input_file:org/apache/tika/langdetect/opennlp/ProbingLanguageDetector.class */
class ProbingLanguageDetector implements LanguageDetector {
    public static final int DEFAULT_CHUNK_SIZE = 300;
    public static final int DEFAULT_MIN_CONSEC_IMPROVEMENTS = 2;
    public static final double DEFAULT_MIN_DIFF = 0.2d;
    public static final int DEFAULT_MAX_LENGTH = 10000;
    private static final String SPACE = " ";
    private int chunkSize = 300;
    private int minConsecImprovements = 2;
    private double minDiff = 0.2d;
    private int maxLength = 10000;
    private CharSequenceNormalizer normalizer;
    private LanguageDetectorModel model;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/tika/langdetect/opennlp/ProbingLanguageDetector$CSAndLength.class */
    public static class CSAndLength {
        private final CharSequence normed;
        private final int originalLength;

        public CSAndLength(CharSequence charSequence, int i) {
            this.normed = charSequence;
            this.originalLength = i;
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/tika/langdetect/opennlp/ProbingLanguageDetector$CharIntNGrammer.class */
    public static class CharIntNGrammer implements Iterator<String> {
        private final int minGram;
        private final int maxGram;
        private String next;
        private int pos = 0;
        private int[] buffer;
        private int currGram;

        CharIntNGrammer(int i, int i2) {
            this.minGram = i;
            this.maxGram = i2;
            this.currGram = i;
        }

        @Override // java.util.Iterator
        public boolean hasNext() {
            return this.next != null;
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // java.util.Iterator
        public String next() {
            String str = this.next;
            this.currGram++;
            if (this.currGram > this.maxGram) {
                this.currGram = this.minGram;
                this.pos++;
                if (this.pos + this.maxGram < this.buffer.length) {
                    this.buffer[this.pos + this.maxGram] = Character.toLowerCase(this.buffer[this.pos + this.maxGram]);
                }
            }
            if (this.pos + this.currGram > this.buffer.length) {
                this.currGram = this.minGram;
                this.pos++;
            }
            if (this.pos >= this.buffer.length - 1) {
                this.next = null;
                return str;
            }
            this.next = new String(this.buffer, this.pos, this.currGram);
            return str;
        }

        void reset(int[] iArr) {
            this.next = null;
            this.pos = 0;
            this.currGram = this.minGram;
            this.buffer = iArr;
            if (this.buffer.length < this.minGram) {
                return;
            }
            int min = Math.min(this.buffer.length, this.maxGram);
            for (int i = 0; i < min; i++) {
                this.buffer[i] = Character.toLowerCase(this.buffer[i]);
            }
            if (this.buffer.length >= this.minGram) {
                this.next = new String(this.buffer, 0, this.minGram);
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/tika/langdetect/opennlp/ProbingLanguageDetector$MutableInt.class */
    public static class MutableInt {
        private int i;

        MutableInt() {
            this(0);
        }

        MutableInt(int i) {
            this.i = i;
        }

        void increment() {
            this.i++;
        }
    }

    public ProbingLanguageDetector(LanguageDetectorModel languageDetectorModel, CharSequenceNormalizer... charSequenceNormalizerArr) {
        this.model = languageDetectorModel;
        this.normalizer = new AggregateCharSequenceNormalizer(charSequenceNormalizerArr);
    }

    @Override // opennlp.tools.langdetect.LanguageDetector
    public Language predictLanguage(CharSequence charSequence) {
        return predictLanguages(charSequence)[0];
    }

    @Override // opennlp.tools.langdetect.LanguageDetector
    public Language[] predictLanguages(CharSequence charSequence) {
        LinkedList<Language[]> linkedList = new LinkedList<>();
        int i = 0;
        Language[] languageArr = null;
        HashMap hashMap = new HashMap();
        CharIntNGrammer charIntNGrammer = new CharIntNGrammer(1, 3);
        int i2 = 0;
        while (true) {
            CSAndLength chunk = chunk(charSequence, i, i + this.chunkSize > this.maxLength ? this.maxLength - i : this.chunkSize);
            int[] array = chunk.normed.codePoints().toArray();
            if (chunk.originalLength == 0) {
                return languageArr == null ? predict(hashMap) : languageArr;
            }
            i += chunk.originalLength;
            charIntNGrammer.reset(array);
            while (charIntNGrammer.hasNext()) {
                String next = charIntNGrammer.next();
                if (!next.equals(" ")) {
                    MutableInt mutableInt = hashMap.get(next);
                    if (mutableInt == null) {
                        hashMap.put(next, new MutableInt(1));
                    } else {
                        mutableInt.increment();
                    }
                    i2++;
                    if (i2 % 110 == 0) {
                        languageArr = predict(hashMap);
                        if (seenEnough(linkedList, languageArr, hashMap)) {
                            return languageArr;
                        }
                    } else {
                        continue;
                    }
                }
            }
        }
    }

    private Language[] predict(Map<String, MutableInt> map) {
        String[] strArr = new String[map.size()];
        float[] fArr = new float[map.size()];
        int i = 0;
        Iterator<Map.Entry<String, MutableInt>> it = map.entrySet().iterator();
        while (it.hasNext()) {
            strArr[i] = it.next().getKey();
            fArr[i] = 1.0f;
            i++;
        }
        double[] eval = this.model.getMaxentModel().eval(strArr, fArr);
        Language[] languageArr = new Language[eval.length];
        for (int i2 = 0; i2 < eval.length; i2++) {
            languageArr[i2] = new Language(this.model.getMaxentModel().getOutcome(i2), eval[i2]);
        }
        Arrays.sort(languageArr, (language, language2) -> {
            return Double.compare(language2.getConfidence(), language.getConfidence());
        });
        return languageArr;
    }

    public int getChunkSize() {
        return this.chunkSize;
    }

    public void setChunkSize(int i) {
        this.chunkSize = i;
    }

    public int getMinConsecImprovements() {
        return this.minConsecImprovements;
    }

    public void setMinConsecImprovements(int i) {
        this.minConsecImprovements = i;
    }

    public double getMinDiff() {
        return this.minDiff;
    }

    public void setMinDiff(double d) {
        if (d < 0.0d) {
            throw new IllegalArgumentException("minDiff must be >= 0.0");
        }
        this.minDiff = d;
    }

    public int getMaxLength() {
        return this.maxLength;
    }

    public void setMaxLength(int i) {
        this.maxLength = i;
    }

    public void setNormalizer(CharSequenceNormalizer charSequenceNormalizer) {
        this.normalizer = charSequenceNormalizer;
    }

    @Override // opennlp.tools.langdetect.LanguageDetector
    public String[] getSupportedLanguages() {
        int numOutcomes = this.model.getMaxentModel().getNumOutcomes();
        String[] strArr = new String[numOutcomes];
        for (int i = 0; i < numOutcomes; i++) {
            strArr[i] = this.model.getMaxentModel().getOutcome(i);
        }
        return strArr;
    }

    boolean seenEnough(LinkedList<Language[]> linkedList, Language[] languageArr, Map<String, MutableInt> map) {
        if (linkedList.size() < this.minConsecImprovements) {
            linkedList.add(languageArr);
            return false;
        }
        if (linkedList.size() > this.minConsecImprovements) {
            linkedList.removeFirst();
        }
        linkedList.add(languageArr);
        if (this.minDiff > 0.0d && languageArr[0].getConfidence() - languageArr[1].getConfidence() < this.minDiff) {
            return false;
        }
        String str = null;
        double d = -1.0d;
        Iterator<Language[]> it = linkedList.iterator();
        while (it.hasNext()) {
            Language[] next = it.next();
            if (str == null) {
                str = next[0].getLang();
                d = next[0].getConfidence();
            } else {
                if (!str.equals(next[0].getLang()) || d > next[0].getConfidence()) {
                    return false;
                }
                str = next[0].getLang();
                d = next[0].getConfidence();
            }
        }
        return true;
    }

    private CSAndLength chunk(CharSequence charSequence, int i, int i2) {
        if (i == 0 && i2 > charSequence.length()) {
            return new CSAndLength(this.normalizer.normalize(charSequence), charSequence.codePoints().toArray().length);
        }
        int[] array = charSequence.codePoints().skip(i).limit(i2).toArray();
        return new CSAndLength(this.normalizer.normalize(new String(array, 0, array.length)), array.length);
    }
}
