package org.apache.tika.eval.app;

import com.ibm.icu.text.PluralRules;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Pattern;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang3.mutable.MutableInt;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.commons.math3.distribution.PoissonDistribution;
import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
import org.apache.logging.log4j.core.pattern.NotANumber;
import org.apache.poi.openxml4j.opc.PackagingURIHelper;
import org.apache.tika.batch.FileResource;
import org.apache.tika.batch.FileResourceConsumer;
import org.apache.tika.batch.fs.FSProperties;
import org.apache.tika.eval.app.db.ColInfo;
import org.apache.tika.eval.app.db.Cols;
import org.apache.tika.eval.app.db.TableInfo;
import org.apache.tika.eval.app.io.ExtractReaderException;
import org.apache.tika.eval.app.io.IDBWriter;
import org.apache.tika.eval.core.langid.LanguageIDWrapper;
import org.apache.tika.eval.core.textstats.BasicTokenCountStatsCalculator;
import org.apache.tika.eval.core.textstats.CommonTokens;
import org.apache.tika.eval.core.textstats.CompositeTextStatsCalculator;
import org.apache.tika.eval.core.textstats.ContentLengthCalculator;
import org.apache.tika.eval.core.textstats.TokenEntropy;
import org.apache.tika.eval.core.textstats.TokenLengths;
import org.apache.tika.eval.core.textstats.TopNTokens;
import org.apache.tika.eval.core.textstats.UnicodeBlockCounter;
import org.apache.tika.eval.core.tokens.AnalyzerManager;
import org.apache.tika.eval.core.tokens.CommonTokenCountManager;
import org.apache.tika.eval.core.tokens.CommonTokenResult;
import org.apache.tika.eval.core.tokens.TokenCounts;
import org.apache.tika.eval.core.tokens.TokenIntPair;
import org.apache.tika.eval.core.util.ContentTagParser;
import org.apache.tika.eval.core.util.ContentTags;
import org.apache.tika.eval.core.util.EvalExceptionUtils;
import org.apache.tika.exception.TikaException;
import org.apache.tika.language.detect.LanguageResult;
import org.apache.tika.metadata.HttpHeaders;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.PagedText;
import org.apache.tika.metadata.Property;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.sax.ToXMLContentHandler;
import org.h2.api.ErrorCode;
import org.h2.engine.Constants;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;

/* loaded from: input_file:org/apache/tika/eval/app/AbstractProfiler.class */
public abstract class AbstractProfiler extends FileResourceConsumer {
    static final long NON_EXISTENT_FILE_LENGTH = -1;
    static final int FILE_PATH_MAX_LEN = 1024;
    private static final String ZERO = "0";
    private static final String UNKNOWN_EXTENSION = "unk";
    private static final String DIGEST_KEY = "X-TIKA:digest:MD5";
    private static CommonTokenCountManager COMMON_TOKEN_COUNT_MANAGER;
    protected IDBWriter writer;
    AnalyzerManager analyzerManager;
    int maxContentLength;
    int maxContentLengthForLangId;
    int maxTokens;
    CompositeTextStatsCalculator compositeTextStatsCalculator;
    private String lastExtractExtension;
    private static final Property CONTAINER_EXCEPTION_1X = Property.externalText("X-TIKA:EXCEPTION:runtime");
    public static final String TRUE = Boolean.toString(true);
    public static final String FALSE = Boolean.toString(false);
    protected static final AtomicInteger ID = new AtomicInteger();
    private static final Logger LOG = LoggerFactory.getLogger((Class<?>) AbstractProfiler.class);
    private static final String[] EXTRACT_EXTENSIONS = {".json", ".txt", ""};
    private static final String[] COMPRESSION_EXTENSIONS = {"", ".bz2", ".gzip", ".zip"};
    private static final Map<String, Cols> UC_TAGS_OF_INTEREST = initTags();
    private static final Pattern ACCESS_PERMISSION_EXCEPTION = Pattern.compile("org\\.apache\\.tika\\.exception\\.AccessPermissionException");
    private static final Pattern ENCRYPTION_EXCEPTION = Pattern.compile("org\\.apache\\.tika.exception\\.EncryptedDocumentException");
    public static TableInfo REF_EXTRACT_EXCEPTION_TYPES = new TableInfo("ref_extract_exception_types", new ColInfo(Cols.EXTRACT_EXCEPTION_ID, 4), new ColInfo(Cols.EXTRACT_EXCEPTION_DESCRIPTION, 12, (Integer) 128));
    public static TableInfo REF_PARSE_ERROR_TYPES = new TableInfo("ref_parse_error_types", new ColInfo(Cols.PARSE_ERROR_ID, 4), new ColInfo(Cols.PARSE_ERROR_DESCRIPTION, 12, (Integer) 128));
    public static TableInfo REF_PARSE_EXCEPTION_TYPES = new TableInfo("ref_parse_exception_types", new ColInfo(Cols.PARSE_EXCEPTION_ID, 4), new ColInfo(Cols.PARSE_EXCEPTION_DESCRIPTION, 12, (Integer) 128));
    public static TableInfo MIME_TABLE = new TableInfo("mimes", new ColInfo(Cols.MIME_ID, 4, "PRIMARY KEY"), new ColInfo(Cols.MIME_STRING, 12, (Integer) 256), new ColInfo(Cols.FILE_EXTENSION, 12, (Integer) 12));
    private static Pattern FILE_NAME_CLEANER = Pattern.compile("\\.(json|txt)(\\.(bz2|gz|zip))?$");
    private static LanguageIDWrapper LANG_ID = new LanguageIDWrapper();

    /* loaded from: input_file:org/apache/tika/eval/app/AbstractProfiler$EXCEPTION_TYPE.class */
    public enum EXCEPTION_TYPE {
        RUNTIME,
        ENCRYPTION,
        ACCESS_PERMISSION,
        UNSUPPORTED_VERSION
    }

    /* loaded from: input_file:org/apache/tika/eval/app/AbstractProfiler$PARSE_ERROR_TYPE.class */
    public enum PARSE_ERROR_TYPE {
        OOM,
        TIMEOUT
    }

    public AbstractProfiler(ArrayBlockingQueue<FileResource> arrayBlockingQueue, IDBWriter iDBWriter) {
        super(arrayBlockingQueue);
        this.maxContentLength = PoissonDistribution.DEFAULT_MAX_ITERATIONS;
        this.maxContentLengthForLangId = ErrorCode.GENERAL_ERROR_1;
        this.maxTokens = 200000;
        this.lastExtractExtension = null;
        this.writer = iDBWriter;
        LanguageIDWrapper.setMaxTextLength(this.maxContentLengthForLangId);
        this.compositeTextStatsCalculator = initAnalyzersAndTokenCounter(this.maxTokens, LANG_ID);
    }

    private static Map<String, Cols> initTags() {
        HashMap hashMap = new HashMap();
        hashMap.put("A", Cols.TAGS_A);
        hashMap.put("B", Cols.TAGS_B);
        hashMap.put("DIV", Cols.TAGS_DIV);
        hashMap.put("I", Cols.TAGS_I);
        hashMap.put("IMG", Cols.TAGS_IMG);
        hashMap.put("LI", Cols.TAGS_LI);
        hashMap.put("OL", Cols.TAGS_OL);
        hashMap.put("P", Cols.TAGS_P);
        hashMap.put("TABLE", Cols.TAGS_TABLE);
        hashMap.put("TD", Cols.TAGS_TD);
        hashMap.put("TITLE", Cols.TAGS_TITLE);
        hashMap.put("TR", Cols.TAGS_TR);
        hashMap.put("U", Cols.TAGS_U);
        hashMap.put("UL", Cols.TAGS_UL);
        return Collections.unmodifiableMap(hashMap);
    }

    public static void loadCommonTokens(Path path, String str) throws IOException {
        COMMON_TOKEN_COUNT_MANAGER = new CommonTokenCountManager(path, str);
    }

    private static String getFileName(String str) {
        if (str == null) {
            return "";
        }
        try {
            return FilenameUtils.getName(str);
        } catch (IllegalArgumentException e) {
            LOG.warn("{} in {}", e.getMessage(), str);
            String replaceAll = str.replaceAll(NotANumber.VALUE, " ");
            try {
                return FilenameUtils.getName(replaceAll);
            } catch (IllegalArgumentException e2) {
                LOG.warn("Again: {} in {}", e2.getMessage(), replaceAll);
                return "";
            }
        }
    }

    protected static String truncateContent(ContentTags contentTags, int i, Map<Cols, String> map) {
        map.put(Cols.CONTENT_TRUNCATED_AT_MAX_LEN, "FALSE");
        if (contentTags == null) {
            return "";
        }
        String content = contentTags.getContent();
        if (i > -1 && content.length() > i) {
            content = content.substring(0, i);
            map.put(Cols.CONTENT_TRUNCATED_AT_MAX_LEN, Constants.CLUSTERING_ENABLED);
        }
        return content;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static ContentTags getContent(EvalFilePaths evalFilePaths, Metadata metadata) {
        return metadata == null ? ContentTags.EMPTY_CONTENT_TAGS : parseContentAndTags(evalFilePaths, metadata);
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static List<Integer> countAttachments(List<Metadata> list) {
        ArrayList arrayList = new ArrayList();
        if (list == null || list.size() == 0) {
            return arrayList;
        }
        arrayList.add(Integer.valueOf(list.size() - 1));
        HashMap hashMap = new HashMap();
        for (int i = 1; i < list.size(); i++) {
            String str = list.get(i).get(TikaCoreProperties.EMBEDDED_RESOURCE_PATH);
            if (str != null) {
                String[] split = str.split(PackagingURIHelper.FORWARD_SLASH_STRING);
                StringBuilder sb = new StringBuilder();
                for (int i2 = 1; i2 < split.length - 1; i2++) {
                    sb.setLength(0);
                    join(PackagingURIHelper.FORWARD_SLASH_STRING, sb, split, 1, i2);
                    String sb2 = sb.toString();
                    Integer num = (Integer) hashMap.get(sb2);
                    hashMap.put(sb2, num == null ? 1 : Integer.valueOf(num.intValue() + 1));
                }
            }
        }
        for (int i3 = 1; i3 < list.size(); i3++) {
            Integer num2 = (Integer) hashMap.get(list.get(i3).get(TikaCoreProperties.EMBEDDED_RESOURCE_PATH));
            if (num2 == null) {
                num2 = 0;
            }
            arrayList.add(i3, num2);
        }
        return arrayList;
    }

    private static void join(String str, StringBuilder sb, String[] strArr, int i, int i2) {
        for (int i3 = i; i3 <= i2; i3++) {
            sb.append(str);
            sb.append(strArr[i3]);
        }
    }

    private static ContentTags parseContentAndTags(EvalFilePaths evalFilePaths, Metadata metadata) {
        String str = metadata.get(TikaCoreProperties.TIKA_CONTENT);
        if (str == null || str.length() == 0) {
            return ContentTags.EMPTY_CONTENT_TAGS;
        }
        String str2 = metadata.get(TikaCoreProperties.TIKA_CONTENT_HANDLER);
        if (evalFilePaths.getExtractFile().getFileName().toString().toLowerCase(Locale.ENGLISH).endsWith(".html")) {
            try {
                return ContentTagParser.parseHTML(str, UC_TAGS_OF_INTEREST.keySet());
            } catch (IOException | SAXException e) {
                LOG.warn("Problem parsing html in {}; backing off to treat string as text", evalFilePaths.getExtractFile().toAbsolutePath().toString(), e);
                return new ContentTags(str, true);
            }
        }
        if (!evalFilePaths.getExtractFile().getFileName().toString().toLowerCase(Locale.ENGLISH).endsWith(".xhtml") && (str2 == null || !str2.equals(ToXMLContentHandler.class.getSimpleName()))) {
            return new ContentTags(str);
        }
        try {
            return ContentTagParser.parseXML(str, UC_TAGS_OF_INTEREST.keySet());
        } catch (IOException | TikaException | SAXException e2) {
            LOG.warn("Problem parsing xhtml in {}; backing off to html parser", evalFilePaths.getExtractFile().toAbsolutePath().toString(), e2);
            try {
                ContentTags parseHTML = ContentTagParser.parseHTML(str, UC_TAGS_OF_INTEREST.keySet());
                parseHTML.setParseException(true);
                return parseHTML;
            } catch (IOException | SAXException e3) {
                LOG.warn("Problem parsing html in {}; backing off to treat string as text", evalFilePaths.getExtractFile().toAbsolutePath().toString(), e3);
                return new ContentTags(str, true);
            }
        }
    }

    private CompositeTextStatsCalculator initAnalyzersAndTokenCounter(int i, LanguageIDWrapper languageIDWrapper) {
        this.analyzerManager = AnalyzerManager.newInstance(i);
        ArrayList arrayList = new ArrayList();
        arrayList.add(new CommonTokens(COMMON_TOKEN_COUNT_MANAGER));
        arrayList.add(new TokenEntropy());
        arrayList.add(new TokenLengths());
        arrayList.add(new TopNTokens(10));
        arrayList.add(new BasicTokenCountStatsCalculator());
        arrayList.add(new ContentLengthCalculator());
        arrayList.add(new UnicodeBlockCounter(this.maxContentLengthForLangId));
        return new CompositeTextStatsCalculator(arrayList, this.analyzerManager.getGeneralAnalyzer(), languageIDWrapper);
    }

    public void setMaxContentLength(int i) {
        this.maxContentLength = i;
    }

    public void setMaxContentLengthForLangId(int i) {
        this.maxContentLengthForLangId = i;
        LanguageIDWrapper.setMaxTextLength(i);
    }

    public void setMaxTokens(int i) {
        this.maxTokens = i;
        initAnalyzersAndTokenCounter(i, new LanguageIDWrapper());
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void writeExtractException(TableInfo tableInfo, String str, String str2, ExtractReaderException.TYPE type) throws IOException {
        HashMap hashMap = new HashMap();
        hashMap.put(Cols.CONTAINER_ID, str);
        hashMap.put(Cols.FILE_PATH, str2);
        hashMap.put(Cols.EXTRACT_EXCEPTION_ID, Integer.toString(type.ordinal()));
        this.writer.writeRow(tableInfo, hashMap);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void writeProfileData(EvalFilePaths evalFilePaths, int i, ContentTags contentTags, Metadata metadata, String str, String str2, List<Integer> list, TableInfo tableInfo) {
        HashMap hashMap = new HashMap();
        hashMap.put(Cols.ID, str);
        hashMap.put(Cols.CONTAINER_ID, str2);
        hashMap.put(Cols.MD5, metadata.get(DIGEST_KEY));
        if (i < list.size()) {
            hashMap.put(Cols.NUM_ATTACHMENTS, Integer.toString(list.get(i).intValue()));
        }
        hashMap.put(Cols.ELAPSED_TIME_MILLIS, getTime(metadata));
        hashMap.put(Cols.NUM_METADATA_VALUES, Integer.toString(countMetadataValues(metadata)));
        Integer num = metadata.getInt(PagedText.N_PAGES);
        if (num != null) {
            hashMap.put(Cols.NUM_PAGES, Integer.toString(num.intValue()));
        }
        if (i == 0) {
            hashMap.put(Cols.IS_EMBEDDED, FALSE);
            hashMap.put(Cols.FILE_NAME, evalFilePaths.getRelativeSourceFilePath().getFileName().toString());
        } else {
            hashMap.put(Cols.IS_EMBEDDED, TRUE);
            hashMap.put(Cols.FILE_NAME, getFileName(metadata.get(TikaCoreProperties.EMBEDDED_RESOURCE_PATH)));
        }
        String extension = FilenameUtils.getExtension(hashMap.get(Cols.FILE_NAME));
        hashMap.put(Cols.FILE_EXTENSION, extension == null ? "" : extension.toLowerCase(Locale.US));
        long sourceFileLength = getSourceFileLength(metadata);
        if (sourceFileLength > -1) {
            hashMap.put(Cols.LENGTH, Long.toString(sourceFileLength));
        } else {
            hashMap.put(Cols.LENGTH, "");
        }
        hashMap.put(Cols.NUM_METADATA_VALUES, Integer.toString(countMetadataValues(metadata)));
        hashMap.put(Cols.ELAPSED_TIME_MILLIS, getTime(metadata));
        String content = contentTags.getContent();
        if (content == null || content.trim().length() == 0) {
            hashMap.put(Cols.HAS_CONTENT, FALSE);
        } else {
            hashMap.put(Cols.HAS_CONTENT, TRUE);
        }
        getFileTypes(metadata, hashMap);
        try {
            this.writer.writeRow(tableInfo, hashMap);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void writeExceptionData(String str, Metadata metadata, TableInfo tableInfo) {
        HashMap hashMap = new HashMap();
        getExceptionStrings(metadata, hashMap);
        if (hashMap.keySet().size() > 0) {
            try {
                hashMap.put(Cols.ID, str);
                this.writer.writeRow(tableInfo, hashMap);
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Map<Class, Object> calcTextStats(ContentTags contentTags) {
        String truncateContent = truncateContent(contentTags, this.maxContentLength, new HashMap());
        if (truncateContent == null || truncateContent.trim().length() == 0) {
            truncateContent = "";
        }
        return this.compositeTextStatsCalculator.calculate(truncateContent);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void writeContentData(String str, Map<Class, Object> map, TableInfo tableInfo) throws IOException {
        HashMap hashMap = new HashMap();
        hashMap.put(Cols.ID, str);
        if (map.containsKey(ContentLengthCalculator.class)) {
            int intValue = ((Integer) map.get(ContentLengthCalculator.class)).intValue();
            if (intValue == 0) {
                return;
            } else {
                hashMap.put(Cols.CONTENT_LENGTH, Integer.toString(intValue));
            }
        }
        langid(map, hashMap);
        writeTokenCounts(map, hashMap);
        CommonTokenResult commonTokenResult = (CommonTokenResult) map.get(CommonTokens.class);
        if (commonTokenResult != null) {
            hashMap.put(Cols.COMMON_TOKENS_LANG, commonTokenResult.getLangCode());
            hashMap.put(Cols.NUM_UNIQUE_COMMON_TOKENS, Integer.toString(commonTokenResult.getUniqueCommonTokens()));
            hashMap.put(Cols.NUM_COMMON_TOKENS, Integer.toString(commonTokenResult.getCommonTokens()));
            hashMap.put(Cols.NUM_UNIQUE_ALPHABETIC_TOKENS, Integer.toString(commonTokenResult.getUniqueAlphabeticTokens()));
            hashMap.put(Cols.NUM_ALPHABETIC_TOKENS, Integer.toString(commonTokenResult.getAlphabeticTokens()));
        }
        TokenCounts tokenCounts = (TokenCounts) map.get(BasicTokenCountStatsCalculator.class);
        if (tokenCounts != null) {
            hashMap.put(Cols.NUM_UNIQUE_TOKENS, Integer.toString(tokenCounts.getTotalUniqueTokens()));
            hashMap.put(Cols.NUM_TOKENS, Integer.toString(tokenCounts.getTotalTokens()));
        }
        if (map.get(TokenEntropy.class) != null) {
            hashMap.put(Cols.TOKEN_ENTROPY_RATE, Double.toString(((Double) map.get(TokenEntropy.class)).doubleValue()));
        }
        SummaryStatistics summaryStatistics = (SummaryStatistics) map.get(TokenLengths.class);
        if (summaryStatistics != null) {
            hashMap.put(Cols.TOKEN_LENGTH_SUM, Integer.toString((int) summaryStatistics.getSum()));
            hashMap.put(Cols.TOKEN_LENGTH_MEAN, Double.toString(summaryStatistics.getMean()));
            hashMap.put(Cols.TOKEN_LENGTH_STD_DEV, Double.toString(summaryStatistics.getStandardDeviation()));
        }
        unicodeBlocks(map, hashMap);
        try {
            this.writer.writeRow(tableInfo, hashMap);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public void writeTagData(String str, ContentTags contentTags, TableInfo tableInfo) {
        Map<String, Integer> tags = contentTags.getTags();
        if (tags.size() != 0 || contentTags.getParseException()) {
            HashMap hashMap = new HashMap();
            hashMap.put(Cols.ID, str);
            for (Map.Entry<String, Cols> entry : UC_TAGS_OF_INTEREST.entrySet()) {
                Integer num = tags.get(entry.getKey());
                if (num == null) {
                    hashMap.put(entry.getValue(), ZERO);
                } else {
                    hashMap.put(entry.getValue(), Integer.toString(num.intValue()));
                }
            }
            if (contentTags.getParseException()) {
                hashMap.put(Cols.TAGS_PARSE_EXCEPTION, TRUE);
            } else {
                hashMap.put(Cols.TAGS_PARSE_EXCEPTION, FALSE);
            }
            try {
                this.writer.writeRow(tableInfo, hashMap);
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
    }

    String getTime(Metadata metadata) {
        String str = metadata.get(TikaCoreProperties.PARSE_TIME_MILLIS);
        return str != null ? str : "-1";
    }

    int countMetadataValues(Metadata metadata) {
        if (metadata == null) {
            return 0;
        }
        int i = 0;
        for (String str : metadata.names()) {
            i += metadata.getValues(str).length;
        }
        return i;
    }

    void getExceptionStrings(Metadata metadata, Map<Cols, String> map) {
        String str = metadata.get(TikaCoreProperties.CONTAINER_EXCEPTION);
        if (str == null) {
            str = metadata.get(CONTAINER_EXCEPTION_1X);
        }
        if (str == null) {
            str = metadata.get(TikaCoreProperties.EMBEDDED_EXCEPTION);
        }
        if (str != null) {
            if (ACCESS_PERMISSION_EXCEPTION.matcher(str).find()) {
                map.put(Cols.PARSE_EXCEPTION_ID, Integer.toString(EXCEPTION_TYPE.ACCESS_PERMISSION.ordinal()));
                return;
            }
            if (ENCRYPTION_EXCEPTION.matcher(str).find()) {
                map.put(Cols.PARSE_EXCEPTION_ID, Integer.toString(EXCEPTION_TYPE.ENCRYPTION.ordinal()));
                return;
            }
            map.put(Cols.PARSE_EXCEPTION_ID, Integer.toString(EXCEPTION_TYPE.RUNTIME.ordinal()));
            map.put(Cols.ORIG_STACK_TRACE, str);
            map.put(Cols.SORT_STACK_TRACE, EvalExceptionUtils.normalize(str));
        }
    }

    void unicodeBlocks(Map<Class, Object> map, Map<Cols, String> map2) {
        Map map3 = (Map) map.get(UnicodeBlockCounter.class);
        ArrayList arrayList = new ArrayList();
        for (Map.Entry entry : map3.entrySet()) {
            arrayList.add(Pair.of(entry.getKey(), Integer.valueOf(((MutableInt) entry.getValue()).intValue())));
        }
        arrayList.sort((pair, pair2) -> {
            return ((Integer) pair2.getValue()).compareTo((Integer) pair.getValue());
        });
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < 20 && i < arrayList.size(); i++) {
            if (i > 0) {
                sb.append(" | ");
            }
            sb.append((String) ((Pair) arrayList.get(i)).getKey()).append(PluralRules.KEYWORD_RULE_SEPARATOR).append(((Pair) arrayList.get(i)).getValue());
        }
        map2.put(Cols.UNICODE_CHAR_BLOCKS, sb.toString());
    }

    void langid(Map<Class, Object> map, Map<Cols, String> map2) {
        List list = (List) map.get(LanguageIDWrapper.class);
        if (list.size() > 0) {
            map2.put(Cols.LANG_ID_1, ((LanguageResult) list.get(0)).getLanguage());
            map2.put(Cols.LANG_ID_PROB_1, Double.toString(((LanguageResult) list.get(0)).getRawScore()));
        }
        if (list.size() > 1) {
            map2.put(Cols.LANG_ID_2, ((LanguageResult) list.get(1)).getLanguage());
            map2.put(Cols.LANG_ID_PROB_2, Double.toString(((LanguageResult) list.get(1)).getRawScore()));
        }
    }

    void getFileTypes(Metadata metadata, Map<Cols, String> map) {
        String str;
        if (metadata == null || (str = metadata.get(HttpHeaders.CONTENT_TYPE)) == null) {
            return;
        }
        map.put(Cols.MIME_ID, Integer.toString(this.writer.getMimeId(str)));
    }

    void writeTokenCounts(Map<Class, Object> map, Map<Cols, String> map2) {
        TokenIntPair[] tokenIntPairArr = (TokenIntPair[]) map.get(TopNTokens.class);
        int i = 0;
        StringBuilder sb = new StringBuilder();
        for (TokenIntPair tokenIntPair : tokenIntPairArr) {
            int i2 = i;
            i++;
            if (i2 > 0) {
                sb.append(" | ");
            }
            sb.append(tokenIntPair.getToken()).append(PluralRules.KEYWORD_RULE_SEPARATOR).append(tokenIntPair.getValue());
        }
        map2.put(Cols.TOP_N_TOKENS, sb.toString());
    }

    public void closeWriter() throws IOException {
        this.writer.close();
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public EvalFilePaths getPathsFromExtractCrawl(Metadata metadata, Path path) {
        String str = metadata.get(FSProperties.FS_REL_PATH);
        Path path2 = Paths.get(FILE_NAME_CLEANER.matcher(str).replaceAll(""), new String[0]);
        Path resolve = path.resolve(str);
        if (!Files.isRegularFile(resolve, new LinkOption[0])) {
            resolve = findFile(path, path2);
        }
        return new EvalFilePaths(path2, resolve);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public EvalFilePaths getPathsFromSrcCrawl(Metadata metadata, Path path, Path path2) {
        Path path3 = Paths.get(metadata.get(FSProperties.FS_REL_PATH), new String[0]);
        Path findFile = findFile(path2, path3);
        Path resolve = path.resolve(path3);
        long j = -1;
        try {
            j = Files.size(resolve);
        } catch (IOException e) {
            LOG.warn("Couldn't get length for: {}", resolve.toAbsolutePath());
        }
        return new EvalFilePaths(path3, findFile, j);
    }

    private Path findFile(Path path, Path path2) {
        String path3 = path2.toString();
        if (this.lastExtractExtension != null) {
            Path resolve = path.resolve(path3 + this.lastExtractExtension);
            if (Files.isRegularFile(resolve, new LinkOption[0])) {
                return resolve;
            }
        }
        for (String str : EXTRACT_EXTENSIONS) {
            for (String str2 : COMPRESSION_EXTENSIONS) {
                Path resolve2 = path.resolve(path3 + str + str2);
                if (Files.isRegularFile(resolve2, new LinkOption[0])) {
                    this.lastExtractExtension = str + str2;
                    return resolve2;
                }
            }
        }
        return null;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public long getSourceFileLength(EvalFilePaths evalFilePaths, List<Metadata> list) {
        return evalFilePaths.getSourceFileLength() > -1 ? evalFilePaths.getSourceFileLength() : getSourceFileLength(list);
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public long getSourceFileLength(List<Metadata> list) {
        if (list == null || list.size() < 1) {
            return -1L;
        }
        return getSourceFileLength(list.get(0));
    }

    long getSourceFileLength(Metadata metadata) {
        String str = metadata.get(HttpHeaders.CONTENT_LENGTH);
        if (str == null) {
            return -1L;
        }
        try {
            return Long.parseLong(str);
        } catch (NumberFormatException e) {
            return -1L;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public long getFileLength(Path path) {
        if (path == null || !Files.isRegularFile(path, new LinkOption[0])) {
            return -1L;
        }
        try {
            return Files.size(path);
        } catch (IOException e) {
            return -1L;
        }
    }
}
