package org.apache.tika.eval.app;

import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.concurrent.ArrayBlockingQueue;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.io.FilenameUtils;
import org.apache.tika.Tika;
import org.apache.tika.batch.FileResource;
import org.apache.tika.batch.builders.BatchProcessBuilder;
import org.apache.tika.batch.fs.FSProperties;
import org.apache.tika.detect.FileCommandDetector;
import org.apache.tika.eval.app.db.ColInfo;
import org.apache.tika.eval.app.db.Cols;
import org.apache.tika.eval.app.db.TableInfo;
import org.apache.tika.eval.app.io.IDBWriter;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/tika/eval/app/FileProfiler.class */
public class FileProfiler extends AbstractProfiler {
    public static final String DETECT_EXCEPTION = "detect-exception";
    private static final boolean HAS_FILE = FileCommandDetector.checkHasFile();
    private static final Logger LOG = LoggerFactory.getLogger((Class<?>) FileProfiler.class);
    private static final Tika TIKA = new Tika();
    private static final FileCommandDetector FILE_COMMAND_DETECTOR = new FileCommandDetector();
    public static TableInfo FILE_PROFILES;
    public static TableInfo FILE_MIME_TABLE;
    static Options OPTIONS;
    private final Path inputDir;

    public FileProfiler(ArrayBlockingQueue<FileResource> arrayBlockingQueue, Path path, IDBWriter iDBWriter) {
        super(arrayBlockingQueue, iDBWriter);
        this.inputDir = path;
    }

    public static void USAGE() {
        new HelpFormatter().printHelp(80, "java -jar tika-eval-x.y.jar FileProfiler -inputDir docs -db mydb [-inputDir input]", "Tool: Profile", OPTIONS, "Note: for the default h2 db, do not include the .mv.db at the end of the db name.");
    }

    /* JADX WARN: Failed to calculate best type for var: r8v1 ??
    java.lang.NullPointerException
     */
    /* JADX WARN: Failed to calculate best type for var: r9v0 ??
    java.lang.NullPointerException
     */
    /* JADX WARN: Multi-variable type inference failed. Error: java.lang.NullPointerException
     */
    /* JADX WARN: Not initialized variable reg: 8, insn: 0x01d7: MOVE (r0 I:??[int, float, boolean, short, byte, char, OBJECT, ARRAY]) = (r8 I:??[int, float, boolean, short, byte, char, OBJECT, ARRAY]) A[TRY_LEAVE], block:B:70:0x01d7 */
    /* JADX WARN: Not initialized variable reg: 9, insn: 0x01db: MOVE (r0 I:??[int, float, boolean, short, byte, char, OBJECT, ARRAY]) = (r9 I:??[int, float, boolean, short, byte, char, OBJECT, ARRAY]), block:B:72:0x01db */
    /* JADX WARN: Type inference failed for: r8v1, types: [java.io.InputStream] */
    /* JADX WARN: Type inference failed for: r9v0, types: [java.lang.Throwable] */
    @Override // org.apache.tika.batch.FileResourceConsumer
    public boolean processFileResource(FileResource fileResource) {
        String str = fileResource.getMetadata().get(FSProperties.FS_REL_PATH);
        try {
            try {
                InputStream openInputStream = fileResource.openInputStream();
                Throwable th = null;
                TikaInputStream tikaInputStream = TikaInputStream.get(openInputStream);
                Throwable th2 = null;
                try {
                    Path path = tikaInputStream.getPath();
                    HashMap hashMap = new HashMap();
                    int mimeId = this.writer.getMimeId(detectTika(tikaInputStream));
                    String str2 = "";
                    String str3 = "";
                    long j = -1;
                    try {
                        str2 = FilenameUtils.getName(str);
                    } catch (IllegalArgumentException e) {
                        LOG.warn("bad file name: " + str, (Throwable) e);
                    }
                    try {
                        str3 = FilenameUtils.getExtension(str);
                    } catch (IllegalArgumentException e2) {
                        LOG.warn("bad extension: " + str, (Throwable) e2);
                    }
                    try {
                        j = Files.size(path);
                    } catch (IOException e3) {
                        LOG.warn("problem getting size: " + str, (Throwable) e3);
                    }
                    hashMap.put(Cols.FILE_PATH, str);
                    hashMap.put(Cols.FILE_NAME, str2);
                    hashMap.put(Cols.FILE_EXTENSION, str3);
                    hashMap.put(Cols.LENGTH, Long.toString(j));
                    hashMap.put(Cols.TIKA_MIME_ID, Integer.toString(mimeId));
                    hashMap.put(Cols.SHA256, DigestUtils.sha256Hex(tikaInputStream));
                    if (HAS_FILE) {
                        hashMap.put(Cols.FILE_MIME_ID, Integer.toString(this.writer.getMimeId(detectFile(tikaInputStream))));
                    }
                    this.writer.writeRow(FILE_PROFILES, hashMap);
                    if (tikaInputStream != null) {
                        if (0 != 0) {
                            try {
                                tikaInputStream.close();
                            } catch (Throwable th3) {
                                th2.addSuppressed(th3);
                            }
                        } else {
                            tikaInputStream.close();
                        }
                    }
                    if (openInputStream != null) {
                        if (0 != 0) {
                            try {
                                openInputStream.close();
                            } catch (Throwable th4) {
                                th.addSuppressed(th4);
                            }
                        } else {
                            openInputStream.close();
                        }
                    }
                    return true;
                } catch (Throwable th5) {
                    if (tikaInputStream != null) {
                        if (0 != 0) {
                            try {
                                tikaInputStream.close();
                            } catch (Throwable th6) {
                                th2.addSuppressed(th6);
                            }
                        } else {
                            tikaInputStream.close();
                        }
                    }
                    throw th5;
                }
            } finally {
            }
        } catch (IOException e4) {
            return false;
        }
    }

    private String detectFile(TikaInputStream tikaInputStream) {
        try {
            return FILE_COMMAND_DETECTOR.detect(tikaInputStream, new Metadata()).toString();
        } catch (IOException e) {
            return DETECT_EXCEPTION;
        }
    }

    private String detectTika(TikaInputStream tikaInputStream) {
        try {
            return TIKA.detect(tikaInputStream);
        } catch (IOException e) {
            return DETECT_EXCEPTION;
        }
    }

    static {
        FILE_PROFILES = HAS_FILE ? new TableInfo("file_profiles", new ColInfo(Cols.FILE_PATH, 12, 2048, "PRIMARY KEY"), new ColInfo(Cols.FILE_NAME, 12, (Integer) 2048), new ColInfo(Cols.FILE_EXTENSION, 12, (Integer) 24), new ColInfo(Cols.LENGTH, -5), new ColInfo(Cols.SHA256, 12, (Integer) 64), new ColInfo(Cols.TIKA_MIME_ID, 4), new ColInfo(Cols.FILE_MIME_ID, 4)) : new TableInfo("file_profiles", new ColInfo(Cols.FILE_PATH, 12, 2048, "PRIMARY KEY"), new ColInfo(Cols.FILE_NAME, 12, (Integer) 2048), new ColInfo(Cols.FILE_EXTENSION, 12, (Integer) 24), new ColInfo(Cols.LENGTH, -5), new ColInfo(Cols.SHA256, 12, (Integer) 64), new ColInfo(Cols.TIKA_MIME_ID, 4));
        FILE_MIME_TABLE = new TableInfo("file_mimes", new ColInfo(Cols.MIME_ID, 4, "PRIMARY KEY"), new ColInfo(Cols.MIME_STRING, 12, (Integer) 256), new ColInfo(Cols.FILE_EXTENSION, 12, (Integer) 12));
        OPTIONS = new Options().addOption(new Option("inputDir", true, "optional: directory for original binary input documents. If not specified, -extracts is crawled as is.")).addOption("bc", "optional: tika-batch config file").addOption(BatchProcessBuilder.NUM_CONSUMERS_KEY, true, "optional: number of consumer threads").addOption("db", true, "db file to which to write results").addOption("jdbc", true, "EXPERT: full jdbc connection string. Must specify this or -db <h2db>").addOption("jdbcDriver", true, "EXPERT: jdbc driver, or specify via -Djdbc.driver").addOption("tablePrefix", true, "EXPERT: optional prefix for table names").addOption("drop", false, "drop tables if they exist").addOption("maxFilesToAdd", true, "maximum number of files to add to the crawler");
    }
}
