package org.apache.tika.server.resource;

import com.ctc.wstx.cfg.XmlConsts;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.lang.reflect.Field;
import java.lang.reflect.Method;
import java.nio.charset.StandardCharsets;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import javax.mail.internet.ContentDisposition;
import javax.mail.internet.ParseException;
import javax.ws.rs.Consumes;
import javax.ws.rs.GET;
import javax.ws.rs.POST;
import javax.ws.rs.PUT;
import javax.ws.rs.Path;
import javax.ws.rs.Produces;
import javax.ws.rs.WebApplicationException;
import javax.ws.rs.core.Context;
import javax.ws.rs.core.HttpHeaders;
import javax.ws.rs.core.MultivaluedMap;
import javax.ws.rs.core.Response;
import javax.ws.rs.core.StreamingOutput;
import javax.ws.rs.core.UriInfo;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TransformerHandler;
import javax.xml.transform.stream.StreamResult;
import org.apache.commons.lang.StringUtils;
import org.apache.cxf.jaxrs.ext.multipart.Attachment;
import org.apache.james.mime4j.dom.field.ContentDispositionField;
import org.apache.poi.extractor.ExtractorFactory;
import org.apache.tika.Tika;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.detect.Detector;
import org.apache.tika.exception.EncryptedDocumentException;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaMetadataKeys;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.DigestingParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.ParserDecorator;
import org.apache.tika.parser.PasswordProvider;
import org.apache.tika.parser.html.BoilerpipeContentHandler;
import org.apache.tika.parser.html.HtmlParser;
import org.apache.tika.parser.ocr.TesseractOCRConfig;
import org.apache.tika.parser.pdf.PDFParserConfig;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.ExpandedTitleContentHandler;
import org.apache.tika.sax.RichTextContentHandler;
import org.apache.tika.server.InputStreamFactory;
import org.apache.tika.server.TikaServerParseException;
import org.ccil.cowan.tagsoup.XMLWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

@Path("/tika")
/* loaded from: input_file:org/apache/tika/server/resource/TikaResource.class */
public class TikaResource {
    public static final String X_TIKA_OCR_HEADER_PREFIX = "X-Tika-OCR";
    public static final String X_TIKA_PDF_HEADER_PREFIX = "X-Tika-PDF";
    private static TikaConfig tikaConfig;
    public static final String GREETING = "This is Tika Server (" + new Tika().toString() + "). Please PUT\n";
    private static final Logger LOG = LoggerFactory.getLogger((Class<?>) TikaResource.class);
    private static DigestingParser.Digester digester = null;
    private static InputStreamFactory inputStreamFactory = null;

    public static void init(TikaConfig tikaConfig2, DigestingParser.Digester digester2, InputStreamFactory inputStreamFactory2) {
        tikaConfig = tikaConfig2;
        digester = digester2;
        inputStreamFactory = inputStreamFactory2;
    }

    public static Parser createParser() {
        final AutoDetectParser autoDetectParser = new AutoDetectParser(tikaConfig);
        Map<MediaType, Parser> parsers = autoDetectParser.getParsers();
        parsers.put(MediaType.APPLICATION_XML, new HtmlParser());
        autoDetectParser.setParsers(parsers);
        autoDetectParser.setFallback(new Parser() { // from class: org.apache.tika.server.resource.TikaResource.1
            @Override // org.apache.tika.parser.Parser
            public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
                return Parser.this.getSupportedTypes(parseContext);
            }

            @Override // org.apache.tika.parser.Parser
            public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) {
                throw new WebApplicationException(Response.Status.UNSUPPORTED_MEDIA_TYPE);
            }
        });
        return digester != null ? new DigestingParser(autoDetectParser, digester) : autoDetectParser;
    }

    public static TikaConfig getConfig() {
        return tikaConfig;
    }

    public static String detectFilename(MultivaluedMap<String, String> multivaluedMap) {
        String first = multivaluedMap.getFirst("Content-Disposition");
        if (first != null) {
            try {
                ContentDisposition contentDisposition = new ContentDisposition(first);
                if ("attachment".equals(contentDisposition.getDisposition())) {
                    String parameter = contentDisposition.getParameter(ContentDispositionField.PARAM_FILENAME);
                    if (parameter != null) {
                        return parameter;
                    }
                }
            } catch (ParseException e) {
                LOG.warn("Parse exception {} determining content disposition", e.getMessage(), e);
            }
        }
        return multivaluedMap.getFirst("File-Name");
    }

    public static void fillParseContext(ParseContext parseContext, MultivaluedMap<String, String> multivaluedMap, Parser parser) {
        TesseractOCRConfig tesseractOCRConfig = new TesseractOCRConfig();
        PDFParserConfig pDFParserConfig = new PDFParserConfig();
        for (String str : multivaluedMap.keySet()) {
            if (StringUtils.startsWith(str, X_TIKA_OCR_HEADER_PREFIX)) {
                processHeaderConfig(multivaluedMap, tesseractOCRConfig, str, X_TIKA_OCR_HEADER_PREFIX);
            } else if (StringUtils.startsWith(str, X_TIKA_PDF_HEADER_PREFIX)) {
                processHeaderConfig(multivaluedMap, pDFParserConfig, str, X_TIKA_PDF_HEADER_PREFIX);
            }
        }
        parseContext.set(TesseractOCRConfig.class, tesseractOCRConfig);
        parseContext.set(PDFParserConfig.class, pDFParserConfig);
        if (parser != null) {
            parseContext.set(Parser.class, parser);
        }
    }

    public static InputStream getInputStream(InputStream inputStream, HttpHeaders httpHeaders) {
        try {
            return inputStreamFactory.getInputSteam(inputStream, httpHeaders);
        } catch (IOException e) {
            throw new TikaServerParseException(e);
        }
    }

    private static void processHeaderConfig(MultivaluedMap<String, String> multivaluedMap, Object obj, String str, String str2) {
        try {
            String removeStart = StringUtils.removeStart(str, str2);
            Field declaredField = obj.getClass().getDeclaredField(StringUtils.uncapitalize(removeStart));
            declaredField.setAccessible(true);
            if (declaredField.getType() == String.class) {
                declaredField.set(obj, multivaluedMap.getFirst(str));
            } else if (declaredField.getType() == Integer.TYPE) {
                declaredField.setInt(obj, Integer.parseInt(multivaluedMap.getFirst(str)));
            } else if (declaredField.getType() == Double.TYPE) {
                declaredField.setDouble(obj, Double.parseDouble(multivaluedMap.getFirst(str)));
            } else if (declaredField.getType() == Boolean.TYPE) {
                declaredField.setBoolean(obj, Boolean.parseBoolean(multivaluedMap.getFirst(str)));
            } else {
                String uncapitalize = StringUtils.uncapitalize(removeStart);
                Method method = null;
                try {
                    method = obj.getClass().getMethod("set" + uncapitalize.substring(0, 1).toUpperCase(Locale.US) + uncapitalize.substring(1), String.class);
                } catch (NoSuchMethodException e) {
                }
                if (method != null) {
                    method.invoke(obj, multivaluedMap.getFirst(str));
                }
            }
        } catch (Throwable th) {
            throw new WebApplicationException(String.format(Locale.ROOT, "%s is an invalid %s header", str, X_TIKA_OCR_HEADER_PREFIX));
        }
    }

    public static void fillMetadata(Parser parser, Metadata metadata, ParseContext parseContext, MultivaluedMap<String, String> multivaluedMap) {
        String detectFilename = detectFilename(multivaluedMap);
        if (detectFilename != null) {
            metadata.set(TikaMetadataKeys.RESOURCE_NAME_KEY, detectFilename);
        }
        String first = multivaluedMap.getFirst("Content-Type");
        javax.ws.rs.core.MediaType valueOf = first == null ? null : javax.ws.rs.core.MediaType.valueOf(first);
        if (valueOf != null && "xml".equals(valueOf.getSubtype())) {
            valueOf = null;
        }
        if (valueOf != null && valueOf.equals(javax.ws.rs.core.MediaType.APPLICATION_OCTET_STREAM_TYPE)) {
            valueOf = null;
        }
        if (valueOf != null) {
            metadata.add("Content-Type", valueOf.toString());
            final Detector detector = getDetector(parser);
            setDetector(parser, new Detector() { // from class: org.apache.tika.server.resource.TikaResource.2
                @Override // org.apache.tika.detect.Detector
                public MediaType detect(InputStream inputStream, Metadata metadata2) throws IOException {
                    String str = metadata2.get("Content-Type");
                    MediaType mediaType = null;
                    if (str != null) {
                        mediaType = MediaType.parse(str);
                    }
                    return mediaType != null ? mediaType : Detector.this.detect(inputStream, metadata2);
                }
            });
        }
        final String first2 = multivaluedMap.getFirst("Password");
        if (first2 != null) {
            parseContext.set(PasswordProvider.class, new PasswordProvider() { // from class: org.apache.tika.server.resource.TikaResource.3
                @Override // org.apache.tika.parser.PasswordProvider
                public String getPassword(Metadata metadata2) {
                    return first2;
                }
            });
        }
    }

    public static void setDetector(Parser parser, Detector detector) {
        getAutoDetectParser(parser).setDetector(detector);
    }

    public static Detector getDetector(Parser parser) {
        return getAutoDetectParser(parser).getDetector();
    }

    private static AutoDetectParser getAutoDetectParser(Parser parser) {
        if (parser instanceof AutoDetectParser) {
            return (AutoDetectParser) parser;
        }
        if (!(parser instanceof ParserDecorator)) {
            throw new RuntimeException("Couldn't find AutoDetectParser within: " + parser.getClass());
        }
        Parser wrappedParser = ((ParserDecorator) parser).getWrappedParser();
        if (wrappedParser instanceof AutoDetectParser) {
            return (AutoDetectParser) wrappedParser;
        }
        throw new RuntimeException("Couldn't find AutoDetectParser within: " + wrappedParser.getClass());
    }

    public static void parse(Parser parser, Logger logger, String str, InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws IOException {
        try {
            TikaInputStream tikaInputStream = TikaInputStream.get(inputStream);
            Throwable th = null;
            try {
                try {
                    parser.parse(tikaInputStream, contentHandler, metadata, parseContext);
                    if (tikaInputStream != null) {
                        if (0 != 0) {
                            try {
                                tikaInputStream.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        } else {
                            tikaInputStream.close();
                        }
                    }
                } finally {
                }
            } catch (Throwable th3) {
                if (tikaInputStream != null) {
                    if (th != null) {
                        try {
                            tikaInputStream.close();
                        } catch (Throwable th4) {
                            th.addSuppressed(th4);
                        }
                    } else {
                        tikaInputStream.close();
                    }
                }
                throw th3;
            }
        } catch (EncryptedDocumentException e) {
            logger.warn("{}: Encrypted document", str, e);
            throw new TikaServerParseException(e);
        } catch (SAXException e2) {
            throw new TikaServerParseException(e2);
        } catch (Exception e3) {
            logger.warn("{}: Text extraction failed", str, e3);
            throw new TikaServerParseException(e3);
        }
    }

    public static void logRequest(Logger logger, UriInfo uriInfo, Metadata metadata) {
        if (metadata.get("Content-Type") == null) {
            logger.info("{} (autodetecting type)", uriInfo.getPath());
        } else {
            logger.info("{} ({})", uriInfo.getPath(), metadata.get("Content-Type"));
        }
    }

    @GET
    @Produces({"text/plain"})
    public String getMessage() {
        return GREETING;
    }

    @Path("form")
    @Consumes({javax.ws.rs.core.MediaType.MULTIPART_FORM_DATA})
    @POST
    @Produces({"text/plain"})
    public StreamingOutput getTextFromMultipart(Attachment attachment, @Context UriInfo uriInfo) {
        return produceText((InputStream) attachment.getObject(InputStream.class), attachment.getHeaders(), uriInfo);
    }

    @Path("main")
    @Consumes({javax.ws.rs.core.MediaType.WILDCARD})
    @Produces({"text/plain"})
    @PUT
    public StreamingOutput getTextMain(InputStream inputStream, @Context HttpHeaders httpHeaders, @Context UriInfo uriInfo) {
        return produceTextMain(inputStream, httpHeaders.getRequestHeaders(), uriInfo);
    }

    @Path("form/main")
    @Consumes({javax.ws.rs.core.MediaType.MULTIPART_FORM_DATA})
    @POST
    @Produces({"text/plain"})
    public StreamingOutput getTextMainFromMultipart(Attachment attachment, @Context UriInfo uriInfo) {
        return produceTextMain((InputStream) attachment.getObject(InputStream.class), attachment.getHeaders(), uriInfo);
    }

    public StreamingOutput produceTextMain(final InputStream inputStream, @Context MultivaluedMap<String, String> multivaluedMap, @Context final UriInfo uriInfo) {
        final Parser createParser = createParser();
        final Metadata metadata = new Metadata();
        final ParseContext parseContext = new ParseContext();
        fillMetadata(createParser, metadata, parseContext, multivaluedMap);
        fillParseContext(parseContext, multivaluedMap, createParser);
        logRequest(LOG, uriInfo, metadata);
        return new StreamingOutput() { // from class: org.apache.tika.server.resource.TikaResource.4
            @Override // javax.ws.rs.core.StreamingOutput
            public void write(OutputStream outputStream) throws IOException, WebApplicationException {
                BoilerpipeContentHandler boilerpipeContentHandler = new BoilerpipeContentHandler(new OutputStreamWriter(outputStream, StandardCharsets.UTF_8));
                InputStream inputStream2 = inputStream;
                Throwable th = null;
                try {
                    try {
                        TikaResource.parse(createParser, TikaResource.LOG, uriInfo.getPath(), inputStream2, boilerpipeContentHandler, metadata, parseContext);
                        if (inputStream2 != null) {
                            if (0 == 0) {
                                inputStream2.close();
                                return;
                            }
                            try {
                                inputStream2.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        }
                    } catch (Throwable th3) {
                        th = th3;
                        throw th3;
                    }
                } catch (Throwable th4) {
                    if (inputStream2 != null) {
                        if (th != null) {
                            try {
                                inputStream2.close();
                            } catch (Throwable th5) {
                                th.addSuppressed(th5);
                            }
                        } else {
                            inputStream2.close();
                        }
                    }
                    throw th4;
                }
            }
        };
    }

    @Produces({"text/plain"})
    @PUT
    @Consumes({javax.ws.rs.core.MediaType.WILDCARD})
    public StreamingOutput getText(InputStream inputStream, @Context HttpHeaders httpHeaders, @Context UriInfo uriInfo) {
        return produceText(getInputStream(inputStream, httpHeaders), httpHeaders.getRequestHeaders(), uriInfo);
    }

    public StreamingOutput produceText(final InputStream inputStream, MultivaluedMap<String, String> multivaluedMap, final UriInfo uriInfo) {
        final Parser createParser = createParser();
        final Metadata metadata = new Metadata();
        final ParseContext parseContext = new ParseContext();
        fillMetadata(createParser, metadata, parseContext, multivaluedMap);
        fillParseContext(parseContext, multivaluedMap, createParser);
        logRequest(LOG, uriInfo, metadata);
        return new StreamingOutput() { // from class: org.apache.tika.server.resource.TikaResource.5
            @Override // javax.ws.rs.core.StreamingOutput
            public void write(OutputStream outputStream) throws IOException, WebApplicationException {
                BodyContentHandler bodyContentHandler = new BodyContentHandler(new RichTextContentHandler(new OutputStreamWriter(outputStream, StandardCharsets.UTF_8)));
                InputStream inputStream2 = inputStream;
                Throwable th = null;
                try {
                    TikaResource.parse(createParser, TikaResource.LOG, uriInfo.getPath(), inputStream2, bodyContentHandler, metadata, parseContext);
                    if (inputStream2 != null) {
                        if (0 == 0) {
                            inputStream2.close();
                            return;
                        }
                        try {
                            inputStream2.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                    }
                } catch (Throwable th3) {
                    if (inputStream2 != null) {
                        if (0 != 0) {
                            try {
                                inputStream2.close();
                            } catch (Throwable th4) {
                                th.addSuppressed(th4);
                            }
                        } else {
                            inputStream2.close();
                        }
                    }
                    throw th3;
                }
            }
        };
    }

    @Path("form")
    @Consumes({javax.ws.rs.core.MediaType.MULTIPART_FORM_DATA})
    @POST
    @Produces({"text/html"})
    public StreamingOutput getHTMLFromMultipart(Attachment attachment, @Context UriInfo uriInfo) {
        return produceOutput((InputStream) attachment.getObject(InputStream.class), attachment.getHeaders(), uriInfo, "html");
    }

    @Produces({"text/html"})
    @PUT
    @Consumes({javax.ws.rs.core.MediaType.WILDCARD})
    public StreamingOutput getHTML(InputStream inputStream, @Context HttpHeaders httpHeaders, @Context UriInfo uriInfo) {
        return produceOutput(getInputStream(inputStream, httpHeaders), httpHeaders.getRequestHeaders(), uriInfo, "html");
    }

    @Path("form")
    @Consumes({javax.ws.rs.core.MediaType.MULTIPART_FORM_DATA})
    @POST
    @Produces({"text/xml"})
    public StreamingOutput getXMLFromMultipart(Attachment attachment, @Context UriInfo uriInfo) {
        return produceOutput((InputStream) attachment.getObject(InputStream.class), attachment.getHeaders(), uriInfo, "xml");
    }

    @Produces({"text/xml"})
    @PUT
    @Consumes({javax.ws.rs.core.MediaType.WILDCARD})
    public StreamingOutput getXML(InputStream inputStream, @Context HttpHeaders httpHeaders, @Context UriInfo uriInfo) {
        return produceOutput(getInputStream(inputStream, httpHeaders), httpHeaders.getRequestHeaders(), uriInfo, "xml");
    }

    private StreamingOutput produceOutput(final InputStream inputStream, MultivaluedMap<String, String> multivaluedMap, final UriInfo uriInfo, final String str) {
        final Parser createParser = createParser();
        final Metadata metadata = new Metadata();
        final ParseContext parseContext = new ParseContext();
        fillMetadata(createParser, metadata, parseContext, multivaluedMap);
        fillParseContext(parseContext, multivaluedMap, createParser);
        logRequest(LOG, uriInfo, metadata);
        return new StreamingOutput() { // from class: org.apache.tika.server.resource.TikaResource.6
            @Override // javax.ws.rs.core.StreamingOutput
            public void write(OutputStream outputStream) throws IOException, WebApplicationException {
                OutputStreamWriter outputStreamWriter = new OutputStreamWriter(outputStream, StandardCharsets.UTF_8);
                try {
                    TransformerHandler newTransformerHandler = ((SAXTransformerFactory) SAXTransformerFactory.newInstance()).newTransformerHandler();
                    newTransformerHandler.getTransformer().setOutputProperty("method", str);
                    newTransformerHandler.getTransformer().setOutputProperty(XMLWriter.INDENT, XmlConsts.XML_SA_YES);
                    newTransformerHandler.getTransformer().setOutputProperty("encoding", StandardCharsets.UTF_8.name());
                    newTransformerHandler.setResult(new StreamResult(outputStreamWriter));
                    TikaResource.parse(createParser, TikaResource.LOG, uriInfo.getPath(), inputStream, new ExpandedTitleContentHandler(newTransformerHandler), metadata, parseContext);
                } catch (TransformerConfigurationException e) {
                    throw new WebApplicationException(e);
                }
            }
        };
    }

    static {
        ExtractorFactory.setAllThreadsPreferEventExtractors(true);
    }
}
