package org.apache.tika.parser.mbox;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.felix.bundlerepository.impl.RepositoryParser;
import org.apache.log4j.Logger;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.DublinCore;
import org.apache.tika.metadata.MSOffice;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.XHTMLContentHandler;
import org.mortbay.jetty.HttpHeaders;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

/* loaded from: input_file:resources/bundles/15/tika-bundle-0.6.jar:tika-parsers-0.6.jar:org/apache/tika/parser/mbox/MboxParser.class */
public class MboxParser implements Parser {
    public static final String MBOX_MIME_TYPE = "application/mbox";
    public static final String MBOX_RECORD_DIVIDER = "From ";
    private static final Logger LOGGER = Logger.getLogger(MboxParser.class);
    private static final Pattern EMAIL_HEADER_PATTERN = Pattern.compile("([^ ]+):[ \t]*(.*)");
    private static final String EMAIL_HEADER_METADATA_PREFIX = MboxParser.class.getSimpleName() + "-";
    private static final String EMAIL_FROMLINE_METADATA = EMAIL_HEADER_METADATA_PREFIX + "from";

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:resources/bundles/15/tika-bundle-0.6.jar:tika-parsers-0.6.jar:org/apache/tika/parser/mbox/MboxParser$ParseStates.class */
    public enum ParseStates {
        START,
        IN_HEADER,
        IN_CONTENT
    }

    @Override // org.apache.tika.parser.Parser
    public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws IOException, TikaException, SAXException {
        InputStreamReader inputStreamReader;
        try {
            inputStreamReader = new InputStreamReader(inputStream, "us-ascii");
        } catch (UnsupportedEncodingException e) {
            LOGGER.error("Unexpected exception setting up MboxParser", e);
            inputStreamReader = new InputStreamReader(inputStream);
        }
        BufferedReader bufferedReader = new BufferedReader(inputStreamReader);
        metadata.set("Content-Type", MBOX_MIME_TYPE);
        metadata.set("Content-Encoding", "us-ascii");
        XHTMLContentHandler xHTMLContentHandler = new XHTMLContentHandler(contentHandler, metadata);
        xHTMLContentHandler.startDocument();
        ParseStates parseStates = ParseStates.START;
        String str = null;
        boolean z = false;
        int i = 0;
        String readLine = bufferedReader.readLine();
        while (true) {
            String str2 = readLine;
            if (str2 == null) {
                if (parseStates == ParseStates.IN_HEADER) {
                    saveHeaderInMetadata(i, metadata, str);
                } else if (parseStates == ParseStates.IN_CONTENT) {
                    endMessage(xHTMLContentHandler, z);
                }
                xHTMLContentHandler.endDocument();
                return;
            }
            boolean startsWith = str2.startsWith(MBOX_RECORD_DIVIDER);
            if (startsWith) {
                i++;
            }
            switch (parseStates) {
                case START:
                    if (!startsWith) {
                        break;
                    } else {
                        parseStates = ParseStates.IN_HEADER;
                        startsWith = false;
                        break;
                    }
                case IN_CONTENT:
                    if (startsWith) {
                        endMessage(xHTMLContentHandler, z);
                        parseStates = ParseStates.IN_HEADER;
                        str = str2;
                        break;
                    } else {
                        boolean startsWith2 = str2.startsWith(">");
                        if (z) {
                            if (!startsWith2) {
                                xHTMLContentHandler.endElement("q");
                                z = false;
                            }
                        } else if (startsWith2) {
                            xHTMLContentHandler.startElement("q");
                            z = true;
                        }
                        xHTMLContentHandler.characters(str2);
                        xHTMLContentHandler.element("br", "");
                        continue;
                    }
            }
            if (startsWith) {
                saveHeaderInMetadata(i, metadata, str);
                str = str2;
            } else if (str2.length() == 0) {
                saveHeaderInMetadata(i, metadata, str);
                parseStates = ParseStates.IN_CONTENT;
                xHTMLContentHandler.startElement("div", "class", "email-entry");
                xHTMLContentHandler.startElement(RepositoryParser.P);
                z = false;
            } else if (str2.startsWith(" ") || str2.startsWith("\t")) {
                str = str + " " + str2.trim();
            } else {
                saveHeaderInMetadata(i, metadata, str);
                str = str2;
            }
            readLine = bufferedReader.readLine();
        }
    }

    private void endMessage(XHTMLContentHandler xHTMLContentHandler, boolean z) throws SAXException {
        if (z) {
            xHTMLContentHandler.endElement("q");
        }
        xHTMLContentHandler.endElement(RepositoryParser.P);
        xHTMLContentHandler.endElement("div");
    }

    private void saveHeaderInMetadata(int i, Metadata metadata, String str) {
        if (str == null || i > 1) {
            return;
        }
        if (str.startsWith(MBOX_RECORD_DIVIDER)) {
            metadata.add(EMAIL_FROMLINE_METADATA, str.substring(MBOX_RECORD_DIVIDER.length()));
            return;
        }
        Matcher matcher = EMAIL_HEADER_PATTERN.matcher(str);
        if (!matcher.matches()) {
            LOGGER.warn("Malformed email header in mbox file: " + str);
            return;
        }
        String lowerCase = matcher.group(1).toLowerCase();
        String group = matcher.group(2);
        if (lowerCase.equalsIgnoreCase(HttpHeaders.FROM)) {
            metadata.add(MSOffice.AUTHOR, group);
            metadata.add(DublinCore.CREATOR, group);
            return;
        }
        if (lowerCase.equalsIgnoreCase("Subject")) {
            metadata.add("subject", group);
            metadata.add("title", group);
            return;
        }
        if (lowerCase.equalsIgnoreCase("Date")) {
            metadata.add("date", group);
            return;
        }
        if (lowerCase.equalsIgnoreCase("Message-Id")) {
            metadata.add(DublinCore.IDENTIFIER, group);
            return;
        }
        if (lowerCase.equalsIgnoreCase("In-Reply-To")) {
            metadata.add(DublinCore.RELATION, group);
        } else if (!lowerCase.equalsIgnoreCase("Content-Type")) {
            metadata.add(EMAIL_HEADER_METADATA_PREFIX + lowerCase, group);
        } else {
            metadata.add("Content-Type", group);
            metadata.add(DublinCore.FORMAT, group);
        }
    }

    @Override // org.apache.tika.parser.Parser
    public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata) throws IOException, SAXException, TikaException {
        parse(inputStream, contentHandler, metadata, new ParseContext());
    }
}
