package org.apache.tika.parser.hwp;

import java.io.EOFException;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.nio.charset.StandardCharsets;
import java.security.InvalidKeyException;
import java.security.Key;
import java.security.NoSuchAlgorithmException;
import java.util.Arrays;
import java.util.Date;
import java.util.Iterator;
import java.util.Locale;
import java.util.zip.Inflater;
import java.util.zip.InflaterInputStream;
import javax.crypto.Cipher;
import javax.crypto.CipherInputStream;
import javax.crypto.NoSuchPaddingException;
import javax.crypto.spec.SecretKeySpec;
import opennlp.tools.util.featuregen.WindowFeatureGenerator;
import org.apache.poi.hpsf.NoPropertySetStreamException;
import org.apache.poi.hpsf.Property;
import org.apache.poi.hpsf.PropertySet;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.LittleEndian;
import org.apache.tika.exception.EncryptedDocumentException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.exception.UnsupportedFormatException;
import org.apache.tika.io.CloseShieldInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Office;
import org.apache.tika.metadata.OfficeOpenXMLCore;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.sax.XHTMLContentHandler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;

/* loaded from: input_file:org/apache/tika/parser/hwp/HwpTextExtractorV5.class */
public class HwpTextExtractorV5 implements Serializable {
    private static final long serialVersionUID = 1;
    private static final int HWPTAG_BEGIN = 16;
    private static final int I = 1;
    private static final int C = 2;
    private static final int X = 3;
    protected static Logger LOG = LoggerFactory.getLogger((Class<?>) HwpTextExtractorV5.class);
    private static final byte[] HWP_V5_SIGNATURE = "HWP Document File".getBytes(StandardCharsets.US_ASCII);
    private static final int[] HWP_CHAR_TYPE = {2, 3, 3, 3, 1, 1, 1, 1, 1, 1, 2, 3, 3, 2, 3, 3, 3, 3, 3, 1, 1, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2};

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/apache/tika/parser/hwp/HwpTextExtractorV5$FileHeader.class */
    public static class FileHeader {
        HwpVersion version;
        boolean compressed;
        boolean encrypted;
        boolean viewtext;

        FileHeader() {
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/apache/tika/parser/hwp/HwpTextExtractorV5$HwpVersion.class */
    public static class HwpVersion {
        int m;
        int n;
        int p;
        int r;

        HwpVersion() {
        }

        public String toString() {
            return String.format(Locale.US, "%d.%d.%d.%d", Integer.valueOf(this.m), Integer.valueOf(this.n), Integer.valueOf(this.p), Integer.valueOf(this.r));
        }

        public static HwpVersion parseVersion(long j) {
            HwpVersion hwpVersion = new HwpVersion();
            hwpVersion.m = (int) ((j & 4278190080L) >> 24);
            hwpVersion.n = (int) ((j & 16711680) >> 16);
            hwpVersion.p = (int) ((j & 65280) >> 8);
            hwpVersion.r = (int) (j & 255);
            return hwpVersion;
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/tika/parser/hwp/HwpTextExtractorV5$SRand.class */
    public static class SRand {
        private int random_seed;

        private SRand(int i) {
            this.random_seed = i;
        }

        /* JADX INFO: Access modifiers changed from: private */
        public int rand() {
            this.random_seed = ((this.random_seed * 214013) + 2531011) & (-1);
            return (this.random_seed >> 16) & 32767;
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/apache/tika/parser/hwp/HwpTextExtractorV5$TagInfo.class */
    public static class TagInfo {
        long id;
        long level;
        long length;

        TagInfo() {
        }
    }

    public void extract(InputStream inputStream, Metadata metadata, XHTMLContentHandler xHTMLContentHandler) throws FileNotFoundException, IOException, TikaException, SAXException {
        if (inputStream == null || xHTMLContentHandler == null) {
            throw new IllegalArgumentException();
        }
        POIFSFileSystem pOIFSFileSystem = null;
        try {
            try {
                pOIFSFileSystem = new POIFSFileSystem(new CloseShieldInputStream(inputStream));
                extract0(pOIFSFileSystem.getRoot(), metadata, xHTMLContentHandler);
                IOUtils.closeQuietly(pOIFSFileSystem);
            } catch (IOException e) {
                throw new TikaException("error occurred when parsing HWP Format, It may not HWP Format.", e);
            }
        } catch (Throwable th) {
            IOUtils.closeQuietly(pOIFSFileSystem);
            throw th;
        }
    }

    private void extract0(DirectoryNode directoryNode, Metadata metadata, XHTMLContentHandler xHTMLContentHandler) throws IOException, SAXException, TikaException {
        Entry entry = directoryNode.getEntry("FileHeader");
        if (!entry.isDocumentEntry()) {
            throw new UnsupportedFormatException("cannot parse the File Header");
        }
        FileHeader header = getHeader(entry);
        if (header == null) {
            throw new UnsupportedFormatException("cannot parse the File Header");
        }
        if (header.encrypted) {
            throw new EncryptedDocumentException("document is encrypted");
        }
        parseSummaryInformation(directoryNode, metadata);
        if (header.viewtext) {
            parseViewText(header, directoryNode, xHTMLContentHandler);
        } else {
            parseBodyText(header, directoryNode, xHTMLContentHandler);
        }
    }

    private void parseSummaryInformation(DirectoryNode directoryNode, Metadata metadata) throws TikaException {
        try {
            populateMatadata(directoryNode.getEntry("\u0005HwpSummaryInformation"), metadata);
        } catch (IOException | NoPropertySetStreamException e) {
            throw new UnsupportedFormatException("cannot parse the Summary Information");
        }
    }

    private void populateMatadata(Entry entry, Metadata metadata) throws IOException, NoPropertySetStreamException {
        for (Property property : new PropertySet(new DocumentInputStream((DocumentEntry) entry)).getProperties()) {
            int id = (int) property.getID();
            Object value = property.getValue();
            switch (id) {
                case 2:
                    metadata.set(TikaCoreProperties.TITLE, (String) value);
                    break;
                case 3:
                    metadata.set(OfficeOpenXMLCore.SUBJECT, (String) value);
                    break;
                case 4:
                    metadata.set(TikaCoreProperties.CREATOR, (String) value);
                    break;
                case 5:
                    metadata.set(Office.KEYWORDS, (String) value);
                    break;
                case 6:
                    metadata.set(TikaCoreProperties.COMMENTS, (String) value);
                    break;
                case 8:
                    metadata.set(TikaCoreProperties.MODIFIER, (String) value);
                    break;
                case 12:
                    metadata.set(TikaCoreProperties.CREATED, (Date) value);
                    break;
                case 13:
                    metadata.set(TikaCoreProperties.MODIFIED, (Date) value);
                    break;
                case 14:
                    metadata.set(Office.PAGE_COUNT, ((Integer) value).intValue());
                    break;
            }
        }
    }

    private FileHeader getHeader(Entry entry) throws IOException {
        byte[] bArr = new byte[256];
        DocumentInputStream documentInputStream = new DocumentInputStream((DocumentEntry) entry);
        try {
            if (documentInputStream.read(bArr) == 256) {
                if (Arrays.equals(HWP_V5_SIGNATURE, Arrays.copyOfRange(bArr, 0, HWP_V5_SIGNATURE.length))) {
                    documentInputStream.close();
                    FileHeader fileHeader = new FileHeader();
                    fileHeader.version = HwpVersion.parseVersion(LittleEndian.getUInt(bArr, 32));
                    long uInt = LittleEndian.getUInt(bArr, 36);
                    LOG.debug("Flags={}", Long.toBinaryString(uInt).replace(' ', '0'));
                    fileHeader.compressed = (uInt & 1) == 1;
                    fileHeader.encrypted = (uInt & 2) == 2;
                    fileHeader.viewtext = (uInt & 4) == 4;
                    return fileHeader;
                }
            }
            documentInputStream.close();
            return null;
        } catch (Throwable th) {
            try {
                documentInputStream.close();
            } catch (Throwable th2) {
                th.addSuppressed(th2);
            }
            throw th;
        }
    }

    private void parseBodyText(FileHeader fileHeader, DirectoryNode directoryNode, XHTMLContentHandler xHTMLContentHandler) throws IOException, SAXException {
        Entry entry = directoryNode.getEntry("BodyText");
        if (entry == null || !entry.isDirectoryEntry()) {
            throw new IOException("Invalid BodyText");
        }
        Iterator<Entry> entries = ((DirectoryEntry) entry).getEntries();
        while (entries.hasNext()) {
            Entry next = entries.next();
            if (next.getName().startsWith("Section") && (next instanceof DocumentEntry)) {
                LOG.debug("extract {}", next.getName());
                InputStream documentInputStream = new DocumentInputStream((DocumentEntry) next);
                if (fileHeader.compressed) {
                    documentInputStream = new InflaterInputStream(documentInputStream, new Inflater(true));
                }
                parse(new HwpStreamReader(documentInputStream), xHTMLContentHandler);
            } else {
                LOG.warn("Unknown Entry '{}'({})", next.getName(), next);
            }
        }
    }

    private void parseViewText(FileHeader fileHeader, DirectoryNode directoryNode, XHTMLContentHandler xHTMLContentHandler) throws IOException {
        Entry entry = directoryNode.getEntry("ViewText");
        if (entry == null || !entry.isDirectoryEntry()) {
            throw new IOException("Invalid ViewText");
        }
        Iterator<Entry> entries = ((DirectoryEntry) entry).getEntries();
        while (entries.hasNext()) {
            Entry next = entries.next();
            if (next.getName().startsWith("Section") && (next instanceof DocumentEntry)) {
                LOG.debug("extract {}", next.getName());
                InputStream documentInputStream = new DocumentInputStream((DocumentEntry) next);
                try {
                    try {
                        try {
                            documentInputStream = createDecryptStream(documentInputStream, readKey(documentInputStream));
                            if (fileHeader.compressed) {
                                documentInputStream = new InflaterInputStream(documentInputStream, new Inflater(true));
                            }
                            parse(new HwpStreamReader(documentInputStream), xHTMLContentHandler);
                            IOUtils.closeQuietly(documentInputStream);
                        } catch (NoSuchPaddingException e) {
                            throw new IOException(e);
                        } catch (SAXException e2) {
                            throw new IOException(e2);
                        }
                    } catch (InvalidKeyException e3) {
                        throw new IOException(e3);
                    } catch (NoSuchAlgorithmException e4) {
                        throw new IOException(e4);
                    }
                } catch (Throwable th) {
                    IOUtils.closeQuietly(documentInputStream);
                    throw th;
                }
            } else {
                LOG.warn("unknown Entry '{}'({})", next.getName(), next);
            }
        }
    }

    private Key readKey(InputStream inputStream) throws IOException {
        byte[] bArr = new byte[260];
        if (IOUtils.readFully(inputStream, bArr, 0, 4) != 4) {
            throw new EOFException();
        }
        if (IOUtils.readFully(inputStream, bArr, 0, 256) != 256) {
            throw new EOFException();
        }
        SRand sRand = new SRand(LittleEndian.getInt(bArr));
        byte b = 0;
        int i = 0;
        int i2 = 0;
        while (i < 256) {
            if (i2 == 0) {
                b = (byte) (sRand.rand() & 255);
                i2 = (sRand.rand() & 15) + 1;
            }
            if (i >= 4) {
                bArr[i] = (byte) (bArr[i] ^ b);
            }
            i++;
            i2--;
        }
        int i3 = 4 + (bArr[0] & 15);
        return new SecretKeySpec(Arrays.copyOfRange(bArr, i3, i3 + 16), "AES");
    }

    public InputStream createDecryptStream(InputStream inputStream, Key key) throws NoSuchAlgorithmException, NoSuchPaddingException, InvalidKeyException {
        Cipher cipher = Cipher.getInstance("AES/ECB/NoPadding");
        cipher.init(2, key);
        return new CipherInputStream(inputStream, cipher);
    }

    private void parse(HwpStreamReader hwpStreamReader, XHTMLContentHandler xHTMLContentHandler) throws IOException, SAXException {
        StringBuffer stringBuffer = new StringBuffer(1024);
        TagInfo tagInfo = new TagInfo();
        while (readTag(hwpStreamReader, tagInfo)) {
            if (67 != tagInfo.id) {
                hwpStreamReader.ensureSkip(tagInfo.length);
            } else {
                if (tagInfo.length % 2 != 0) {
                    throw new IOException("Invalid block size");
                }
                stringBuffer.setLength(0);
                writeParaText(hwpStreamReader, tagInfo.length, stringBuffer);
                if (stringBuffer.length() > 0) {
                    stringBuffer.append('\n');
                    xHTMLContentHandler.startElement(WindowFeatureGenerator.PREV_PREFIX);
                    xHTMLContentHandler.characters(stringBuffer.toString());
                    xHTMLContentHandler.endElement(WindowFeatureGenerator.PREV_PREFIX);
                }
            }
        }
    }

    private void writeParaText(HwpStreamReader hwpStreamReader, long j, StringBuffer stringBuffer) throws IOException {
        int[] uint16 = hwpStreamReader.uint16((int) (j / 2));
        int i = 0;
        while (i < uint16.length) {
            int i2 = uint16[i];
            if (i2 >= 32) {
                stringBuffer.append((char) i2);
            } else if (i2 == 9) {
                stringBuffer.append('\t');
                i += 7;
            } else {
                int i3 = HWP_CHAR_TYPE[i2];
                if (1 == i3) {
                    i += 7;
                } else if (3 == i3) {
                    i += 7;
                } else if (2 == i3) {
                    stringBuffer.append(' ');
                }
            }
            i++;
        }
    }

    private boolean readTag(HwpStreamReader hwpStreamReader, TagInfo tagInfo) throws IOException {
        long uint32 = hwpStreamReader.uint32();
        if (uint32 == -1) {
            return false;
        }
        tagInfo.id = uint32 & 1023;
        tagInfo.level = (uint32 >> 10) & 1023;
        tagInfo.length = (uint32 >> 20) & 4095;
        if (tagInfo.length != 4095) {
            return true;
        }
        tagInfo.length = hwpStreamReader.uint32();
        return true;
    }
}
