package org.apache.jackrabbit.core.query;

import java.io.StringReader;
import java.util.HashMap;
import java.util.Map;
import javax.jcr.RepositoryException;
import org.apache.jackrabbit.core.query.lucene.FieldNames;
import org.apache.jackrabbit.core.state.PropertyState;
import org.apache.jackrabbit.core.value.BLOBFileValue;
import org.apache.jackrabbit.core.value.InternalValue;
import org.textmining.text.extraction.WordExtractor;

/* loaded from: input_file:org/apache/jackrabbit/core/query/MsWordTextFilter.class */
public class MsWordTextFilter implements TextFilter {
    public boolean canFilter(String str) {
        return "application/vnd.ms-word".equalsIgnoreCase(str) || "application/msword".equalsIgnoreCase(str);
    }

    public Map doFilter(PropertyState propertyState, String str) throws RepositoryException {
        InternalValue[] values = propertyState.getValues();
        if (values.length <= 0) {
            throw new RepositoryException("Multi-valued binary properties not supported.");
        }
        try {
            String extractText = new WordExtractor().extractText(((BLOBFileValue) values[0].internalValue()).getStream());
            HashMap hashMap = new HashMap();
            hashMap.put(FieldNames.FULLTEXT, new StringReader(extractText));
            return hashMap;
        } catch (Exception e) {
            throw new RepositoryException(e);
        }
    }
}
