package hivemall.tools.text;

import hivemall.utils.lang.StringUtils;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.annotation.Nullable;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.ql.udf.UDFType;

@UDFType(deterministic = true, stateful = false)
@Description(name = "singularize", value = "_FUNC_(string word) - Returns singular form of a given English word", extended = "SELECT singularize(lower(\"Apples\"));\n\n \"apple\"")
/* loaded from: input_file:hivemall/tools/text/SingularizeUDF.class */
public final class SingularizeUDF extends UDF {
    private static final String[] prepositions = {"about", "above", "across", "after", "among", "around", "at", "athwart", "before", "behind", "below", "beneath", "beside", "besides", "between", "betwixt", "beyond", "but", "by", "during", "except", "for", "from", "in", "into", "near", "of", "off", "on", "onto", "out", "over", "since", "till", "to", "under", "until", "unto", "upon", "with"};
    private static final String[] unchanged = {"advice", "bison", "bread", "bream", "breeches", "britches", "butter", "carp", "chassis", "cheese", "christmas", "clippers", "cod", "contretemps", "corps", "debris", "diabetes", "djinn", "eland", "electricity", "elk", "equipment", "flounder", "fruit", "furniture", "gallows", "garbage", "georgia", "graffiti", "gravel", "happiness", "headquarters", "herpes", "high-jinks", "homework", "information", "innings", "jackanapes", "ketchup", "knowledge", "love", "luggage", "mackerel", "mathematics", "mayonnaise", "measles", "meat", "mews", "mumps", "mustard", "news", "news", "pincers", "pliers", "proceedings", "progress", "rabies", "research", "rice", "salmon", "sand", "scissors", "series", "shears", "software", "species", "swine", "swiss", "trout", "tuna", "understanding", "water", "whiting", "wildebeest"};
    private static final Map<String, String> irregular = new HashMap();
    private static final List<String> rules;

    @Nullable
    public String evaluate(@Nullable String str) {
        return singularize(str);
    }

    @Nullable
    private static String singularize(@Nullable String str) {
        if (str == null) {
            return null;
        }
        if (!str.isEmpty() && Arrays.binarySearch(unchanged, str) < 0) {
            if (str.contains("-")) {
                ArrayList arrayList = new ArrayList();
                Collections.addAll(arrayList, str.split("-"));
                if (arrayList.size() > 1 && Arrays.binarySearch(prepositions, arrayList.get(1)) >= 0) {
                    return singularize((String) arrayList.remove(0)) + "-" + StringUtils.join(arrayList, "-");
                }
            }
            if (str.endsWith("'")) {
                return singularize(str.substring(0, str.length() - 1)) + "'s";
            }
            if (irregular.containsKey(str)) {
                return irregular.get(str);
            }
            int size = rules.size();
            for (int i = 0; i < size; i += 2) {
                Matcher matcher = Pattern.compile(rules.get(i), 2).matcher(str);
                if (matcher.find()) {
                    return matcher.replaceAll(rules.get(i + 1));
                }
            }
            return str;
        }
        return str;
    }

    static {
        irregular.put("atlantes", "atlas");
        irregular.put("atlases", "atlas");
        irregular.put("axes", "axe");
        irregular.put("beeves", "beef");
        irregular.put("brethren", "brother");
        irregular.put("children", "child");
        irregular.put("corpora", "corpus");
        irregular.put("corpuses", "corpus");
        irregular.put("ephemerides", "ephemeris");
        irregular.put("feet", "foot");
        irregular.put("ganglia", "ganglion");
        irregular.put("geese", "goose");
        irregular.put("genera", "genus");
        irregular.put("genii", "genie");
        irregular.put("graffiti", "graffito");
        irregular.put("helves", "helve");
        irregular.put("kine", "cow");
        irregular.put("leaves", "leaf");
        irregular.put("loaves", "loaf");
        irregular.put("men", "man");
        irregular.put("mongooses", "mongoose");
        irregular.put("monies", "money");
        irregular.put("moves", "move");
        irregular.put("mythoi", "mythos");
        irregular.put("numena", "numen");
        irregular.put("occipita", "occiput");
        irregular.put("octopodes", "octopus");
        irregular.put("opera", "opus");
        irregular.put("opuses", "opus");
        irregular.put("our", "my");
        irregular.put("oxen", "ox");
        irregular.put("penes", "penis");
        irregular.put("penises", "penis");
        irregular.put("people", "person");
        irregular.put("sexes", "sex");
        irregular.put("soliloquies", "soliloquy");
        irregular.put("teeth", "tooth");
        irregular.put("testes", "testis");
        irregular.put("trilbys", "trilby");
        irregular.put("turves", "turf");
        irregular.put("zoa", "zoon");
        rules = Arrays.asList("(quiz)zes$", "$1", "(matr)ices$", "$1ix", "(vert|ind)ices$", "$1ex", "^(ox)en", "$1", "(alias|status)$", "$1", "(alias|status)es$", "$1", "(octop|vir)us$", "$1us", "(octop|vir)i$", "$1us", "(cris|ax|test)es$", "$1is", "(cris|ax|test)is$", "$1is", "(shoe)s$", "$1", "(o)es$", "$1", "(bus)es$", "$1", "([m|l])ice$", "$1ouse", "(x|ch|ss|sh)es$", "$1", "(m)ovies$", "$1ovie", "(s)eries$", "$1eries", "([^aeiouy]|qu)ies$", "$1y", "([lr])ves$", "$1f", "(tive)s$", "$1", "(hive)s$", "$1", "([^f])ves$", "$1fe", "(^analy)sis$", "$1sis", "(^analy)ses$", "$1sis", "((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$", "$1$2sis", "([ti])a$", "$1um", "(n)ews$", "$1ews", "(s|si|u)s$", "$1s", "s$", "");
    }
}
