package org.apache.sysml.runtime.transform;

import java.io.EOFException;
import java.io.IOException;
import java.io.Serializable;
import java.util.Arrays;
import java.util.regex.Pattern;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.ByteWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
import org.apache.sysml.runtime.io.MatrixReader;
import org.apache.sysml.runtime.matrix.CSVReblockMR;
import org.apache.sysml.runtime.matrix.mapred.MRConfigurationNames;
import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
import org.apache.sysml.runtime.util.MapReduceTool;
import org.apache.sysml.runtime.util.UtilFunctions;
import org.apache.wink.json4j.JSONException;
import org.apache.wink.json4j.JSONObject;

/* loaded from: input_file:org/apache/sysml/runtime/transform/TfUtils.class */
public class TfUtils implements Serializable {
    private static final long serialVersionUID = 526252850872633125L;
    public static final String TXMETHOD_IMPUTE = "impute";
    public static final String TXMETHOD_RECODE = "recode";
    public static final String TXMETHOD_BIN = "bin";
    public static final String TXMETHOD_DUMMYCODE = "dummycode";
    public static final String TXMETHOD_SCALE = "scale";
    public static final String TXMETHOD_OMIT = "omit";
    public static final String TXMETHOD_MVRCD = "mvrcd";
    public static final String TXMTD_SEP = ",";
    public static final String TXMTD_COLTYPES = "coltypes.csv";
    public static final String TXMTD_COLNAMES = "column.names";
    public static final String TXMTD_DC_COLNAMES = "dummycoded.column.names";
    public static final String TXMTD_RCD_MAP_SUFFIX = ".map";
    public static final String TXMTD_RCD_DISTINCT_SUFFIX = ".ndistinct";
    public static final String JSON_ATTRS = "attributes";
    public static final String JSON_MTHD = "methods";
    public static final String JSON_CONSTS = "constants";
    public static final String JSON_NBINS = "numbins";
    protected static final String MV_FILE_SUFFIX = ".impute";
    protected static final String MODE_FILE_SUFFIX = ".mode";
    protected static final String BIN_FILE_SUFFIX = ".bin";
    protected static final String SCALE_FILE_SUFFIX = ".scale";
    protected static final String DCD_FILE_NAME = "dummyCodeMaps.csv";
    protected static final String DCD_NAME_SEP = "_";
    private OmitAgent _oa;
    private MVImputeAgent _mia;
    private RecodeAgent _ra;
    private BinAgent _ba;
    private DummycodeAgent _da;
    private long _numRecordsInPartFile;
    private long _numValidRecords;
    private long _numTransformedRows;
    private long _numTransformedColumns;
    private String _headerLine;
    private boolean _hasHeader;
    private Pattern _delim;
    private String _delimString;
    private String[] _NAstrings;
    private String[] _outputColumnNames;
    private long _numInputCols;
    private String _tfMtdDir;
    private String _spec;
    private String _offsetFile;
    private String _tmpDir;
    private String _outputPath;

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:org/apache/sysml/runtime/transform/TfUtils$ColumnTypes.class */
    public enum ColumnTypes {
        SCALE,
        NOMINAL,
        ORDINAL,
        DUMMYCODED,
        INVALID;

        /* JADX INFO: Access modifiers changed from: protected */
        public byte toID() {
            switch (this) {
                case SCALE:
                    return (byte) 1;
                case NOMINAL:
                    return (byte) 2;
                case ORDINAL:
                    return (byte) 3;
                case DUMMYCODED:
                    return (byte) 1;
                default:
                    throw new RuntimeException("Invalid Column Type: " + this);
            }
        }
    }

    public TfUtils(JobConf jobConf, boolean z) throws IOException, JSONException {
        this._oa = null;
        this._mia = null;
        this._ra = null;
        this._ba = null;
        this._da = null;
        this._headerLine = null;
        this._delim = null;
        this._delimString = null;
        this._NAstrings = null;
        this._outputColumnNames = null;
        this._numInputCols = -1L;
        this._tfMtdDir = null;
        this._spec = null;
        this._offsetFile = null;
        this._tmpDir = null;
        this._outputPath = null;
        if (!InfrastructureAnalyzer.isLocalMode(jobConf)) {
            ConfigurationManager.setCachedJobConf(jobConf);
        }
        this._NAstrings = parseNAStrings(jobConf);
        this._spec = jobConf.get(MRJobConfiguration.TF_SPEC);
        this._oa = new OmitAgent(new JSONObject(this._spec));
    }

    public TfUtils(JobConf jobConf) throws IOException, JSONException {
        this._oa = null;
        this._mia = null;
        this._ra = null;
        this._ba = null;
        this._da = null;
        this._headerLine = null;
        this._delim = null;
        this._delimString = null;
        this._NAstrings = null;
        this._outputColumnNames = null;
        this._numInputCols = -1L;
        this._tfMtdDir = null;
        this._spec = null;
        this._offsetFile = null;
        this._tmpDir = null;
        this._outputPath = null;
        if (!InfrastructureAnalyzer.isLocalMode(jobConf)) {
            ConfigurationManager.setCachedJobConf(jobConf);
        }
        boolean parseBoolean = Boolean.parseBoolean(jobConf.get(MRJobConfiguration.TF_HAS_HEADER));
        String[] parseNAStrings = parseNAStrings(jobConf);
        long parseToLong = UtilFunctions.parseToLong(jobConf.get(MRJobConfiguration.TF_NUM_COLS));
        String str = jobConf.get(MRJobConfiguration.TF_SPEC);
        String str2 = jobConf.get(MRJobConfiguration.TF_OFFSETS_FILE);
        String str3 = jobConf.get(MRJobConfiguration.TF_TMP_LOC);
        String path = FileOutputFormat.getOutputPath(jobConf).toString();
        init(jobConf.get(MRJobConfiguration.TF_HEADER), parseBoolean, jobConf.get(MRJobConfiguration.TF_DELIM), parseNAStrings, new JSONObject(str), parseToLong, str2, str3, path);
    }

    public TfUtils(JobConf jobConf, String str) throws IOException, JSONException {
        this(jobConf);
        this._tfMtdDir = str;
    }

    public TfUtils(String str, boolean z, String str2, String[] strArr, JSONObject jSONObject, long j, String str3, String str4, String str5) throws IOException, JSONException {
        this._oa = null;
        this._mia = null;
        this._ra = null;
        this._ba = null;
        this._da = null;
        this._headerLine = null;
        this._delim = null;
        this._delimString = null;
        this._NAstrings = null;
        this._outputColumnNames = null;
        this._numInputCols = -1L;
        this._tfMtdDir = null;
        this._spec = null;
        this._offsetFile = null;
        this._tmpDir = null;
        this._outputPath = null;
        init(str, z, str2, strArr, jSONObject, j, str4, str5, null);
        this._tfMtdDir = str3;
    }

    public TfUtils(JSONObject jSONObject, long j) throws IOException, JSONException {
        this._oa = null;
        this._mia = null;
        this._ra = null;
        this._ba = null;
        this._da = null;
        this._headerLine = null;
        this._delim = null;
        this._delimString = null;
        this._NAstrings = null;
        this._outputColumnNames = null;
        this._numInputCols = -1L;
        this._tfMtdDir = null;
        this._spec = null;
        this._offsetFile = null;
        this._tmpDir = null;
        this._outputPath = null;
        this._numInputCols = j;
        createAgents(jSONObject, new String[0]);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static boolean checkValidInputFile(FileSystem fileSystem, Path path, boolean z) throws IOException {
        if (!fileSystem.exists(path)) {
            if (z) {
                throw new IOException("File " + path.toString() + " does not exist on HDFS/LFS.");
            }
            return false;
        }
        if (!MapReduceTool.isFileEmpty(fileSystem, path.toString())) {
            return true;
        }
        if (z) {
            throw new EOFException("Empty input file " + path.toString() + ".");
        }
        return false;
    }

    public static String getPartFileName(JobConf jobConf) throws IOException {
        return new Path(jobConf.get(MRConfigurationNames.MR_MAP_INPUT_FILE)).makeQualified(FileSystem.get(jobConf)).toString();
    }

    public static boolean isPartFileWithHeader(JobConf jobConf) throws IOException {
        return getPartFileName(jobConf).toString().equals(new Path(jobConf.get(MRJobConfiguration.TF_SMALLEST_FILE)).makeQualified(FileSystem.get(jobConf)).toString());
    }

    public static String prepNAStrings(String str) {
        return str + "·dummy";
    }

    public static String[] parseNAStrings(String str) {
        if (str == null) {
            return null;
        }
        return Pattern.compile(Pattern.quote("·")).split(str, -1);
    }

    public static String[] parseNAStrings(JobConf jobConf) {
        return parseNAStrings(jobConf.get(MRJobConfiguration.TF_NA_STRINGS));
    }

    private void createAgents(JSONObject jSONObject, String[] strArr) throws IOException, JSONException {
        this._oa = new OmitAgent(jSONObject);
        this._mia = new MVImputeAgent(jSONObject, strArr);
        this._ra = new RecodeAgent(jSONObject);
        this._ba = new BinAgent(jSONObject);
        this._da = new DummycodeAgent(jSONObject, this._numInputCols);
    }

    public void setupAgents(OmitAgent omitAgent, MVImputeAgent mVImputeAgent, RecodeAgent recodeAgent, BinAgent binAgent, DummycodeAgent dummycodeAgent) {
        this._oa = omitAgent;
        this._mia = mVImputeAgent;
        this._ra = recodeAgent;
        this._ba = binAgent;
        this._da = dummycodeAgent;
    }

    private void parseColumnNames() {
        this._outputColumnNames = this._delim.split(this._headerLine, -1);
        for (int i = 0; i < this._outputColumnNames.length; i++) {
            this._outputColumnNames[i] = UtilFunctions.unquote(this._outputColumnNames[i]);
        }
    }

    private void init(String str, boolean z, String str2, String[] strArr, JSONObject jSONObject, long j, String str3, String str4, String str5) throws IOException, JSONException {
        this._numRecordsInPartFile = 0L;
        this._numValidRecords = 0L;
        this._numTransformedRows = 0L;
        this._numTransformedColumns = 0L;
        this._headerLine = str;
        this._hasHeader = z;
        this._delimString = str2;
        this._delim = Pattern.compile(Pattern.quote(str2));
        this._NAstrings = strArr;
        this._numInputCols = j;
        this._offsetFile = str3;
        this._tmpDir = str4;
        this._outputPath = str5;
        parseColumnNames();
        createAgents(jSONObject, strArr);
    }

    public void incrValid() {
        this._numValidRecords++;
    }

    public long getValid() {
        return this._numValidRecords;
    }

    public long getTotal() {
        return this._numRecordsInPartFile;
    }

    public long getNumTransformedRows() {
        return this._numTransformedRows;
    }

    public long getNumTransformedColumns() {
        return this._numTransformedColumns;
    }

    public String getHeader() {
        return this._headerLine;
    }

    public boolean hasHeader() {
        return this._hasHeader;
    }

    public String getDelimString() {
        return this._delimString;
    }

    public Pattern getDelim() {
        return this._delim;
    }

    public String[] getNAStrings() {
        return this._NAstrings;
    }

    public long getNumCols() {
        return this._numInputCols;
    }

    public String getSpec() {
        return this._spec;
    }

    public String getTfMtdDir() {
        return this._tfMtdDir;
    }

    public String getOffsetFile() {
        return this._offsetFile;
    }

    public String getTmpDir() {
        return this._tmpDir;
    }

    public String getOutputPath() {
        return this._outputPath;
    }

    public String getName(int i) {
        return this._outputColumnNames[i - 1];
    }

    public void setValid(long j) {
        this._numValidRecords = j;
    }

    public void incrTotal() {
        this._numRecordsInPartFile++;
    }

    public void setTotal(long j) {
        this._numRecordsInPartFile = j;
    }

    public OmitAgent getOmitAgent() {
        return this._oa;
    }

    public MVImputeAgent getMVImputeAgent() {
        return this._mia;
    }

    public RecodeAgent getRecodeAgent() {
        return this._ra;
    }

    public BinAgent getBinAgent() {
        return this._ba;
    }

    public DummycodeAgent getDummycodeAgent() {
        return this._da;
    }

    public static boolean isNA(String[] strArr, String str) {
        if (strArr == null) {
            return false;
        }
        for (String str2 : strArr) {
            if (str.equals(str2)) {
                return true;
            }
        }
        return false;
    }

    public String[] getWords(Text text) {
        return getWords(text.toString());
    }

    public String[] getWords(String str) {
        return getDelim().split(str.trim(), -1);
    }

    public String[] prepareTfMtd(String str) throws IOException {
        String[] words = getWords(str);
        if (!getOmitAgent().omit(words, this)) {
            getMVImputeAgent().prepare(words);
            getRecodeAgent().prepare(words, this);
            getBinAgent().prepare(words, this);
            incrValid();
        }
        incrTotal();
        return words;
    }

    public void loadTfMetadata() throws IOException {
        loadTfMetadata(ConfigurationManager.getCachedJobConf(), false);
    }

    public void loadTfMetadata(JobConf jobConf, boolean z) throws IOException {
        LocalFileSystem localFileSystem;
        Path path;
        if (z) {
            path = DistributedCache.getLocalCacheFiles(jobConf)[0];
            localFileSystem = FileSystem.getLocal(jobConf);
        } else {
            localFileSystem = FileSystem.get(jobConf);
            path = new Path(getTfMtdDir());
        }
        getMVImputeAgent().loadTxMtd(jobConf, localFileSystem, path, this);
        getRecodeAgent().loadTxMtd(jobConf, localFileSystem, path, this);
        getBinAgent().loadTxMtd(jobConf, localFileSystem, path, this);
        getDummycodeAgent().setRecodeMaps(getRecodeAgent().getRecodeMaps());
        getDummycodeAgent().setNumBins(getBinAgent().getColList(), getBinAgent().getNumBins());
        getDummycodeAgent().loadTxMtd(jobConf, localFileSystem, path, this);
    }

    public String processHeaderLine() throws IOException {
        FileSystem fileSystem = FileSystem.get(ConfigurationManager.getCachedJobConf());
        String constructDummycodedHeader = getDummycodeAgent().constructDummycodedHeader(getHeader(), getDelim());
        getDummycodeAgent().genDcdMapsAndColTypes(fileSystem, getTmpDir(), (int) getNumCols(), this);
        DataTransform.generateHeaderFiles(fileSystem, getTmpDir(), getHeader(), constructDummycodedHeader);
        return constructDummycodedHeader;
    }

    public boolean omit(String[] strArr) {
        if (getOmitAgent() == null) {
            return false;
        }
        return getOmitAgent().omit(strArr, this);
    }

    public String[] apply(String[] strArr) {
        String[] apply = getDummycodeAgent().apply(getBinAgent().apply(getRecodeAgent().apply(getMVImputeAgent().apply(strArr))));
        this._numTransformedRows++;
        return apply;
    }

    public void check(String[] strArr) throws DMLRuntimeException {
        if (getNAStrings() != null) {
            for (int i = 0; i < strArr.length; i++) {
                if (strArr[i] != null && strArr[i].equals("")) {
                    throw new DMLRuntimeException("When na.strings are provided, empty string \"\" is considered as a missing value, and it must be imputed appropriately. Encountered an unhandled empty string in column ID: " + getDummycodeAgent().mapDcdColumnID(i + 1));
                }
            }
        }
    }

    public String checkAndPrepOutputString(String[] strArr) throws DMLRuntimeException {
        return checkAndPrepOutputString(strArr, new StringBuilder());
    }

    public String checkAndPrepOutputString(String[] strArr, StringBuilder sb) throws DMLRuntimeException {
        boolean z = getNAStrings() != null;
        sb.setLength(0);
        if (z) {
            if (strArr[0] == null) {
                sb.append("0");
            } else {
                if (strArr[0].equals("")) {
                    throw new DMLRuntimeException("When na.strings are provided, empty string \"\" is considered as a missing value, and it must be imputed appropriately. Encountered an unhandled empty string in column ID: " + getDummycodeAgent().mapDcdColumnID(1));
                }
                sb.append(strArr[0]);
            }
            for (int i = 1; i < strArr.length; i++) {
                sb.append(this._delimString);
                if (strArr[i] == null) {
                    sb.append("0");
                } else {
                    if (strArr[i].equals("")) {
                        throw new DMLRuntimeException("When na.strings are provided, empty string \"\" is considered as a missing value, and it must be imputed appropriately. Encountered an unhandled empty string in column ID: " + getDummycodeAgent().mapDcdColumnID(i + 1));
                    }
                    sb.append(strArr[i]);
                }
            }
        } else {
            sb.append(strArr[0] != null ? strArr[0] : "0");
            for (int i2 = 1; i2 < strArr.length; i2++) {
                sb.append(this._delimString);
                sb.append(strArr[i2] != null ? strArr[i2] : "0");
            }
        }
        return sb.toString();
    }

    private SequenceFile.Reader initOffsetsReader(JobConf jobConf) throws IOException {
        Path path = new Path(jobConf.get(CSVReblockMR.ROWID_FILE_NAME));
        FileSystem fileSystem = FileSystem.get(jobConf);
        Path[] sequenceFilePaths = MatrixReader.getSequenceFilePaths(fileSystem, path);
        if (sequenceFilePaths.length != 1) {
            throw new IOException("Expecting a single file under counters file: " + path.toString());
        }
        return new SequenceFile.Reader(fileSystem, sequenceFilePaths[0], jobConf);
    }

    public String getPartFileID(JobConf jobConf, long j) throws IOException {
        SequenceFile.Reader initOffsetsReader = initOffsetsReader(jobConf);
        ByteWritable byteWritable = new ByteWritable();
        CSVReblockMR.OffsetCount offsetCount = new CSVReblockMR.OffsetCount();
        String partFileName = getPartFileName(jobConf);
        int i = 0;
        while (initOffsetsReader.next(byteWritable, offsetCount) && (!partFileName.equals(offsetCount.filename) || offsetCount.fileOffset != j)) {
            i++;
        }
        initOffsetsReader.close();
        String num = Integer.toString(i);
        char[] cArr = new char[5 - num.length()];
        Arrays.fill(cArr, '0');
        return new String(cArr).concat(num);
    }
}
