package org.apache.sysml.runtime.transform;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.conf.DMLConfig;
import org.apache.sysml.lops.CSVReBlock;
import org.apache.sysml.lops.Lop;
import org.apache.sysml.lops.LopProperties;
import org.apache.sysml.parser.Expression;
import org.apache.sysml.parser.ParameterizedBuiltinFunctionExpression;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.controlprogram.caching.CacheableData;
import org.apache.sysml.runtime.controlprogram.caching.FrameObject;
import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
import org.apache.sysml.runtime.controlprogram.parfor.ProgramConverter;
import org.apache.sysml.runtime.instructions.InstructionParser;
import org.apache.sysml.runtime.instructions.MRJobInstruction;
import org.apache.sysml.runtime.instructions.cp.ParameterizedBuiltinCPInstruction;
import org.apache.sysml.runtime.instructions.mr.CSVReblockInstruction;
import org.apache.sysml.runtime.instructions.spark.ParameterizedBuiltinSPInstruction;
import org.apache.sysml.runtime.instructions.spark.data.RDDObject;
import org.apache.sysml.runtime.instructions.spark.utils.RDDConverterUtils;
import org.apache.sysml.runtime.matrix.CSVReblockMR;
import org.apache.sysml.runtime.matrix.JobReturn;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
import org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties;
import org.apache.sysml.runtime.matrix.data.FileFormatProperties;
import org.apache.sysml.runtime.matrix.data.FrameBlock;
import org.apache.sysml.runtime.matrix.data.InputInfo;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.OutputInfo;
import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
import org.apache.sysml.runtime.transform.encode.EncoderFactory;
import org.apache.sysml.runtime.util.MapReduceTool;
import org.apache.sysml.runtime.util.UtilFunctions;
import org.apache.sysml.utils.JSONHelper;
import org.apache.wink.json4j.JSONArray;
import org.apache.wink.json4j.JSONException;
import org.apache.wink.json4j.JSONObject;

/* loaded from: input_file:org/apache/sysml/runtime/transform/DataTransform.class */
public class DataTransform {
    private static final String ERROR_MSG_ZERO_ROWS = "Number of rows in the transformed output (potentially, after ommitting the ones with missing values) is zero. Cannot proceed.";

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/sysml/runtime/transform/DataTransform$TransformOperands.class */
    public static class TransformOperands {
        private String inputPath;
        private String txMtdPath;
        private String applyTxPath;
        private String spec;
        private String outNamesFile;
        private boolean isApply;
        private CSVFileFormatProperties inputCSVProperties;

        private TransformOperands(String str, CacheableData<?> cacheableData) {
            String str2;
            this.inputPath = null;
            this.txMtdPath = null;
            this.applyTxPath = null;
            this.spec = null;
            this.outNamesFile = null;
            this.isApply = false;
            this.inputCSVProperties = null;
            this.inputPath = cacheableData.getFileName();
            this.inputCSVProperties = (CSVFileFormatProperties) cacheableData.getFileFormatProperties();
            String[] split = str.split("°");
            this.txMtdPath = split[3];
            this.applyTxPath = split[4].startsWith("applymtd=") ? split[4].substring(9) : null;
            this.isApply = this.applyTxPath != null;
            int i = this.applyTxPath != null ? 5 : 4;
            if (i < split.length) {
                if (split[i].startsWith("spec=")) {
                    i++;
                    str2 = split[i].substring(5);
                } else {
                    str2 = null;
                }
                this.spec = str2;
            }
            if (i < split.length) {
                this.outNamesFile = split[i].startsWith("outnames=") ? split[i].substring(9) : null;
            }
        }

        private TransformOperands(HashMap<String, String> hashMap, CacheableData<?> cacheableData) {
            this.inputPath = null;
            this.txMtdPath = null;
            this.applyTxPath = null;
            this.spec = null;
            this.outNamesFile = null;
            this.isApply = false;
            this.inputCSVProperties = null;
            this.inputPath = cacheableData.getFileName();
            this.txMtdPath = hashMap.get(ParameterizedBuiltinFunctionExpression.TF_FN_PARAM_MTD);
            this.spec = hashMap.get(ParameterizedBuiltinFunctionExpression.TF_FN_PARAM_SPEC);
            this.applyTxPath = hashMap.get(ParameterizedBuiltinFunctionExpression.TF_FN_PARAM_APPLYMTD);
            this.isApply = this.applyTxPath != null;
            this.outNamesFile = hashMap.get(ParameterizedBuiltinFunctionExpression.TF_FN_PARAM_OUTNAMES);
            this.inputCSVProperties = (CSVFileFormatProperties) cacheableData.getFileFormatProperties();
        }
    }

    private static String readHeaderLine(FileSystem fileSystem, CSVFileFormatProperties cSVFileFormatProperties, String str) throws IOException {
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(fileSystem.open(new Path(str))));
        String readLine = bufferedReader.readLine();
        bufferedReader.close();
        if (!cSVFileFormatProperties.hasHeader()) {
            int length = Pattern.compile(Pattern.quote(cSVFileFormatProperties.getDelim())).split(readLine, -1).length;
            StringBuilder sb = new StringBuilder();
            sb.append("V1");
            for (int i = 2; i <= length; i++) {
                sb.append(cSVFileFormatProperties.getDelim() + "V" + i);
            }
            readLine = sb.toString();
        }
        return readLine;
    }

    private static HashMap<String, Integer> processColumnNames(FileSystem fileSystem, CSVFileFormatProperties cSVFileFormatProperties, String str, String str2) throws IllegalArgumentException, IOException {
        HashMap<String, Integer> hashMap = new HashMap<>();
        String[] split = Pattern.compile(Pattern.quote(cSVFileFormatProperties.getDelim())).split(str, -1);
        for (int i = 0; i < split.length; i++) {
            hashMap.put(UtilFunctions.unquote(split[i].trim()), Integer.valueOf(i + 1));
        }
        return hashMap;
    }

    private static void inplacePermute(int[] iArr, byte[] bArr, Object[] objArr, Integer[] numArr) {
        int i;
        Object obj = null;
        for (int i2 = 0; i2 < iArr.length; i2++) {
            int i3 = iArr[i2];
            byte b = bArr[i2];
            if (objArr != null) {
                obj = objArr[i2];
            }
            int i4 = i2;
            while (true) {
                i = i4;
                int intValue = numArr[i].intValue();
                numArr[i] = Integer.valueOf(i);
                if (intValue == i2) {
                    break;
                }
                iArr[i] = iArr[intValue];
                bArr[i] = bArr[intValue];
                if (objArr != null) {
                    objArr[i] = objArr[intValue];
                }
                i4 = intValue;
            }
            iArr[i] = i3;
            bArr[i] = b;
            if (objArr != null) {
                objArr[i] = obj;
            }
        }
    }

    private static String processSpecFile(FileSystem fileSystem, String str, String str2, HashMap<String, Integer> hashMap, CSVFileFormatProperties cSVFileFormatProperties, String str3) throws IllegalArgumentException, IOException, JSONException {
        int[] iArr;
        final int[] iArr2;
        int[] iArr3;
        final int[] iArr4;
        int[] iArr5;
        final int[] iArr6;
        byte b;
        byte b2;
        JSONObject jSONObject = new JSONObject(str3);
        byte[] bArr = null;
        byte[] bArr2 = null;
        byte[] bArr3 = null;
        Object[] objArr = null;
        Object[] objArr2 = null;
        boolean z = jSONObject.containsKey("ids") && ((Boolean) jSONObject.get("ids")).booleanValue();
        if (jSONObject.containsKey(TfUtils.TXMETHOD_OMIT)) {
            JSONArray jSONArray = (JSONArray) jSONObject.get(TfUtils.TXMETHOD_OMIT);
            iArr = new int[jSONArray.size()];
            for (int i = 0; i < jSONArray.size(); i++) {
                if (z) {
                    iArr[i] = UtilFunctions.toInt(jSONArray.get(i));
                } else {
                    iArr[i] = hashMap.get(UtilFunctions.unquote((String) jSONArray.get(i))).intValue();
                }
            }
            Arrays.sort(iArr);
        } else {
            iArr = null;
        }
        if (jSONObject.containsKey(TfUtils.TXMETHOD_IMPUTE)) {
            JSONArray jSONArray2 = (JSONArray) jSONObject.get(TfUtils.TXMETHOD_IMPUTE);
            iArr2 = new int[jSONArray2.size()];
            bArr = new byte[jSONArray2.size()];
            objArr2 = new Object[jSONArray2.size()];
            for (int i2 = 0; i2 < jSONArray2.size(); i2++) {
                JSONObject jSONObject2 = (JSONObject) jSONArray2.get(i2);
                if (z) {
                    iArr2[i2] = UtilFunctions.toInt(jSONObject2.get("id"));
                } else {
                    iArr2[i2] = hashMap.get(UtilFunctions.unquote((String) jSONObject2.get("name"))).intValue();
                }
                String unquote = UtilFunctions.unquote((String) jSONObject2.get("method"));
                if (unquote.equals("global_mean")) {
                    b2 = 1;
                } else if (unquote.equals("global_mode")) {
                    b2 = 2;
                } else {
                    if (!unquote.equals(MVImputeAgent.CONSTANT_PREFIX)) {
                        throw new IOException("Unknown missing value imputation method (" + unquote + ") in transformation specification: " + str3);
                    }
                    b2 = 3;
                }
                bArr[i2] = b2;
                objArr2[i2] = null;
                if (jSONObject2.containsKey("value")) {
                    objArr2[i2] = jSONObject2.get("value");
                }
            }
            Integer[] numArr = new Integer[iArr2.length];
            for (int i3 = 0; i3 < iArr2.length; i3++) {
                numArr[i3] = Integer.valueOf(i3);
            }
            Arrays.sort(numArr, new Comparator<Integer>() { // from class: org.apache.sysml.runtime.transform.DataTransform.1
                @Override // java.util.Comparator
                public int compare(Integer num, Integer num2) {
                    return iArr2[num.intValue()] - iArr2[num2.intValue()];
                }
            });
            inplacePermute(iArr2, bArr, objArr2, numArr);
        } else {
            iArr2 = null;
        }
        if (jSONObject.containsKey(TfUtils.TXMETHOD_RECODE)) {
            JSONArray jSONArray3 = (JSONArray) jSONObject.get(TfUtils.TXMETHOD_RECODE);
            iArr3 = new int[jSONArray3.size()];
            for (int i4 = 0; i4 < jSONArray3.size(); i4++) {
                if (z) {
                    iArr3[i4] = UtilFunctions.toInt(jSONArray3.get(i4));
                } else {
                    iArr3[i4] = hashMap.get(UtilFunctions.unquote((String) jSONArray3.get(i4))).intValue();
                }
            }
            Arrays.sort(iArr3);
        } else {
            iArr3 = null;
        }
        if (jSONObject.containsKey(TfUtils.TXMETHOD_BIN)) {
            JSONArray jSONArray4 = (JSONArray) jSONObject.get(TfUtils.TXMETHOD_BIN);
            iArr4 = new int[jSONArray4.size()];
            bArr2 = new byte[jSONArray4.size()];
            objArr = new Object[jSONArray4.size()];
            for (int i5 = 0; i5 < jSONArray4.size(); i5++) {
                JSONObject jSONObject3 = (JSONObject) jSONArray4.get(i5);
                if (z) {
                    iArr4[i5] = UtilFunctions.toInt(jSONObject3.get("id"));
                } else {
                    iArr4[i5] = hashMap.get(UtilFunctions.unquote((String) jSONObject3.get("name"))).intValue();
                }
                String unquote2 = UtilFunctions.unquote((String) jSONObject3.get("method"));
                if (!unquote2.equals("equi-width")) {
                    if (unquote2.equals("equi-height")) {
                        throw new IOException("Equi-height binning method is not yet supported, in transformation specification: " + str3);
                    }
                    throw new IOException("Unknown missing value imputation method (" + unquote2 + ") in transformation specification: " + str3);
                }
                bArr2[i5] = 1;
                objArr[i5] = jSONObject3.get(TfUtils.JSON_NBINS);
                if (((Integer) objArr[i5]).intValue() <= 1) {
                    throw new IllegalArgumentException("Invalid transformation on column \"" + ((String) jSONObject3.get("name")) + "\". Number of bins must be greater than 1.");
                }
            }
            Integer[] numArr2 = new Integer[iArr4.length];
            for (int i6 = 0; i6 < iArr4.length; i6++) {
                numArr2[i6] = Integer.valueOf(i6);
            }
            Arrays.sort(numArr2, new Comparator<Integer>() { // from class: org.apache.sysml.runtime.transform.DataTransform.2
                @Override // java.util.Comparator
                public int compare(Integer num, Integer num2) {
                    return iArr4[num.intValue()] - iArr4[num2.intValue()];
                }
            });
            inplacePermute(iArr4, bArr2, objArr, numArr2);
        } else {
            iArr4 = null;
        }
        if (jSONObject.containsKey(TfUtils.TXMETHOD_DUMMYCODE)) {
            JSONArray jSONArray5 = (JSONArray) jSONObject.get(TfUtils.TXMETHOD_DUMMYCODE);
            iArr5 = new int[jSONArray5.size()];
            for (int i7 = 0; i7 < jSONArray5.size(); i7++) {
                if (z) {
                    iArr5[i7] = UtilFunctions.toInt(jSONArray5.get(i7));
                } else {
                    iArr5[i7] = hashMap.get(UtilFunctions.unquote((String) jSONArray5.get(i7))).intValue();
                }
            }
            Arrays.sort(iArr5);
        } else {
            iArr5 = null;
        }
        if (jSONObject.containsKey(TfUtils.TXMETHOD_SCALE)) {
            JSONArray jSONArray6 = (JSONArray) jSONObject.get(TfUtils.TXMETHOD_SCALE);
            iArr6 = new int[jSONArray6.size()];
            bArr3 = new byte[jSONArray6.size()];
            for (int i8 = 0; i8 < jSONArray6.size(); i8++) {
                JSONObject jSONObject4 = (JSONObject) jSONArray6.get(i8);
                if (z) {
                    iArr6[i8] = UtilFunctions.toInt(jSONObject4.get("id"));
                } else {
                    iArr6[i8] = hashMap.get(UtilFunctions.unquote((String) jSONObject4.get("name"))).intValue();
                }
                String unquote3 = UtilFunctions.unquote((String) jSONObject4.get("method"));
                if (unquote3.equals("mean-subtraction")) {
                    b = 1;
                } else {
                    if (!unquote3.equals("z-score")) {
                        throw new IOException("Unknown missing value imputation method (" + unquote3 + ") in transformation specification: " + str3);
                    }
                    b = 2;
                }
                bArr3[i8] = b;
            }
            Integer[] numArr3 = new Integer[iArr6.length];
            for (int i9 = 0; i9 < iArr6.length; i9++) {
                numArr3[i9] = Integer.valueOf(i9);
            }
            Arrays.sort(numArr3, new Comparator<Integer>() { // from class: org.apache.sysml.runtime.transform.DataTransform.3
                @Override // java.util.Comparator
                public int compare(Integer num, Integer num2) {
                    return iArr6[num.intValue()] - iArr6[num2.intValue()];
                }
            });
            inplacePermute(iArr6, bArr3, null, numArr3);
        } else {
            iArr6 = null;
        }
        ArrayList arrayList = new ArrayList();
        if (iArr2 != null) {
            for (int i10 = 0; i10 < iArr2.length; i10++) {
                int i11 = iArr2[i10];
                if (bArr[i10] == 2 && (iArr3 == null || Arrays.binarySearch(iArr3, i11) < 0)) {
                    arrayList.add(Integer.valueOf(i11));
                }
            }
        }
        int[] iArr7 = null;
        if (arrayList.size() > 0) {
            iArr7 = new int[arrayList.size()];
            for (int i12 = 0; i12 < arrayList.size(); i12++) {
                iArr7[i12] = ((Integer) arrayList.get(i12)).intValue();
            }
        }
        if (iArr2 != null) {
            for (int i13 = 0; i13 < iArr2.length; i13++) {
                int i14 = iArr2[i13];
                if (iArr != null && Arrays.binarySearch(iArr, i14) >= 0) {
                    throw new IllegalArgumentException("Invalid transformations on column ID " + i14 + ". A column can not be both omitted and imputed.");
                }
                if (bArr[i13] == 1) {
                    if (iArr3 != null && Arrays.binarySearch(iArr3, i14) >= 0) {
                        throw new IllegalArgumentException("Invalid transformations on column ID " + i14 + ". A numeric column can not be recoded.");
                    }
                    if (iArr5 != null && Arrays.binarySearch(iArr5, i14) >= 0 && (iArr4 == null || Arrays.binarySearch(iArr4, i14) < 0)) {
                        throw new IllegalArgumentException("Invalid transformations on column ID " + i14 + ". A numeric column can not be dummycoded.");
                    }
                }
            }
        }
        if (iArr6 != null) {
            for (int i15 : iArr6) {
                if (iArr3 != null && Arrays.binarySearch(iArr3, i15) >= 0) {
                    throw new IllegalArgumentException("Invalid transformations on column ID " + i15 + ". A column can not be recoded and scaled.");
                }
                if (iArr4 != null && Arrays.binarySearch(iArr4, i15) >= 0) {
                    throw new IllegalArgumentException("Invalid transformations on column ID " + i15 + ". A column can not be binned and scaled.");
                }
                if (iArr5 != null && Arrays.binarySearch(iArr5, i15) >= 0) {
                    throw new IllegalArgumentException("Invalid transformations on column ID " + i15 + ". A column can not be dummycoded and scaled.");
                }
            }
        }
        if (iArr3 != null) {
            for (int i16 : iArr3) {
                if (iArr4 != null && Arrays.binarySearch(iArr4, i16) >= 0) {
                    throw new IllegalArgumentException("Invalid transformations on column ID " + i16 + ". A column can not be recoded and binned.");
                }
            }
        }
        ArrayList arrayList2 = new ArrayList();
        if (iArr5 != null) {
            for (int i17 : iArr5) {
                boolean z2 = iArr3 != null && Arrays.binarySearch(iArr3, i17) >= 0;
                boolean z3 = iArr4 != null && Arrays.binarySearch(iArr4, i17) >= 0;
                if (!z2 && !z3) {
                    arrayList2.add(Integer.valueOf(i17));
                }
            }
        }
        if (arrayList2.size() > 0) {
            int[] copyOf = iArr3 != null ? Arrays.copyOf(iArr3, iArr3.length + arrayList2.size()) : new int[arrayList2.size()];
            int length = iArr3 != null ? iArr3.length : 0;
            int i18 = 0;
            while (length < copyOf.length) {
                copyOf[length] = ((Integer) arrayList2.get(i18)).intValue();
                length++;
                i18++;
            }
            Arrays.sort(copyOf);
            iArr3 = copyOf;
        }
        JSONObject jSONObject5 = new JSONObject();
        if (iArr != null) {
            JSONObject jSONObject6 = new JSONObject();
            jSONObject6.put(TfUtils.JSON_ATTRS, (Collection) toJSONArray(iArr));
            jSONObject5.put(TfUtils.TXMETHOD_OMIT, (Map) jSONObject6);
        }
        if (iArr2 != null) {
            JSONObject jSONObject7 = new JSONObject();
            jSONObject7.put(TfUtils.JSON_ATTRS, (Collection) toJSONArray(iArr2));
            jSONObject7.put(TfUtils.JSON_MTHD, (Collection) toJSONArray(bArr));
            jSONObject7.put(TfUtils.JSON_CONSTS, (Collection) toJSONArray(objArr2));
            jSONObject5.put(TfUtils.TXMETHOD_IMPUTE, (Map) jSONObject7);
        }
        if (iArr3 != null) {
            JSONObject jSONObject8 = new JSONObject();
            jSONObject8.put(TfUtils.JSON_ATTRS, (Collection) toJSONArray(iArr3));
            jSONObject5.put(TfUtils.TXMETHOD_RECODE, (Map) jSONObject8);
        }
        if (iArr4 != null) {
            JSONObject jSONObject9 = new JSONObject();
            jSONObject9.put(TfUtils.JSON_ATTRS, (Collection) toJSONArray(iArr4));
            jSONObject9.put(TfUtils.JSON_MTHD, (Collection) toJSONArray(bArr2));
            jSONObject9.put(TfUtils.JSON_NBINS, (Collection) toJSONArray(objArr));
            jSONObject5.put(TfUtils.TXMETHOD_BIN, (Map) jSONObject9);
        }
        if (iArr5 != null) {
            JSONObject jSONObject10 = new JSONObject();
            jSONObject10.put(TfUtils.JSON_ATTRS, (Collection) toJSONArray(iArr5));
            jSONObject5.put(TfUtils.TXMETHOD_DUMMYCODE, (Map) jSONObject10);
        }
        if (iArr6 != null) {
            JSONObject jSONObject11 = new JSONObject();
            jSONObject11.put(TfUtils.JSON_ATTRS, (Collection) toJSONArray(iArr6));
            jSONObject11.put(TfUtils.JSON_MTHD, (Collection) toJSONArray(bArr3));
            jSONObject5.put(TfUtils.TXMETHOD_SCALE, (Map) jSONObject11);
        }
        if (iArr7 != null) {
            JSONObject jSONObject12 = new JSONObject();
            jSONObject12.put(TfUtils.JSON_ATTRS, (Collection) toJSONArray(iArr7));
            jSONObject5.put(TfUtils.TXMETHOD_MVRCD, (Map) jSONObject12);
        }
        return jSONObject5.toString();
    }

    private static JSONArray toJSONArray(int[] iArr) {
        JSONArray jSONArray = new JSONArray(iArr.length);
        for (int i : iArr) {
            jSONArray.add(Integer.valueOf(i));
        }
        return jSONArray;
    }

    private static JSONArray toJSONArray(byte[] bArr) {
        JSONArray jSONArray = new JSONArray(bArr.length);
        for (byte b : bArr) {
            jSONArray.add(Byte.valueOf(b));
        }
        return jSONArray;
    }

    private static JSONArray toJSONArray(Object[] objArr) throws JSONException {
        return new JSONArray(objArr);
    }

    private static void moveFilesFromTmp(FileSystem fileSystem, String str, String str2) throws IllegalArgumentException, IOException {
        MapReduceTool.renameFileOnHDFS(str + Lop.FILE_SEPARATOR + TfUtils.TXMTD_COLNAMES, str2 + Lop.FILE_SEPARATOR + TfUtils.TXMTD_COLNAMES);
        MapReduceTool.renameFileOnHDFS(str + Lop.FILE_SEPARATOR + TfUtils.TXMTD_DC_COLNAMES, str2 + Lop.FILE_SEPARATOR + TfUtils.TXMTD_DC_COLNAMES);
        MapReduceTool.renameFileOnHDFS(str + Lop.FILE_SEPARATOR + TfUtils.TXMTD_COLTYPES, str2 + Lop.FILE_SEPARATOR + TfUtils.TXMTD_COLTYPES);
        if (fileSystem.exists(new Path(str + "/Dummycode/dummyCodeMaps.csv"))) {
            if (!fileSystem.exists(new Path(str2 + "/Dummycode/"))) {
                fileSystem.mkdirs(new Path(str2 + "/Dummycode/"));
            }
            MapReduceTool.renameFileOnHDFS(str + "/Dummycode/dummyCodeMaps.csv", str2 + "/Dummycode/dummyCodeMaps.csv");
        }
    }

    private static int getNumColumnsTf(FileSystem fileSystem, String str, String str2, String str3) throws IllegalArgumentException, IOException, DMLRuntimeException, JSONException {
        String[] split = Pattern.compile(Pattern.quote(str2)).split(str, -1);
        int length = split.length;
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(fileSystem.open(new Path(str3 + "/spec.json"))));
        JSONObject parse = JSONHelper.parse(bufferedReader);
        bufferedReader.close();
        if (!parse.containsKey(TfUtils.TXMETHOD_DUMMYCODE)) {
            return length;
        }
        Iterator it = ((JSONArray) ((JSONObject) parse.get(TfUtils.TXMETHOD_DUMMYCODE)).get(TfUtils.JSON_ATTRS)).iterator();
        while (it.hasNext()) {
            int i = UtilFunctions.toInt(it.next());
            Path path = new Path(str3 + "/Bin/" + UtilFunctions.unquote(split[i - 1]) + ".bin");
            Path path2 = new Path(str3 + "/Recode/" + UtilFunctions.unquote(split[i - 1]) + TfUtils.TXMTD_RCD_DISTINCT_SUFFIX);
            if (TfUtils.checkValidInputFile(fileSystem, path, false)) {
                BufferedReader bufferedReader2 = new BufferedReader(new InputStreamReader(fileSystem.open(path)));
                int parseToInt = UtilFunctions.parseToInt(bufferedReader2.readLine().split(",")[4]);
                bufferedReader2.close();
                length += parseToInt - 1;
            } else {
                if (!TfUtils.checkValidInputFile(fileSystem, path2, false)) {
                    throw new DMLRuntimeException("Relevant transformation metadata for column (id=" + i + ", name=" + split[i - 1] + ") is not found.");
                }
                BufferedReader bufferedReader3 = new BufferedReader(new InputStreamReader(fileSystem.open(path2)));
                int parseToInt2 = UtilFunctions.parseToInt(bufferedReader3.readLine());
                bufferedReader3.close();
                length += parseToInt2 - 1;
            }
        }
        return length;
    }

    public static JobReturn mrDataTransform(MRJobInstruction mRJobInstruction, MatrixObject[] matrixObjectArr, String str, String str2, byte[] bArr, MatrixObject[] matrixObjectArr2, int i, int i2) throws Exception {
        int numColumnsTf;
        String[] split = str.split("‡");
        TransformOperands transformOperands = new TransformOperands(split[0], matrixObjectArr[0]);
        JobConf jobConf = new JobConf(ConfigurationManager.getCachedJobConf());
        FileSystem fileSystem = FileSystem.get(jobConf);
        String findSmallestFile = CSVReblockMR.findSmallestFile(jobConf, transformOperands.inputPath);
        String readHeaderLine = readHeaderLine(fileSystem, transformOperands.inputCSVProperties, findSmallestFile);
        HashMap<String, Integer> processColumnNames = processColumnNames(fileSystem, transformOperands.inputCSVProperties, readHeaderLine, findSmallestFile);
        String outputHeader = getOutputHeader(fileSystem, readHeaderLine, transformOperands);
        int size = processColumnNames.size();
        long j = 0;
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        for (int i3 = 0; i3 < matrixObjectArr2.length; i3++) {
            if (matrixObjectArr2[i3].getFileFormatProperties() == null || matrixObjectArr2[i3].getFileFormatProperties().getFileFormat() != FileFormatProperties.FileFormat.CSV) {
                arrayList2.add(Integer.valueOf(i3));
            } else {
                arrayList.add(Integer.valueOf(i3));
            }
        }
        boolean z = arrayList.size() > 0;
        boolean z2 = arrayList2.size() > 0;
        String constructTempOutputFilename = MRJobConfiguration.constructTempOutputFilename();
        checkIfOutputOverlapsWithTxMtd(matrixObjectArr2, transformOperands, z, z2, arrayList, arrayList2, fileSystem);
        JobReturn jobReturn = null;
        if (transformOperands.isApply) {
            MapReduceTool.deleteFileIfExistOnHDFS(new Path(transformOperands.txMtdPath), jobConf);
            MapReduceTool.copyFileOnHDFS(transformOperands.applyTxPath, transformOperands.txMtdPath);
            String readStringFromHDFSFile = transformOperands.spec != null ? transformOperands.spec : MapReduceTool.readStringFromHDFSFile(transformOperands.txMtdPath + Lop.FILE_SEPARATOR + "spec.json");
            numColumnsTf = getNumColumnsTf(fileSystem, outputHeader, transformOperands.inputCSVProperties.getDelim(), transformOperands.txMtdPath);
            if (z) {
                int intValue = ConfigurationManager.getDMLConfig().getIntValue(DMLConfig.DEFAULT_BLOCK_SIZE);
                CSVReblockMR.AssignRowIDMRReturn runAssignRowIDMRJob = CSVReblockMR.runAssignRowIDMRJob(new String[]{transformOperands.inputPath}, new InputInfo[]{InputInfo.CSVInputInfo}, new int[]{intValue}, new int[]{intValue}, prepDummyReblockInstruction(transformOperands.inputCSVProperties, intValue).toString(), i2, new String[]{findSmallestFile}, true, transformOperands.inputCSVProperties.getNAStrings(), readStringFromHDFSFile);
                j = runAssignRowIDMRJob.rlens[0];
                if (runAssignRowIDMRJob.rlens[0] == 0) {
                    throw new DMLRuntimeException(ERROR_MSG_ZERO_ROWS);
                }
                r43 = ApplyTfCSVMR.runJob(transformOperands.inputPath, readStringFromHDFSFile, transformOperands.applyTxPath, constructTempOutputFilename, matrixObjectArr2[((Integer) arrayList.get(0)).intValue()].getFileName(), runAssignRowIDMRJob.counterFile.toString(), transformOperands.inputCSVProperties, size, i2, outputHeader);
            }
            if (z2) {
                CSVReblockInstruction cSVReblockInstruction = (CSVReblockInstruction) ((CSVReblockInstruction) InstructionParser.parseSingleInstruction(split[1])).clone((byte) 0);
                CSVReblockMR.AssignRowIDMRReturn runAssignRowIDMRJob2 = CSVReblockMR.runAssignRowIDMRJob(new String[]{transformOperands.inputPath}, new InputInfo[]{InputInfo.CSVInputInfo}, new int[]{cSVReblockInstruction.brlen}, new int[]{cSVReblockInstruction.bclen}, cSVReblockInstruction.toString(), i2, new String[]{findSmallestFile}, true, transformOperands.inputCSVProperties.getNAStrings(), readStringFromHDFSFile);
                j = runAssignRowIDMRJob2.rlens[0];
                if (runAssignRowIDMRJob2.rlens[0] == 0) {
                    throw new DMLRuntimeException(ERROR_MSG_ZERO_ROWS);
                }
                jobReturn = ApplyTfBBMR.runJob(transformOperands.inputPath, split[1], str2, readStringFromHDFSFile, transformOperands.txMtdPath, constructTempOutputFilename, matrixObjectArr2[((Integer) arrayList2.get(0)).intValue()].getFileName(), runAssignRowIDMRJob2.counterFile.toString(), transformOperands.inputCSVProperties, runAssignRowIDMRJob2.rlens[0], runAssignRowIDMRJob2.clens[0], numColumnsTf, i2, outputHeader);
            }
        } else {
            String processSpecFile = processSpecFile(fileSystem, transformOperands.inputPath, findSmallestFile, processColumnNames, transformOperands.inputCSVProperties, transformOperands.spec);
            String constructTempOutputFilename2 = MRJobConfiguration.constructTempOutputFilename();
            j = GenTfMtdMR.runJob(transformOperands.inputPath, transformOperands.txMtdPath, processSpecFile, findSmallestFile, constructTempOutputFilename2, transformOperands.inputCSVProperties, size, i2, outputHeader);
            if (j == 0) {
                throw new DMLRuntimeException(ERROR_MSG_ZERO_ROWS);
            }
            MapReduceTool.writeStringToHDFS(processSpecFile, transformOperands.txMtdPath + Lop.FILE_SEPARATOR + "spec.json");
            numColumnsTf = getNumColumnsTf(fileSystem, outputHeader, transformOperands.inputCSVProperties.getDelim(), transformOperands.txMtdPath);
            r43 = z ? ApplyTfCSVMR.runJob(transformOperands.inputPath, processSpecFile, transformOperands.txMtdPath, constructTempOutputFilename, matrixObjectArr2[((Integer) arrayList.get(0)).intValue()].getFileName(), constructTempOutputFilename2, transformOperands.inputCSVProperties, size, i2, outputHeader) : null;
            if (z2) {
                int intValue2 = ConfigurationManager.getDMLConfig().getIntValue(DMLConfig.DEFAULT_BLOCK_SIZE);
                CSVReblockMR.AssignRowIDMRReturn runAssignRowIDMRJob3 = CSVReblockMR.runAssignRowIDMRJob(new String[]{transformOperands.inputPath}, new InputInfo[]{InputInfo.CSVInputInfo}, new int[]{intValue2}, new int[]{intValue2}, prepDummyReblockInstruction(transformOperands.inputCSVProperties, intValue2).toString(), i2, new String[]{findSmallestFile}, true, transformOperands.inputCSVProperties.getNAStrings(), processSpecFile);
                if (runAssignRowIDMRJob3.rlens[0] == 0) {
                    throw new DMLRuntimeException(ERROR_MSG_ZERO_ROWS);
                }
                jobReturn = ApplyTfBBMR.runJob(transformOperands.inputPath, split[1], str2, processSpecFile, transformOperands.txMtdPath, constructTempOutputFilename, matrixObjectArr2[((Integer) arrayList2.get(0)).intValue()].getFileName(), runAssignRowIDMRJob3.counterFile.toString(), transformOperands.inputCSVProperties, j, size, numColumnsTf, i2, outputHeader);
            }
            MapReduceTool.deleteFileIfExistOnHDFS(new Path(constructTempOutputFilename2), jobConf);
        }
        moveFilesFromTmp(fileSystem, constructTempOutputFilename, transformOperands.txMtdPath);
        if (r43 != null) {
            r43.getMatrixCharacteristics(0).setDimension(j, numColumnsTf);
            MapReduceTool.writeMetaDataFile(matrixObjectArr2[((Integer) arrayList.get(0)).intValue()].getFileName() + ".mtd", Expression.ValueType.DOUBLE, r43.getMatrixCharacteristics(0), OutputInfo.CSVOutputInfo, new CSVFileFormatProperties(false, transformOperands.inputCSVProperties.getDelim(), false, Double.NaN, null));
            return r43;
        }
        if (jobReturn == null) {
            return null;
        }
        jobReturn.getMatrixCharacteristics(0).setDimension(j, numColumnsTf);
        MapReduceTool.writeMetaDataFile(matrixObjectArr2[((Integer) arrayList2.get(0)).intValue()].getFileName() + ".mtd", Expression.ValueType.DOUBLE, jobReturn.getMatrixCharacteristics(0), OutputInfo.BinaryBlockOutputInfo);
        return jobReturn;
    }

    private static CSVReblockInstruction prepDummyReblockInstruction(CSVFileFormatProperties cSVFileFormatProperties, int i) {
        return CSVReblockInstruction.parseInstruction(LopProperties.ExecType.MR + "°" + CSVReBlock.OPCODE + "°0·" + Expression.DataType.MATRIX + "·" + Expression.ValueType.DOUBLE + "°1·" + Expression.DataType.MATRIX + "·" + Expression.ValueType.DOUBLE + "°" + i + "°" + i + "°" + cSVFileFormatProperties.hasHeader() + "°" + cSVFileFormatProperties.getDelim() + "°" + cSVFileFormatProperties.isFill() + "°" + cSVFileFormatProperties.getFillValue());
    }

    private static String getOutputHeader(FileSystem fileSystem, String str, TransformOperands transformOperands) throws IOException {
        String readLine;
        if (transformOperands.isApply) {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(fileSystem.open(new Path(transformOperands.applyTxPath + Lop.FILE_SEPARATOR + TfUtils.TXMTD_COLNAMES))));
            readLine = bufferedReader.readLine();
            bufferedReader.close();
        } else if (transformOperands.outNamesFile == null) {
            readLine = str;
        } else {
            BufferedReader bufferedReader2 = new BufferedReader(new InputStreamReader(fileSystem.open(new Path(transformOperands.outNamesFile))));
            readLine = bufferedReader2.readLine();
            bufferedReader2.close();
        }
        return readLine;
    }

    public static JobReturn cpDataTransform(ParameterizedBuiltinCPInstruction parameterizedBuiltinCPInstruction, CacheableData<?>[] cacheableDataArr, MatrixObject[] matrixObjectArr) throws IOException, DMLRuntimeException, IllegalArgumentException, JSONException {
        return cpDataTransform(new TransformOperands(parameterizedBuiltinCPInstruction.getParameterMap(), cacheableDataArr[0]), cacheableDataArr, matrixObjectArr);
    }

    public static JobReturn cpDataTransform(String str, CacheableData<?>[] cacheableDataArr, MatrixObject[] matrixObjectArr) throws IOException, DMLRuntimeException, IllegalArgumentException, JSONException {
        return cpDataTransform(new TransformOperands(str.split("‡")[0], cacheableDataArr[0]), cacheableDataArr, matrixObjectArr);
    }

    public static JobReturn cpDataTransform(TransformOperands transformOperands, CacheableData<?>[] cacheableDataArr, MatrixObject[] matrixObjectArr) throws IOException, DMLRuntimeException, IllegalArgumentException, JSONException {
        JobReturn performTransform;
        JobConf jobConf = new JobConf(ConfigurationManager.getCachedJobConf());
        FileSystem fileSystem = FileSystem.get(jobConf);
        String findSmallestFile = CSVReblockMR.findSmallestFile(jobConf, transformOperands.inputPath);
        String readHeaderLine = readHeaderLine(fileSystem, transformOperands.inputCSVProperties, findSmallestFile);
        HashMap<String, Integer> processColumnNames = processColumnNames(fileSystem, transformOperands.inputCSVProperties, readHeaderLine, findSmallestFile);
        String outputHeader = getOutputHeader(fileSystem, readHeaderLine, transformOperands);
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        for (int i = 0; i < matrixObjectArr.length; i++) {
            if (matrixObjectArr[i].getFileFormatProperties() == null || matrixObjectArr[i].getFileFormatProperties().getFileFormat() != FileFormatProperties.FileFormat.CSV) {
                arrayList2.add(Integer.valueOf(i));
            } else {
                arrayList.add(Integer.valueOf(i));
            }
        }
        boolean z = arrayList.size() > 0;
        boolean z2 = arrayList2.size() > 0;
        checkIfOutputOverlapsWithTxMtd(matrixObjectArr, transformOperands, z, z2, arrayList, arrayList2, fileSystem);
        if (transformOperands.isApply) {
            MapReduceTool.deleteFileIfExistOnHDFS(new Path(transformOperands.txMtdPath), jobConf);
            MapReduceTool.copyFileOnHDFS(transformOperands.applyTxPath, transformOperands.txMtdPath);
            performTransform = performTransform(jobConf, fileSystem, transformOperands.inputPath, processColumnNames.size(), transformOperands.inputCSVProperties, transformOperands.spec != null ? transformOperands.spec : MapReduceTool.readStringFromHDFSFile(transformOperands.txMtdPath + Lop.FILE_SEPARATOR + "spec.json"), transformOperands.txMtdPath, transformOperands.isApply, matrixObjectArr[0], outputHeader, z2, z);
        } else {
            String processSpecFile = processSpecFile(fileSystem, transformOperands.inputPath, findSmallestFile, processColumnNames, transformOperands.inputCSVProperties, transformOperands.spec);
            MapReduceTool.writeStringToHDFS(processSpecFile, transformOperands.txMtdPath + Lop.FILE_SEPARATOR + "spec.json");
            performTransform = performTransform(jobConf, fileSystem, transformOperands.inputPath, processColumnNames.size(), transformOperands.inputCSVProperties, processSpecFile, transformOperands.txMtdPath, transformOperands.isApply, matrixObjectArr[0], outputHeader, z2, z);
        }
        return performTransform;
    }

    public static MatrixBlock cpDataTransform(HashMap<String, String> hashMap, FrameBlock frameBlock, FrameBlock frameBlock2) throws DMLRuntimeException {
        return EncoderFactory.createEncoder(hashMap.get(ParameterizedBuiltinFunctionExpression.TF_FN_PARAM_SPEC), frameBlock.getNumColumns(), frameBlock2).apply(frameBlock, new MatrixBlock(frameBlock.getNumRows(), frameBlock.getNumColumns(), false));
    }

    private static ArrayList<Path> collectInputFiles(String str, FileSystem fileSystem) throws FileNotFoundException, IOException {
        Path path = new Path(str);
        ArrayList<Path> arrayList = new ArrayList<>();
        if (fileSystem.isDirectory(path)) {
            for (FileStatus fileStatus : fileSystem.listStatus(path, CSVReblockMR.hiddenFileFilter)) {
                arrayList.add(fileStatus.getPath());
            }
            Collections.sort(arrayList);
        } else {
            arrayList.add(path);
        }
        return arrayList;
    }

    private static int[] countNumRows(ArrayList<Path> arrayList, CSVFileFormatProperties cSVFileFormatProperties, FileSystem fileSystem, TfUtils tfUtils) throws IOException {
        int[] iArr = new int[2];
        int i = 0;
        int i2 = 0;
        OmitAgent omitAgent = tfUtils.getOmitAgent();
        if (omitAgent.isApplicable()) {
            Pattern compile = Pattern.compile(Pattern.quote(cSVFileFormatProperties.getDelim()));
            for (int i3 = 0; i3 < arrayList.size(); i3++) {
                BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(fileSystem.open(arrayList.get(i3))));
                if (i3 == 0 && cSVFileFormatProperties.hasHeader()) {
                    bufferedReader.readLine();
                }
                while (true) {
                    String readLine = bufferedReader.readLine();
                    if (readLine != null) {
                        i++;
                        if (!omitAgent.omit(compile.split(readLine, -1), tfUtils)) {
                            i2++;
                        }
                    }
                }
                bufferedReader.close();
            }
        } else {
            for (int i4 = 0; i4 < arrayList.size(); i4++) {
                BufferedReader bufferedReader2 = new BufferedReader(new InputStreamReader(fileSystem.open(arrayList.get(i4))));
                if (i4 == 0 && cSVFileFormatProperties.hasHeader()) {
                    bufferedReader2.readLine();
                }
                while (bufferedReader2.readLine() != null) {
                    i++;
                }
                bufferedReader2.close();
            }
            i2 = i;
        }
        iArr[0] = i;
        iArr[1] = i2;
        return iArr;
    }

    private static JobReturn performTransform(JobConf jobConf, FileSystem fileSystem, String str, int i, CSVFileFormatProperties cSVFileFormatProperties, String str2, String str3, boolean z, MatrixObject matrixObject, String str4, boolean z2, boolean z3) throws IOException, DMLRuntimeException, IllegalArgumentException, JSONException {
        TfUtils tfUtils = new TfUtils(str4, cSVFileFormatProperties.hasHeader(), cSVFileFormatProperties.getDelim(), TfUtils.parseNAStrings(cSVFileFormatProperties.getNAStrings()), new JSONObject(str2), i, str3, null, null);
        MVImputeAgent mVImputeAgent = tfUtils.getMVImputeAgent();
        RecodeAgent recodeAgent = tfUtils.getRecodeAgent();
        BinAgent binAgent = tfUtils.getBinAgent();
        DummycodeAgent dummycodeAgent = tfUtils.getDummycodeAgent();
        ArrayList<Path> collectInputFiles = collectInputFiles(str, fileSystem);
        if (z) {
            int[] countNumRows = countNumRows(collectInputFiles, cSVFileFormatProperties, fileSystem, tfUtils);
            tfUtils.setTotal(countNumRows[0]);
            tfUtils.setValid(countNumRows[1]);
            if (tfUtils.getValid() == 0) {
                throw new DMLRuntimeException(ERROR_MSG_ZERO_ROWS);
            }
            Path path = new Path(str3);
            mVImputeAgent.loadTxMtd(jobConf, fileSystem, path, tfUtils);
            recodeAgent.loadTxMtd(jobConf, fileSystem, path, tfUtils);
            binAgent.loadTxMtd(jobConf, fileSystem, path, tfUtils);
            dummycodeAgent.setRecodeMaps(recodeAgent.getRecodeMaps());
            dummycodeAgent.setNumBins(binAgent.getColList(), binAgent.getNumBins());
            dummycodeAgent.loadTxMtd(jobConf, fileSystem, path, tfUtils);
        } else {
            for (int i2 = 0; i2 < collectInputFiles.size(); i2++) {
                BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(fileSystem.open(collectInputFiles.get(i2))));
                if (i2 == 0 && cSVFileFormatProperties.hasHeader()) {
                    bufferedReader.readLine();
                }
                while (true) {
                    String readLine = bufferedReader.readLine();
                    if (readLine != null) {
                        tfUtils.prepareTfMtd(readLine);
                    }
                }
                bufferedReader.close();
            }
            if (tfUtils.getValid() == 0) {
                throw new DMLRuntimeException(ERROR_MSG_ZERO_ROWS);
            }
            mVImputeAgent.outputTransformationMetadata(str3, fileSystem, tfUtils);
            binAgent.outputTransformationMetadata(str3, fileSystem, tfUtils);
            recodeAgent.outputTransformationMetadata(str3, fileSystem, tfUtils);
            Path path2 = new Path(str3);
            mVImputeAgent.loadTxMtd(jobConf, fileSystem, path2, tfUtils);
            binAgent.loadTxMtd(jobConf, fileSystem, path2, tfUtils);
            dummycodeAgent.setRecodeMapsCP(recodeAgent.getCPRecodeMaps());
            dummycodeAgent.setNumBins(binAgent.getColList(), binAgent.getNumBins());
            dummycodeAgent.loadTxMtd(jobConf, fileSystem, path2, tfUtils);
        }
        int numColumnsTf = getNumColumnsTf(fileSystem, str4, cSVFileFormatProperties.getDelim(), str3);
        MapReduceTool.deleteFileIfExistOnHDFS(matrixObject.getFileName());
        BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(fileSystem.create(new Path(matrixObject.getFileName()), true)));
        StringBuilder sb = new StringBuilder();
        MatrixBlock matrixBlock = null;
        if (z2) {
            matrixBlock = new MatrixBlock((int) tfUtils.getValid(), numColumnsTf, ((int) tfUtils.getValid()) * i);
            if (matrixBlock.isInSparseFormat()) {
                matrixBlock.allocateSparseRowsBlock();
            } else {
                matrixBlock.allocateDenseBlock();
            }
        }
        int i3 = 0;
        for (int i4 = 0; i4 < collectInputFiles.size(); i4++) {
            BufferedReader bufferedReader2 = new BufferedReader(new InputStreamReader(fileSystem.open(collectInputFiles.get(i4))));
            if (i4 == 0) {
                if (cSVFileFormatProperties.hasHeader()) {
                    bufferedReader2.readLine();
                }
                String constructDummycodedHeader = dummycodeAgent.constructDummycodedHeader(str4, tfUtils.getDelim());
                numColumnsTf = dummycodeAgent.genDcdMapsAndColTypes(fileSystem, str3, i, tfUtils);
                generateHeaderFiles(fileSystem, str3, str4, constructDummycodedHeader);
            }
            while (true) {
                String readLine2 = bufferedReader2.readLine();
                if (readLine2 != null) {
                    String[] words = tfUtils.getWords(readLine2);
                    if (!tfUtils.omit(words)) {
                        String[] apply = tfUtils.apply(words);
                        if (z3) {
                            bufferedWriter.write(tfUtils.checkAndPrepOutputString(apply, sb));
                            bufferedWriter.write(ProgramConverter.NEWLINE);
                        }
                        if (z2) {
                            tfUtils.check(apply);
                            for (int i5 = 0; i5 < apply.length; i5++) {
                                if (apply[i5] != null && !apply[i5].isEmpty()) {
                                    matrixBlock.appendValue(i3, i5, UtilFunctions.parseToDouble(apply[i5]));
                                }
                            }
                        }
                        i3++;
                    }
                }
            }
            bufferedReader2.close();
        }
        bufferedWriter.close();
        if (matrixBlock != null) {
            matrixBlock.recomputeNonZeros();
            matrixBlock.examSparsity();
            matrixObject.acquireModify(matrixBlock);
            matrixObject.release();
            matrixObject.exportData();
        }
        return new JobReturn(new MatrixCharacteristics[]{new MatrixCharacteristics(tfUtils.getValid(), numColumnsTf, (int) matrixObject.getNumRowsPerBlock(), (int) matrixObject.getNumColumnsPerBlock())}, true);
    }

    public static void generateHeaderFiles(FileSystem fileSystem, String str, String str2, String str3) throws IOException {
        BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(fileSystem.create(new Path(str + Lop.FILE_SEPARATOR + TfUtils.TXMTD_COLNAMES), true)));
        bufferedWriter.write(str2 + ProgramConverter.NEWLINE);
        bufferedWriter.close();
        BufferedWriter bufferedWriter2 = new BufferedWriter(new OutputStreamWriter(fileSystem.create(new Path(str + Lop.FILE_SEPARATOR + TfUtils.TXMTD_DC_COLNAMES), true)));
        bufferedWriter2.write(str3 + ProgramConverter.NEWLINE);
        bufferedWriter2.close();
    }

    private static void checkIfOutputOverlapsWithTxMtd(MatrixObject[] matrixObjectArr, TransformOperands transformOperands, boolean z, boolean z2, ArrayList<Integer> arrayList, ArrayList<Integer> arrayList2, FileSystem fileSystem) throws DMLRuntimeException {
        if (z) {
            checkIfOutputOverlapsWithTxMtd(transformOperands.txMtdPath, matrixObjectArr[arrayList.get(0).intValue()].getFileName(), fileSystem);
        } else if (z2) {
            checkIfOutputOverlapsWithTxMtd(transformOperands.txMtdPath, matrixObjectArr[arrayList2.get(0).intValue()].getFileName(), fileSystem);
        }
    }

    private static void checkIfOutputOverlapsWithTxMtd(String str, String str2, FileSystem fileSystem) throws DMLRuntimeException {
        Path makeQualified = new Path(str).makeQualified(fileSystem);
        Path makeQualified2 = new Path(str2).makeQualified(fileSystem);
        String path = makeQualified.toString();
        String path2 = makeQualified2.toString();
        if (makeQualified.getParent().toString().equals(makeQualified2.getParent().toString())) {
            if (path.equals(path2)) {
                throw new DMLRuntimeException("The transform path '" + str + "' cannot overlap with the output path '" + str2 + "'");
            }
        } else if (path.startsWith(path2) || path2.startsWith(path)) {
            throw new DMLRuntimeException("The transform path '" + str + "' cannot overlap with the output path '" + str2 + "'");
        }
    }

    public static void spDataTransform(ParameterizedBuiltinSPInstruction parameterizedBuiltinSPInstruction, FrameObject[] frameObjectArr, MatrixObject[] matrixObjectArr, ExecutionContext executionContext) throws Exception {
        long numColumnsTf;
        JavaPairRDD<Long, String> runSparkJob;
        SparkExecutionContext sparkExecutionContext = (SparkExecutionContext) executionContext;
        TransformOperands transformOperands = new TransformOperands(parameterizedBuiltinSPInstruction.getParams(), frameObjectArr[0]);
        JobConf jobConf = new JobConf();
        FileSystem fileSystem = FileSystem.get(jobConf);
        checkIfOutputOverlapsWithTxMtd(transformOperands.txMtdPath, matrixObjectArr[0].getFileName(), fileSystem);
        String findSmallestFile = CSVReblockMR.findSmallestFile(jobConf, transformOperands.inputPath);
        String readHeaderLine = readHeaderLine(fileSystem, transformOperands.inputCSVProperties, findSmallestFile);
        HashMap<String, Integer> processColumnNames = processColumnNames(fileSystem, transformOperands.inputCSVProperties, readHeaderLine, findSmallestFile);
        int size = processColumnNames.size();
        String outputHeader = getOutputHeader(fileSystem, readHeaderLine, transformOperands);
        String constructTempOutputFilename = MRJobConfiguration.constructTempOutputFilename();
        JavaRDD javaRDD = JavaPairRDD.toRDD(sparkExecutionContext.getRDDHandleForFrameObject(frameObjectArr[0], InputInfo.CSVInputInfo)).toJavaRDD();
        long j = 0;
        if (transformOperands.isApply) {
            MapReduceTool.deleteFileIfExistOnHDFS(new Path(transformOperands.txMtdPath), jobConf);
            MapReduceTool.copyFileOnHDFS(transformOperands.applyTxPath, transformOperands.txMtdPath);
            String readStringFromHDFSFile = transformOperands.spec != null ? transformOperands.spec : MapReduceTool.readStringFromHDFSFile(transformOperands.txMtdPath + Lop.FILE_SEPARATOR + "spec.json");
            numColumnsTf = getNumColumnsTf(fileSystem, outputHeader, transformOperands.inputCSVProperties.getDelim(), transformOperands.txMtdPath);
            runSparkJob = ApplyTfCSVSPARK.runSparkJob(sparkExecutionContext, javaRDD, transformOperands.txMtdPath, readStringFromHDFSFile, constructTempOutputFilename, transformOperands.inputCSVProperties, size, outputHeader);
        } else {
            String processSpecFile = processSpecFile(fileSystem, transformOperands.inputPath, findSmallestFile, processColumnNames, transformOperands.inputCSVProperties, transformOperands.spec);
            String constructTempOutputFilename2 = MRJobConfiguration.constructTempOutputFilename();
            j = GenTfMtdSPARK.runSparkJob(sparkExecutionContext, javaRDD, transformOperands.txMtdPath, processSpecFile, constructTempOutputFilename2, transformOperands.inputCSVProperties, size, outputHeader);
            MapReduceTool.writeStringToHDFS(processSpecFile, transformOperands.txMtdPath + Lop.FILE_SEPARATOR + "spec.json");
            numColumnsTf = getNumColumnsTf(fileSystem, outputHeader, transformOperands.inputCSVProperties.getDelim(), transformOperands.txMtdPath);
            runSparkJob = ApplyTfCSVSPARK.runSparkJob(sparkExecutionContext, javaRDD, transformOperands.txMtdPath, processSpecFile, constructTempOutputFilename, transformOperands.inputCSVProperties, size, outputHeader);
            MapReduceTool.deleteFileIfExistOnHDFS(new Path(constructTempOutputFilename2), jobConf);
        }
        moveFilesFromTmp(fileSystem, constructTempOutputFilename, transformOperands.txMtdPath);
        JavaPairRDD<LongWritable, Text> stringToSerializableText = RDDConverterUtils.stringToSerializableText(runSparkJob);
        if (stringToSerializableText != null) {
            MatrixObject matrixObject = matrixObjectArr[0];
            String varName = matrixObject.getVarName();
            matrixObject.setRDDHandle(new RDDObject(stringToSerializableText, varName));
            sparkExecutionContext.addLineageRDD(varName, parameterizedBuiltinSPInstruction.getParams().get("target"));
            MatrixCharacteristics matrixCharacteristics = sparkExecutionContext.getMatrixCharacteristics(varName);
            matrixCharacteristics.setDimension(j, numColumnsTf);
            matrixCharacteristics.setNonZeros(-1L);
        }
    }
}
