package org.apache.sysml.runtime.matrix;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.FileNotFoundException;
import java.io.IOException;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.io.ByteWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.Counters;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RunningJob;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapreduce.filecache.DistributedCache;
import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.conf.DMLConfig;
import org.apache.sysml.runtime.instructions.MRJobInstruction;
import org.apache.sysml.runtime.matrix.data.InputInfo;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.OutputInfo;
import org.apache.sysml.runtime.matrix.data.TaggedFirstSecondIndexes;
import org.apache.sysml.runtime.matrix.mapred.CSVAssignRowIDMapper;
import org.apache.sysml.runtime.matrix.mapred.CSVAssignRowIDReducer;
import org.apache.sysml.runtime.matrix.mapred.CSVReblockMapper;
import org.apache.sysml.runtime.matrix.mapred.CSVReblockReducer;
import org.apache.sysml.runtime.matrix.mapred.MRConfigurationNames;
import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
import org.apache.sysml.runtime.util.MapReduceTool;

/* loaded from: input_file:org/apache/sysml/runtime/matrix/CSVReblockMR.class */
public class CSVReblockMR {
    public static final String NUM_ROWS_IN_MATRIX = "num.rows.in.matrix.";
    public static final String NUM_COLS_IN_MATRIX = "num.cols.in.matrix.";
    public static final String ROWID_FILE_NAME = "rowid.file.name";
    public static final String SMALLEST_FILE_NAME_PER_INPUT = "smallest.file.name.per.input";
    public static final PathFilter hiddenFileFilter = new PathFilter() { // from class: org.apache.sysml.runtime.matrix.CSVReblockMR.1
        @Override // org.apache.hadoop.fs.PathFilter
        public boolean accept(Path path) {
            String name = path.getName();
            return (name.startsWith("_") || name.startsWith(Path.CUR_DIR)) ? false : true;
        }
    };

    /* loaded from: input_file:org/apache/sysml/runtime/matrix/CSVReblockMR$AssignRowIDMRReturn.class */
    public static class AssignRowIDMRReturn {
        public Path counterFile = null;
        public long[] rlens = null;
        public long[] clens = null;

        public String toString() {
            StringBuilder sb = new StringBuilder();
            sb.append(this.counterFile.toString());
            sb.append("\n");
            for (long j : this.rlens) {
                sb.append(j);
                sb.append(", ");
            }
            sb.append("\n");
            for (long j2 : this.clens) {
                sb.append(j2);
                sb.append(", ");
            }
            return sb.toString();
        }
    }

    /* loaded from: input_file:org/apache/sysml/runtime/matrix/CSVReblockMR$BlockRow.class */
    public static class BlockRow implements Writable {
        public int indexInBlock = 0;
        public MatrixBlock data = null;

        @Override // org.apache.hadoop.io.Writable
        public void readFields(DataInput dataInput) throws IOException {
            this.indexInBlock = dataInput.readInt();
            if (this.data == null) {
                this.data = new MatrixBlock();
            }
            this.data.readFields(dataInput);
        }

        @Override // org.apache.hadoop.io.Writable
        public void write(DataOutput dataOutput) throws IOException {
            dataOutput.writeInt(this.indexInBlock);
            this.data.write(dataOutput);
        }
    }

    /* loaded from: input_file:org/apache/sysml/runtime/matrix/CSVReblockMR$OffsetCount.class */
    public static class OffsetCount implements WritableComparable {
        public String filename;
        public long fileOffset;
        public long count;

        public OffsetCount() {
            this.filename = "";
            this.fileOffset = 0L;
            this.count = 0L;
        }

        public OffsetCount(String str, long j, long j2) {
            this.filename = str;
            this.fileOffset = j;
            this.count = j2;
        }

        public OffsetCount(OffsetCount offsetCount) {
            this.filename = offsetCount.filename;
            this.fileOffset = offsetCount.fileOffset;
            this.count = offsetCount.count;
        }

        @Override // org.apache.hadoop.io.Writable
        public void readFields(DataInput dataInput) throws IOException {
            this.filename = dataInput.readLine();
            this.fileOffset = dataInput.readLong();
            this.count = dataInput.readLong();
        }

        @Override // org.apache.hadoop.io.Writable
        public void write(DataOutput dataOutput) throws IOException {
            dataOutput.writeBytes(this.filename + '\n');
            dataOutput.writeLong(this.fileOffset);
            dataOutput.writeLong(this.count);
        }

        public String toString() {
            return this.filename + ", " + this.fileOffset + ", " + this.count;
        }

        public int compareTo(OffsetCount offsetCount) {
            int compareTo = this.filename.compareTo(offsetCount.filename);
            if (compareTo != 0) {
                return compareTo;
            }
            if (this.fileOffset < offsetCount.fileOffset) {
                return -1;
            }
            return this.fileOffset > offsetCount.fileOffset ? 1 : 0;
        }

        @Override // java.lang.Comparable
        public int compareTo(Object obj) {
            if (obj instanceof OffsetCount) {
                return compareTo((OffsetCount) obj);
            }
            return -1;
        }

        public boolean equals(Object obj) {
            if (!(obj instanceof OffsetCount)) {
                return false;
            }
            OffsetCount offsetCount = (OffsetCount) obj;
            return this.filename.equals(offsetCount.filename) && this.fileOffset == offsetCount.fileOffset;
        }

        public int hashCode() {
            throw new RuntimeException("hashCode() should never be called on instances of this class.");
        }
    }

    private CSVReblockMR() {
    }

    public static String findSmallestFile(JobConf jobConf, String str) throws FileNotFoundException, IOException {
        String path;
        Path path2 = new Path(str);
        FileSystem fileSystem = path2.getFileSystem(jobConf);
        if (fileSystem.isDirectory(path2)) {
            FileStatus[] listStatus = fileSystem.listStatus(path2, hiddenFileFilter);
            if (listStatus.length == 0) {
                path = "";
            } else {
                path = listStatus[0].getPath().toString();
                for (int i = 1; i < listStatus.length; i++) {
                    String path3 = listStatus[i].getPath().toString();
                    if (path3.compareTo(path) < 0) {
                        path = path3;
                    }
                }
            }
        } else {
            path = path2.makeQualified(fileSystem).toString();
        }
        return path;
    }

    public static JobReturn runJob(MRJobInstruction mRJobInstruction, String[] strArr, InputInfo[] inputInfoArr, long[] jArr, long[] jArr2, int[] iArr, int[] iArr2, String str, String str2, int i, int i2, byte[] bArr, String[] strArr2, OutputInfo[] outputInfoArr) throws Exception {
        String[] strArr3 = new String[strArr.length];
        JobConf jobConf = new JobConf();
        for (int i3 = 0; i3 < strArr.length; i3++) {
            strArr3[i3] = findSmallestFile(jobConf, strArr[i3]);
        }
        for (int i4 = 0; i4 < strArr.length; i4++) {
            Path path = new Path(strArr[i4]);
            FileSystem fileSystem = path.getFileSystem(jobConf);
            if (fileSystem.isDirectory(path)) {
                FileStatus[] listStatus = fileSystem.listStatus(path, hiddenFileFilter);
                if (listStatus.length == 0) {
                    strArr3[i4] = "";
                } else {
                    strArr3[i4] = listStatus[0].getPath().toString();
                    for (int i5 = 1; i5 < listStatus.length; i5++) {
                        String path2 = listStatus[i5].getPath().toString();
                        if (path2.compareTo(strArr3[i4]) < 0) {
                            strArr3[i4] = path2;
                        }
                    }
                }
            } else {
                strArr3[i4] = path.makeQualified(fileSystem).toString();
            }
        }
        AssignRowIDMRReturn runAssignRowIDMRJob = runAssignRowIDMRJob(strArr, inputInfoArr, iArr, iArr2, str, i2, strArr3);
        for (int i6 = 0; i6 < jArr.length; i6++) {
            if ((jArr[i6] >= 0 && jArr[i6] != runAssignRowIDMRJob.rlens[i6]) || (jArr2[i6] >= 0 && jArr2[i6] != runAssignRowIDMRJob.clens[i6])) {
                throw new RuntimeException("Dimension doesn't mach for input matrix " + i6 + ", expected (" + jArr[i6] + ", " + jArr2[i6] + ") but real (" + runAssignRowIDMRJob.rlens[i6] + ", " + runAssignRowIDMRJob.clens[i6] + ")");
            }
        }
        return runCSVReblockJob(null, strArr, inputInfoArr, runAssignRowIDMRJob.rlens, runAssignRowIDMRJob.clens, iArr, iArr2, str, str2, i, i2, bArr, strArr2, outputInfoArr, runAssignRowIDMRJob.counterFile, strArr3);
    }

    public static AssignRowIDMRReturn runAssignRowIDMRJob(String[] strArr, InputInfo[] inputInfoArr, int[] iArr, int[] iArr2, String str, int i, String[] strArr2) throws Exception {
        AssignRowIDMRReturn assignRowIDMRReturn = new AssignRowIDMRReturn();
        JobConf jobConf = new JobConf(CSVReblockMR.class);
        jobConf.setJobName("Assign-RowID-MR");
        byte[] bArr = new byte[strArr.length];
        byte b = 0;
        while (true) {
            byte b2 = b;
            if (b2 >= bArr.length) {
                break;
            }
            bArr[b2] = b2;
            b = (byte) (b2 + 1);
        }
        MRJobConfiguration.setUpMultipleInputs(jobConf, bArr, strArr, inputInfoArr, iArr, iArr2, false, MRJobConfiguration.ConvertTarget.CELL);
        jobConf.setStrings(SMALLEST_FILE_NAME_PER_INPUT, strArr2);
        MRJobConfiguration.setCSVReblockInstructions(jobConf, str);
        jobConf.setInt(MRConfigurationNames.DFS_REPLICATION, i);
        MRJobConfiguration.setupCustomMRConfigurations(jobConf, ConfigurationManager.getDMLConfig());
        jobConf.setNumReduceTasks(1);
        jobConf.setMapperClass(CSVAssignRowIDMapper.class);
        jobConf.setMapOutputKeyClass(ByteWritable.class);
        jobConf.setMapOutputValueClass(OffsetCount.class);
        jobConf.setReducerClass(CSVAssignRowIDReducer.class);
        jobConf.setBoolean("adaptivemr.map.enable", false);
        MRJobConfiguration.setUniqueWorkingDir(jobConf);
        assignRowIDMRReturn.counterFile = new Path(MRJobConfiguration.constructTempOutputFilename());
        jobConf.setOutputFormat(SequenceFileOutputFormat.class);
        FileOutputFormat.setOutputPath(jobConf, assignRowIDMRReturn.counterFile);
        jobConf.setOutputKeyClass(ByteWritable.class);
        jobConf.setOutputValueClass(OffsetCount.class);
        RunningJob runJob = JobClient.runJob(jobConf);
        Counters.Group group = runJob.getCounters().getGroup(NUM_ROWS_IN_MATRIX);
        Counters.Group group2 = runJob.getCounters().getGroup(NUM_COLS_IN_MATRIX);
        assignRowIDMRReturn.rlens = new long[strArr.length];
        assignRowIDMRReturn.clens = new long[strArr.length];
        for (int i2 = 0; i2 < strArr.length; i2++) {
            assignRowIDMRReturn.rlens[i2] = group.getCounter(Integer.toString(i2));
            assignRowIDMRReturn.clens[i2] = group2.getCounter(Integer.toString(i2));
        }
        return assignRowIDMRReturn;
    }

    private static JobReturn runCSVReblockJob(MRJobInstruction mRJobInstruction, String[] strArr, InputInfo[] inputInfoArr, long[] jArr, long[] jArr2, int[] iArr, int[] iArr2, String str, String str2, int i, int i2, byte[] bArr, String[] strArr2, OutputInfo[] outputInfoArr, Path path, String[] strArr3) throws Exception {
        JobConf jobConf = new JobConf(ReblockMR.class);
        jobConf.setJobName("CSV-Reblock-MR");
        byte[] bArr2 = new byte[strArr.length];
        byte b = 0;
        while (true) {
            byte b2 = b;
            if (b2 >= bArr2.length) {
                break;
            }
            bArr2[b2] = b2;
            b = (byte) (b2 + 1);
        }
        MRJobConfiguration.setUpMultipleInputs(jobConf, bArr2, strArr, inputInfoArr, iArr, iArr2, false, MRJobConfiguration.ConvertTarget.CELL);
        jobConf.setStrings(SMALLEST_FILE_NAME_PER_INPUT, strArr3);
        MRJobConfiguration.setMatricesDimensions(jobConf, bArr2, jArr, jArr2);
        MRJobConfiguration.setBlocksSizes(jobConf, bArr2, iArr, iArr2);
        MRJobConfiguration.setCSVReblockInstructions(jobConf, str);
        MRJobConfiguration.setInstructionsInReducer(jobConf, str2);
        jobConf.setInt(MRConfigurationNames.DFS_REPLICATION, i2);
        MRJobConfiguration.addBinaryBlockSerializationFramework(jobConf);
        DMLConfig dMLConfig = ConfigurationManager.getDMLConfig();
        MRJobConfiguration.setupCustomMRConfigurations(jobConf, dMLConfig);
        MRJobConfiguration.MatrixChar_N_ReducerGroups computeMatrixCharacteristics = MRJobConfiguration.computeMatrixCharacteristics(jobConf, bArr2, null, str, null, null, str2, bArr, MRJobConfiguration.setUpOutputIndexesForMapper(jobConf, bArr2, null, str, null, str2, bArr), false);
        MatrixCharacteristics[] matrixCharacteristicsArr = computeMatrixCharacteristics.stats;
        jobConf.setNumReduceTasks(WriteCSVMR.determineNumReducers(jArr, jArr2, dMLConfig.getIntValue(DMLConfig.NUM_REDUCERS), computeMatrixCharacteristics.numReducerGroups));
        byte[] bArr3 = new byte[bArr.length];
        for (int i3 = 0; i3 < bArr.length; i3++) {
            if (matrixCharacteristicsArr[i3].getRows() == -1 || matrixCharacteristicsArr[i3].getCols() == -1) {
                bArr3[i3] = 1;
            } else {
                bArr3[i3] = 0;
            }
        }
        MRJobConfiguration.setUpMultipleOutputs(jobConf, bArr, bArr3, strArr2, outputInfoArr, true, true);
        jobConf.setMapperClass(CSVReblockMapper.class);
        jobConf.setMapOutputKeyClass(TaggedFirstSecondIndexes.class);
        jobConf.setMapOutputValueClass(BlockRow.class);
        jobConf.setReducerClass(CSVReblockReducer.class);
        jobConf.setBoolean("adaptivemr.map.enable", false);
        MRJobConfiguration.setUniqueWorkingDir(jobConf);
        Path path2 = new Path(path, "part-00000");
        DistributedCache.addCacheFile(path2.toUri(), jobConf);
        DistributedCache.createSymlink(jobConf);
        jobConf.set(ROWID_FILE_NAME, path2.toString());
        RunningJob runJob = JobClient.runJob(jobConf);
        MapReduceTool.deleteFileIfExistOnHDFS(path, jobConf);
        Counters.Group group = runJob.getCounters().getGroup(MRJobConfiguration.NUM_NONZERO_CELLS);
        for (int i4 = 0; i4 < bArr.length; i4++) {
            matrixCharacteristicsArr[i4].setNonZeros(group.getCounter(Integer.toString(i4)));
        }
        return new JobReturn(matrixCharacteristicsArr, outputInfoArr, runJob.isSuccessful());
    }
}
