package org.apache.sysml.runtime.transform;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.broadcast.Broadcast;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
import org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties;
import org.apache.wink.json4j.JSONException;
import scala.Tuple2;

/* loaded from: input_file:org/apache/sysml/runtime/transform/ApplyTfCSVSPARK.class */
public class ApplyTfCSVSPARK {

    /* loaded from: input_file:org/apache/sysml/runtime/transform/ApplyTfCSVSPARK$ApplyTfCSVMap.class */
    public static class ApplyTfCSVMap implements Function2<Integer, Iterator<Tuple2<LongWritable, Text>>, Iterator<String>> {
        private static final long serialVersionUID = 1496686437276906911L;
        TfUtils _tfmapper;

        ApplyTfCSVMap(boolean z, String str, String str2, String str3, String str4, String str5, long j, String str6, Broadcast<TfUtils> broadcast) throws IllegalArgumentException, IOException, JSONException {
            this._tfmapper = null;
            this._tfmapper = (TfUtils) broadcast.getValue();
        }

        ApplyTfCSVMap(Broadcast<TfUtils> broadcast) throws IllegalArgumentException, IOException, JSONException {
            this._tfmapper = null;
            this._tfmapper = (TfUtils) broadcast.getValue();
        }

        public Iterator<String> call(Integer num, Iterator<Tuple2<LongWritable, Text>> it) throws Exception {
            boolean z = true;
            ArrayList arrayList = new ArrayList();
            while (it.hasNext()) {
                Tuple2<LongWritable, Text> next = it.next();
                if (z && num.intValue() == 0) {
                    z = false;
                    this._tfmapper.processHeaderLine();
                    if (this._tfmapper.hasHeader()) {
                        continue;
                    }
                }
                String[] words = this._tfmapper.getWords((Text) next._2());
                if (this._tfmapper.omit(words)) {
                    continue;
                } else {
                    try {
                        arrayList.add(this._tfmapper.checkAndPrepOutputString(this._tfmapper.apply(words)));
                    } catch (DMLRuntimeException e) {
                        throw new RuntimeException(e.getMessage() + ": " + ((Text) next._2()).toString());
                    }
                }
            }
            return arrayList.iterator();
        }
    }

    public static JavaPairRDD<Long, String> runSparkJob(SparkExecutionContext sparkExecutionContext, JavaRDD<Tuple2<LongWritable, Text>> javaRDD, String str, String str2, String str3, CSVFileFormatProperties cSVFileFormatProperties, int i, String str4) throws IOException, ClassNotFoundException, InterruptedException, IllegalArgumentException, JSONException {
        TfUtils tfUtils = new TfUtils(str4, cSVFileFormatProperties.hasHeader(), cSVFileFormatProperties.getDelim(), TfUtils.parseNAStrings(cSVFileFormatProperties.getNAStrings()), TfUtils.readSpec(FileSystem.get(new JobConf()), str2), i, str, null, str3);
        tfUtils.loadTfMetadata();
        JavaPairRDD<Long, String> cache = javaRDD.mapPartitionsWithIndex(new ApplyTfCSVMap(sparkExecutionContext.getSparkContext().broadcast(tfUtils)), true).mapToPair(new PairFunction<String, Long, String>() { // from class: org.apache.sysml.runtime.transform.ApplyTfCSVSPARK.1
            private static final long serialVersionUID = 3868143093999082931L;

            public Tuple2<Long, String> call(String str5) throws Exception {
                return new Tuple2<>(new Long(1L), str5);
            }
        }).cache();
        cache.count();
        return cache;
    }
}
