package ml.shifu.guagua.mapreduce;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import ml.shifu.guagua.GuaguaRuntimeException;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:ml/shifu/guagua/mapreduce/GuaguaInputFormat.class */
public class GuaguaInputFormat extends TextInputFormat {
    private static final Logger LOG = LoggerFactory.getLogger(GuaguaInputFormat.class);
    private static Comparator<Node> nodeComparator = new Comparator<Node>() { // from class: ml.shifu.guagua.mapreduce.GuaguaInputFormat.1
        @Override // java.util.Comparator
        public int compare(Node node, Node node2) {
            long j = node.length - node2.length;
            if (j == 0) {
                return 0;
            }
            return j < 0 ? -1 : 1;
        }
    };

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:ml/shifu/guagua/mapreduce/GuaguaInputFormat$ComparableSplit.class */
    public static final class ComparableSplit implements Comparable<ComparableSplit> {
        private InputSplit rawInputSplit;
        private Set<Node> nodes = new HashSet();
        private long id;

        ComparableSplit(InputSplit inputSplit, long j) {
            this.rawInputSplit = inputSplit;
            this.id = j;
        }

        void add(Node node) {
            this.nodes.add(node);
        }

        void removeFromNodes() {
            Iterator<Node> it = this.nodes.iterator();
            while (it.hasNext()) {
                it.next().remove(this);
            }
        }

        public InputSplit getSplit() {
            return this.rawInputSplit;
        }

        public boolean equals(Object obj) {
            return obj != null && (obj instanceof ComparableSplit) && compareTo((ComparableSplit) obj) == 0;
        }

        public int hashCode() {
            return 41;
        }

        @Override // java.lang.Comparable
        public int compareTo(ComparableSplit comparableSplit) {
            try {
                long length = this.rawInputSplit.getLength() - comparableSplit.rawInputSplit.getLength();
                if (length != 0) {
                    return length < 0 ? 1 : -1;
                }
                if (this.id == comparableSplit.id) {
                    return 0;
                }
                return this.id < comparableSplit.id ? -1 : 1;
            } catch (IOException e) {
                throw new RuntimeException(e);
            } catch (InterruptedException e2) {
                throw new RuntimeException(e2);
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:ml/shifu/guagua/mapreduce/GuaguaInputFormat$DummySplit.class */
    public static class DummySplit extends InputSplit {
        private long length;

        private DummySplit() {
        }

        public String[] getLocations() {
            return null;
        }

        public long getLength() {
            return this.length;
        }

        public void setLength(long j) {
            this.length = j;
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:ml/shifu/guagua/mapreduce/GuaguaInputFormat$Node.class */
    public static class Node {
        private long length;
        private List<ComparableSplit> splits = new ArrayList();
        private boolean sorted = false;

        Node() throws IOException, InterruptedException {
            this.length = 0L;
            this.length = 0L;
        }

        void add(ComparableSplit comparableSplit) throws IOException, InterruptedException {
            this.splits.add(comparableSplit);
            this.length++;
        }

        void remove(ComparableSplit comparableSplit) {
            if (!this.sorted) {
                sort();
            }
            int binarySearch = Collections.binarySearch(this.splits, comparableSplit);
            if (binarySearch >= 0) {
                this.splits.remove(binarySearch);
                this.length--;
            }
        }

        void sort() {
            if (this.sorted) {
                return;
            }
            Collections.sort(this.splits);
            this.sorted = true;
        }

        List<ComparableSplit> getSplits() {
            return this.splits;
        }

        public long getLength() {
            return this.length;
        }
    }

    public List<InputSplit> getSplits(JobContext jobContext) throws IOException {
        List<InputSplit> guaguaSplits;
        if (jobContext.getConfiguration().getBoolean("guagua.split.combinable", false)) {
            long defaultBlockSize = FileSystem.get(jobContext.getConfiguration()).getDefaultBlockSize();
            long j = jobContext.getConfiguration().getLong("guagua.split.maxCombinedSplitSize", defaultBlockSize);
            if (j == 0) {
                j = defaultBlockSize;
            }
            jobContext.getConfiguration().setLong(GuaguaMapReduceConstants.MAPRED_MIN_SPLIT_SIZE, 1L);
            jobContext.getConfiguration().setLong(GuaguaMapReduceConstants.MAPRED_MAX_SPLIT_SIZE, j);
            jobContext.getConfiguration().setLong("mapreduce.input.fileinputformat.split.minsize", 1L);
            jobContext.getConfiguration().setLong("mapreduce.input.fileinputformat.split.maxsize", j);
            List<InputSplit> splits = super.getSplits(jobContext);
            LOG.debug("combine size:{}, splits:{}", Long.valueOf(j), splits);
            guaguaSplits = getFinalCombineGuaguaSplits(splits, j);
        } else {
            guaguaSplits = getGuaguaSplits(jobContext);
        }
        ArrayList arrayList = new ArrayList();
        Iterator<InputSplit> it = guaguaSplits.iterator();
        while (it.hasNext()) {
            ml.shifu.guagua.hadoop.io.GuaguaInputSplit guaguaInputSplit = (ml.shifu.guagua.hadoop.io.GuaguaInputSplit) it.next();
            if (guaguaInputSplit != null && !isAllFileSplitsEmptyGzip(guaguaInputSplit)) {
                arrayList.add(guaguaInputSplit);
            }
        }
        int i = jobContext.getConfiguration().getInt("guagua.master.number", 1);
        for (int i2 = 0; i2 < i; i2++) {
            arrayList.add(new ml.shifu.guagua.hadoop.io.GuaguaInputSplit(true, (FileSplit) null));
        }
        int size = arrayList.size();
        LOG.info("Input size including master: {}", Integer.valueOf(size));
        LOG.debug("input splits: {}", arrayList);
        jobContext.getConfiguration().set("guagua.worker.number", (size - i) + "");
        return arrayList;
    }

    private boolean isAllFileSplitsEmptyGzip(ml.shifu.guagua.hadoop.io.GuaguaInputSplit guaguaInputSplit) {
        boolean z = true;
        for (FileSplit fileSplit : guaguaInputSplit.getFileSplits()) {
            if (!fileSplit.getPath().getName().toLowerCase().endsWith("gz") || fileSplit.getStart() != 0 || fileSplit.getLength() > 20) {
                return false;
            }
            z = true;
        }
        return z;
    }

    protected List<InputSplit> getFinalCombineGuaguaSplits(List<InputSplit> list, long j) throws IOException {
        try {
            List<List<InputSplit>> combineGuaguaSplits = getCombineGuaguaSplits(list, j);
            ArrayList arrayList = new ArrayList();
            for (List<InputSplit> list2 : combineGuaguaSplits) {
                FileSplit[] fileSplitArr = new FileSplit[list2.size()];
                for (int i = 0; i < list2.size(); i++) {
                    fileSplitArr[i] = (FileSplit) list2.get(i);
                }
                arrayList.add(new ml.shifu.guagua.hadoop.io.GuaguaInputSplit(false, fileSplitArr));
            }
            return arrayList;
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
            throw new GuaguaRuntimeException(e);
        }
    }

    protected List<InputSplit> getGuaguaSplits(JobContext jobContext) throws IOException {
        long j;
        long max = Math.max(getFormatMinSplitSize(), getMinSplitSize(jobContext));
        long maxSplitSize = getMaxSplitSize(jobContext);
        ArrayList arrayList = new ArrayList();
        for (FileStatus fileStatus : listStatus(jobContext)) {
            Path path = fileStatus.getPath();
            if (!isPigOrHadoopMetaFile(path)) {
                FileSystem fileSystem = path.getFileSystem(jobContext.getConfiguration());
                long len = fileStatus.getLen();
                BlockLocation[] fileBlockLocations = fileSystem.getFileBlockLocations(fileStatus, 0L, len);
                if (len != 0 && isSplitable(jobContext, path)) {
                    long computeSplitSize = computeSplitSize(fileStatus.getBlockSize(), max, maxSplitSize);
                    long j2 = len;
                    while (true) {
                        j = j2;
                        if (j / computeSplitSize <= 1.1d) {
                            break;
                        }
                        arrayList.add(new ml.shifu.guagua.hadoop.io.GuaguaInputSplit(false, new FileSplit(path, len - j, computeSplitSize, fileBlockLocations[getBlockIndex(fileBlockLocations, len - j)].getHosts())));
                        j2 = j - computeSplitSize;
                    }
                    if (j != 0) {
                        arrayList.add(new ml.shifu.guagua.hadoop.io.GuaguaInputSplit(false, new FileSplit(path, len - j, j, fileBlockLocations[fileBlockLocations.length - 1].getHosts())));
                    }
                } else if (len != 0) {
                    arrayList.add(new ml.shifu.guagua.hadoop.io.GuaguaInputSplit(false, new FileSplit(path, 0L, len, fileBlockLocations[0].getHosts())));
                } else {
                    arrayList.add(new ml.shifu.guagua.hadoop.io.GuaguaInputSplit(false, new FileSplit(path, 0L, len, new String[0])));
                }
            }
        }
        jobContext.getConfiguration().setLong(GuaguaMapReduceConstants.NUM_INPUT_FILES, r0.size());
        LOG.debug("Total # of splits: {}", Integer.valueOf(arrayList.size()));
        return arrayList;
    }

    public static List<List<InputSplit>> getCombineGuaguaSplits(List<InputSplit> list, long j) throws IOException, InterruptedException {
        ArrayList<Node> arrayList = new ArrayList();
        HashMap hashMap = new HashMap();
        ArrayList arrayList2 = new ArrayList();
        ArrayList arrayList3 = new ArrayList();
        long j2 = 0;
        int i = 0;
        int size = list.size();
        InputSplit inputSplit = null;
        int i2 = 0;
        for (InputSplit inputSplit2 : list) {
            if (inputSplit2.getLength() == 0) {
                i2++;
            } else if (inputSplit2.getLength() >= j) {
                j2++;
                ArrayList arrayList4 = new ArrayList();
                arrayList4.add(inputSplit2);
                arrayList2.add(arrayList4);
                arrayList3.add(Long.valueOf(inputSplit2.getLength()));
            } else {
                long j3 = j2;
                j2 = j3 + 1;
                ComparableSplit comparableSplit = new ComparableSplit(inputSplit2, j3);
                String[] locations = inputSplit2.getLocations();
                Arrays.sort(locations);
                HashSet hashSet = new HashSet();
                for (String str : locations) {
                    if (!hashSet.contains(str)) {
                        Node node = (Node) hashMap.get(str);
                        if (node == null) {
                            node = new Node();
                            arrayList.add(node);
                            hashMap.put(str, node);
                        }
                        node.add(comparableSplit);
                        comparableSplit.add(node);
                        hashSet.add(str);
                    }
                }
                inputSplit = inputSplit2;
                i++;
            }
        }
        if (size > 0 && i2 == size) {
            ArrayList arrayList5 = new ArrayList();
            arrayList5.add(list.get(0));
            arrayList2.add(arrayList5);
        } else if (i == 1) {
            ArrayList arrayList6 = new ArrayList();
            arrayList6.add(inputSplit);
            arrayList2.add(arrayList6);
        } else if (i > 1) {
            Collections.sort(arrayList, nodeComparator);
            DummySplit dummySplit = new DummySplit();
            ComparableSplit comparableSplit2 = new ComparableSplit(dummySplit, -1L);
            for (Node node2 : arrayList) {
                node2.sort();
                long j4 = 0;
                List<ComparableSplit> splits = node2.getSplits();
                ArrayList arrayList7 = new ArrayList();
                ArrayList arrayList8 = new ArrayList();
                while (true) {
                    if (!splits.isEmpty()) {
                        arrayList7.add(splits.get(0).getSplit());
                        arrayList8.add(splits.get(0));
                        int i3 = 1;
                        int size2 = splits.size();
                        long length = j4 + splits.get(0).getSplit().getLength();
                        long j5 = j - length;
                        dummySplit.setLength(j5);
                        int binarySearch = Collections.binarySearch(node2.getSplits().subList(1, size2), comparableSplit2);
                        while (true) {
                            int i4 = ((-binarySearch) - 1) + i3;
                            if (i4 >= size2) {
                                break;
                            }
                            long length2 = splits.get(i4).getSplit().getLength();
                            arrayList7.add(splits.get(i4).getSplit());
                            arrayList8.add(splits.get(i4));
                            length += length2;
                            j5 -= length2;
                            if (j5 <= 0) {
                                break;
                            }
                            i3 = i4 + 1;
                            if (i3 >= size2) {
                                break;
                            }
                            dummySplit.setLength(j5);
                            binarySearch = Collections.binarySearch(node2.getSplits().subList(i3, size2), comparableSplit2);
                        }
                        if (length > j / 2) {
                            arrayList2.add(arrayList7);
                            arrayList3.add(Long.valueOf(length));
                            removeSplits(arrayList8);
                            j4 = 0;
                            arrayList7 = new ArrayList();
                            arrayList8.clear();
                            splits = node2.getSplits();
                        } else if (arrayList7.size() != size2) {
                            throw new AssertionError("Combined split logic error!");
                        }
                    }
                }
            }
            ArrayList arrayList9 = new ArrayList();
            HashSet hashSet2 = new HashSet();
            Iterator it = arrayList.iterator();
            while (it.hasNext()) {
                for (ComparableSplit comparableSplit3 : ((Node) it.next()).getSplits()) {
                    if (!hashSet2.contains(comparableSplit3.getSplit())) {
                        hashSet2.add(comparableSplit3.getSplit());
                        arrayList9.add(comparableSplit3);
                    }
                }
            }
            if (!arrayList9.isEmpty()) {
                long j6 = 0;
                ArrayList arrayList10 = new ArrayList();
                ArrayList arrayList11 = new ArrayList();
                int size3 = arrayList9.size();
                for (int i5 = 0; i5 < size3; i5++) {
                    ComparableSplit comparableSplit4 = (ComparableSplit) arrayList9.get(i5);
                    if (j6 + comparableSplit4.getSplit().getLength() >= j) {
                        removeSplits(arrayList11);
                        arrayList2.add(arrayList10);
                        arrayList3.add(Long.valueOf(j6));
                        arrayList10 = new ArrayList();
                        arrayList11.clear();
                        j6 = 0;
                    }
                    arrayList10.add(comparableSplit4.getSplit());
                    arrayList11.add(comparableSplit4);
                    j6 += comparableSplit4.getSplit().getLength();
                    if (i5 == size3 - 1) {
                        int i6 = 0;
                        while (true) {
                            if (i6 >= arrayList2.size()) {
                                break;
                            }
                            if (((Long) arrayList3.get(i6)).longValue() + j6 <= j) {
                                List list2 = (List) arrayList2.get(i6);
                                Iterator it2 = arrayList10.iterator();
                                while (it2.hasNext()) {
                                    list2.add((InputSplit) it2.next());
                                }
                                removeSplits(arrayList11);
                                arrayList10.clear();
                            } else {
                                i6++;
                            }
                        }
                        if (!arrayList10.isEmpty()) {
                            removeSplits(arrayList11);
                            arrayList2.add(arrayList10);
                        }
                    }
                }
            }
        }
        LOG.info("Total input paths (combined) to process : {}", Integer.valueOf(arrayList2.size()));
        return arrayList2;
    }

    private static void removeSplits(List<ComparableSplit> list) {
        Iterator<ComparableSplit> it = list.iterator();
        while (it.hasNext()) {
            it.next().removeFromNodes();
        }
    }

    protected boolean isPigOrHadoopMetaFile(Path path) {
        return path.toString().indexOf(GuaguaMapReduceConstants.HADOOP_SUCCESS) >= 0 || path.toString().indexOf(GuaguaMapReduceConstants.PIG_HEADER) >= 0 || path.toString().indexOf(GuaguaMapReduceConstants.PIG_SCHEMA) >= 0;
    }

    protected boolean isSplitable(JobContext jobContext, Path path) {
        if (path.getName().endsWith("parquet")) {
            return false;
        }
        return super.isSplitable(jobContext, path);
    }

    public RecordReader<LongWritable, Text> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) {
        return new GuaguaMRRecordReader(taskAttemptContext.getConfiguration().getInt("guagua.iteration.count", -1));
    }
}
