package ru.ifmo.genetics.tools.cleaner;

import it.unimi.dsi.fastutil.longs.Long2LongMap;
import it.unimi.dsi.fastutil.longs.Long2LongOpenHashMap;
import it.unimi.dsi.fastutil.longs.LongArrayList;
import it.unimi.dsi.fastutil.longs.LongCollection;
import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
import it.unimi.dsi.fastutil.longs.LongSet;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.EOFException;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.TreeMap;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CountDownLatch;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.commons.configuration.PropertiesConfiguration;
import ru.ifmo.genetics.dna.DnaQ;
import ru.ifmo.genetics.io.sources.Source;
import ru.ifmo.genetics.statistics.Timer;
import ru.ifmo.genetics.tools.Util;
import ru.ifmo.genetics.tools.cleaner.task.clean.CleanDispatcher;
import ru.ifmo.genetics.tools.cleaner.task.clean.NewCleanWorker;
import ru.ifmo.genetics.tools.cleaner.task.processHalfRead.GlobalContext;
import ru.ifmo.genetics.tools.cleaner.task.processHalfRead.LocalContext;
import ru.ifmo.genetics.tools.cleaner.task.processHalfRead.Task;
import ru.ifmo.genetics.tools.executors.BlockingThreadPoolExecutor;
import ru.ifmo.genetics.utils.FileUtils;
import ru.ifmo.genetics.utils.KmerUtils;

/* loaded from: input_file:ru/ifmo/genetics/tools/cleaner/Cleaner.class */
public class Cleaner {
    static int STAT_WORK_THREADS_NUMBER;
    static int CLEAN_WORK_THREADS_NUMBER;
    int MAXIMAL_SUBS_NUMBER;
    int MAXIMAL_INDELS_NUMBER;
    int MAXIMAL_BAD_FREQUENCY;
    static String[] filenames;
    int countNoway;
    int countAmb;
    int countPolymorphism;
    int countFixes;
    Map<Integer, Integer> statAmb;
    Map<Integer, Integer> statPos;
    Map<Integer, Integer> statPosPair;
    ConcurrentHashMap<Long, Integer> hm = new ConcurrentHashMap<>(42, 0.75f, STAT_WORK_THREADS_NUMBER + 1);
    static int SCALE = 1;
    static boolean verbose = false;
    static int numVl = 0;
    static int LEN = 30;
    static long MASK = (1 << (2 * LEN)) - 1;
    static int DISPATCH_WORK_RANGE_SIZE = 32768;
    static int HALF_READ_TASK_SIZE = 2048;
    static int CLEAN_WORK_QUEUE_CAPACITY = 2;
    static ArrayList<String> prefixes = new ArrayList<>();
    static String DIR = null;
    static String BUCKETS = null;
    static String DATA = null;

    static void processSource(Source<DnaQ> source, BlockingThreadPoolExecutor blockingThreadPoolExecutor, Collection<GlobalContext> collection) throws InterruptedException {
        int i = 0;
        LocalContext localContext = new LocalContext(0, new ArrayList(HALF_READ_TASK_SIZE));
        Iterator<DnaQ> it2 = source.iterator();
        while (it2.hasNext()) {
            i++;
            localContext.dnaqs.add(it2.next());
            if (localContext.dnaqs.size() == HALF_READ_TASK_SIZE) {
                Iterator<GlobalContext> it3 = collection.iterator();
                while (it3.hasNext()) {
                    blockingThreadPoolExecutor.blockingExecute(new Task(it3.next(), localContext));
                }
                localContext = new LocalContext(i, new ArrayList(HALF_READ_TASK_SIZE));
            }
        }
        if (localContext.dnaqs.size() != 0) {
            Iterator<GlobalContext> it4 = collection.iterator();
            while (it4.hasNext()) {
                blockingThreadPoolExecutor.blockingExecute(new Task(it4.next(), localContext));
            }
        }
    }

    Map<Long, Integer> calcPrefix(List<String> list, Source<DnaQ>[] sourceArr) throws IOException, InterruptedException {
        new Timer();
        int i = 0;
        BlockingThreadPoolExecutor blockingThreadPoolExecutor = new BlockingThreadPoolExecutor(STAT_WORK_THREADS_NUMBER, STAT_WORK_THREADS_NUMBER * 2);
        int i2 = 0;
        for (Source<DnaQ> source : sourceArr) {
            ArrayList arrayList = new ArrayList(list.size());
            for (String str : list) {
                arrayList.add(new GlobalContext(this.hm, Util.getPrefixMask(str, LEN), Util.getPrefixCode(str, LEN), i2, LEN, MASK));
            }
            processSource(source, blockingThreadPoolExecutor, arrayList);
            i++;
            i2++;
        }
        blockingThreadPoolExecutor.shutdownAndAwaitTermination();
        return this.hm;
    }

    HashMap<Long, Integer> readHashMapLongInteger(File file) throws IOException {
        BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
        HashMap<Long, Integer> hashMap = new HashMap<>();
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                return hashMap;
            }
            StringTokenizer stringTokenizer = new StringTokenizer(readLine);
            hashMap.put(Long.valueOf(Long.parseLong(stringTokenizer.nextToken())), Integer.valueOf(Integer.parseInt(stringTokenizer.nextToken())));
        }
    }

    private int calcThreshold(Map<Long, Integer> map) {
        TreeMap treeMap = new TreeMap();
        long j = 0;
        Iterator<Map.Entry<Long, Integer>> it2 = map.entrySet().iterator();
        while (it2.hasNext()) {
            int intValue = it2.next().getValue().intValue() * SCALE;
            if (!treeMap.containsKey(Integer.valueOf(intValue))) {
                treeMap.put(Integer.valueOf(intValue), 0L);
            }
            treeMap.put(Integer.valueOf(intValue), Long.valueOf(((Long) treeMap.get(Integer.valueOf(intValue))).longValue() + 1));
            j += intValue;
        }
        System.err.println(j);
        long j2 = -1;
        long j3 = -1;
        int i = -1;
        int i2 = -1;
        Iterator it3 = treeMap.entrySet().iterator();
        while (true) {
            if (!it3.hasNext()) {
                break;
            }
            Map.Entry entry = (Map.Entry) it3.next();
            long longValue = ((Long) entry.getValue()).longValue();
            int intValue2 = ((Integer) entry.getKey()).intValue();
            if ((j2 == -1 || j2 >= j3) && j3 < longValue && j3 > 0) {
                i2 = j2 == -1 ? 0 : i;
            } else {
                j2 = j3;
                j3 = longValue;
                i = intValue2;
            }
        }
        if (i2 == -1) {
            throw new RuntimeException("Bad map");
        }
        return i2;
    }

    private void clean(LongSet longSet, LongCollection longCollection, Long2LongMap long2LongMap) {
        if (verbose) {
            System.out.println("<verbose>");
            System.out.println("infos before cleaning (" + this.hm.size() + " items):");
            for (Map.Entry<Long, Integer> entry : this.hm.entrySet()) {
                System.out.println(KmerUtils.kmer2String(entry.getKey().longValue(), LEN) + " " + entry.getValue());
            }
            System.out.println("</verbose>");
        }
        if (verbose) {
            System.out.println("<verbose>");
            System.out.println("Using threshold = " + this.MAXIMAL_BAD_FREQUENCY);
            System.out.println("</verbose>");
        }
        if (verbose) {
            CLEAN_WORK_THREADS_NUMBER = 1;
        }
        CleanDispatcher cleanDispatcher = new CleanDispatcher(longCollection, DISPATCH_WORK_RANGE_SIZE);
        NewCleanWorker[] newCleanWorkerArr = new NewCleanWorker[CLEAN_WORK_THREADS_NUMBER];
        CountDownLatch countDownLatch = new CountDownLatch(newCleanWorkerArr.length);
        for (int i = 0; i < CLEAN_WORK_THREADS_NUMBER; i++) {
            System.err.println("Starting worker #" + i + "...");
            newCleanWorkerArr[i] = new NewCleanWorker(cleanDispatcher, longSet, LEN, countDownLatch, this.MAXIMAL_SUBS_NUMBER, this.MAXIMAL_INDELS_NUMBER, verbose);
            new Thread(newCleanWorkerArr[i]).start();
        }
        try {
            countDownLatch.await();
        } catch (InterruptedException e) {
            System.err.println("Main thread interrupted");
            for (NewCleanWorker newCleanWorker : newCleanWorkerArr) {
                newCleanWorker.interrupt();
            }
        }
        long j = 0;
        long j2 = 0;
        for (NewCleanWorker newCleanWorker2 : newCleanWorkerArr) {
            j += newCleanWorker2.del;
            j2 += newCleanWorker2.notdel;
            long2LongMap.putAll(newCleanWorker2.getResults());
        }
    }

    public static long kmersNumberInFiles(String str, List<String> list) throws IOException {
        ArrayList arrayList = new ArrayList(list.size());
        Iterator<String> it2 = list.iterator();
        while (it2.hasNext()) {
            arrayList.add(DIR + File.separator + it2.next() + str);
        }
        return FileUtils.filesSizeByNames(arrayList) / 8;
    }

    public static void loadKMers(String str, List<String> list, LongCollection longCollection) throws IOException {
        if (list == null) {
            list = new ArrayList();
            list.add("");
        }
        ArrayList arrayList = new ArrayList(list.size());
        Iterator<String> it2 = list.iterator();
        while (it2.hasNext()) {
            arrayList.add(DIR + File.separator + it2.next() + str);
        }
        Iterator it3 = arrayList.iterator();
        while (it3.hasNext()) {
            String str2 = (String) it3.next();
            System.err.println("loading from " + str2);
            while (true) {
                try {
                    longCollection.add(new DataInputStream(new BufferedInputStream(new FileInputStream(str2))).readLong());
                } catch (EOFException e) {
                }
            }
        }
    }

    void dumpGoodKMers(Map<Long, Integer> map, String str, int i) throws IOException {
        DataOutputStream dataOutputStream = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(str)));
        for (Map.Entry<Long, Integer> entry : map.entrySet()) {
            if (entry.getValue().intValue() > i) {
                dataOutputStream.writeLong(entry.getKey().longValue());
            }
        }
        dataOutputStream.close();
    }

    void dumpBadKMers(Map<Long, Integer> map, String str, int i) throws IOException {
        DataOutputStream dataOutputStream = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(str)));
        for (Map.Entry<Long, Integer> entry : map.entrySet()) {
            if (entry.getValue().intValue() <= i) {
                dataOutputStream.writeLong(entry.getKey().longValue());
            }
        }
        dataOutputStream.close();
    }

    void dumpGoodKMersVlad(Map<Long, Integer> map, String str, int i) throws IOException {
        System.err.println("Print Vlad Stat");
        PrintWriter printWriter = new PrintWriter(str);
        for (Map.Entry<Long, Integer> entry : map.entrySet()) {
            int intValue = entry.getValue().intValue();
            if (intValue > 0) {
                printWriter.println(entry.getKey() + " " + intValue);
            }
        }
        printWriter.close();
    }

    void stat(Map<Long, Integer> map) throws IOException {
        HashMap hashMap = new HashMap();
        Iterator<Map.Entry<Long, Integer>> it2 = map.entrySet().iterator();
        while (it2.hasNext()) {
            int intValue = it2.next().getValue().intValue() * SCALE;
            if (!hashMap.containsKey(Integer.valueOf(intValue))) {
                hashMap.put(Integer.valueOf(intValue), 0);
            }
            hashMap.put(Integer.valueOf(intValue), Integer.valueOf(((Integer) hashMap.get(Integer.valueOf(intValue))).intValue() + 1));
        }
        TreeMap treeMap = new TreeMap(hashMap);
        PrintWriter printWriter = new PrintWriter(BUCKETS + File.separator + Util.join(prefixes, "_") + ".stat");
        for (Map.Entry entry : treeMap.entrySet()) {
            printWriter.println(entry.getKey() + " " + entry.getValue());
        }
        printWriter.close();
    }

    private void run() throws IOException, ClassNotFoundException, InterruptedException {
        runAndGetResults();
    }

    public Map<Long, Long> runAndGetResults() throws IOException, ClassNotFoundException, InterruptedException {
        System.err.println("prefixes = " + prefixes);
        LongSet longOpenHashSet = new LongOpenHashSet((int) kmersNumberInFiles(".good", prefixes));
        loadKMers(".good", prefixes, longOpenHashSet);
        System.err.println("good kmers: " + longOpenHashSet.size());
        LongArrayList longArrayList = new LongArrayList((int) kmersNumberInFiles(".bad", prefixes));
        loadKMers(".bad", prefixes, longArrayList);
        System.err.println("bad kmers: " + longArrayList.size());
        Long2LongMap long2LongOpenHashMap = new Long2LongOpenHashMap();
        System.err.println("clean()...");
        clean(longOpenHashSet, longArrayList, long2LongOpenHashMap);
        System.err.println("allFixes.size() = " + long2LongOpenHashMap.size());
        dumpFixes(long2LongOpenHashMap, BUCKETS + Util.join(prefixes, "_") + ".fixes");
        return long2LongOpenHashMap;
    }

    public void dumpFixes(Map<Long, Long> map, String str) throws IOException {
        System.err.println("dumping fixes to " + str);
        DataOutputStream dataOutputStream = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(str)));
        for (Map.Entry<Long, Long> entry : map.entrySet()) {
            dataOutputStream.writeLong(entry.getKey().longValue());
            dataOutputStream.writeLong(entry.getValue().longValue());
        }
        dataOutputStream.close();
        System.err.println("done");
    }

    public Cleaner(String str, int i, int i2, int i3) throws IOException, ConfigurationException {
        PropertiesConfiguration propertiesConfiguration = new PropertiesConfiguration(str);
        STAT_WORK_THREADS_NUMBER = propertiesConfiguration.getInt("available_processors");
        CLEAN_WORK_THREADS_NUMBER = propertiesConfiguration.getInt("available_processors");
        this.MAXIMAL_SUBS_NUMBER = i;
        this.MAXIMAL_INDELS_NUMBER = i2;
        this.MAXIMAL_BAD_FREQUENCY = i3;
    }

    public static void prepare(String[] strArr) throws Exception {
        if (strArr.length < 8) {
            System.err.println("Usage: FastqCleaner <config> <workdir> <LEN> <maximal_sub_number> <maximal_del_nuber> <maximal_bad_frequency> <verbose> <binq-file>+ -- <prefix>+");
            System.exit(666);
        }
        verbose = strArr[5].equals("true");
        int i = 6;
        while (i < strArr.length && !strArr[i].equals(HelpFormatter.DEFAULT_LONG_OPT_PREFIX)) {
            i++;
        }
        if (i == strArr.length) {
            System.err.println("Usage: FastqCleaner <config> <workdir> <LEN> <maximal_sub_number> <maximal_del_nuber> <maximal_bad_frequency> <verbose> <binq-file>+ -- <prefix>+");
            System.exit(666);
        }
        filenames = (String[]) Arrays.copyOfRange(strArr, 6, i);
        while (true) {
            i++;
            if (i >= strArr.length) {
                break;
            } else {
                prefixes.add(strArr[i]);
            }
        }
        if (prefixes.size() == 0) {
            prefixes.add("");
        }
        DIR = strArr[1];
        DATA = DIR + File.separator;
        BUCKETS = DIR + File.separator + "buckets" + File.separator;
        new File(BUCKETS).mkdir();
        LEN = Integer.parseInt(strArr[2]);
        MASK = (1 << (2 * LEN)) - 1;
    }

    public static Map<Long, Long> runAndGetResults(String[] strArr) throws Exception {
        prepare(strArr);
        return new Cleaner(strArr[0], Integer.parseInt(strArr[3]), Integer.parseInt(strArr[4]), Integer.parseInt(strArr[5])).runAndGetResults();
    }

    public static void main(String[] strArr) throws Exception {
        runAndGetResults(strArr);
    }
}
