package ru.ifmo.genetics.tools.ec;

import it.unimi.dsi.fastutil.longs.Long2ByteMap;
import it.unimi.dsi.fastutil.longs.Long2ByteOpenHashMap;
import java.io.BufferedOutputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Iterator;
import java.util.Random;
import org.apache.commons.configuration.tree.DefaultExpressionEngine;
import ru.ifmo.genetics.dna.DnaQ;
import ru.ifmo.genetics.io.readers.BinqReader;
import ru.ifmo.genetics.io.sources.Source;
import ru.ifmo.genetics.tools.Util;
import ru.ifmo.genetics.utils.Misc;
import ru.ifmo.genetics.utils.tool.ExecutionFailedException;
import ru.ifmo.genetics.utils.tool.Parameter;
import ru.ifmo.genetics.utils.tool.Tool;
import ru.ifmo.genetics.utils.tool.inputParameterBuilder.FileMVParameterBuilder;
import ru.ifmo.genetics.utils.tool.inputParameterBuilder.FileParameterBuilder;
import ru.ifmo.genetics.utils.tool.inputParameterBuilder.IntParameterBuilder;
import ru.ifmo.genetics.utils.tool.inputParameterBuilder.LongParameterBuilder;
import ru.ifmo.genetics.utils.tool.inputParameterBuilder.ParameterBuilder;

/* loaded from: input_file:ru/ifmo/genetics/tools/ec/KmerStatisticsGatherer.class */
public class KmerStatisticsGatherer extends Tool {
    public static final String NAME = "kmer-statistics-gatherer";
    public static final String DESCRIPTION = "differentiates good kmers from bad ones";
    public final Parameter<Integer> maximalBadFrequence;
    public final Parameter<Integer> k;
    public final Parameter<Long> maxSize;
    public final Parameter<File[]> inputFiles;
    public final Parameter<File> outputDir;
    public final Parameter<File> prefixesFile;
    private long prefix;
    private long prefixMask;
    private int prefixLength;
    private int LEN;
    private long MASK;
    private long MAX_SIZE;
    private final Random r;

    /* JADX WARN: Type inference failed for: r0v15, types: [it.unimi.dsi.fastutil.bytes.ByteCollection] */
    @Override // ru.ifmo.genetics.utils.tool.Tool
    protected void runImpl() throws ExecutionFailedException {
        this.LEN = this.k.get().intValue();
        this.MASK = (1 << (2 * this.LEN)) - 1;
        this.MAX_SIZE = this.maxSize.get().longValue();
        this.outputDir.get().mkdir();
        info("MAXIMAL_SIZE = " + this.MAX_SIZE);
        Long2ByteMap load = load(this.inputFiles.get(), this.MAX_SIZE);
        info("loaded " + load.size() + " kmers");
        int[] iArr = new int[256];
        Iterator it2 = load.values2().iterator();
        while (it2.hasNext()) {
            byte byteValue = ((Byte) it2.next()).byteValue();
            iArr[byteValue] = iArr[byteValue] + 1;
        }
        int intValue = this.maximalBadFrequence.get() == null ? -1 : this.maximalBadFrequence.get().intValue();
        if (intValue == -1) {
            int i = 2;
            while (true) {
                if (i >= 255) {
                    break;
                }
                if (iArr[i - 1] >= iArr[i] && iArr[i] < iArr[i + 1]) {
                    intValue = i;
                    break;
                }
                i++;
            }
        }
        info("threshold = " + intValue);
        long j = 0;
        long j2 = 0;
        long j3 = 1 << (2 * this.prefixLength);
        String absolutePath = this.outputDir.get().getAbsolutePath();
        try {
            PrintWriter printWriter = new PrintWriter(this.prefixesFile.get());
            for (long j4 = 0; j4 < j3; j4++) {
                long j5 = j4 << (2 * (this.LEN - this.prefixLength));
                printWriter.println(Util.getString(j4, this.prefixLength));
                info("prefix = \"" + Util.getString(j4, this.prefixLength) + "\"");
                Long2ByteMap load2 = load(this.inputFiles.get(), j5, this.prefixMask);
                info("loaded " + load2.size() + " kmers");
                String str = absolutePath + File.separator + Util.getString(j4, this.prefixLength) + ".good";
                long dumpGoodKmers = dumpGoodKmers(load2, intValue, str);
                info("dumped " + dumpGoodKmers + " kmers to " + str);
                j += dumpGoodKmers;
                String str2 = absolutePath + File.separator + Util.getString(j4, this.prefixLength) + ".bad";
                long dumpBadKmers = dumpBadKmers(load2, intValue, str2);
                info("dumped " + dumpBadKmers + " kmers to " + str2);
                j2 += dumpBadKmers;
            }
            printWriter.close();
            info("total good kmers: " + j);
            info("total bad kmers:  " + j2);
        } catch (IOException e) {
            throw new ExecutionFailedException(e);
        }
    }

    @Override // ru.ifmo.genetics.utils.tool.Tool
    protected void clean() {
    }

    public static void main(String[] strArr) {
        new KmerStatisticsGatherer().mainImpl(strArr);
    }

    public KmerStatisticsGatherer() {
        super(NAME, DESCRIPTION);
        this.maximalBadFrequence = addParameter(new IntParameterBuilder("maximal-bad-frequence").optional().withShortOpt("b").withDescription("maximal frequency for a kmer to be assumed erroneous").create());
        this.k = addParameter(new IntParameterBuilder("k").mandatory().withShortOpt("k").withDescription("k").create());
        this.maxSize = addParameter(new LongParameterBuilder("max-size").optional().withDescription("maximal hashset size").withDefaultValue((ParameterBuilder<Long>) Long.valueOf(Misc.availableMemory() / 42)).create());
        this.inputFiles = addParameter(new FileMVParameterBuilder("reads").mandatory().withDescription("list of input files").create());
        this.outputDir = addParameter(new FileParameterBuilder("output-dir").withShortOpt("o").withDescription("directory to place output files").withDefaultValue(this.workDir.append("kmers")).create());
        this.prefixesFile = addParameter(new FileParameterBuilder("prefixes-file").withDescription("file with prefixes").withDefaultValue(this.workDir.append("prefixes")).create());
        this.r = new Random();
    }

    Long2ByteMap load(File[] fileArr, long j) {
        Source[] sourceArr = new Source[fileArr.length];
        for (int i = 0; i < fileArr.length; i++) {
            sourceArr[i] = new BinqReader(fileArr[i]);
        }
        Long2ByteMap long2ByteOpenHashMap = new Long2ByteOpenHashMap();
        int i2 = 0;
        for (Source<DnaQ> source : sourceArr) {
            for (DnaQ dnaQ : source) {
                add(dnaQ, long2ByteOpenHashMap, this.prefix, this.prefixMask);
                add(dnaQ.reverseComplement(), long2ByteOpenHashMap, this.prefix, this.prefixMask);
                long size = long2ByteOpenHashMap.size();
                long2ByteOpenHashMap = trim(long2ByteOpenHashMap, j);
                if (long2ByteOpenHashMap.size() != size) {
                    info("trimmed from " + size + " -> " + long2ByteOpenHashMap.size() + " (prefix = " + Util.getString(this.prefix >> (2 * (this.LEN - this.prefixLength)), this.prefixLength) + DefaultExpressionEngine.DEFAULT_INDEX_END);
                }
            }
            int i3 = i2;
            i2++;
            info(fileArr[i3].getAbsolutePath() + " processed; size = " + long2ByteOpenHashMap.size());
        }
        return long2ByteOpenHashMap;
    }

    Long2ByteMap load(File[] fileArr, long j, long j2) {
        Source[] sourceArr = new Source[fileArr.length];
        for (int i = 0; i < fileArr.length; i++) {
            sourceArr[i] = new BinqReader(fileArr[i]);
        }
        Long2ByteOpenHashMap long2ByteOpenHashMap = new Long2ByteOpenHashMap();
        for (Source<DnaQ> source : sourceArr) {
            for (DnaQ dnaQ : source) {
                add(dnaQ, long2ByteOpenHashMap, j, j2);
                add(dnaQ.reverseComplement(), long2ByteOpenHashMap, j, j2);
            }
        }
        return long2ByteOpenHashMap;
    }

    /* JADX WARN: Type inference failed for: r0v3, types: [it.unimi.dsi.fastutil.longs.LongSet] */
    long dumpGoodKmers(Long2ByteMap long2ByteMap, int i, String str) throws IOException {
        long j = 0;
        DataOutputStream dataOutputStream = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(str)));
        Iterator it2 = long2ByteMap.keySet2().iterator();
        while (it2.hasNext()) {
            long longValue = ((Long) it2.next()).longValue();
            if (long2ByteMap.get(longValue) > i) {
                dataOutputStream.writeLong(longValue);
                j++;
            }
        }
        dataOutputStream.close();
        return j;
    }

    /* JADX WARN: Type inference failed for: r0v3, types: [it.unimi.dsi.fastutil.longs.LongSet] */
    long dumpBadKmers(Long2ByteMap long2ByteMap, int i, String str) throws IOException {
        long j = 0;
        DataOutputStream dataOutputStream = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(str)));
        Iterator it2 = long2ByteMap.keySet2().iterator();
        while (it2.hasNext()) {
            long longValue = ((Long) it2.next()).longValue();
            if (long2ByteMap.get(longValue) <= i) {
                dataOutputStream.writeLong(longValue);
                j++;
            }
        }
        dataOutputStream.close();
        return j;
    }

    void add(DnaQ dnaQ, Long2ByteMap long2ByteMap, long j, long j2) {
        if (dnaQ.length() < this.LEN) {
            return;
        }
        long j3 = 0;
        for (int i = 0; i < this.LEN - 1; i++) {
            j3 = (j3 << 2) | dnaQ.nucAt(i);
        }
        for (int i2 = this.LEN - 1; i2 < dnaQ.length(); i2++) {
            j3 = ((j3 & (this.MASK >> 2)) << 2) | dnaQ.nucAt(i2);
            if ((j3 & j2) == j) {
                if (!long2ByteMap.containsKey(j3)) {
                    long2ByteMap.put(j3, (byte) 0);
                }
                if (long2ByteMap.get(j3) != Byte.MAX_VALUE) {
                    long2ByteMap.put(j3, (byte) (long2ByteMap.get(j3) + 1));
                }
            }
        }
    }

    /* JADX WARN: Type inference failed for: r0v16, types: [it.unimi.dsi.fastutil.longs.LongSet] */
    Long2ByteMap trim(Long2ByteMap long2ByteMap, long j) {
        if (long2ByteMap.size() < j) {
            return long2ByteMap;
        }
        long2ByteMap.size();
        this.prefix |= this.r.nextInt(4) << (2 * ((this.LEN - this.prefixLength) - 1));
        this.prefixMask |= 3 << (2 * ((this.LEN - this.prefixLength) - 1));
        this.prefixLength++;
        Long2ByteOpenHashMap long2ByteOpenHashMap = new Long2ByteOpenHashMap();
        Iterator it2 = long2ByteMap.keySet2().iterator();
        while (it2.hasNext()) {
            long longValue = ((Long) it2.next()).longValue();
            if ((longValue & this.prefixMask) == this.prefix) {
                long2ByteOpenHashMap.put(longValue, long2ByteMap.get(longValue));
            }
        }
        return long2ByteOpenHashMap;
    }
}
