package ru.ifmo.genetics.tools.transcriptome;

import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.Stack;
import org.apache.commons.lang.mutable.MutableLong;
import ru.ifmo.genetics.statistics.Timer;
import ru.ifmo.genetics.structures.set.BigLongsHashSet;
import ru.ifmo.genetics.structures.set.LongsHashSet;
import ru.ifmo.genetics.transcriptome.CompactDeBruijnGraphWF;
import ru.ifmo.genetics.utils.KmerUtils;
import ru.ifmo.genetics.utils.Misc;
import ru.ifmo.genetics.utils.iterators.IterableIterator;
import ru.ifmo.genetics.utils.tool.ExecutionFailedException;
import ru.ifmo.genetics.utils.tool.Parameter;
import ru.ifmo.genetics.utils.tool.Tool;
import ru.ifmo.genetics.utils.tool.inputParameterBuilder.FileParameterBuilder;
import ru.ifmo.genetics.utils.tool.inputParameterBuilder.IntParameterBuilder;

/* loaded from: input_file:ru/ifmo/genetics/tools/transcriptome/SmallComponentsAssembler.class */
public class SmallComponentsAssembler extends Tool {
    public static final String NAME = "small-components-assembler";
    public static final String DESCRIPTION = "assembles obvious transcripts";
    public final Parameter<File> filePrefix;
    public final Parameter<Integer> kParameter;
    private final float minNextFreq = 0.25f;
    private int k;
    private int minLenOfGen;
    int total;
    int totalTranscripts;

    @Override // ru.ifmo.genetics.utils.tool.Tool
    protected void runImpl() throws ExecutionFailedException {
        this.filePrefix.get().getAbsolutePath();
        this.k = this.kParameter.get().intValue();
        this.minLenOfGen = this.k + 1;
        info("Min len of gen = " + this.minLenOfGen);
        Timer timer = new Timer();
        info("Assembling transcripts...");
        File file = new File(this.workDir.get().getAbsolutePath() + "/transcripts");
        file.mkdir();
        for (File file2 : this.filePrefix.get().listFiles()) {
            try {
                assebleTranscripts(buildGraph(file2), new File(file.getAbsolutePath() + "/" + file2.getName()));
                this.total++;
                if (this.total % 1000 == 0) {
                    info(this.total + " transcripts assembled");
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        info("Assembling transcripts done, it took " + timer);
    }

    private void assebleTranscripts(CompactDeBruijnGraphWF compactDeBruijnGraphWF, File file) throws IOException {
        if (compactDeBruijnGraphWF == null) {
            return;
        }
        BigLongsHashSet bigLongsHashSet = new BigLongsHashSet(compactDeBruijnGraphWF.getMemSize());
        List<Long> makeSimple = makeSimple(compactDeBruijnGraphWF, bigLongsHashSet);
        if (makeSimple.size() == 0) {
            System.err.println("Bad = " + file.getName() + " Total = " + this.total);
        } else {
            visGraph(makeSimple.get(0).longValue(), file.getAbsolutePath(), compactDeBruijnGraphWF, bigLongsHashSet);
            getTranscripts(makeSimple, file.getAbsolutePath(), compactDeBruijnGraphWF, bigLongsHashSet);
        }
    }

    private boolean dfs(long j, PrintWriter printWriter, CompactDeBruijnGraphWF compactDeBruijnGraphWF, LongsHashSet longsHashSet, Set<Long> set) {
        Stack stack = new Stack();
        Stack stack2 = new Stack();
        stack.push(Long.valueOf(j));
        stack2.push(0);
        StringBuilder sb = new StringBuilder();
        int i = 0;
        int i2 = 0;
        int i3 = 1;
        HashMap hashMap = new HashMap();
        while (!stack.isEmpty()) {
            long longValue = ((Long) stack.pop()).longValue();
            int intValue = ((Integer) stack2.pop()).intValue();
            set.add(Long.valueOf(Math.min(longValue, compactDeBruijnGraphWF.reverseComplementEdge(longValue) >> 2)));
            if (i != intValue) {
                sb.delete(intValue, sb.length());
                i2 = 0;
                i = intValue;
                i3++;
            }
            sb.append(KmerUtils.kmer2String(longValue, this.k).charAt(0));
            i++;
            i2++;
            hashMap.put(Long.valueOf(longValue), Integer.valueOf(i3));
            int i4 = 0;
            for (long j2 : compactDeBruijnGraphWF.outcomeEdges(longValue)) {
                if (!longsHashSet.contains(Math.min(j2, compactDeBruijnGraphWF.reverseComplementEdge(j2))) && (!hashMap.containsKey(Long.valueOf(j2 & compactDeBruijnGraphWF.vertexMask)) || ((Integer) hashMap.get(Long.valueOf(j2 & compactDeBruijnGraphWF.vertexMask))).intValue() < i3)) {
                    float freq = getFreq(longValue, compactDeBruijnGraphWF, longsHashSet) / getFreq(j2 & compactDeBruijnGraphWF.vertexMask, compactDeBruijnGraphWF, longsHashSet);
                    if (Math.min(freq, 1.0f / freq) > 0.25f) {
                        i4++;
                    }
                }
            }
            if (i4 > 1) {
                i2 = 0;
                hashMap.put(Long.valueOf(longValue), Integer.valueOf(i3));
            }
            for (long j3 : compactDeBruijnGraphWF.outcomeEdges(longValue)) {
                if (!longsHashSet.contains(Math.min(j3, compactDeBruijnGraphWF.reverseComplementEdge(j3))) && (!hashMap.containsKey(Long.valueOf(j3 & compactDeBruijnGraphWF.vertexMask)) || ((Integer) hashMap.get(Long.valueOf(j3 & compactDeBruijnGraphWF.vertexMask))).intValue() < i3)) {
                    float freq2 = getFreq(longValue, compactDeBruijnGraphWF, longsHashSet) / getFreq(j3 & compactDeBruijnGraphWF.vertexMask, compactDeBruijnGraphWF, longsHashSet);
                    if (Math.min(freq2, 1.0f / freq2) > 0.25f) {
                        stack.push(Long.valueOf(j3 & compactDeBruijnGraphWF.vertexMask));
                        stack2.push(Integer.valueOf(i));
                    }
                }
            }
            if (i4 == 0 && i2 >= this.minLenOfGen) {
                this.totalTranscripts++;
                printWriter.println(">" + this.totalTranscripts + " len=" + sb.length());
                printWriter.println(((Object) sb) + KmerUtils.kmer2String(longValue, this.k).substring(1));
            }
            if (i3 > 30) {
                return false;
            }
        }
        return true;
    }

    private void getTranscripts(List<Long> list, String str, CompactDeBruijnGraphWF compactDeBruijnGraphWF, LongsHashSet longsHashSet) throws IOException {
        File file = new File(str + "tr.fasta");
        file.createNewFile();
        PrintWriter printWriter = new PrintWriter(file);
        HashSet hashSet = new HashSet();
        Iterator<Long> it2 = list.iterator();
        while (it2.hasNext()) {
            long longValue = it2.next().longValue();
            if (!hashSet.contains(Long.valueOf(Math.min(longValue, compactDeBruijnGraphWF.reverseComplementEdge(longValue) >> 2))) && !dfs(longValue, printWriter, compactDeBruijnGraphWF, longsHashSet, hashSet)) {
                printWriter.close();
                file.delete();
                return;
            }
        }
        printWriter.close();
    }

    private boolean checkFrom(CompactDeBruijnGraphWF compactDeBruijnGraphWF, BigLongsHashSet bigLongsHashSet, long j, int i) {
        if (i > this.minLenOfGen) {
            return true;
        }
        if (compactDeBruijnGraphWF.incomeEdges(j).length != 1 || compactDeBruijnGraphWF.outcomeEdges(j).length != 1) {
            return false;
        }
        long j2 = compactDeBruijnGraphWF.outcomeEdges(j)[0];
        if (checkFrom(compactDeBruijnGraphWF, bigLongsHashSet, j2 & compactDeBruijnGraphWF.vertexMask, i + 1)) {
            return true;
        }
        bigLongsHashSet.put(Math.min(j2, compactDeBruijnGraphWF.reverseComplementEdge(j2)));
        return false;
    }

    private boolean checkTo(CompactDeBruijnGraphWF compactDeBruijnGraphWF, BigLongsHashSet bigLongsHashSet, long j, int i) {
        if (i > this.minLenOfGen) {
            return true;
        }
        if (compactDeBruijnGraphWF.incomeEdges(j).length != 1 || compactDeBruijnGraphWF.outcomeEdges(j).length != 1) {
            return false;
        }
        long j2 = compactDeBruijnGraphWF.incomeEdges(j)[0];
        if (checkTo(compactDeBruijnGraphWF, bigLongsHashSet, j2 >> 2, i + 1)) {
            return true;
        }
        bigLongsHashSet.put(Math.min(j2, compactDeBruijnGraphWF.reverseComplementEdge(j2)));
        return false;
    }

    private List<Long> makeSimple(CompactDeBruijnGraphWF compactDeBruijnGraphWF, BigLongsHashSet bigLongsHashSet) {
        Iterator<MutableLong> iterator = compactDeBruijnGraphWF.getIterator();
        ArrayList arrayList = new ArrayList();
        Iterator it2 = new IterableIterator(iterator).iterator();
        while (it2.hasNext()) {
            long longValue = ((MutableLong) it2.next()).longValue();
            long j = longValue >> 2;
            long j2 = longValue & compactDeBruijnGraphWF.vertexMask;
            if (compactDeBruijnGraphWF.incomeEdges(compactDeBruijnGraphWF.reverseComplementEdge(longValue) >>> 2).length == 0) {
                j = compactDeBruijnGraphWF.reverseComplementEdge(longValue) >> 2;
                j2 = compactDeBruijnGraphWF.reverseComplementEdge(longValue) & compactDeBruijnGraphWF.vertexMask;
            }
            if (compactDeBruijnGraphWF.incomeEdges(j).length == 0) {
                if (checkFrom(compactDeBruijnGraphWF, bigLongsHashSet, j2, 0)) {
                    arrayList.add(Long.valueOf(j));
                } else {
                    bigLongsHashSet.put(Math.min(longValue, compactDeBruijnGraphWF.reverseComplementEdge(longValue)));
                }
            }
        }
        return arrayList;
    }

    private CompactDeBruijnGraphWF buildGraph(File file) throws IOException {
        FileInputStream fileInputStream = new FileInputStream(file);
        long size = fileInputStream.getChannel().size() / 12;
        fileInputStream.close();
        if (size > 1048576) {
            System.err.println("Big component");
            return null;
        }
        CompactDeBruijnGraphWF compactDeBruijnGraphWF = new CompactDeBruijnGraphWF(this.k, Math.min(size * 24, (long) (Misc.availableMemory() * 0.85d)));
        new Timer().start();
        long j = 0;
        FileInputStream fileInputStream2 = new FileInputStream(file);
        DataInputStream dataInputStream = new DataInputStream(new BufferedInputStream(fileInputStream2));
        long j2 = 0;
        while (true) {
            long j3 = j2;
            if (j3 >= size) {
                break;
            }
            long readLong = dataInputStream.readLong();
            j |= readLong;
            compactDeBruijnGraphWF.addEdge(readLong, dataInputStream.readInt());
            j2 = j3 + 1;
        }
        fileInputStream2.close();
        if (j != (1 << ((2 * this.k) + 2)) - 1) {
            warn("k-mer size mismatch");
            warn("set: " + this.k);
            debug(String.format("kmerMask: 0x%x", Long.valueOf(j)));
            int i = 1;
            while (true) {
                if (i >= 30) {
                    break;
                }
                if (j == (1 << (2 * i)) - 1) {
                    warn("found: " + (i - 1));
                    break;
                }
                i++;
            }
        }
        return compactDeBruijnGraphWF;
    }

    @Override // ru.ifmo.genetics.utils.tool.Tool
    protected void cleanImpl() {
    }

    public SmallComponentsAssembler() {
        super(NAME, "assembles obvious transcripts");
        this.filePrefix = addParameter(new FileParameterBuilder("file-prefix").mandatory().withDescription("prefix of files with edges").create());
        this.kParameter = addParameter(new IntParameterBuilder("k").mandatory().withShortOpt("k").withDescription("k-mer size (vertex, not edge)").create());
        this.minNextFreq = 0.25f;
        this.total = 0;
        this.totalTranscripts = 0;
    }

    public static void main(String[] strArr) {
        new SmallComponentsAssembler().mainImpl(strArr);
    }

    private int getFreq(long j, CompactDeBruijnGraphWF compactDeBruijnGraphWF, LongsHashSet longsHashSet) {
        return getFreqImpl(j, compactDeBruijnGraphWF, longsHashSet) + getFreqImpl(compactDeBruijnGraphWF.reverseComplementEdge(j) >> 2, compactDeBruijnGraphWF, longsHashSet);
    }

    private int getFreqImpl(long j, CompactDeBruijnGraphWF compactDeBruijnGraphWF, LongsHashSet longsHashSet) {
        int i = 0;
        for (long j2 : compactDeBruijnGraphWF.outcomeEdges(j)) {
            if (!longsHashSet.contains(Math.min(j2, compactDeBruijnGraphWF.reverseComplementEdge(j2)))) {
                i += compactDeBruijnGraphWF.getFreg(j2);
            }
        }
        for (long j3 : compactDeBruijnGraphWF.incomeEdges(j)) {
            if (!longsHashSet.contains(Math.min(j3, compactDeBruijnGraphWF.reverseComplementEdge(j3)))) {
                i += compactDeBruijnGraphWF.getFreg(j3);
            }
        }
        return i;
    }

    private boolean visGraph(long j, String str, CompactDeBruijnGraphWF compactDeBruijnGraphWF, LongsHashSet longsHashSet) throws IOException {
        PrintWriter printWriter = new PrintWriter(str);
        LinkedList linkedList = new LinkedList();
        linkedList.add(Long.valueOf(j));
        int i = 0;
        printWriter.println("digraph G{");
        HashSet hashSet = new HashSet();
        while (!linkedList.isEmpty()) {
            long longValue = ((Long) linkedList.poll()).longValue();
            for (long j2 : compactDeBruijnGraphWF.outcomeEdges(longValue)) {
                if (!longsHashSet.contains(Math.min(j2, compactDeBruijnGraphWF.reverseComplementEdge(j2))) && !hashSet.contains(Long.valueOf(Math.min(j2, compactDeBruijnGraphWF.reverseComplementEdge(j2))))) {
                    long j3 = j2 & compactDeBruijnGraphWF.vertexMask;
                    printWriter.println(longValue + " [ label = \"" + getFreq(longValue, compactDeBruijnGraphWF, longsHashSet) + "\"];");
                    printWriter.println(j3 + " [ label = \"" + getFreq(j3, compactDeBruijnGraphWF, longsHashSet) + "\"];");
                    printWriter.println(longValue + " -> " + j3 + " [ label = \"" + KmerUtils.kmer2String(longValue, this.k) + KmerUtils.kmer2String(j3, this.k).charAt(KmerUtils.kmer2String(j3, this.k).length() - 1) + "\"];");
                    linkedList.add(Long.valueOf(j3));
                    hashSet.add(Long.valueOf(Math.min(j2, compactDeBruijnGraphWF.reverseComplementEdge(j2))));
                    i++;
                }
            }
            for (long j4 : compactDeBruijnGraphWF.incomeEdges(longValue)) {
                if (!longsHashSet.contains(Math.min(j4, compactDeBruijnGraphWF.reverseComplementEdge(j4))) && !hashSet.contains(Long.valueOf(Math.min(j4, compactDeBruijnGraphWF.reverseComplementEdge(j4))))) {
                    long j5 = (j4 >> 2) & compactDeBruijnGraphWF.vertexMask;
                    printWriter.println(longValue + " [ label = \"" + getFreq(longValue, compactDeBruijnGraphWF, longsHashSet) + "\"];");
                    printWriter.println(j5 + " [ label = \"" + getFreq(j5, compactDeBruijnGraphWF, longsHashSet) + "\"];");
                    printWriter.println(j5 + " -> " + longValue + " [ label = \"" + KmerUtils.kmer2String(j5, this.k) + KmerUtils.kmer2String(longValue, this.k).charAt(KmerUtils.kmer2String(longValue, this.k).length() - 1) + "\"];");
                    linkedList.add(Long.valueOf(j5));
                    hashSet.add(Long.valueOf(Math.min(j4, compactDeBruijnGraphWF.reverseComplementEdge(j4))));
                    i++;
                }
            }
        }
        printWriter.println("}");
        printWriter.close();
        if (i < 0 || i >= 150) {
            return true;
        }
        new File(str).delete();
        return false;
    }
}
