morpheus.TcgaUtil = function () { }; morpheus.TcgaUtil.DISEASE_STUDIES = { 'LAML': 'Acute Myeloid Leukemia', 'ACC': 'Adrenocortical carcinoma', 'BLCA': 'Bladder Urothelial Carcinoma', 'LGG': 'Brain Lower Grade Glioma', 'BRCA': 'Breast invasive carcinoma', 'CESC': 'Cervical squamous cell carcinoma and endocervical adenocarcinoma', 'CHOL': 'Cholangiocarcinoma', 'LCML': 'Chronic Myelogenous Leukemia', 'COAD': 'Colon adenocarcinoma', 'CNTL': 'Controls', 'ESCA': 'Esophageal carcinoma ', 'FPPP': 'FFPE Pilot Phase II', 'GBM': 'Glioblastoma multiforme', 'HNSC': 'Head and Neck squamous cell carcinoma', 'KICH': 'Kidney Chromophobe', 'KIRC': 'Kidney renal clear cell carcinoma', 'KIRP': 'Kidney renal papillary cell carcinoma', 'LIHC': 'Liver hepatocellular carcinoma', 'LUAD': 'Lung adenocarcinoma', 'LUSC': 'Lung squamous cell carcinoma', 'DLBC': 'Lymphoid Neoplasm Diffuse Large B-cell Lymphoma', 'MESO': 'Mesothelioma', 'MISC': 'Miscellaneous', 'OV': 'Ovarian serous cystadenocarcinoma', 'PAAD': 'Pancreatic adenocarcinoma', 'PCPG': 'Pheochromocytoma and Paraganglioma', 'PRAD': 'Prostate adenocarcinoma', 'READ': 'Rectum adenocarcinoma', 'SARC': 'Sarcoma', 'SKCM': 'Skin Cutaneous Melanoma', 'STAD': 'Stomach adenocarcinoma', 'TGCT': 'Testicular Germ Cell Tumors', 'THYM': 'Thymoma', 'THCA': 'Thyroid carcinoma', 'UCS': 'Uterine Carcinosarcoma', 'UCEC': 'Uterine Corpus Endometrial Carcinoma', 'UVM': 'Uveal Melanoma', 'KIPAN': 'Pan-Kidney Cohort', 'COADREAD': 'Colonrectal adenocarcinoma', 'GBMLGG': 'Glioma', 'STES': 'Stomach and Esophageal Carcinoma' }; morpheus.TcgaUtil.SAMPLE_TYPES = { '01': 'Primary solid Tumor', '02': 'Recurrent Solid Tumor', '03': 'Primary Blood Derived Cancer - Peripheral Blood', '04': 'Recurrent Blood Derived Cancer - Bone Marrow', '05': 'Additional - New Primary', '06': 'Metastatic', '07': 'Additional Metastatic', '08': 'Human Tumor Original Cells', '09': 'Primary Blood Derived Cancer - Bone Marrow', '10': 'Blood Derived Normal', '11': 'Solid Tissue Normal', '12': 'Buccal Cell Normal', '13': 'EBV Immortalized Normal', '14': 'Bone Marrow Normal', '20': 'Control Analyte', '40': 'Recurrent Blood Derived Cancer - Peripheral Blood', '50': 'Cell Lines', '60': 'Primary Xenograft Tissue', '61': 'Cell Line Derived Xenograft Tissue' }; morpheus.TcgaUtil.barcode = function (s) { var tokens = s.split('-'); var id = tokens[2]; var sampleType; // e.g. TCGA-AC-A23H-01A-11D-A159-09 if (tokens.length > 3) { sampleType = tokens[3]; if (sampleType.length > 2) { sampleType = sampleType.substring(0, 2); } sampleType = morpheus.TcgaUtil.SAMPLE_TYPES[sampleType]; } else { sampleType = morpheus.TcgaUtil.SAMPLE_TYPES['01']; } return { id: id.toLowerCase(), sampleType: sampleType }; }; morpheus.TcgaUtil.setIdAndSampleType = function (dataset) { var idVector = dataset.getColumnMetadata().get(0); var participantId = dataset.getColumnMetadata().add('participant_id'); var sampleType = dataset.getColumnMetadata().add('sample_type'); for (var i = 0, size = idVector.size(); i < size; i++) { var barcode = morpheus.TcgaUtil.barcode(idVector.getValue(i)); idVector.setValue(i, barcode.id + '-' + barcode.sampleType); sampleType.setValue(i, barcode.sampleType); participantId.setValue(i, barcode.id); } }; morpheus.TcgaUtil.getDataset = function (options) { var promises = []; var datasets = []; var returnDeferred = $.Deferred(); if (options.mrna) { // id + type var mrna = $.Deferred(); promises.push(mrna); new morpheus.TxtReader().read(options.mrna, function (err, dataset) { if (err) { console.log('Error reading file:' + err); } else { datasets.push(dataset); morpheus.TcgaUtil.setIdAndSampleType(dataset); } mrna.resolve(); }); } var sigGenesLines; if (options.mutation) { var mutation = $.Deferred(); promises.push(mutation); new morpheus.MafFileReader().read(options.mutation, function (err, dataset) { if (err) { console.log('Error reading file:' + err); } else { datasets.push(dataset); morpheus.TcgaUtil.setIdAndSampleType(dataset); } mutation.resolve(); }); var sigGenesAnnotation = morpheus.Util.readLines(options.sigGenes); sigGenesAnnotation.done(function (lines) { sigGenesLines = lines; }); promises.push(sigGenesAnnotation); } if (options.gistic) { var gistic = $.Deferred(); promises.push(gistic); new morpheus.GisticReader().read(options.gistic, function (err, dataset) { if (err) { console.log('Error reading file:' + err); } else { datasets.push(dataset); morpheus.TcgaUtil.setIdAndSampleType(dataset); } gistic.resolve(); }); } if (options.gisticGene) { var gisticGene = $.Deferred(); promises.push(gisticGene); new morpheus.TxtReader({ dataColumnStart: 3 }).read(options.gisticGene, function (err, dataset) { if (err) { console.log('Error reading file:' + err); } else { datasets.push(dataset); morpheus.TcgaUtil.setIdAndSampleType(dataset); } gisticGene.resolve(); }); } if (options.seg) { var seg = $.Deferred(); promises.push(seg); new morpheus.SegTabReader().read(options.seg, function (err, dataset) { if (err) { console.log('Error reading file:' + err); } else { datasets.push(dataset); morpheus.TcgaUtil.setIdAndSampleType(dataset); } seg.resolve(); }); } if (options.rppa) { // id + type var rppa = $.Deferred(); promises.push(rppa); new morpheus.TxtReader().read(options.rppa, function (err, dataset) { if (err) { console.log('Error reading file:' + err); } else { datasets.push(dataset); morpheus.TcgaUtil.setIdAndSampleType(dataset); } rppa.resolve(); }); } if (options.methylation) { // id + type var methylation = $.Deferred(); promises.push(methylation); new morpheus.TxtReader({}).read(options.methylation, function (err, dataset) { if (err) { console.log('Error reading file:' + err); } else { datasets.push(dataset); morpheus.TcgaUtil.setIdAndSampleType(dataset); } methylation.resolve(); }); } var mrnaClustPromise = morpheus.Util.readLines(options.mrnaClust); promises.push(mrnaClustPromise); var sampleIdToClusterId; mrnaClustPromise.done(function (lines) { // SampleName cluster silhouetteValue // SampleName cluster silhouetteValue // TCGA-OR-A5J1-01 1 0.00648776228925048 sampleIdToClusterId = new morpheus.Map(); var lineNumber = 0; while (lines[lineNumber].indexOf('SampleName') !== -1) { lineNumber++; } var tab = /\t/; for (; lineNumber < lines.length; lineNumber++) { var tokens = lines[lineNumber].split(tab); var barcode = morpheus.TcgaUtil.barcode(tokens[0]); sampleIdToClusterId.set(barcode.id + '-' + barcode.sampleType, tokens[1]); } }); var annotationCallbacks = []; var annotationDef = null; if (options.columnAnnotations) { annotationDef = morpheus.DatasetUtil.annotate({ annotations: options.columnAnnotations, isColumns: true }); promises.push(annotationDef); annotationDef.done(function (array) { annotationCallbacks = array; }); } $.when.apply($, promises).then( function () { var datasetToReturn = null; if (datasets.length === 1) { var sourceName = datasets[0].getName(); var sourceVector = datasets[0].getRowMetadata().add( 'Source'); for (var i = 0; i < sourceVector.size(); i++) { sourceVector.setValue(i, sourceName); } datasetToReturn = datasets[0]; } else { var maxIndex = 0; var maxColumns = datasets[0].getColumnCount(); // use dataset with most columns as the reference or // mutation data for (var i = 1; i < datasets.length; i++) { if (datasets[i].getColumnCount() > maxColumns) { maxColumns = datasets[i].getColumnCount(); maxIndex = i; } if (datasets[i].getName() === 'mutations_merged.maf') { maxColumns = Number.MAX_VALUE; maxIndex = i; } } var datasetIndices = []; datasetIndices.push(maxIndex); for (var i = 0; i < datasets.length; i++) { if (i !== maxIndex) { datasetIndices.push(i); } } var joined = new morpheus.JoinedDataset( datasets[datasetIndices[0]], datasets[datasetIndices[1]], 'id', 'id'); for (var i = 2; i < datasetIndices.length; i++) { joined = new morpheus.JoinedDataset(joined, datasets[datasetIndices[i]], 'id', 'id'); } datasetToReturn = joined; } var clusterIdVector = datasetToReturn.getColumnMetadata().add( 'mRNAseq_cluster'); var idVector = datasetToReturn.getColumnMetadata().getByName( 'id'); for (var j = 0, size = idVector.size(); j < size; j++) { clusterIdVector.setValue(j, sampleIdToClusterId .get(idVector.getValue(j))); } // view in space of mutation sample ids only if (options.mutation) { var sourceToIndices = morpheus.VectorUtil .createValueToIndicesMap(datasetToReturn .getRowMetadata().getByName('Source')); var mutationDataset = new morpheus.SlicedDatasetView( datasetToReturn, sourceToIndices .get('mutations_merged.maf')); new morpheus.OpenFileTool() .annotate(sigGenesLines, mutationDataset, false, null, 'id', 'gene', ['q']); var qVector = mutationDataset.getRowMetadata().getByName( 'q'); var qValueVector = mutationDataset.getRowMetadata() .getByName('q_value'); if (qValueVector == null) { qValueVector = mutationDataset.getRowMetadata().add( 'q_value'); } for (var i = 0, size = qValueVector.size(); i < size; i++) { qValueVector.setValue(i, qVector.getValue(i)); } mutationDataset.getRowMetadata().remove( morpheus.MetadataUtil.indexOf(mutationDataset .getRowMetadata(), 'q')); } if (annotationDef) { annotationCallbacks.forEach(function (f) { f(datasetToReturn); }); } returnDeferred.resolve(datasetToReturn); }); return returnDeferred; };