Commit 37187ac9 authored by jgould's avatar jgould

tcga cluster ids

parent f3facf7f
This source diff could not be displayed because it is too large. You can view the blob instead.
morpheus.TcgaUtil = function() { morpheus.TcgaUtil = function () {
}; };
morpheus.TcgaUtil.DISEASE_STUDIES = { morpheus.TcgaUtil.DISEASE_STUDIES = {
'LAML' : 'Acute Myeloid Leukemia', 'LAML': 'Acute Myeloid Leukemia',
'ACC' : 'Adrenocortical carcinoma', 'ACC': 'Adrenocortical carcinoma',
'BLCA' : 'Bladder Urothelial Carcinoma', 'BLCA': 'Bladder Urothelial Carcinoma',
'LGG' : 'Brain Lower Grade Glioma', 'LGG': 'Brain Lower Grade Glioma',
'BRCA' : 'Breast invasive carcinoma', 'BRCA': 'Breast invasive carcinoma',
'CESC' : 'Cervical squamous cell carcinoma and endocervical adenocarcinoma', 'CESC': 'Cervical squamous cell carcinoma and endocervical adenocarcinoma',
'CHOL' : 'Cholangiocarcinoma', 'CHOL': 'Cholangiocarcinoma',
'LCML' : 'Chronic Myelogenous Leukemia', 'LCML': 'Chronic Myelogenous Leukemia',
'COAD' : 'Colon adenocarcinoma', 'COAD': 'Colon adenocarcinoma',
'CNTL' : 'Controls', 'CNTL': 'Controls',
'ESCA' : 'Esophageal carcinoma ', 'ESCA': 'Esophageal carcinoma ',
'FPPP' : 'FFPE Pilot Phase II', 'FPPP': 'FFPE Pilot Phase II',
'GBM' : 'Glioblastoma multiforme', 'GBM': 'Glioblastoma multiforme',
'HNSC' : 'Head and Neck squamous cell carcinoma', 'HNSC': 'Head and Neck squamous cell carcinoma',
'KICH' : 'Kidney Chromophobe', 'KICH': 'Kidney Chromophobe',
'KIRC' : 'Kidney renal clear cell carcinoma', 'KIRC': 'Kidney renal clear cell carcinoma',
'KIRP' : 'Kidney renal papillary cell carcinoma', 'KIRP': 'Kidney renal papillary cell carcinoma',
'LIHC' : 'Liver hepatocellular carcinoma', 'LIHC': 'Liver hepatocellular carcinoma',
'LUAD' : 'Lung adenocarcinoma', 'LUAD': 'Lung adenocarcinoma',
'LUSC' : 'Lung squamous cell carcinoma', 'LUSC': 'Lung squamous cell carcinoma',
'DLBC' : 'Lymphoid Neoplasm Diffuse Large B-cell Lymphoma', 'DLBC': 'Lymphoid Neoplasm Diffuse Large B-cell Lymphoma',
'MESO' : 'Mesothelioma', 'MESO': 'Mesothelioma',
'MISC' : 'Miscellaneous', 'MISC': 'Miscellaneous',
'OV' : 'Ovarian serous cystadenocarcinoma', 'OV': 'Ovarian serous cystadenocarcinoma',
'PAAD' : 'Pancreatic adenocarcinoma', 'PAAD': 'Pancreatic adenocarcinoma',
'PCPG' : 'Pheochromocytoma and Paraganglioma', 'PCPG': 'Pheochromocytoma and Paraganglioma',
'PRAD' : 'Prostate adenocarcinoma', 'PRAD': 'Prostate adenocarcinoma',
'READ' : 'Rectum adenocarcinoma', 'READ': 'Rectum adenocarcinoma',
'SARC' : 'Sarcoma', 'SARC': 'Sarcoma',
'SKCM' : 'Skin Cutaneous Melanoma', 'SKCM': 'Skin Cutaneous Melanoma',
'STAD' : 'Stomach adenocarcinoma', 'STAD': 'Stomach adenocarcinoma',
'TGCT' : 'Testicular Germ Cell Tumors', 'TGCT': 'Testicular Germ Cell Tumors',
'THYM' : 'Thymoma', 'THYM': 'Thymoma',
'THCA' : 'Thyroid carcinoma', 'THCA': 'Thyroid carcinoma',
'UCS' : 'Uterine Carcinosarcoma', 'UCS': 'Uterine Carcinosarcoma',
'UCEC' : 'Uterine Corpus Endometrial Carcinoma', 'UCEC': 'Uterine Corpus Endometrial Carcinoma',
'UVM' : 'Uveal Melanoma', 'UVM': 'Uveal Melanoma',
'KIPAN' : 'Pan-Kidney Cohort', 'KIPAN': 'Pan-Kidney Cohort',
'COADREAD' : 'Colonrectal adenocarcinoma', 'COADREAD': 'Colonrectal adenocarcinoma',
'GBMLGG' : 'Glioma', 'GBMLGG': 'Glioma',
'STES' : 'Stomach and Esophageal Carcinoma' 'STES': 'Stomach and Esophageal Carcinoma'
}; };
morpheus.TcgaUtil.SAMPLE_TYPES = { morpheus.TcgaUtil.SAMPLE_TYPES = {
'01' : 'Primary solid Tumor', '01': 'Primary solid Tumor',
'02' : 'Recurrent Solid Tumor', '02': 'Recurrent Solid Tumor',
'03' : 'Primary Blood Derived Cancer - Peripheral Blood', '03': 'Primary Blood Derived Cancer - Peripheral Blood',
'04' : 'Recurrent Blood Derived Cancer - Bone Marrow', '04': 'Recurrent Blood Derived Cancer - Bone Marrow',
'05' : 'Additional - New Primary', '05': 'Additional - New Primary',
'06' : 'Metastatic', '06': 'Metastatic',
'07' : 'Additional Metastatic', '07': 'Additional Metastatic',
'08' : 'Human Tumor Original Cells', '08': 'Human Tumor Original Cells',
'09' : 'Primary Blood Derived Cancer - Bone Marrow', '09': 'Primary Blood Derived Cancer - Bone Marrow',
'10' : 'Blood Derived Normal', '10': 'Blood Derived Normal',
'11' : 'Solid Tissue Normal', '11': 'Solid Tissue Normal',
'12' : 'Buccal Cell Normal', '12': 'Buccal Cell Normal',
'13' : 'EBV Immortalized Normal', '13': 'EBV Immortalized Normal',
'14' : 'Bone Marrow Normal', '14': 'Bone Marrow Normal',
'20' : 'Control Analyte', '20': 'Control Analyte',
'40' : 'Recurrent Blood Derived Cancer - Peripheral Blood', '40': 'Recurrent Blood Derived Cancer - Peripheral Blood',
'50' : 'Cell Lines', '50': 'Cell Lines',
'60' : 'Primary Xenograft Tissue', '60': 'Primary Xenograft Tissue',
'61' : 'Cell Line Derived Xenograft Tissue' '61': 'Cell Line Derived Xenograft Tissue'
}; };
morpheus.TcgaUtil.barcode = function(s) { morpheus.TcgaUtil.barcode = function (s) {
var tokens = s.split('-'); var tokens = s.split('-');
var id = tokens[2]; var id = tokens[2];
var sampleType; var sampleType;
// e.g. TCGA-AC-A23H-01A-11D-A159-09
if (tokens.length > 3) { if (tokens.length > 3) {
sampleType = tokens[3]; sampleType = tokens[3];
if (sampleType.length > 2) { if (sampleType.length > 2) {
sampleType = sampleType.substring(0, 2); sampleType = sampleType.substring(0, 2);
} }
sampleType = morpheus.TcgaUtil.SAMPLE_TYPES[sampleType]; sampleType = morpheus.TcgaUtil.SAMPLE_TYPES[sampleType];
} else { } else {
sampleType = morpheus.TcgaUtil.SAMPLE_TYPES['01']; sampleType = morpheus.TcgaUtil.SAMPLE_TYPES['01'];
} }
return { return {
id : id.toLowerCase(), id: id.toLowerCase(),
sampleType : sampleType sampleType: sampleType
}; };
}; };
morpheus.TcgaUtil.getDataset = function(options) { morpheus.TcgaUtil.setIdAndSampleType = function (dataset) {
var promises = [];
var datasets = [];
var returnDeferred = $.Deferred();
function splitId(dataset) {
var idVector = dataset.getRowMetadata().get(0);
for (var i = 0, size = idVector.size(); i < size; i++) {
var id = idVector.getValue(i);
var index = id.indexOf('|');
idVector.setValue(i, id.substring(0, index));
}
}
function setIdAndSampleType(dataset) {
var idVector = dataset.getColumnMetadata().get(0); var idVector = dataset.getColumnMetadata().get(0);
var participantId = dataset.getColumnMetadata().add('participant_id'); var participantId = dataset.getColumnMetadata().add('participant_id');
var sampleType = dataset.getColumnMetadata().add('sample_type'); var sampleType = dataset.getColumnMetadata().add('sample_type');
for (var i = 0, size = idVector.size(); i < size; i++) { for (var i = 0, size = idVector.size(); i < size; i++) {
var barcode = morpheus.TcgaUtil.barcode(idVector.getValue(i)); var barcode = morpheus.TcgaUtil.barcode(idVector.getValue(i));
if (barcode.sampleType !== undefined) {
idVector.setValue(i, barcode.id + '-' + barcode.sampleType); idVector.setValue(i, barcode.id + '-' + barcode.sampleType);
sampleType.setValue(i, barcode.sampleType); sampleType.setValue(i, barcode.sampleType);
} else {
idVector.setValue(i, barcode.id);
}
participantId.setValue(i, barcode.id); participantId.setValue(i, barcode.id);
}
} }
};
morpheus.TcgaUtil.getDataset = function (options) {
var promises = [];
var datasets = [];
var returnDeferred = $.Deferred();
if (options.mrna) { if (options.mrna) {
// id + type // id + type
var mrna = $.Deferred(); var mrna = $.Deferred();
promises.push(mrna); promises.push(mrna);
new morpheus.TxtReader().read(options.mrna, function(err, dataset) { new morpheus.TxtReader().read(options.mrna, function (err, dataset) {
if (err) { if (err) {
console.log('Error reading file:' + err); console.log('Error reading file:' + err);
} else { } else {
datasets.push(dataset); datasets.push(dataset);
// splitId(dataset); morpheus.TcgaUtil.setIdAndSampleType(dataset);
setIdAndSampleType(dataset);
} }
mrna.resolve(); mrna.resolve();
}); });
...@@ -134,18 +122,18 @@ morpheus.TcgaUtil.getDataset = function(options) { ...@@ -134,18 +122,18 @@ morpheus.TcgaUtil.getDataset = function(options) {
if (options.mutation) { if (options.mutation) {
var mutation = $.Deferred(); var mutation = $.Deferred();
promises.push(mutation); promises.push(mutation);
new morpheus.MafFileReader().read(options.mutation, function(err, new morpheus.MafFileReader().read(options.mutation, function (err,
dataset) { dataset) {
if (err) { if (err) {
console.log('Error reading file:' + err); console.log('Error reading file:' + err);
} else { } else {
datasets.push(dataset); datasets.push(dataset);
setIdAndSampleType(dataset); morpheus.TcgaUtil.setIdAndSampleType(dataset);
} }
mutation.resolve(); mutation.resolve();
}); });
var sigGenesAnnotation = morpheus.Util.readLines(options.sigGenes); var sigGenesAnnotation = morpheus.Util.readLines(options.sigGenes);
sigGenesAnnotation.done(function(lines) { sigGenesAnnotation.done(function (lines) {
sigGenesLines = lines; sigGenesLines = lines;
}); });
promises.push(sigGenesAnnotation); promises.push(sigGenesAnnotation);
...@@ -154,12 +142,12 @@ morpheus.TcgaUtil.getDataset = function(options) { ...@@ -154,12 +142,12 @@ morpheus.TcgaUtil.getDataset = function(options) {
var gistic = $.Deferred(); var gistic = $.Deferred();
promises.push(gistic); promises.push(gistic);
new morpheus.GisticReader().read(options.gistic, new morpheus.GisticReader().read(options.gistic,
function(err, dataset) { function (err, dataset) {
if (err) { if (err) {
console.log('Error reading file:' + err); console.log('Error reading file:' + err);
} else { } else {
datasets.push(dataset); datasets.push(dataset);
setIdAndSampleType(dataset); morpheus.TcgaUtil.setIdAndSampleType(dataset);
} }
gistic.resolve(); gistic.resolve();
}); });
...@@ -170,14 +158,14 @@ morpheus.TcgaUtil.getDataset = function(options) { ...@@ -170,14 +158,14 @@ morpheus.TcgaUtil.getDataset = function(options) {
promises.push(gisticGene); promises.push(gisticGene);
new morpheus.TxtReader({ new morpheus.TxtReader({
dataColumnStart : 3 dataColumnStart: 3
}).read(options.gisticGene, function(err, dataset) { }).read(options.gisticGene, function (err, dataset) {
if (err) { if (err) {
console.log('Error reading file:' + err); console.log('Error reading file:' + err);
} else { } else {
datasets.push(dataset); datasets.push(dataset);
setIdAndSampleType(dataset); morpheus.TcgaUtil.setIdAndSampleType(dataset);
} }
gisticGene.resolve(); gisticGene.resolve();
}); });
...@@ -186,12 +174,12 @@ morpheus.TcgaUtil.getDataset = function(options) { ...@@ -186,12 +174,12 @@ morpheus.TcgaUtil.getDataset = function(options) {
if (options.seg) { if (options.seg) {
var seg = $.Deferred(); var seg = $.Deferred();
promises.push(seg); promises.push(seg);
new morpheus.SegTabReader().read(options.seg, function(err, dataset) { new morpheus.SegTabReader().read(options.seg, function (err, dataset) {
if (err) { if (err) {
console.log('Error reading file:' + err); console.log('Error reading file:' + err);
} else { } else {
datasets.push(dataset); datasets.push(dataset);
setIdAndSampleType(dataset); morpheus.TcgaUtil.setIdAndSampleType(dataset);
} }
seg.resolve(); seg.resolve();
}); });
...@@ -201,14 +189,13 @@ morpheus.TcgaUtil.getDataset = function(options) { ...@@ -201,14 +189,13 @@ morpheus.TcgaUtil.getDataset = function(options) {
var rppa = $.Deferred(); var rppa = $.Deferred();
promises.push(rppa); promises.push(rppa);
new morpheus.TxtReader().read(options.rppa, function(err, dataset) { new morpheus.TxtReader().read(options.rppa, function (err, dataset) {
if (err) { if (err) {
console.log('Error reading file:' + err); console.log('Error reading file:' + err);
} else { } else {
datasets.push(dataset); datasets.push(dataset);
setIdAndSampleType(dataset); morpheus.TcgaUtil.setIdAndSampleType(dataset);
} }
// splitId(dataset);
rppa.resolve(); rppa.resolve();
}); });
...@@ -218,13 +205,13 @@ morpheus.TcgaUtil.getDataset = function(options) { ...@@ -218,13 +205,13 @@ morpheus.TcgaUtil.getDataset = function(options) {
// id + type // id + type
var methylation = $.Deferred(); var methylation = $.Deferred();
promises.push(methylation); promises.push(methylation);
new morpheus.TxtReader({}).read(options.methylation, function(err, new morpheus.TxtReader({}).read(options.methylation, function (err,
dataset) { dataset) {
if (err) { if (err) {
console.log('Error reading file:' + err); console.log('Error reading file:' + err);
} else { } else {
datasets.push(dataset); datasets.push(dataset);
setIdAndSampleType(dataset); morpheus.TcgaUtil.setIdAndSampleType(dataset);
} }
methylation.resolve(); methylation.resolve();
}); });
...@@ -233,7 +220,7 @@ morpheus.TcgaUtil.getDataset = function(options) { ...@@ -233,7 +220,7 @@ morpheus.TcgaUtil.getDataset = function(options) {
var mrnaClustPromise = morpheus.Util.readLines(options.mrnaClust); var mrnaClustPromise = morpheus.Util.readLines(options.mrnaClust);
promises.push(mrnaClustPromise); promises.push(mrnaClustPromise);
var sampleIdToClusterId; var sampleIdToClusterId;
mrnaClustPromise.done(function(lines) { mrnaClustPromise.done(function (lines) {
// SampleName cluster silhouetteValue // SampleName cluster silhouetteValue
// SampleName cluster silhouetteValue // SampleName cluster silhouetteValue
// TCGA-OR-A5J1-01 1 0.00648776228925048 // TCGA-OR-A5J1-01 1 0.00648776228925048
...@@ -246,23 +233,23 @@ morpheus.TcgaUtil.getDataset = function(options) { ...@@ -246,23 +233,23 @@ morpheus.TcgaUtil.getDataset = function(options) {
for (; lineNumber < lines.length; lineNumber++) { for (; lineNumber < lines.length; lineNumber++) {
var tokens = lines[lineNumber].split(tab); var tokens = lines[lineNumber].split(tab);
var barcode = morpheus.TcgaUtil.barcode(tokens[0]); var barcode = morpheus.TcgaUtil.barcode(tokens[0]);
sampleIdToClusterId.set(barcode.id, tokens[1]); sampleIdToClusterId.set(barcode.id + '-' + barcode.sampleType, tokens[1]);
} }
}); });
var annotationCallbacks = []; var annotationCallbacks = [];
var annotationDef = null; var annotationDef = null;
if (options.columnAnnotations) { if (options.columnAnnotations) {
annotationDef = morpheus.DatasetUtil.annotate({ annotationDef = morpheus.DatasetUtil.annotate({
annotations : options.columnAnnotations, annotations: options.columnAnnotations,
isColumns : true isColumns: true
}); });
promises.push(annotationDef); promises.push(annotationDef);
annotationDef.done(function(array) { annotationDef.done(function (array) {
annotationCallbacks = array; annotationCallbacks = array;
}); });
} }
$.when.apply($, promises).then( $.when.apply($, promises).then(
function() { function () {
var datasetToReturn = null; var datasetToReturn = null;
if (datasets.length === 1) { if (datasets.length === 1) {
var sourceName = datasets[0].getName(); var sourceName = datasets[0].getName();
...@@ -309,7 +296,7 @@ morpheus.TcgaUtil.getDataset = function(options) { ...@@ -309,7 +296,7 @@ morpheus.TcgaUtil.getDataset = function(options) {
var clusterIdVector = datasetToReturn.getColumnMetadata().add( var clusterIdVector = datasetToReturn.getColumnMetadata().add(
'mRNAseq_cluster'); 'mRNAseq_cluster');
var idVector = datasetToReturn.getColumnMetadata().getByName( var idVector = datasetToReturn.getColumnMetadata().getByName(
'participant_id'); 'id');
for (var j = 0, size = idVector.size(); j < size; j++) { for (var j = 0, size = idVector.size(); j < size; j++) {
clusterIdVector.setValue(j, sampleIdToClusterId clusterIdVector.setValue(j, sampleIdToClusterId
.get(idVector.getValue(j))); .get(idVector.getValue(j)));
...@@ -324,7 +311,7 @@ morpheus.TcgaUtil.getDataset = function(options) { ...@@ -324,7 +311,7 @@ morpheus.TcgaUtil.getDataset = function(options) {
.get('mutations_merged.maf')); .get('mutations_merged.maf'));
new morpheus.OpenFileTool() new morpheus.OpenFileTool()
.annotate(sigGenesLines, mutationDataset, false, .annotate(sigGenesLines, mutationDataset, false,
null, 'id', 'gene', [ 'q' ]); null, 'id', 'gene', ['q']);
var qVector = mutationDataset.getRowMetadata().getByName( var qVector = mutationDataset.getRowMetadata().getByName(
'q'); 'q');
var qValueVector = mutationDataset.getRowMetadata() var qValueVector = mutationDataset.getRowMetadata()
...@@ -342,7 +329,7 @@ morpheus.TcgaUtil.getDataset = function(options) { ...@@ -342,7 +329,7 @@ morpheus.TcgaUtil.getDataset = function(options) {
.getRowMetadata(), 'q')); .getRowMetadata(), 'q'));
} }
if (annotationDef) { if (annotationDef) {
annotationCallbacks.forEach(function(f) { annotationCallbacks.forEach(function (f) {
f(datasetToReturn); f(datasetToReturn);
}); });
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment