morpheus.DatasetUtil = function() {
};
morpheus.DatasetUtil.min = function(dataset) {
var min = Number.MAX_VALUE;
for (var i = 0, rows = dataset.getRowCount(); i < rows; i++) {
for (var j = 0, columns = dataset.getColumnCount(); j < columns; j++) {
var d = dataset.getValue(i, j);
if (isNaN(d)) {
continue;
}
min = Math.min(min, d);
}
}
return min;
};
morpheus.DatasetUtil.slicedView = function(dataset, rows, columns) {
return new morpheus.SlicedDatasetView(dataset, rows, columns);
};
morpheus.DatasetUtil.transposedView = function(dataset) {
return dataset instanceof morpheus.TransposedDatasetView ? dataset
.getDataset() : new morpheus.TransposedDatasetView(dataset);
};
morpheus.DatasetUtil.max = function(dataset) {
var max = -Number.MAX_VALUE;
for (var i = 0, rows = dataset.getRowCount(); i < rows; i++) {
for (var j = 0, columns = dataset.getColumnCount(); j < columns; j++) {
var d = dataset.getValue(i, j);
if (isNaN(d)) {
continue;
}
max = Math.max(max, d);
}
}
return max;
};
morpheus.DatasetUtil.getDatasetReader = function(ext, options) {
var datasetReader;
if (ext === 'maf') {
datasetReader = new morpheus.MafFileReader();
if (options && options.mafGeneFilter) {
datasetReader.setGeneFilter(options.mafGeneFilter);
}
} else if (ext === 'gct') {
datasetReader = new morpheus.GctReader();
// datasetReader = new morpheus.StreamingGctReader();
} else if (ext === 'gmt') {
datasetReader = new morpheus.GmtDatasetReader();
} else if (ext === 'xlsx') {
datasetReader = new morpheus.XlsxDatasetReader();
} else if (ext === 'segtab' || ext === 'seg') {
datasetReader = new morpheus.SegTabReader();
if (options && options.regions) {
datasetReader.setRegions(options.regions);
}
} else if (ext === 'txt' || ext === 'tsv' || ext === 'csv') {
datasetReader = new morpheus.TxtReader();
} else {
datasetReader = new morpheus.GctReader();
}
return datasetReader;
};
morpheus.DatasetUtil.readDatasetArray = function(options) {
var retDef = $.Deferred();
var loadedDatasets = [];
var promises = [];
_.each(options.dataset, function(option, i) {
var p = option.dataset.file ? morpheus.DatasetUtil.read(
option.dataset.file, option.dataset.options)
: morpheus.DatasetUtil.read(option.dataset);
p.index = i;
p.done(function(dataset) {
loadedDatasets[this.index] = dataset;
});
p.fail(function(err) {
var message = [ 'Error opening '
+ (option.dataset.file ? morpheus.Util
.getFileName(option.dataset.file) : morpheus.Util
.getFileName(option.dataset)) + '.' ];
if (err.message) {
message.push('
Cause: ');
message.push(err.message);
}
retDef.reject(message.join(''));
});
promises.push(p);
});
if (promises.length === 0) {
retDef.reject('No datasets specified.');
}
$.when
.apply($, promises)
.then(
function() {
var joined;
if (loadedDatasets.length > 1) {
joined = new morpheus.JoinedDataset(
loadedDatasets[0], loadedDatasets[1], 'id',
'id');
for (var i = 2; i < loadedDatasets.length; i++) {
joined = new morpheus.JoinedDataset(joined,
loadedDatasets[i], 'id', 'id');
}
} else {
// add Source
joined = loadedDatasets[0];
var sourceVector = joined.getRowMetadata().add(
'Source');
var name = joined.getName();
for (var i = 0, nrows = sourceVector.size(); i < nrows; i++) {
sourceVector.setValue(i, name);
}
}
retDef.resolve(joined);
});
return retDef;
};
/**
* Annotate a dataset from external file or text.
*
* @param options.annotations -
* Array of file, datasetField, and fileField.
* @param options.isColumns -
* Whether to annotate columns
* @return A jQuery Deferred object that resolves to an array of functions to
* execute with a dataset parameter.
*/
morpheus.DatasetUtil.annotate = function(options) {
var retDef = $.Deferred();
var promises = [];
var functions = [];
var isColumns = options.isColumns;
_.each(options.annotations, function(ann) {
if (morpheus.Util.isArray(ann.file)) { // already parsed text
functions.push(function(dataset) {
new morpheus.OpenFileTool().annotate(ann.file, dataset,
isColumns, null, ann.datasetField, ann.fileField,
ann.include);
});
} else {
var result = morpheus.Util.readLines(ann.file);
var fileName = morpheus.Util.getFileName(ann.file);
var deferred = $.Deferred();
promises.push(deferred);
result.fail(function(message) {
deferred.reject(message);
});
result.done(function(lines) {
if (morpheus.Util.endsWith(fileName, '.gmt')) {
var sets = new morpheus.GmtReader().parseLines(lines);
functions.push(function(dataset) {
new morpheus.OpenFileTool().annotate(null, dataset,
isColumns, sets, ann.datasetField,
ann.fileField);
});
deferred.resolve();
} else if (morpheus.Util.endsWith(fileName, '.cls')) {
functions.push(function(dataset) {
new morpheus.OpenFileTool().annotateCls(null, dataset,
fileName, isColumns, lines);
});
deferred.resolve();
} else {
functions.push(function(dataset) {
new morpheus.OpenFileTool().annotate(lines, dataset,
isColumns, null, ann.datasetField,
ann.fileField, ann.include);
});
deferred.resolve();
}
});
}
});
$.when.apply($, promises).then(function() {
retDef.resolve(functions);
});
return retDef;
};
/**
* @param file
* a File or url
* @return A promise that resolves to Dataset
*/
morpheus.DatasetUtil.read = function(fileOrUrl, options) {
var isFile = fileOrUrl instanceof File;
var isString = _.isString(fileOrUrl);
var ext = morpheus.Util.getExtension(morpheus.Util.getFileName(fileOrUrl));
var datasetReader = morpheus.DatasetUtil.getDatasetReader(ext, options);
if (isString || isFile) { // URL or file
var deferred = $.Deferred();
// override toString so can determine file name
if (options && options.background) {
var path = morpheus.Util.getScriptPath();
var blob = new Blob(
[ 'self.onmessage = function(e) {'
+ 'importScripts(e.data.path);'
+ 'var ext = morpheus.Util.getExtension(morpheus.Util'
+ '.getFileName(e.data.fileOrUrl));'
+ 'var datasetReader = morpheus.DatasetUtil.getDatasetReader(ext,'
+ ' e.data.options);'
+ 'datasetReader.read(e.data.fileOrUrl, function(err,dataset) {'
+ ' self.postMessage(dataset);' + ' });' + '}' ]);
// Obtain a blob URL reference to our worker 'file'.
var blobURL = window.URL.createObjectURL(blob);
var worker = new Worker(blobURL); // blobURL);
worker.addEventListener('message', function(e) {
// wrap in dataset object
deferred.resolve(morpheus.Dataset.fromJson(e.data));
window.URL.revokeObjectURL(blobURL);
}, false);
// start the worker
worker.postMessage({
path : path,
fileOrUrl : fileOrUrl,
options : options
});
} else {
datasetReader.read(fileOrUrl, function(err, dataset) {
if (err) {
deferred.reject(err);
} else {
deferred.resolve(dataset);
}
});
}
var pr = deferred.promise();
pr.toString = function() {
return '' + fileOrUrl;
};
return pr;
} else if (typeof fileOrUrl.done === 'function') { // assume it's a
// deferred
return fileOrUrl;
} else { // it's already a dataset?
var deferred = $.Deferred();
deferred.resolve(fileOrUrl);
return deferred.promise();
}
};
/**
* @param dataset
* The dataset to convert to an array
* @param options.columns
* An array of column indices to include from the dataset
* @param options.columnFields
* An array of field names to use in the returned objects that
* correspond to the column indices in the dataset
* @param options.metadataFields
* An array of row metadata fields to include from the dataset
*
*/
morpheus.DatasetUtil.toObjectArray = function(dataset, options) {
var columns = options.columns || [ 0 ];
var columnFields = options.columnFields || [ 'value' ];
if (columnFields.length !== columns.length) {
throw 'columns.length !== columnFields.length';
}
var metadataFields = options.metadataFields;
// grab all of the headers and filter the meta data vectors in the dataset
// down
// to the ones specified in metaFields. If metaFields is not passed, take
// all metadata
var rowMetadata = dataset.getRowMetadata();
if (!metadataFields) {
metadataFields = morpheus.MetadataUtil.getMetadataNames(rowMetadata);
}
var vectors = morpheus.MetadataUtil.getVectors(rowMetadata, metadataFields);
// build an object that contains the matrix values for the given columns
// along
// with any metadata
var array = [];
for (var i = 0; i < dataset.getRowCount(); i++) {
var obj = {};
for (var j = 0; j < columns.length; j++) {
obj[columnFields[j]] = dataset.getValue(i, columns[j]);
}
for (var j = 0; j < vectors.length; j++) {
obj[vectors[j].getName()] = vectors[j].getValue(i);
}
array.push(obj);
}
return array;
};
morpheus.DatasetUtil.fixL1K = function(dataset) {
var names = {
'cell_id' : 'Cell Line',
'pert_idose' : 'Dose (\u00B5M)',
'pert_iname' : 'Name',
'pert_itime' : 'Time (hr)',
'distil_ss' : 'Signature Strength',
'pert_type' : 'Type',
'cell_lineage' : 'Lineage',
'cell_histology' : 'Histology',
'cell_type' : 'Cell Type'
};
var fixNames = function(metadata) {
for (var i = 0, count = metadata.getMetadataCount(); i < count; i++) {
var v = metadata.get(i);
var name = v.getName();
var mapped = names[name];
if (mapped) {
v.setName(mapped);
}
}
};
fixNames(dataset.getRowMetadata());
fixNames(dataset.getColumnMetadata());
var fix666 = function(metadata) {
for (var i = 0, count = metadata.getMetadataCount(); i < count; i++) {
var v = metadata.get(i);
if (v.getName() == 'Dose (\u00B5M)') { // convert to number
for (var j = 0, size = v.size(); j < size; j++) {
var value = v.getValue(j);
if (value != null) {
v.setValue(j, parseFloat(value));
}
}
}
var isNumber = false;
for (var j = 0, size = v.size(); j < size; j++) {
var value = v.getValue(j);
if (value != null) {
isNumber = _.isNumber(value);
break;
}
}
var newValue = isNumber || v.getName() == 'Dose (\u00B5M)' ? 0 : '';
for (var j = 0, size = v.size(); j < size; j++) {
var value = v.getValue(j);
if (value != null && value == '-666') {
v.setValue(j, newValue);
}
}
}
};
fix666(dataset.getRowMetadata());
fix666(dataset.getColumnMetadata());
var fixCommas = function(metadata) {
var regex = /(,)([^ ])/g;
_.each([ 'Lineage', 'Histology' ], function(name) {
var v = metadata.getByName(name);
if (v != null) {
for (var i = 0, size = v.size(); i < size; i++) {
var val = v.getValue(i);
if (val) {
v.setValue(i, val.replace(regex, ', $2'));
}
}
}
});
};
fixCommas(dataset.getRowMetadata());
fixCommas(dataset.getColumnMetadata());
};
morpheus.DatasetUtil.geneSetsToDataset = function(name, sets) {
var uniqueIds = new morpheus.Map();
for (var i = 0, length = sets.length; i < length; i++) {
var ids = sets[i].ids;
for (var j = 0, nIds = ids.length; j < nIds; j++) {
uniqueIds.set(ids[j], 1);
}
}
var uniqueIdsArray = uniqueIds.keys();
var dataset = new morpheus.Dataset({
name : name,
rows : uniqueIdsArray.length,
columns : sets.length
});
var columnIds = dataset.getColumnMetadata().add('id');
for (var i = 0, length = sets.length; i < length; i++) {
columnIds.setValue(i, sets[i].name);
}
var rowIds = dataset.getRowMetadata().add('id');
for (var i = 0, size = uniqueIdsArray.length; i < size; i++) {
rowIds.setValue(i, uniqueIdsArray[i]);
}
var rowIdToIndex = morpheus.VectorUtil.createValueToIndexMap(rowIds);
for (var i = 0, length = sets.length; i < length; i++) {
var ids = sets[i].ids;
for (var j = 0, nIds = ids.length; j < nIds; j++) {
dataset.setValue(rowIdToIndex.get(ids[j]), i, 1);
}
}
return dataset;
};
morpheus.DatasetUtil.DATASET_FILE_FORMATS = 'GCT 1.3, '
+ 'GCT 1.2, '
+ 'MAF, '
+ 'GMT, '
+ ' or a tab-delimited text file';
morpheus.DatasetUtil.BASIC_DATASET_FILE_FORMATS = 'GCT 1.3, '
+ 'GCT 1.2, '
+ ' or a tab-delimited text file';
morpheus.DatasetUtil.GCT_FILE_FORMAT = 'GCT 1.3';
morpheus.DatasetUtil.ANNOTATION_FILE_FORMATS = 'an xlsx file, tab-delimited text file, or a GMT file';
morpheus.DatasetUtil.DENDROGRAM_FILE_FORMATS = 'a Newick file';
morpheus.DatasetUtil.OPEN_FILE_FORMATS = 'GCT 1.3, '
+ 'GCT 1.2, '
+ 'MAF, '
+ 'GMT, '
+ ' a tab-delimited text file, or a Newick file';
morpheus.DatasetUtil.getRootDataset = function(dataset) {
while (dataset.getDataset) {
dataset = dataset.getDataset();
}
return dataset;
};
morpheus.DatasetUtil.getSeriesIndex = function(dataset, name) {
for (var i = 0, nseries = dataset.getSeriesCount(); i < nseries; i++) {
if (name === dataset.getName(i)) {
return i;
}
}
return -1;
};
morpheus.DatasetUtil.getSeriesNames = function(dataset) {
var names = [];
for (var i = 0, nseries = dataset.getSeriesCount(); i < nseries; i++) {
names.push(dataset.getName(i));
}
names.sort(function(a, b) {
a = a.toLowerCase();
b = b.toLowerCase();
return (a < b ? -1 : (a === b ? 0 : 1));
});
return names;
};
/**
* Search dataset values.
*/
morpheus.DatasetUtil.searchValues = function(dataset, text, cb) {
if (text === '') {
return;
}
var tokens = morpheus.Util.getAutocompleteTokens(text);
if (tokens.length == 0) {
return;
}
var seriesIndices = [];
for (var i = 0, nrows = dataset.getRowCount(); i < nrows; i++) {
for (var k = 0, nseries = dataset.getSeriesCount(); k < nseries; k++) {
if (dataset.getDataType(i, k) === 'object') {
seriesIndices.push([ i, k ]);
}
}
}
if (seriesIndices.length === 0) {
return;
}
var _val;
elementSearch: for (var k = 0, nseries = seriesIndices.length; k < nseries; k++) {
var pair = seriesIndices[k];
for (var j = 0, ncols = dataset.getColumnCount(); j < ncols; j++) {
var element = dataset.getValue(pair[0], j, pair[1]);
if (element != null && element.toObject) {
_val = element.toObject();
break elementSearch;
}
}
}
var fields = _val == null ? [] : _.keys(_val);
var predicates = morpheus.Util.createSearchPredicates({
tokens : tokens,
fields : fields
});
var npredicates = predicates.length;
for (var k = 0, nseries = seriesIndices.length; k < nseries; k++) {
var pair = seriesIndices[k];
for (var j = 0, ncols = dataset.getColumnCount(); j < ncols; j++) {
var matches = false;
var element = dataset.getValue(pair[0], j, pair[1]);
if (element && element.toObject) {
var object = element.toObject();
for (var p = 0; p < npredicates && !matches; p++) {
var predicate = predicates[p];
var filterColumnName = predicate.getField();
if (filterColumnName != null) {
var value = object[filterColumnName];
if (value != null && predicate.accept(value)) {
if (cb(value, pair[0], j) === false) {
return;
}
matches = true;
break;
}
} else { // try all fields
for ( var name in object) {
var value = object[name];
if (value != null && predicate.accept(value)) {
if (cb(value, pair[0], j) === false) {
return;
}
matches = true;
break;
}
}
}
}
}
}
}
};
// morpheus.DatasetUtil.toJSON = function(dataset) {
// var json = [];
// json.push('{');
// json.push('"name":"' + dataset.getName() + '", ');
// json.push('"v":['); // row major 2d array
// for (var i = 0, nrows = dataset.getRowCount(); i < nrows; i++) {
// if (i > 0) {
// json.push(',\n');
// }
// json.push('[');
// for (var j = 0, ncols = dataset.getColumnCount(); j < ncols; j++) {
// if (j > 0) {
// json.push(',');
// }
// json.push(JSON.stringify(dataset.getValue(i, j)));
// }
// json.push(']');
// }
// json.push(']'); // end v
// var metadataToJson = function(model) {
// json.push('[');
// for (var i = 0, count = model.getMetadataCount(); i < count; i++) {
// var v = model.get(i);
// if (i > 0) {
// json.push(',\n');
// }
// json.push('{');
// json.push('"id":"' + v.getName() + '"');
// json.push(', "v":[');
// for (var j = 0, nitems = v.size(); j < nitems; j++) {
// if (j > 0) {
// json.push(',');
// }
// json.push(JSON.stringify(v.getValue(j)));
// }
// json.push(']'); // end v array
// json.push('}');
// }
// json.push(']');
// };
// json.push(', "cols":');
// metadataToJson(dataset.getColumnMetadata());
// json.push(', "rows":');
// metadataToJson(dataset.getRowMetadata());
// json.push('}'); // end json object
// return json.join('');
// };
morpheus.DatasetUtil.fill = function(dataset, value, seriesIndex) {
seriesIndex = seriesIndex || 0;
for (var i = 0, nrows = dataset.getRowCount(), ncols = dataset
.getColumnCount(); i < nrows; i++) {
for (var j = 0; j < ncols; j++) {
dataset.setValue(i, j, value, seriesIndex);
}
}
};
morpheus.DatasetUtil.shallowCopy = function(dataset) {
// make a shallow copy of the dataset, metadata is immutable via the UI
var rowMetadataModel = morpheus.MetadataUtil.shallowCopy(dataset
.getRowMetadata());
var columnMetadataModel = morpheus.MetadataUtil.shallowCopy(dataset
.getColumnMetadata());
dataset.getRowMetadata = function() {
return rowMetadataModel;
};
dataset.getColumnMetadata = function() {
return columnMetadataModel;
};
return dataset;
};
morpheus.DatasetUtil.copy = function(dataset) {
var newDataset = new morpheus.Dataset({
name : dataset.getName(),
rows : dataset.getRowCount(),
columns : dataset.getColumnCount(),
dataType : 'object'
});
for (var seriesIndex = 0, nseries = dataset.getSeriesCount(); seriesIndex < nseries; seriesIndex++) {
if (seriesIndex > 0) {
newDataset.addSeries({
name : dataset.getName(seriesIndex),
rows : dataset.getRowCount(),
columns : dataset.getColumnCount(),
dataType : 'object'
});
}
for (var i = 0, nrows = dataset.getRowCount(), ncols = dataset
.getColumnCount(); i < nrows; i++) {
for (var j = 0; j < ncols; j++) {
newDataset.setValue(i, j, dataset.getValue(i, j, seriesIndex),
seriesIndex);
}
}
}
var rowMetadataModel = morpheus.MetadataUtil.shallowCopy(dataset
.getRowMetadata());
var columnMetadataModel = morpheus.MetadataUtil.shallowCopy(dataset
.getColumnMetadata());
newDataset.getRowMetadata = function() {
return rowMetadataModel;
};
newDataset.getColumnMetadata = function() {
return columnMetadataModel;
};
return newDataset;
};
morpheus.DatasetUtil.toString = function(dataset, value, seriesIndex) {
seriesIndex = seriesIndex || 0;
var s = [];
for (var i = 0, nrows = dataset.getRowCount(), ncols = dataset
.getColumnCount(); i < nrows; i++) {
for (var j = 0; j < ncols; j++) {
if (j > 0) {
s.push(', ');
}
s.push(morpheus.Util.nf(dataset.getValue(i, j, seriesIndex)));
}
s.push('\n');
}
return s.join('');
};
morpheus.DatasetUtil.getNonEmptyRows = function(dataset) {
var rowsToKeep = [];
for (var i = 0, nrows = dataset.getRowCount(); i < nrows; i++) {
var keep = false;
for (var j = 0, ncols = dataset.getColumnCount(); j < ncols; j++) {
var value = dataset.getValue(i, j);
if (!isNaN(value)) {
keep = true;
break;
}
}
if (keep) {
rowsToKeep.push(i);
}
}
return rowsToKeep;
};