Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
Package: VISION
Title: Functional interpretation of single cell RNA-seq latent manifolds
Version: 2.1.0
Version: 3.0.0
Authors@R: c(person("Matt", "Jones", email = "[email protected]", role = c("aut", "cre")),
person("David", "Detomaso", email = "[email protected]", role = c("aut", "cre")),
person("Tal", "Ashuach", email = "[email protected]", role = c("aut")),
person("Yanay", "Rosen", email = "[email protected]", role = c("aut")),
person("Nir", "Yosef", email = "[email protected]", role = c("ctb", "cph")))
Author: Matt Jones [aut, cre], David Detomaso [aut, cre], Tal Ashuach [aut], Nir Yosef [ctb]
Maintainer: Matt Jones <[email protected]>
Expand Down Expand Up @@ -40,7 +41,7 @@ Encoding: UTF-8
LazyData: true
URL: https://yoseflab.github.io/VISION, https://github.com/yoseflab/VISION
BugReports: https://github.com/YosefLab/VISION/issues
RoxygenNote: 7.1.0
RoxygenNote: 7.1.1
Suggests:
Biobase,
BiocStyle,
Expand Down
26 changes: 23 additions & 3 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,28 +1,49 @@
# Generated by roxygen2: do not edit by hand

export(Vision)
export(PhyloVision)
export(Vision)
export(addHotspotToVision)
export(addSignatures)
export(addTSNE)
export(addUMAP)
export(analyzeHotspotObjectVision)
export(analyzeLocalCorrelations)
export(analyzeLocalCorrelationsModules)
export(annotateLatentComponents)
export(applyMicroClustering)
export(calcModuleScores)
export(calcSignatureScores)
export(calcHotspotModules)
export(calc_mod_sig_enrichment)
export(calc_set_enrichment)
export(clusterModScores)
export(clusterSigScores)
export(computeLatentSpace)
export(convertGeneIds)
export(createGeneSignature)
export(depthBasedCladewiseTreeCluster)
export(depthBasedTreeCluster)
export(draw_hotspot_heatmap)
export(generateOverlapSignatures)
export(group_modules_enrichment)
export(hsCalculateModuleScores)
export(hsComputeAutoCorrelations)
export(hsComputeLocalCorrelations)
export(hsCreateKnnGraph)
export(hsInit)
export(lca_based_depth)
export(loadHotspotObject)
export(poolMatrixCols)
export(poolMatrixRows)
export(poolMetaData)
export(read_10x)
export(read_10x_h5)
export(read_10x_h5_v2)
export(read_10x_h5_v3)
export(runHotspot)
export(saveHSBytestToPickle)
export(treeClusterMinCladeSize)
export(trivial_dist)
exportMethods(PhyloVision)
exportMethods(Vision)
exportMethods(addProjection)
exportMethods(analyze)
Expand All @@ -39,7 +60,6 @@ exportMethods(phyloAnalyze)
exportMethods(saveAndViewResults)
exportMethods(viewResults)
import(Matrix)
import(dplyr)
import(Rcpp)
import(ape)
import(loe)
Expand Down
8 changes: 8 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# VISION 3.0.0

Added support for Phylogenies as latent spaces in core VISION.

Integrated [Hotspot](https://yoseflab.github.io/Hotspot/index.html) into VISION analysis and report UI.

Deprecated support for trajectories and LC Annotator.

# VISION 2.1.0

Added parameter `sig_gene_threshold` with **changed default behavior**
Expand Down
6 changes: 4 additions & 2 deletions R/AllClasses.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ setClassUnion("numericORNULL", members = c("numeric", "NULL"))
setClassUnion("matrixORSparse", members = c("matrix", "dgCMatrix"))
setClassUnion("matrixORNULL", members = c("matrix", "NULL"))
setClassUnion("dataframeORNULL", members = c("data.frame", "NULL"))
setClassUnion("rawORNULL", members = c("raw", "NULL"))


# setClassUnion("treeorNull", members=c("phylo", "NULL"))
# setClassUnion("pythonorNull", members = c("python.builtin.object", "NULL"))
Expand Down Expand Up @@ -121,7 +123,7 @@ Vision <- setClass("Vision",
Pools = "list",
LatentSpace = "matrix",
LatentTrajectory = "trajectoryORNULL",
Hotspot = "list",
Hotspot = "rawORNULL",
ModuleSignatureEnrichment = "list",
ModuleHotspotScores = "data.frame",
Viewer = "list",
Expand All @@ -148,7 +150,7 @@ Vision <- setClass("Vision",
Pools = list(),
LatentSpace = matrix(NA, 1, 1),
LatentTrajectory = NULL,
Hotspot = list(),
Hotspot = NULL,
ModuleSignatureEnrichment = list(),
ModuleHotspotScores = data.frame(),
Viewer = list(),
Expand Down
2 changes: 2 additions & 0 deletions R/AllGenerics.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ setGeneric("Vision", function(data, ...) {
standardGeneric("Vision")
})

#' @rdname PhyloVision-class
#' @export
setGeneric("PhyloVision", function(tree, ...) {
standardGeneric("PhyloVision")
})
Expand Down
2 changes: 1 addition & 1 deletion R/AnalysisFunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ clusterCells <- function(object, tree=FALSE) {
} else {
message("Using Tree to compute clusters...\n")
# Get the MRCA matrix and convert the node indexes to depths
cl <- treeCluster3(object@tree)
cl <- maxSizeCladewiseTreeCluster(object@tree)
}

names(cl) <- paste('Cluster', seq(length(cl)))
Expand Down
79 changes: 43 additions & 36 deletions R/Microclusters.R
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,7 @@ readjust_clusters <- function(clusters, data, cellsPerPartition=100) {
return(clusters)
}


#' Pools columns of a numeric matrix
#'
#' Uses the provided pools to merge columns of the supplied data matrix
Expand Down Expand Up @@ -383,6 +384,7 @@ poolMatrixRows <- function(data, pools) {
return(pooled_data)
}


#' create "super-cells" by pooling together single cells
#' @param expr expression data (genes x cells matrix)
#' @param pools cluster association of each cell
Expand Down Expand Up @@ -418,13 +420,15 @@ poolMatrixCols_Inner <- function(expr, pools) {


#' Performs a binary search on a depth d such that
#' if depth(u, v) <= d then u and v are in the same cluster
#' if depth(LCA(u, v)) <= d then u and v are in the same cluster
#'
#' @param tree object of class phylo
#' @param reach number of clusters to attempt to generate
#' @param target number of clusters to attempt to generate
#' @return List of clusters, each entry being a vector of indices representing
#' samples in the cluster.
treeCluster <- function(tree, reach=10) {
#'
#' @export
depthBasedTreeCluster <- function(tree, target=10) {
high <- length(tree$tip.label)
low <- 0
while (T) {
Expand Down Expand Up @@ -453,12 +457,12 @@ treeCluster <- function(tree, reach=10) {
}
}

if (num_clusters >= reach) {
if (num_clusters >= target) {
if(low == d) {
break
}
low <- d
} else if (num_clusters < reach) {
} else if (num_clusters < target) {
if(high == d) {
break
}
Expand All @@ -470,15 +474,17 @@ treeCluster <- function(tree, reach=10) {
}


#' Performs a breadth first search to create a specific number of clusters
#' Clusters are split based on depth
#' Performs a breadth first search to create a specific number of clusters.
#' Clusters are split based on depth.
#'
#' @param tree object of class phylo
#' @param reach number of clusters to attempt to generate
#' @param target number of clusters to attempt to generate
#' @return List of clusters, each entry being a vector of indices representing
#' samples in the cluster.
treeCluster2 <- function(tree, reach=10) {
if (reach > length(tree$tip.label)) {
#'
#' @export
depthBasedCladewiseTreeCluster <- function(tree, target=10) {
if (target > length(tree$tip.label)) {
stop("Number of clusters is too high.")
}

Expand All @@ -498,7 +504,7 @@ treeCluster2 <- function(tree, reach=10) {
cluster_parents[[as.name(child)]] <- node_depths[child]
}

if (length(cluster_parents) >= reach) {
if (length(cluster_parents) >= target) {
break
}
}
Expand All @@ -516,18 +522,17 @@ treeCluster2 <- function(tree, reach=10) {


#' Performs a breadth first search to create a specific number of clusters
#' Clusters are split to prioritize cluster size
#' Clusters are split to prioritize max cluster size
#'
#' @param tree object of class phylo
#' @param reach number of clusters to attempt to generate
#' @param target number of clusters to attempt to generate
#' @return List of clusters, each entry being a vector of tips representing
#' samples in the cluster.
treeCluster3 <- function(tree, reach=10) {
if (reach > length(tree$tip.label)) {
maxSizeCladewiseTreeCluster <- function(tree, target=10) {
if (target > length(tree$tip.label)) {
stop("Number of clusters is too high.")
}

# node_depths <- node.depth(tree)
root <- find_root(tree)
cluster_parents <- c()
cluster_parents[[as.name(root)]] <- get_max_cluster_size(tree, root)
Expand Down Expand Up @@ -556,36 +561,38 @@ treeCluster3 <- function(tree, reach=10) {
cl[[cluster]] <- all_children
}

while (length(cl) > reach) {
cs <- c()
for (c in cl) {
cs <- append(cs, length(c))
}
smallest_i <- which.min(cs)
tip1 <- which(tree$tip.label == cl[[smallest_i]][1])
dists <- c()
for (i in 1:length(cl)) {
tip2 <- which(tree$tip.label == cl[[i]][1])
dists <- append(dists, trivial_dist(tree, tip1, tip2))
}
dists[smallest_i] <- dists[smallest_i] + max(dists)
closest_cluster_i <- which.min(dists)
cl[[min(c(closest_cluster_i, smallest_i))]] <- append(cl[[smallest_i]], cl[[closest_cluster_i]])
cl[[max(c(closest_cluster_i, smallest_i))]] <- NULL
while (length(cl) > target) {
cs <- c()
for (c in cl) {
cs <- append(cs, length(c))
}

smallest_i <- which.min(cs)
tip1 <- which(tree$tip.label == cl[[smallest_i]][1])
dists <- c()
for (i in 1:length(cl)) {
tip2 <- which(tree$tip.label == cl[[i]][1])
dists <- append(dists, trivial_dist(tree, tip1, tip2))
}

dists[smallest_i] <- dists[smallest_i] + max(dists)
closest_cluster_i <- which.min(dists)
cl[[min(c(closest_cluster_i, smallest_i))]] <- append(cl[[smallest_i]], cl[[closest_cluster_i]])
cl[[max(c(closest_cluster_i, smallest_i))]] <- NULL
}

return(cl)
}



#' Generate clusters for a tree of minimum size (unless children of root)
#' Generate clade-clusters for a tree of minimum size (unless children of root)
#'
#'
#' @param tree object of class phylo
#' @param minSize minimum clade size for a clade to be expanded
#' @return List of clusters, each entry being a vector of tips representing
#' WARNING: This won't work well for tree's with broad multifurcations
#' @export
treeClusterMinCladeSize <- function(tree, minSize=30) {
nodeLabels <- tree$node.label
numC <- length(tree$tip.label)
Expand Down
14 changes: 11 additions & 3 deletions R/Projections.R
Original file line number Diff line number Diff line change
Expand Up @@ -378,20 +378,28 @@ setMethod("computeKNNWeights", signature(object = "matrix"),
)


#' compute for each vector the weights to apply to it's K nearest neighbors
#' Compute for each vector the weights to apply to it's K nearest neighbors
#'
#' @importFrom Matrix rowSums
#' @importFrom Matrix sparseMatrix
#' @importFrom matrixStats rowMaxs
#' @param object tree to use for KNN
#' @param K Number of neighbors to consider.
#' @param lcaKNN whether to use LCA based KNN (cluster by minimum size), if false defaults to cophenetic distance (random tie breaking).
#' WARNING: lcaKNN doesn't perform well with broad multifurcations
#' @return a list of two items:
#' indices: matrix, cells X neighbors
#' Each row specifies indices of nearest neighbors
#' weights: matrix, cells X neighbors
#' Corresponding weights to nearest neighbors
setMethod("computeKNNWeights", signature(object = "phylo"),
function(object, K = round(sqrt(length(object$tip.label)))) {
k <- find_knn_parallel_tree(object, K)
function(object, K = round(sqrt(length(object$tip.label))), lcaKNN=FALSE, minSize=20) {
if (lcaKNN) {
k <- lcaBasedTreeKNN(object, minSize = minSize)
} else {
k <- find_knn_parallel_tree(object, K)
}
Comment on lines +396 to +401
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's unclear -- why do we have two separate options here?

I propose we populate the KNN before this step and pass it in with the object (either as a slot or as an extra argument). It makes less sense to me to have a boolean argument here specifying the approach because it can cause downstream inconsistencies if another function uses a different approach for computing the KNN graph.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updating docstring to:
#' @param lcaKNN whether to use LCA based KNN (cluster by minimum size), if false defaults to cophenetic distance (random tie breaking).
#' WARNING: lcaKNN doesn't perform well with broad multifurcations

this is if you want to use the lcaKNN where you use all neighbors from clade if clade size > min size. This function is the first time the knn are calculated for the object.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This docstring is a lot more clear now!

I'm curious if we don't want to add a new step in the pipeline that just calculates & populates the KNN. That way we can just access this object again if we ever need to get the KNN graph.



nn <- k[[1]]
d <- k[[2]]
Expand Down
Loading