YosefLab · Yanay1 · Jun 15, 2021 · Mar 24, 2021 · Mar 30, 2021 · Mar 30, 2021
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,9 +1,10 @@
 Package: VISION
 Title: Functional interpretation of single cell RNA-seq latent manifolds
-Version: 2.1.0
+Version: 3.0.0
 Authors@R: c(person("Matt", "Jones", email = "[email protected]", role = c("aut", "cre")),
 			 person("David", "Detomaso", email = "[email protected]", role = c("aut", "cre")),
 			 person("Tal", "Ashuach", email = "[email protected]", role = c("aut")),
+			 person("Yanay", "Rosen", email = "[email protected]", role = c("aut")),
 			 person("Nir", "Yosef", email = "[email protected]", role = c("ctb", "cph")))
 Author: Matt Jones [aut, cre], David Detomaso [aut, cre], Tal Ashuach [aut], Nir Yosef [ctb]
 Maintainer: Matt Jones <[email protected]>
@@ -40,7 +41,7 @@ Encoding: UTF-8
 LazyData: true
 URL: https://yoseflab.github.io/VISION, https://github.com/yoseflab/VISION
 BugReports: https://github.com/YosefLab/VISION/issues
-RoxygenNote: 7.1.0
+RoxygenNote: 7.1.1
 Suggests:
 	Biobase,
 	BiocStyle,

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,28 +1,49 @@
 # Generated by roxygen2: do not edit by hand
 
-export(Vision)
 export(PhyloVision)
+export(Vision)
+export(addHotspotToVision)
 export(addSignatures)
 export(addTSNE)
 export(addUMAP)
+export(analyzeHotspotObjectVision)
 export(analyzeLocalCorrelations)
 export(analyzeLocalCorrelationsModules)
 export(annotateLatentComponents)
 export(applyMicroClustering)
 export(calcModuleScores)
 export(calcSignatureScores)
-export(calcHotspotModules)
+export(calc_mod_sig_enrichment)
+export(calc_set_enrichment)
+export(clusterModScores)
 export(clusterSigScores)
 export(computeLatentSpace)
 export(convertGeneIds)
 export(createGeneSignature)
+export(depthBasedCladewiseTreeCluster)
+export(depthBasedTreeCluster)
+export(draw_hotspot_heatmap)
+export(generateOverlapSignatures)
+export(group_modules_enrichment)
+export(hsCalculateModuleScores)
+export(hsComputeAutoCorrelations)
+export(hsComputeLocalCorrelations)
+export(hsCreateKnnGraph)
+export(hsInit)
+export(lca_based_depth)
+export(loadHotspotObject)
 export(poolMatrixCols)
 export(poolMatrixRows)
 export(poolMetaData)
 export(read_10x)
 export(read_10x_h5)
 export(read_10x_h5_v2)
 export(read_10x_h5_v3)
+export(runHotspot)
+export(saveHSBytestToPickle)
+export(treeClusterMinCladeSize)
+export(trivial_dist)
+exportMethods(PhyloVision)
 exportMethods(Vision)
 exportMethods(addProjection)
 exportMethods(analyze)
@@ -39,7 +60,6 @@ exportMethods(phyloAnalyze)
 exportMethods(saveAndViewResults)
 exportMethods(viewResults)
 import(Matrix)
-import(dplyr)
 import(Rcpp)
 import(ape)
 import(loe)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,11 @@
+# VISION 3.0.0
+
+Added support for Phylogenies as latent spaces in core VISION.
+
+Integrated [Hotspot](https://yoseflab.github.io/Hotspot/index.html) into VISION analysis and report UI.
+
+Deprecated support for trajectories and LC Annotator.
+
 # VISION 2.1.0
 
 Added parameter `sig_gene_threshold` with **changed default behavior**

diff --git a/R/AllClasses.R b/R/AllClasses.R
@@ -10,6 +10,8 @@ setClassUnion("numericORNULL", members = c("numeric", "NULL"))
 setClassUnion("matrixORSparse", members = c("matrix", "dgCMatrix"))
 setClassUnion("matrixORNULL", members = c("matrix", "NULL"))
 setClassUnion("dataframeORNULL", members = c("data.frame", "NULL"))
+setClassUnion("rawORNULL", members = c("raw", "NULL"))
+
 
 # setClassUnion("treeorNull", members=c("phylo", "NULL"))
 # setClassUnion("pythonorNull", members = c("python.builtin.object", "NULL"))
@@ -121,7 +123,7 @@ Vision <- setClass("Vision",
         Pools = "list",
         LatentSpace = "matrix",
         LatentTrajectory = "trajectoryORNULL",
-        Hotspot = "list",
+        Hotspot = "rawORNULL",
         ModuleSignatureEnrichment = "list",
         ModuleHotspotScores = "data.frame",
         Viewer = "list",
@@ -148,7 +150,7 @@ Vision <- setClass("Vision",
         Pools = list(),
         LatentSpace = matrix(NA, 1, 1),
         LatentTrajectory = NULL,
-        Hotspot = list(),
+        Hotspot = NULL,
         ModuleSignatureEnrichment = list(),
         ModuleHotspotScores = data.frame(),
         Viewer = list(),

diff --git a/R/AllGenerics.R b/R/AllGenerics.R
@@ -4,6 +4,8 @@ setGeneric("Vision", function(data, ...) {
     standardGeneric("Vision")
 })
 
+#' @rdname PhyloVision-class
+#' @export
 setGeneric("PhyloVision", function(tree, ...) {
   standardGeneric("PhyloVision")
 })

diff --git a/R/AnalysisFunctions.R b/R/AnalysisFunctions.R
@@ -18,7 +18,7 @@ clusterCells <- function(object, tree=FALSE) {
     } else {
         message("Using Tree to compute clusters...\n")
         # Get the MRCA matrix and convert the node indexes to depths
-        cl <- treeCluster3(object@tree)
+        cl <- maxSizeCladewiseTreeCluster(object@tree)
     }
 
     names(cl) <- paste('Cluster', seq(length(cl)))

diff --git a/R/Microclusters.R b/R/Microclusters.R
@@ -326,6 +326,7 @@ readjust_clusters <- function(clusters, data, cellsPerPartition=100) {
     return(clusters)
 }
 
+
 #' Pools columns of a numeric matrix
 #'
 #' Uses the provided pools to merge columns of the supplied data matrix
@@ -383,6 +384,7 @@ poolMatrixRows <- function(data, pools) {
     return(pooled_data)
 }
 
+
 #' create "super-cells" by pooling together single cells
 #' @param expr expression data (genes x cells matrix)
 #' @param pools cluster association of each cell
@@ -418,13 +420,15 @@ poolMatrixCols_Inner <- function(expr, pools) {
 
 
 #' Performs a binary search on a depth d such that 
-#' if depth(u, v) <= d then u and v are in the same cluster
+#' if depth(LCA(u, v)) <= d then u and v are in the same cluster
 #'
 #' @param tree object of class phylo
-#' @param reach number of clusters to attempt to generate
+#' @param target number of clusters to attempt to generate
 #' @return List of clusters, each entry being a vector of indices representing
 #' samples in the cluster.
-treeCluster <- function(tree, reach=10) {
+#' 
+#' @export
+depthBasedTreeCluster <- function(tree, target=10) {
     high <- length(tree$tip.label)
     low <- 0
     while (T) {
@@ -453,12 +457,12 @@ treeCluster <- function(tree, reach=10) {
             }
         }
 
-        if (num_clusters >= reach) {
+        if (num_clusters >= target) {
             if(low == d) {
                 break
             }
             low <- d
-        } else if (num_clusters < reach) {
+        } else if (num_clusters < target) {
             if(high == d) {
                 break
             }
@@ -470,15 +474,17 @@ treeCluster <- function(tree, reach=10) {
 }
 
 
-#' Performs a breadth first search to create a specific number of clusters
-#' Clusters are split based on depth
+#' Performs a breadth first search to create a specific number of clusters.
+#' Clusters are split based on depth.
 #'
 #' @param tree object of class phylo
-#' @param reach number of clusters to attempt to generate
+#' @param target number of clusters to attempt to generate
 #' @return List of clusters, each entry being a vector of indices representing
 #' samples in the cluster.
-treeCluster2 <- function(tree, reach=10) {
-    if (reach > length(tree$tip.label)) {
+#'
+#' @export
+depthBasedCladewiseTreeCluster <- function(tree, target=10) {
+    if (target > length(tree$tip.label)) {
         stop("Number of clusters is too high.")
     }
 
@@ -498,7 +504,7 @@ treeCluster2 <- function(tree, reach=10) {
           cluster_parents[[as.name(child)]] <- node_depths[child]
       }
 
-      if (length(cluster_parents) >= reach) {
+      if (length(cluster_parents) >= target) {
           break
       }
     }
@@ -516,18 +522,17 @@ treeCluster2 <- function(tree, reach=10) {
 
 
 #' Performs a breadth first search to create a specific number of clusters
-#' Clusters are split to prioritize cluster size
+#' Clusters are split to prioritize max cluster size
 #'
 #' @param tree object of class phylo
-#' @param reach number of clusters to attempt to generate
+#' @param target number of clusters to attempt to generate
 #' @return List of clusters, each entry being a vector of tips representing
 #' samples in the cluster.
-treeCluster3 <- function(tree, reach=10) {
-  if (reach > length(tree$tip.label)) {
+maxSizeCladewiseTreeCluster <- function(tree, target=10) {
+  if (target > length(tree$tip.label)) {
     stop("Number of clusters is too high.")
   }
 
-  # node_depths <- node.depth(tree)
   root <- find_root(tree)
   cluster_parents <- c()
   cluster_parents[[as.name(root)]] <- get_max_cluster_size(tree, root)
@@ -556,36 +561,38 @@ treeCluster3 <- function(tree, reach=10) {
     cl[[cluster]] <- all_children
   }
 
-  while (length(cl) > reach) {
-      cs <- c()
-      for (c in cl) {
-          cs <- append(cs, length(c))
-      }
-      
-      smallest_i <- which.min(cs)
-      tip1 <- which(tree$tip.label == cl[[smallest_i]][1])
-      dists <- c()
-      for (i in 1:length(cl)) {
-          tip2 <- which(tree$tip.label == cl[[i]][1])
-          dists <- append(dists, trivial_dist(tree, tip1, tip2))
-      }
-      
-      dists[smallest_i] <- dists[smallest_i] + max(dists)
-      closest_cluster_i <- which.min(dists)
-      cl[[min(c(closest_cluster_i, smallest_i))]] <- append(cl[[smallest_i]], cl[[closest_cluster_i]])
-      cl[[max(c(closest_cluster_i, smallest_i))]] <- NULL
+  while (length(cl) > target) {
+    cs <- c()
+    for (c in cl) {
+      cs <- append(cs, length(c))
+    }
+
+    smallest_i <- which.min(cs)
+    tip1 <- which(tree$tip.label == cl[[smallest_i]][1])
+    dists <- c()
+    for (i in 1:length(cl)) {
+      tip2 <- which(tree$tip.label == cl[[i]][1])
+      dists <- append(dists, trivial_dist(tree, tip1, tip2))
+    }
+
+    dists[smallest_i] <- dists[smallest_i] + max(dists)
+    closest_cluster_i <- which.min(dists)
+    cl[[min(c(closest_cluster_i, smallest_i))]] <- append(cl[[smallest_i]], cl[[closest_cluster_i]])
+    cl[[max(c(closest_cluster_i, smallest_i))]] <- NULL
   }
 
   return(cl)
 }
 
 
-
-#' Generate clusters for a tree of minimum size (unless children of root)
+#' Generate clade-clusters for a tree of minimum size (unless children of root)
+#' 
 #'
 #' @param tree object of class phylo
 #' @param minSize minimum clade size for a clade to be expanded
 #' @return List of clusters, each entry being a vector of tips representing
+#' WARNING: This won't work well for tree's with broad multifurcations
+#' @export
 treeClusterMinCladeSize <- function(tree, minSize=30) {
   nodeLabels <- tree$node.label
   numC <- length(tree$tip.label)

diff --git a/R/Projections.R b/R/Projections.R
@@ -378,20 +378,28 @@ setMethod("computeKNNWeights", signature(object = "matrix"),
 )
 
 
-#' compute for each vector the weights to apply to it's K nearest neighbors
+#' Compute for each vector the weights to apply to it's K nearest neighbors
+#' 
 #' @importFrom Matrix rowSums
 #' @importFrom Matrix sparseMatrix
 #' @importFrom matrixStats rowMaxs
 #' @param object tree to use for KNN
 #' @param K Number of neighbors to consider.
+#' @param lcaKNN whether to use LCA based KNN (cluster by minimum size), if false defaults to cophenetic distance (random tie breaking).
+#' WARNING: lcaKNN doesn't perform well with broad multifurcations
 #' @return a list of two items:
 #'          indices: matrix, cells X neighbors
 #'              Each row specifies indices of nearest neighbors
 #'          weights: matrix, cells X neighbors
 #'              Corresponding weights to nearest neighbors
 setMethod("computeKNNWeights", signature(object = "phylo"),
-    function(object, K = round(sqrt(length(object$tip.label)))) {
-        k <- find_knn_parallel_tree(object, K)
+    function(object, K = round(sqrt(length(object$tip.label))), lcaKNN=FALSE, minSize=20) {
+        if (lcaKNN) {
+          k <- lcaBasedTreeKNN(object, minSize = minSize)
+        } else {
+          k <- find_knn_parallel_tree(object, K)
+        }
+
 
         nn <- k[[1]]
         d <- k[[2]]