dmcable · jpromeror · Mar 8, 2024 · Mar 8, 2024 · Mar 8, 2024 · Mar 8, 2024
diff --git a/R/IRWLS.R b/R/IRWLS.R
@@ -34,13 +34,11 @@ solveIRWLS.weights <-function(S,B,nUMI, OLS=FALSE, constrain = TRUE, verbose = F
   #solution <- runif(length(solution))*2 / length(solution) # random initialization
   names(solution) <- colnames(S)
 
-  S_mat <<- matrix(0,nrow = dim(S)[1],ncol = dim(S)[2]*(dim(S)[2] + 1)/2)
-  counter = 1
-  for(i in 1:dim(S)[2])
-    for(j in i:dim(S)[2]) {
-      S_mat[,counter] <<- S[,i] * S[,j] # depends on n^2
-      counter <- counter + 1
-    }
+  numCols <- ncol(S)
+  Index <- which(upper.tri(matrix(0, ncol = numCols, nrow = numCols), diag = TRUE), arr.ind = TRUE)
+  Index <- Index[order(Index[, 1], Index[, 2]), ,drop=F]
+  S_mat <<- S[, Index[, 1]] * S[, Index[, 2]]
+
 
   iterations<-0 #now use dampened WLS, iterate weights until convergence
   changes<-c()
@@ -83,7 +81,7 @@ solveWLS<-function(S,B,initialSol, nUMI, bulk_mode = F, constrain = F){
   threshold = max(1e-4, nUMI * 1e-7)
   prediction[prediction < threshold] <- threshold
   gene_list = rownames(S)
-  derivatives <- get_der_fast(S, B, gene_list, prediction, bulk_mode = bulk_mode)
+  derivatives <- get_der_fast(S, B, gene_list, prediction[,1], bulk_mode = bulk_mode)
   d_vec <- -derivatives$grad
   D_mat <- psd(derivatives$hess)
   norm_factor <- norm(D_mat,"2")

diff --git a/R/RCTD_helper.R b/R/RCTD_helper.R
@@ -64,80 +64,133 @@ check_pairs_type <- function(cell_type_profiles, bead, UMI_tot, score_mat, min_s
 }
 
 #Decomposing a single bead via doublet search
-process_bead_doublet <- function(cell_type_info, gene_list, UMI_tot, bead, class_df = NULL, constrain = T, verbose = F,
-                                 MIN.CHANGE = 0.001, CONFIDENCE_THRESHOLD = 10, DOUBLET_THRESHOLD = 25) {
+process_bead_doublet <-function(cell_type_info, gene_list, UMI_tot, bead, class_df = NULL, constrain = T, verbose = F,
+                                MIN.CHANGE = 0.001, CONFIDENCE_THRESHOLD = 10, DOUBLET_THRESHOLD = 25, OLS = FALSE,
+                                n.iter = 50, bulk_mode = FALSE)
+{
+
   cell_type_profiles <- cell_type_info[[1]][gene_list,]
   cell_type_profiles = cell_type_profiles * UMI_tot
   cell_type_profiles = data.matrix(cell_type_profiles)
-  QL_score_cutoff = CONFIDENCE_THRESHOLD; doublet_like_cutoff = DOUBLET_THRESHOLD
-  results_all = decompose_full(cell_type_profiles, UMI_tot, bead, constrain = constrain, verbose = verbose, MIN_CHANGE = MIN.CHANGE)
+  QL_score_cutoff = CONFIDENCE_THRESHOLD
+  doublet_like_cutoff = DOUBLET_THRESHOLD
+
+  results_all <- solveIRWLS.weights(cell_type_profiles,bead,UMI_tot,OLS = OLS, constrain = constrain,
+                                    verbose = verbose, n.iter = n.iter, MIN_CHANGE = MIN.CHANGE, bulk_mode = bulk_mode)
+
   all_weights <- results_all$weights
   conv_all <- results_all$converged
-  initial_weight_thresh = 0.01; cell_type_names = cell_type_info[[2]]
+  initial_weight_thresh = 0.01
+  cell_type_names = cell_type_info[[2]]
   candidates <- names(which(all_weights > initial_weight_thresh))
+
   if(length(candidates) == 0)
+  {
     candidates = cell_type_info[[2]][1:min(3,cell_type_info[[3]])]
-  if(length(candidates) == 1)
+
+  }else if(length(candidates) == 1)
+  {
     if(candidates[1] == cell_type_info[[2]][1])
+    {
       candidates = c(candidates, cell_type_info[[2]][2])
-    else
+
+    }else{
+
       candidates = c(candidates, cell_type_info[[2]][1])
+    }
+
+  }
+
   score_mat = Matrix(0, nrow = length(candidates), ncol = length(candidates))
-  rownames(score_mat) = candidates; colnames(score_mat) = candidates
+  rownames(score_mat) = candidates
+  colnames(score_mat) = candidates
   singlet_scores <- numeric(length(candidates))
   names(singlet_scores) <- candidates
-  for(type in candidates) {
+
+  for(type in candidates)
+  {
+
     singlet_scores[type] <- get_singlet_score(cell_type_profiles, bead, UMI_tot,
                                               type, constrain, MIN.CHANGE = MIN.CHANGE)
   }
+
+
   min_score = 0
   first_type = NULL; second_type = NULL
-  first_class = F; second_class = F #indicates whether the first (resp second) refers to a class rather than a type
-  for(i in 1:(length(candidates)-1)) {
-    type1 = candidates[i]
-    for(j in (i+1):length(candidates)) {
-      type2 = candidates[j]
-      score = decompose_sparse(cell_type_profiles, UMI_tot, bead, type1, type2, score_mode = T, constrain = constrain, verbose = verbose, MIN.CHANGE = MIN.CHANGE)
-      score_mat[i,j] = score; score_mat[j,i] = score
-      if(is.null(second_type) || score < min_score) {
-        first_type <- type1; second_type <- type2
-        min_score = score
+  first_class = F
+  second_class = F #indicates whether the first (resp second) refers to a class rather than a type
+
+  for(i in 1:(length(candidates)-1))
+  {
+    type1 <- candidates[i]
+
+    for(j in (i+1):length(candidates))
+    {
+      type2 <- candidates[j]
+      score <- decompose_sparse(cell_type_profiles, UMI_tot, bead, type1, type2, score_mode = T, constrain = constrain, verbose = verbose, MIN.CHANGE = MIN.CHANGE)
+      score_mat[i,j] <- score
+      score_mat[j,i]<- score
+
+      if(is.null(second_type) || score < min_score)
+      {
+        first_type <- type1
+        second_type <- type2
+        min_score <- score
       }
     }
+
   }
-  type1_pres = check_pairs_type(cell_type_profiles, bead, UMI_tot, score_mat, min_score, first_type, class_df, QL_score_cutoff, constrain, singlet_scores, MIN.CHANGE = MIN.CHANGE)
-  type2_pres = check_pairs_type(cell_type_profiles, bead, UMI_tot, score_mat, min_score, second_type, class_df, QL_score_cutoff, constrain, singlet_scores, MIN.CHANGE = MIN.CHANGE)
-  if(!type1_pres$all_pairs_class && !type2_pres$all_pairs_class) {
+
+  type1_pres <- check_pairs_type(cell_type_profiles, bead, UMI_tot, score_mat, min_score, first_type, class_df, QL_score_cutoff, constrain, singlet_scores, MIN.CHANGE = MIN.CHANGE)
+  type2_pres <- check_pairs_type(cell_type_profiles, bead, UMI_tot, score_mat, min_score, second_type, class_df, QL_score_cutoff, constrain, singlet_scores, MIN.CHANGE = MIN.CHANGE)
+  if(!type1_pres$all_pairs_class && !type2_pres$all_pairs_class)
+  {
     spot_class <- "reject"
     singlet_score = min_score + 2 * doublet_like_cutoff #arbitrary
-  }
-  else if(type1_pres$all_pairs_class && !type2_pres$all_pairs_class) {
+
+  }else if(type1_pres$all_pairs_class && !type2_pres$all_pairs_class)
+  {
     first_class <- !type1_pres$all_pairs
     singlet_score = type1_pres$singlet_score
     spot_class = "doublet_uncertain"
-  } else if(!type1_pres$all_pairs_class && type2_pres$all_pairs_class) {
+
+  }else if(!type1_pres$all_pairs_class && type2_pres$all_pairs_class)
+  {
     first_class <- !type2_pres$all_pairs
     singlet_score = type2_pres$singlet_score
     temp = first_type; first_type = second_type; second_type = temp
     spot_class = "doublet_uncertain"
-  } else {
+  }else{
     spot_class = "doublet_certain"
     singlet_score = min(type1_pres$singlet_score, type2_pres$singlet_score)
     first_class <- !type1_pres$all_pairs; second_class <- !type2_pres$all_pairs
-    if(type2_pres$singlet_score < type1_pres$singlet_score) {
-      temp = first_type; first_type = second_type; second_type = temp
-      first_class <- !type2_pres$all_pairs; second_class <- !type1_pres$all_pairs
+
+    if(type2_pres$singlet_score < type1_pres$singlet_score)
+    {
+      temp = first_type; first_type = second_type
+      second_type = temp
+      first_class <- !type2_pres$all_pairs
+      second_class <- !type1_pres$all_pairs
     }
+
   }
+
   if(singlet_score - min_score < doublet_like_cutoff)
-    spot_class = "singlet"
+  {
+    spot_class <- "singlet"
+  }
+
   doublet_results = decompose_sparse(cell_type_profiles, UMI_tot, bead, first_type, second_type, constrain = constrain, MIN.CHANGE = MIN.CHANGE)
   doublet_weights = doublet_results$weights; conv_doublet = doublet_results$converged
   spot_class <- factor(spot_class, c("reject", "singlet", "doublet_certain", "doublet_uncertain"))
-  return(list(all_weights = all_weights, spot_class = spot_class, first_type = first_type, second_type = second_type,
-              doublet_weights = doublet_weights, min_score = min_score, singlet_score = singlet_score,
-              conv_all = conv_all, conv_doublet = conv_doublet, score_mat = score_mat, singlet_scores = singlet_scores,
-              first_class = first_class, second_class = second_class))
+
+  Rx<-list(all_weights = all_weights, spot_class = spot_class, first_type = first_type, second_type = second_type,
+           doublet_weights = doublet_weights, min_score = min_score, singlet_score = singlet_score,
+           conv_all = conv_all, conv_doublet = conv_doublet, score_mat = score_mat, singlet_scores = singlet_scores,
+           first_class = first_class, second_class = second_class)
+
+  return(Rx)
+
 }
 
 #Decomposing a single bead via doublet search

diff --git a/R/platform_effect_normalization.R b/R/platform_effect_normalization.R
@@ -67,43 +67,121 @@ chooseSigma <- function(prediction, counts, Q_mat_all, X_vals, sigma) {
 #' @return Returns an \code{\linkS4class{RCTD}} with the estimated \code{sigma_c}.
 #' @export
 choose_sigma_c <- function(RCTD) {
-  puck = RCTD@spatialRNA; MIN_UMI = RCTD@config$UMI_min_sigma; sigma = 100
-  #Q_mat_all <- readRDS('/Users/dcable/Documents/MIT/Research/Rafalab/Projects/slideseq/Cell Demixing/ContentStructure/RCTD/Qmat/Q_mat_c.rds')
+
+  message('Step 2/4: Choose Sigma')
+  puck <- RCTD@spatialRNA
+  MIN_UMI <- RCTD@config$UMI_min_sigma
+  sigma <- 100
+
   Q1 <- readRDS(system.file("extdata", "Qmat/Q_mat_1.rds", package = "spacexr"))
   Q2 <- readRDS(system.file("extdata", "Qmat/Q_mat_2.rds", package = "spacexr"))
   Q3 <- readRDS(system.file("extdata", "Qmat/Q_mat_3.rds", package = "spacexr"))
   Q4 <- readRDS(system.file("extdata", "Qmat/Q_mat_4.rds", package = "spacexr"))
   Q5 <- readRDS(system.file("extdata", "Qmat/Q_mat_5.rds", package = "spacexr"))
+
   Q_mat_all <- c(Q1, Q2, Q3, Q4, Q5)
   sigma_vals <- names(Q_mat_all)
-  #X_vals <- readRDS('/Users/dcable/Documents/MIT/Research/Rafalab/Projects/slideseq/Cell Demixing/ContentStructure/RCTD/Qmat/X_vals.rds')
+
   X_vals <- readRDS(system.file("extdata", "Qmat/X_vals.rds", package = "spacexr"))
+
   #get initial classification
   N_fit = min(RCTD@config$N_fit,sum(puck@nUMI > MIN_UMI))
   if(N_fit == 0) {
-      stop(paste('choose_sigma_c determined a N_fit of 0! This is probably due to unusually low UMI counts per bead in your dataset. Try decreasing the parameter UMI_min_sigma. It currently is',MIN_UMI,'but none of the beads had counts larger than that.'))
+    stop(paste('choose_sigma_c determined a N_fit of 0! This is probably due to unusually low UMI counts per bead in your dataset. Try decreasing the parameter UMI_min_sigma. It currently is',MIN_UMI,'but none of the beads had counts larger than that.'))
   }
+
   fit_ind = sample(names(puck@nUMI[puck@nUMI > MIN_UMI]), N_fit)
-  beads = t(as.matrix(puck@counts[RCTD@internal_vars$gene_list_reg,fit_ind]))
-  message(paste('chooseSigma: using initial Q_mat with sigma = ',sigma/100))
-  for(iter in 1:RCTD@config$N_epoch) {
+  beads = t(puck@counts[RCTD@internal_vars$gene_list_reg,fit_ind])
+
+  #message(paste('chooseSigma: using initial Q_mat with sigma = ',sigma/100))
+  #print(paste0("N_epoch: ",RCTD@config$N_epoch))
+
+  nUMI <- puck@nUMI[fit_ind]
+  cell_type_means <- RCTD@cell_type_info$renorm[[1]]
+  gene_list <- RCTD@internal_vars$gene_list_reg
+  constrain <- FALSE
+  max_cores <- RCTD@config$max_cores
+
+  if(max_cores > 1)
+  {
+    message(paste0("Multicore enabled using ", max_cores," cores"))
+    registerDoParallel(cores=max_cores)
+  }
+
+  NN<-nrow(beads)
+  pb <- txtProgressBar(min = 0, max = RCTD@config$N_epoch, style = 3)
+
+  for(iter in 1:RCTD@config$N_epoch)
+  {
     set_likelihood_vars(Q_mat_all[[as.character(sigma)]], X_vals)
-    #message(paste('chooseSigma: getting initial weights for #samples: ',N_fit))
-    results = decompose_batch(puck@nUMI[fit_ind], RCTD@cell_type_info$renorm[[1]], beads, RCTD@internal_vars$gene_list_reg, constrain = F, max_cores = RCTD@config$max_cores)
-    weights = Matrix(0, nrow = N_fit, ncol = RCTD@cell_type_info$renorm[[3]])
-    rownames(weights) = fit_ind; colnames(weights) = RCTD@cell_type_info$renorm[[2]];
-    for(i in 1:N_fit)
-      weights[i,] = results[[i]]$weights
+
+    if(max_cores>1)
+    {
+
+      results<- foreach(i = 1:NN) %dopar% {
+
+        #set_likelihood_vars(Q_mat_all[[as.character(sigma)]], X_vals)
+        weights <- solveIRWLS.weights(data.matrix(RCTD@cell_type_info$renorm[[1]][RCTD@internal_vars$gene_list_reg,]*nUMI[i]),
+                                      beads[i,],
+                                      nUMI[i],
+                                      OLS = FALSE,
+                                      constrain = FALSE,
+                                      verbose = FALSE,
+                                      n.iter = 50,
+                                      MIN_CHANGE = 0.001,
+                                      bulk_mode = FALSE)
+
+        return(weights)
+      }
+
+
+    }else{
+
+      results<-vector("list",length=nrow(beads))
+
+      for(i in 1:nrow(beads))
+      {
+        set_likelihood_vars(Q_mat_all[[as.character(sigma)]], X_vals)
+        weights <- solveIRWLS.weights(data.matrix(RCTD@cell_type_info$renorm[[1]][RCTD@internal_vars$gene_list_reg,]*nUMI[i]),
+                                      beads[i,],
+                                      nUMI[i],
+                                      OLS = FALSE,
+                                      constrain = FALSE,
+                                      verbose = FALSE,
+                                      n.iter = 50,
+                                      MIN_CHANGE = 0.001,
+                                      bulk_mode = FALSE)
+        results[[i]]<-weights
+
+      }
+
+
+    }
+
+    weights<- do.call(rbind,lapply(results,function(X){return(X$weights)}))
+    weights<-as(weights,"dgCMatrix")
+    rownames(weights) <- fit_ind
+    colnames(weights) <- RCTD@cell_type_info$renorm[[2]]
     prediction <- sweep(as.matrix(RCTD@cell_type_info$renorm[[1]][RCTD@internal_vars$gene_list_reg,]) %*% t(as.matrix(weights)), 2, puck@nUMI[fit_ind], '*')
-    message(paste('Likelihood value:',calc_log_l_vec(as.vector(prediction), as.vector(t(beads)))))
+    #message(paste('Likelihood value:',calc_log_l_vec(as.vector(prediction), as.vector(t(beads)))))
     sigma_prev <- sigma
     sigma <- chooseSigma(prediction, t(beads), Q_mat_all, X_vals, sigma)
-    message(paste('Sigma value: ', sigma/100))
+
     if(sigma == sigma_prev)
+    {
+      message(paste0(RCTD@config$N_epoch,"/",RCTD@config$N_epoch))
       break
+    }
+
+    setTxtProgressBar(pb, iter)
   }
+
+  setTxtProgressBar(pb, iter)
+
+  close(pb)
   RCTD@internal_vars$sigma <- sigma/100
   RCTD@internal_vars$Q_mat <- Q_mat_all[[as.character(sigma)]]
   RCTD@internal_vars$X_vals <- X_vals
+
   return(RCTD)
 }