Skip to content
14 changes: 6 additions & 8 deletions R/IRWLS.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,11 @@ solveIRWLS.weights <-function(S,B,nUMI, OLS=FALSE, constrain = TRUE, verbose = F
#solution <- runif(length(solution))*2 / length(solution) # random initialization
names(solution) <- colnames(S)

S_mat <<- matrix(0,nrow = dim(S)[1],ncol = dim(S)[2]*(dim(S)[2] + 1)/2)
counter = 1
for(i in 1:dim(S)[2])
for(j in i:dim(S)[2]) {
S_mat[,counter] <<- S[,i] * S[,j] # depends on n^2
counter <- counter + 1
}
numCols <- ncol(S)
Index <- which(upper.tri(matrix(0, ncol = numCols, nrow = numCols), diag = TRUE), arr.ind = TRUE)
Index <- Index[order(Index[, 1], Index[, 2]), ,drop=F]
S_mat <<- S[, Index[, 1]] * S[, Index[, 2]]


iterations<-0 #now use dampened WLS, iterate weights until convergence
changes<-c()
Expand Down Expand Up @@ -83,7 +81,7 @@ solveWLS<-function(S,B,initialSol, nUMI, bulk_mode = F, constrain = F){
threshold = max(1e-4, nUMI * 1e-7)
prediction[prediction < threshold] <- threshold
gene_list = rownames(S)
derivatives <- get_der_fast(S, B, gene_list, prediction, bulk_mode = bulk_mode)
derivatives <- get_der_fast(S, B, gene_list, prediction[,1], bulk_mode = bulk_mode)
d_vec <- -derivatives$grad
D_mat <- psd(derivatives$hess)
norm_factor <- norm(D_mat,"2")
Expand Down
121 changes: 87 additions & 34 deletions R/RCTD_helper.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,80 +64,133 @@ check_pairs_type <- function(cell_type_profiles, bead, UMI_tot, score_mat, min_s
}

#Decomposing a single bead via doublet search
process_bead_doublet <- function(cell_type_info, gene_list, UMI_tot, bead, class_df = NULL, constrain = T, verbose = F,
MIN.CHANGE = 0.001, CONFIDENCE_THRESHOLD = 10, DOUBLET_THRESHOLD = 25) {
process_bead_doublet <-function(cell_type_info, gene_list, UMI_tot, bead, class_df = NULL, constrain = T, verbose = F,
MIN.CHANGE = 0.001, CONFIDENCE_THRESHOLD = 10, DOUBLET_THRESHOLD = 25, OLS = FALSE,
n.iter = 50, bulk_mode = FALSE)
{

cell_type_profiles <- cell_type_info[[1]][gene_list,]
cell_type_profiles = cell_type_profiles * UMI_tot
cell_type_profiles = data.matrix(cell_type_profiles)
QL_score_cutoff = CONFIDENCE_THRESHOLD; doublet_like_cutoff = DOUBLET_THRESHOLD
results_all = decompose_full(cell_type_profiles, UMI_tot, bead, constrain = constrain, verbose = verbose, MIN_CHANGE = MIN.CHANGE)
QL_score_cutoff = CONFIDENCE_THRESHOLD
doublet_like_cutoff = DOUBLET_THRESHOLD

results_all <- solveIRWLS.weights(cell_type_profiles,bead,UMI_tot,OLS = OLS, constrain = constrain,
verbose = verbose, n.iter = n.iter, MIN_CHANGE = MIN.CHANGE, bulk_mode = bulk_mode)

all_weights <- results_all$weights
conv_all <- results_all$converged
initial_weight_thresh = 0.01; cell_type_names = cell_type_info[[2]]
initial_weight_thresh = 0.01
cell_type_names = cell_type_info[[2]]
candidates <- names(which(all_weights > initial_weight_thresh))

if(length(candidates) == 0)
{
candidates = cell_type_info[[2]][1:min(3,cell_type_info[[3]])]
if(length(candidates) == 1)

}else if(length(candidates) == 1)
{
if(candidates[1] == cell_type_info[[2]][1])
{
candidates = c(candidates, cell_type_info[[2]][2])
else

}else{

candidates = c(candidates, cell_type_info[[2]][1])
}

}

score_mat = Matrix(0, nrow = length(candidates), ncol = length(candidates))
rownames(score_mat) = candidates; colnames(score_mat) = candidates
rownames(score_mat) = candidates
colnames(score_mat) = candidates
singlet_scores <- numeric(length(candidates))
names(singlet_scores) <- candidates
for(type in candidates) {

for(type in candidates)
{

singlet_scores[type] <- get_singlet_score(cell_type_profiles, bead, UMI_tot,
type, constrain, MIN.CHANGE = MIN.CHANGE)
}


min_score = 0
first_type = NULL; second_type = NULL
first_class = F; second_class = F #indicates whether the first (resp second) refers to a class rather than a type
for(i in 1:(length(candidates)-1)) {
type1 = candidates[i]
for(j in (i+1):length(candidates)) {
type2 = candidates[j]
score = decompose_sparse(cell_type_profiles, UMI_tot, bead, type1, type2, score_mode = T, constrain = constrain, verbose = verbose, MIN.CHANGE = MIN.CHANGE)
score_mat[i,j] = score; score_mat[j,i] = score
if(is.null(second_type) || score < min_score) {
first_type <- type1; second_type <- type2
min_score = score
first_class = F
second_class = F #indicates whether the first (resp second) refers to a class rather than a type

for(i in 1:(length(candidates)-1))
{
type1 <- candidates[i]

for(j in (i+1):length(candidates))
{
type2 <- candidates[j]
score <- decompose_sparse(cell_type_profiles, UMI_tot, bead, type1, type2, score_mode = T, constrain = constrain, verbose = verbose, MIN.CHANGE = MIN.CHANGE)
score_mat[i,j] <- score
score_mat[j,i]<- score

if(is.null(second_type) || score < min_score)
{
first_type <- type1
second_type <- type2
min_score <- score
}
}

}
type1_pres = check_pairs_type(cell_type_profiles, bead, UMI_tot, score_mat, min_score, first_type, class_df, QL_score_cutoff, constrain, singlet_scores, MIN.CHANGE = MIN.CHANGE)
type2_pres = check_pairs_type(cell_type_profiles, bead, UMI_tot, score_mat, min_score, second_type, class_df, QL_score_cutoff, constrain, singlet_scores, MIN.CHANGE = MIN.CHANGE)
if(!type1_pres$all_pairs_class && !type2_pres$all_pairs_class) {

type1_pres <- check_pairs_type(cell_type_profiles, bead, UMI_tot, score_mat, min_score, first_type, class_df, QL_score_cutoff, constrain, singlet_scores, MIN.CHANGE = MIN.CHANGE)
type2_pres <- check_pairs_type(cell_type_profiles, bead, UMI_tot, score_mat, min_score, second_type, class_df, QL_score_cutoff, constrain, singlet_scores, MIN.CHANGE = MIN.CHANGE)
if(!type1_pres$all_pairs_class && !type2_pres$all_pairs_class)
{
spot_class <- "reject"
singlet_score = min_score + 2 * doublet_like_cutoff #arbitrary
}
else if(type1_pres$all_pairs_class && !type2_pres$all_pairs_class) {

}else if(type1_pres$all_pairs_class && !type2_pres$all_pairs_class)
{
first_class <- !type1_pres$all_pairs
singlet_score = type1_pres$singlet_score
spot_class = "doublet_uncertain"
} else if(!type1_pres$all_pairs_class && type2_pres$all_pairs_class) {

}else if(!type1_pres$all_pairs_class && type2_pres$all_pairs_class)
{
first_class <- !type2_pres$all_pairs
singlet_score = type2_pres$singlet_score
temp = first_type; first_type = second_type; second_type = temp
spot_class = "doublet_uncertain"
} else {
}else{
spot_class = "doublet_certain"
singlet_score = min(type1_pres$singlet_score, type2_pres$singlet_score)
first_class <- !type1_pres$all_pairs; second_class <- !type2_pres$all_pairs
if(type2_pres$singlet_score < type1_pres$singlet_score) {
temp = first_type; first_type = second_type; second_type = temp
first_class <- !type2_pres$all_pairs; second_class <- !type1_pres$all_pairs

if(type2_pres$singlet_score < type1_pres$singlet_score)
{
temp = first_type; first_type = second_type
second_type = temp
first_class <- !type2_pres$all_pairs
second_class <- !type1_pres$all_pairs
}

}

if(singlet_score - min_score < doublet_like_cutoff)
spot_class = "singlet"
{
spot_class <- "singlet"
}

doublet_results = decompose_sparse(cell_type_profiles, UMI_tot, bead, first_type, second_type, constrain = constrain, MIN.CHANGE = MIN.CHANGE)
doublet_weights = doublet_results$weights; conv_doublet = doublet_results$converged
spot_class <- factor(spot_class, c("reject", "singlet", "doublet_certain", "doublet_uncertain"))
return(list(all_weights = all_weights, spot_class = spot_class, first_type = first_type, second_type = second_type,
doublet_weights = doublet_weights, min_score = min_score, singlet_score = singlet_score,
conv_all = conv_all, conv_doublet = conv_doublet, score_mat = score_mat, singlet_scores = singlet_scores,
first_class = first_class, second_class = second_class))

Rx<-list(all_weights = all_weights, spot_class = spot_class, first_type = first_type, second_type = second_type,
doublet_weights = doublet_weights, min_score = min_score, singlet_score = singlet_score,
conv_all = conv_all, conv_doublet = conv_doublet, score_mat = score_mat, singlet_scores = singlet_scores,
first_class = first_class, second_class = second_class)

return(Rx)

}

#Decomposing a single bead via doublet search
Expand Down
108 changes: 93 additions & 15 deletions R/platform_effect_normalization.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,43 +67,121 @@ chooseSigma <- function(prediction, counts, Q_mat_all, X_vals, sigma) {
#' @return Returns an \code{\linkS4class{RCTD}} with the estimated \code{sigma_c}.
#' @export
choose_sigma_c <- function(RCTD) {
puck = RCTD@spatialRNA; MIN_UMI = RCTD@config$UMI_min_sigma; sigma = 100
#Q_mat_all <- readRDS('/Users/dcable/Documents/MIT/Research/Rafalab/Projects/slideseq/Cell Demixing/ContentStructure/RCTD/Qmat/Q_mat_c.rds')

message('Step 2/4: Choose Sigma')
puck <- RCTD@spatialRNA
MIN_UMI <- RCTD@config$UMI_min_sigma
sigma <- 100

Q1 <- readRDS(system.file("extdata", "Qmat/Q_mat_1.rds", package = "spacexr"))
Q2 <- readRDS(system.file("extdata", "Qmat/Q_mat_2.rds", package = "spacexr"))
Q3 <- readRDS(system.file("extdata", "Qmat/Q_mat_3.rds", package = "spacexr"))
Q4 <- readRDS(system.file("extdata", "Qmat/Q_mat_4.rds", package = "spacexr"))
Q5 <- readRDS(system.file("extdata", "Qmat/Q_mat_5.rds", package = "spacexr"))

Q_mat_all <- c(Q1, Q2, Q3, Q4, Q5)
sigma_vals <- names(Q_mat_all)
#X_vals <- readRDS('/Users/dcable/Documents/MIT/Research/Rafalab/Projects/slideseq/Cell Demixing/ContentStructure/RCTD/Qmat/X_vals.rds')

X_vals <- readRDS(system.file("extdata", "Qmat/X_vals.rds", package = "spacexr"))

#get initial classification
N_fit = min(RCTD@config$N_fit,sum(puck@nUMI > MIN_UMI))
if(N_fit == 0) {
stop(paste('choose_sigma_c determined a N_fit of 0! This is probably due to unusually low UMI counts per bead in your dataset. Try decreasing the parameter UMI_min_sigma. It currently is',MIN_UMI,'but none of the beads had counts larger than that.'))
stop(paste('choose_sigma_c determined a N_fit of 0! This is probably due to unusually low UMI counts per bead in your dataset. Try decreasing the parameter UMI_min_sigma. It currently is',MIN_UMI,'but none of the beads had counts larger than that.'))
}

fit_ind = sample(names(puck@nUMI[puck@nUMI > MIN_UMI]), N_fit)
beads = t(as.matrix(puck@counts[RCTD@internal_vars$gene_list_reg,fit_ind]))
message(paste('chooseSigma: using initial Q_mat with sigma = ',sigma/100))
for(iter in 1:RCTD@config$N_epoch) {
beads = t(puck@counts[RCTD@internal_vars$gene_list_reg,fit_ind])

#message(paste('chooseSigma: using initial Q_mat with sigma = ',sigma/100))
#print(paste0("N_epoch: ",RCTD@config$N_epoch))

nUMI <- puck@nUMI[fit_ind]
cell_type_means <- RCTD@cell_type_info$renorm[[1]]
gene_list <- RCTD@internal_vars$gene_list_reg
constrain <- FALSE
max_cores <- RCTD@config$max_cores

if(max_cores > 1)
{
message(paste0("Multicore enabled using ", max_cores," cores"))
registerDoParallel(cores=max_cores)
}

NN<-nrow(beads)
pb <- txtProgressBar(min = 0, max = RCTD@config$N_epoch, style = 3)

for(iter in 1:RCTD@config$N_epoch)
{
set_likelihood_vars(Q_mat_all[[as.character(sigma)]], X_vals)
#message(paste('chooseSigma: getting initial weights for #samples: ',N_fit))
results = decompose_batch(puck@nUMI[fit_ind], RCTD@cell_type_info$renorm[[1]], beads, RCTD@internal_vars$gene_list_reg, constrain = F, max_cores = RCTD@config$max_cores)
weights = Matrix(0, nrow = N_fit, ncol = RCTD@cell_type_info$renorm[[3]])
rownames(weights) = fit_ind; colnames(weights) = RCTD@cell_type_info$renorm[[2]];
for(i in 1:N_fit)
weights[i,] = results[[i]]$weights

if(max_cores>1)
{

results<- foreach(i = 1:NN) %dopar% {

#set_likelihood_vars(Q_mat_all[[as.character(sigma)]], X_vals)
weights <- solveIRWLS.weights(data.matrix(RCTD@cell_type_info$renorm[[1]][RCTD@internal_vars$gene_list_reg,]*nUMI[i]),
beads[i,],
nUMI[i],
OLS = FALSE,
constrain = FALSE,
verbose = FALSE,
n.iter = 50,
MIN_CHANGE = 0.001,
bulk_mode = FALSE)

return(weights)
}


}else{

results<-vector("list",length=nrow(beads))

for(i in 1:nrow(beads))
{
set_likelihood_vars(Q_mat_all[[as.character(sigma)]], X_vals)
weights <- solveIRWLS.weights(data.matrix(RCTD@cell_type_info$renorm[[1]][RCTD@internal_vars$gene_list_reg,]*nUMI[i]),
beads[i,],
nUMI[i],
OLS = FALSE,
constrain = FALSE,
verbose = FALSE,
n.iter = 50,
MIN_CHANGE = 0.001,
bulk_mode = FALSE)
results[[i]]<-weights

}


}

weights<- do.call(rbind,lapply(results,function(X){return(X$weights)}))
weights<-as(weights,"dgCMatrix")
rownames(weights) <- fit_ind
colnames(weights) <- RCTD@cell_type_info$renorm[[2]]
prediction <- sweep(as.matrix(RCTD@cell_type_info$renorm[[1]][RCTD@internal_vars$gene_list_reg,]) %*% t(as.matrix(weights)), 2, puck@nUMI[fit_ind], '*')
message(paste('Likelihood value:',calc_log_l_vec(as.vector(prediction), as.vector(t(beads)))))
#message(paste('Likelihood value:',calc_log_l_vec(as.vector(prediction), as.vector(t(beads)))))
sigma_prev <- sigma
sigma <- chooseSigma(prediction, t(beads), Q_mat_all, X_vals, sigma)
message(paste('Sigma value: ', sigma/100))

if(sigma == sigma_prev)
{
message(paste0(RCTD@config$N_epoch,"/",RCTD@config$N_epoch))
break
}

setTxtProgressBar(pb, iter)
}

setTxtProgressBar(pb, iter)

close(pb)
RCTD@internal_vars$sigma <- sigma/100
RCTD@internal_vars$Q_mat <- Q_mat_all[[as.character(sigma)]]
RCTD@internal_vars$X_vals <- X_vals

return(RCTD)
}
Loading