44import numpy as np
55import pandas as pd
66from .timer import Timer
7+ from .onlineCoexpression import onlineCoexpression
78from joblib .externals .loky import set_loky_pickler
89from joblib import parallel_backend
910from joblib import Parallel , delayed
@@ -67,6 +68,8 @@ class Lioness(Panda):
6768 samples as column name
6869 ignore_final: bool
6970 if True, no lioness network is kept in memory. This requires saving single networks at each step
71+ online_coexpression: bool
72+ if True, each LIONESS correlation is computed using the online coexpression method.
7073 Returns
7174 --------
7275 export_lioness_results : _
@@ -127,6 +130,7 @@ def __init__(
127130 save_single = False ,
128131 export_filename = None ,
129132 ignore_final = False ,
133+ online_coexpression = False ,
130134 ):
131135 """ Initialize instance of Lioness class and load data.
132136 """
@@ -168,6 +172,7 @@ def __init__(
168172 self .gene_names = gene_names
169173 self .tf_names = tf_names
170174 del obj
175+ self .online_coexpression = online_coexpression
171176
172177 # Get sample range to iterate
173178 # the number of conditions is the N parameter used for the number of samples in the whole background
@@ -217,9 +222,33 @@ def __init__(
217222 # We create the folder
218223 if not os .path .exists (save_dir ):
219224 os .makedirs (save_dir )
225+
226+ # if we are using online_coexpression
227+ if self .online_coexpression :
228+ self .mi_all = np .mean (self .expression_matrix , axis = 1 )
229+ # Here we need to specify the ddof=1 to get the estimator that matches corrcoef
230+ self .std_all = np .std (self .expression_matrix , axis = 1 , ddof = 1 )
231+ self .cov_all = np .cov (self .expression_matrix )
232+ # We need to set the diagonal to 1 if na and 0 elsewhere
233+ # Get indices of the matrix
234+ rows , cols = np .indices (self .cov_all .shape )
235+ # Create mask for NaNs
236+ nan_mask = np .isnan (self .cov_all )
237+ # Replace NaNs based on diagonal condition
238+ self .cov_all [nan_mask & (rows == cols )] = 1 # Diagonal NaNs → 1
239+ self .cov_all [nan_mask & (rows != cols )] = 0 # Off-diagonal NaNs → 0
240+
241+ # we need n_conditions as number of samples
242+ assert (self .n_conditions > 0 )
243+
244+
245+
246+
220247 #############
221248 # Run LIONESS
222249 #############
250+
251+
223252 if int (self .n_conditions ) >= int (self .n_cores ) and self .computing == "cpu" :
224253 # the total_lioness_network here is a list of 1d
225254 # arrays (network:(tfXgene,1),gene_targeting:(gene,1),tf_targeting:(tf,1))
@@ -231,6 +260,11 @@ def __init__(
231260 for i in self .indexes :
232261 self .total_lioness_network = self .__lioness_loop (i )
233262 self .total_lioness_network = self .total_lioness_network .T
263+
264+ #############
265+ # OUTPUT ####
266+ #############
267+
234268 # create result data frame
235269 if self .ignore_final :
236270 print ('WARNING: we do not keep all lionesses in memory. They have been saved singularly.' )
@@ -279,34 +313,47 @@ def __init__(
279313 else :
280314 self .save_lioness_results ()
281315
282- def __lioness_loop (self , i ):
283- #TODO: this is now for GPU only in practice
284- """ Initialize instance of Lioness class and load data.
316+ def __compute_subset_panda (self ,i ):
317+ """ Compute the subset panda network using the correlation matrix and the motif matrix.
285318
319+ Parameters
320+ ----------
321+ correlation_matrix: array
322+ The coexpression network to be used for computing the subset panda network.
323+
286324 Returns
287- --------
288- self.total_lioness_network : array
289- An edge-by-sample matrix containing sample-specific networks .
325+ -------
326+ subset_panda_network : array
327+ The subset panda network .
290328 """
291- # for i in self.indexes:
292- print ("Running LIONESS for sample %d/%d:" % ((i ),(self .n_conditions )))
293329 idx = [x for x in range (self .n_conditions ) if x != i ] # all samples except i
294- with Timer ("Computing coexpression network:" ):
295- if self .computing == "gpu" :
296- import cupy as cp
297-
298- correlation_matrix_cp = cp .corrcoef (self .expression_matrix [:, idx ].astype (self .np_dtype )).astype (self .np_dtype )
299- if cp .isnan (correlation_matrix_cp ).any ():
300- cp .fill_diagonal (correlation_matrix_cp , 1 )
301- correlation_matrix_cp = cp .nan_to_num (correlation_matrix_cp )
302- correlation_matrix = cp .asnumpy (correlation_matrix_cp )
303- del correlation_matrix_cp
304- cp ._default_memory_pool .free_all_blocks ()
305- else :
306- correlation_matrix = np .corrcoef (self .expression_matrix [:, idx ])
307- if np .isnan (correlation_matrix ).any ():
308- np .fill_diagonal (correlation_matrix , 1 )
309- correlation_matrix = np .nan_to_num (correlation_matrix )
330+
331+ if self .online_coexpression :
332+ print ("Computing online coexpression" )
333+ correlation_matrix = onlineCoexpression (
334+ self .expression_matrix [:, i ],
335+ self .n_conditions ,
336+ self .mi_all ,
337+ self .std_all ,
338+ self .cov_all ,
339+ )
340+ else :
341+ with Timer ("Computing coexpression network:" ):
342+ if self .computing == "gpu" :
343+ import cupy as cp
344+
345+ correlation_matrix_cp = cp .corrcoef (self .expression_matrix [:, idx ].astype (self .np_dtype )).astype (self .np_dtype )
346+ if cp .isnan (correlation_matrix_cp ).any ():
347+ cp .fill_diagonal (correlation_matrix_cp , 1 )
348+ correlation_matrix_cp = cp .nan_to_num (correlation_matrix_cp )
349+ correlation_matrix = cp .asnumpy (correlation_matrix_cp )
350+ del correlation_matrix_cp
351+ cp ._default_memory_pool .free_all_blocks ()
352+ else :
353+ correlation_matrix = np .corrcoef (self .expression_matrix [:, idx ])
354+ if np .isnan (correlation_matrix ).any ():
355+ np .fill_diagonal (correlation_matrix , 1 )
356+ correlation_matrix = np .nan_to_num (correlation_matrix )
310357
311358 with Timer ("Normalizing networks:" ):
312359 correlation_matrix_orig = (
@@ -328,6 +375,23 @@ def __lioness_loop(self, i):
328375 del correlation_matrix
329376 subset_panda_network = correlation_matrix_orig
330377
378+ return subset_panda_network
379+
380+ def __lioness_loop (self , i ):
381+ #TODO: this is now for GPU only in practice
382+ """ Initialize instance of Lioness class and load data.
383+
384+ Returns
385+ --------
386+ self.total_lioness_network: array
387+ An edge-by-sample matrix containing sample-specific networks.
388+ """
389+ # for i in self.indexes:
390+ print ("Running LIONESS for sample %d/%d:" % ((i ),(self .n_conditions )))
391+
392+ # get subset panda network
393+ subset_panda_network = self .__compute_subset_panda (i )
394+
331395 # For consistency with R, we are using the N panda_all - (N-1) panda_all_but_q
332396 lioness_network = (self .n_conditions * self .network ) - (
333397 (self .n_conditions - 1 ) * subset_panda_network
@@ -367,7 +431,7 @@ def __lioness_loop(self, i):
367431
368432 @delayed
369433 @wrap_non_picklable_objects
370- def __par_lioness_loop (self , i , output ):
434+ def __par_lioness_loop (self , i , output , online_coexpression = None ):
371435 """ Initialize instance of Lioness class and load data.
372436
373437 Returns
@@ -376,44 +440,10 @@ def __par_lioness_loop(self, i, output):
376440 An edge-by-sample matrix containing sample-specific networks.
377441 """
378442 # for i in self.indexes:
379- print ("Running LIONESS for sample %d:" % (i + 1 ))
380- idx = [x for x in range (self .n_conditions ) if x != i ] # all samples except i
381- with Timer ("Computing coexpression network:" ):
382- if self .computing == "gpu" :
383- import cupy as cp
384-
385- correlation_matrix = cp .corrcoef (self .expression_matrix [:, idx ])
386- if cp .isnan (correlation_matrix ).any ():
387- cp .fill_diagonal (correlation_matrix , 1 )
388- correlation_matrix = cp .nan_to_num (correlation_matrix )
389- correlation_matrix = cp .asnumpy (correlation_matrix )
390- else :
391- # run on CPU with numpy
392- correlation_matrix = np .corrcoef (self .expression_matrix [:, idx ])
393- if np .isnan (correlation_matrix ).any ():
394- np .fill_diagonal (correlation_matrix , 1 )
395- correlation_matrix = np .nan_to_num (correlation_matrix )
396-
397- with Timer ("Normalizing networks:" ):
398- correlation_matrix_orig = (
399- correlation_matrix # save matrix before normalization
400- )
401- correlation_matrix = self ._normalize_network (correlation_matrix )
443+ print ("Running LIONESS for sample %d/%d:" % ((i ),(self .n_conditions )))
402444
403- with Timer ("Inferring LIONESS network:" ):
404- # TODO: fix this correlation matrix+delete
405- if self .motif_matrix is not None :
406- del correlation_matrix_orig
407- subset_panda_network = compute_panda (
408- correlation_matrix ,
409- np .copy (self .ppi_matrix ),
410- np .copy (self .motif_matrix ),
411- computing = self .computing ,
412- alpha = self .alpha ,
413- )
414- else :
415- del correlation_matrix
416- subset_panda_network = correlation_matrix_orig
445+ # get subset panda network
446+ subset_panda_network = self .__compute_subset_panda (i )
417447
418448 # For consistency with R, we are using the N panda_all - (N-1) panda_all_but_q
419449 #lioness_network = self.n_conditions * (self.network - subset_panda_network) + subset_panda_network
0 commit comments