diff --git a/ms2pip/core.py b/ms2pip/core.py index 3ffd82b..fe50112 100644 --- a/ms2pip/core.py +++ b/ms2pip/core.py @@ -189,18 +189,34 @@ def predict_library( raise ValueError("Either `fasta_file` or `config` must be provided.") search_space = ProteomeSearchSpace.from_any(config) - search_space.build() + search_space.build(processes=processes) + + # Convert to PSMList + psm_list = search_space.to_psm_list() + + # Filter PSMs by mz + # TODO: Parallelize this step? + psm_list_filtered = search_space.filter_psms_by_mz(psm_list) + + # Add retention time and ion mobility predictions + if add_retention_time: + logger.info("Adding retention time predictions...") + rt_predictor = RetentionTime(processes=processes) + rt_predictor.add_rt_predictions(psm_list_filtered) + if add_ion_mobility: + logger.info("Adding ion mobility predictions...") + im_predictor = IonMobility(processes=processes) + im_predictor.add_im_predictions(psm_list_filtered) for batch in track( - _into_batches(search_space, batch_size=batch_size), + _into_batches(psm_list_filtered, batch_size=batch_size), description="Predicting spectra...", total=ceil(len(search_space) / batch_size), ): + logging.disable(logging.CRITICAL) yield predict_batch( - search_space.filter_psms_by_mz(PSMList(psm_list=list(batch))), - add_retention_time=add_retention_time, - add_ion_mobility=add_ion_mobility, + batch, model=model, model_dir=model_dir, processes=processes, diff --git a/ms2pip/search_space.py b/ms2pip/search_space.py index 2ac3a87..e969c53 100644 --- a/ms2pip/search_space.py +++ b/ms2pip/search_space.py @@ -265,7 +265,9 @@ def build(self, processes: int = 1): Number of processes to use for parallelization. """ - processes = processes if processes else multiprocessing.cpu_count() + processes = ( + processes if processes else multiprocessing.cpu_count() + ) # Always ignored because of the default value self._digest_fasta(processes) self._remove_redundancy() self._add_modifications(processes) @@ -308,6 +310,10 @@ def filter_psms_by_mz(self, psms: PSMList) -> PSMList: ] ) + def to_psm_list(self) -> PSMList: + """Convert search space to PSMList.""" + return PSMList(psm_list=list(self)) + def _digest_fasta(self, processes: int = 1): """Digest FASTA file to peptides and populate search space.""" # Convert to string to avoid issues with Path objects