Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 21 additions & 5 deletions ms2pip/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,18 +189,34 @@ def predict_library(
raise ValueError("Either `fasta_file` or `config` must be provided.")

search_space = ProteomeSearchSpace.from_any(config)
search_space.build()
search_space.build(processes=processes)

# Convert to PSMList
psm_list = search_space.to_psm_list()

# Filter PSMs by mz
# TODO: Parallelize this step?
psm_list_filtered = search_space.filter_psms_by_mz(psm_list)

# Add retention time and ion mobility predictions
if add_retention_time:
logger.info("Adding retention time predictions...")
rt_predictor = RetentionTime(processes=processes)
rt_predictor.add_rt_predictions(psm_list_filtered)
if add_ion_mobility:
logger.info("Adding ion mobility predictions...")
im_predictor = IonMobility(processes=processes)
im_predictor.add_im_predictions(psm_list_filtered)

for batch in track(
_into_batches(search_space, batch_size=batch_size),
_into_batches(psm_list_filtered, batch_size=batch_size),
description="Predicting spectra...",
total=ceil(len(search_space) / batch_size),
):

logging.disable(logging.CRITICAL)
yield predict_batch(
search_space.filter_psms_by_mz(PSMList(psm_list=list(batch))),
add_retention_time=add_retention_time,
add_ion_mobility=add_ion_mobility,
batch,
model=model,
model_dir=model_dir,
processes=processes,
Expand Down
8 changes: 7 additions & 1 deletion ms2pip/search_space.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,9 @@ def build(self, processes: int = 1):
Number of processes to use for parallelization.

"""
processes = processes if processes else multiprocessing.cpu_count()
processes = (
processes if processes else multiprocessing.cpu_count()
) # Always ignored because of the default value
self._digest_fasta(processes)
self._remove_redundancy()
self._add_modifications(processes)
Expand Down Expand Up @@ -308,6 +310,10 @@ def filter_psms_by_mz(self, psms: PSMList) -> PSMList:
]
)

def to_psm_list(self) -> PSMList:
"""Convert search space to PSMList."""
return PSMList(psm_list=list(self))

def _digest_fasta(self, processes: int = 1):
"""Digest FASTA file to peptides and populate search space."""
# Convert to string to avoid issues with Path objects
Expand Down