@@ -80,6 +80,7 @@ class SraRunMetadata:
8080
8181def read_xml_metadata (path , include_invalid_runs = False ) -> List [SraRunMetadata ]:
8282 """
83+ Extract transcriptomic RNA-Seq runs from the given SRA XML metadata file.
8384 :param path: Path to the XML file containing SRA run metadata.
8485 :param include_invalid_runs: If True, include runs that do not have any suitable metadata that can be used to
8586 determine the layout.
@@ -92,6 +93,20 @@ def read_xml_metadata(path, include_invalid_runs=False) -> List[SraRunMetadata]:
9293 srr = run .attrib ['accession' ]
9394
9495 srx = run .find ('EXPERIMENT_REF' ).attrib ['accession' ]
96+
97+ library_strategy = root .find (
98+ 'EXPERIMENT_PACKAGE/EXPERIMENT[@accession=\' ' + srx + '\' ]/DESIGN/LIBRARY_DESCRIPTOR/LIBRARY_STRATEGY' )
99+ library_source = root .find (
100+ 'EXPERIMENT_PACKAGE/EXPERIMENT[@accession=\' ' + srx + '\' ]/DESIGN/LIBRARY_DESCRIPTOR/LIBRARY_SOURCE' )
101+
102+ if library_strategy is not None and library_strategy .text not in ['RNA-Seq' ]:
103+ logger .warning ('%s Ignoring run with %s library strategy.' , srr , library_strategy .text )
104+ continue
105+
106+ if library_source is not None and library_source .text not in ['TRANSCRIPTOMIC' , 'TRANSCRIPTOMIC SINGLE CELL' ]:
107+ logger .warning ('%s: Ignoring run with %s library source.' , srr , library_source .text )
108+ continue
109+
95110 is_single_end = root .find (
96111 'EXPERIMENT_PACKAGE/EXPERIMENT[@accession=\' ' + srx + '\' ]/DESIGN/LIBRARY_DESCRIPTOR/LIBRARY_LAYOUT/SINGLE' ) is not None
97112 is_paired = root .find (
@@ -389,7 +404,7 @@ def run(self):
389404 meta = [r for r in meta if r .srr in self .srr ]
390405
391406 if not meta :
392- raise ValueError (f'No SRA runs found for { self .srx } .' )
407+ raise ValueError (f'No valid SRA runs found for { self .srx } . Valid runs must be transcriptomic RNA-Seq .' )
393408
394409 metadata = dict (self .metadata )
395410 # do not override the sample_id when invoked from DownloadGeoSample or DownloadGemmaExperiment
0 commit comments