Skip to content

Commit f250d94

Browse files
committed
fixup! fixup! WIP
1 parent 91df897 commit f250d94

File tree

8 files changed

+52
-25
lines changed

8 files changed

+52
-25
lines changed

README.md

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -102,13 +102,15 @@ The output is organized as follow:
102102

103103
```
104104
pipeline-output/
105-
genomes/<reference_id>/ # Genomic references
106-
references/<reference_id>/ # RSEM/STAR indexes
107-
data/<source> # FASTQs (note that GEO source uses SRA)
108-
data-qc/<experiment_id>/<sample_id>/ # FastQC reports
109-
aligned/<reference_id>/<experiment_id>/ # alignments and quantification results
110-
quantified/<reference_id> # quantification matrices for isoforms and genes
111-
report/<reference_id>/<experiment_id>/ # MultiQC reports for reads and alignments
105+
genomes/<reference_id>/ # Genomic references
106+
references/<reference_id>/ # RSEM/STAR indexes
107+
data/<source>/ # FASTQs (organization is source-specific; note that GEO source uses SRA)
108+
data-qc/<experiment_id>/<sample_id>/ # FastQC reports
109+
data-single-cell/<experiment_id>/<sample_id>/ # Single-cell data (hard links to files from data/)
110+
aligned/<reference_id>/<experiment_id>/ # alignments and quantification results
111+
quantified/<reference_id> # quantification matrices for isoforms and genes
112+
quantified-single-cell/<reference_id> # quantified single-cell data (Cell Ranger outputs)
113+
report/<reference_id>/<experiment_id>/ # MultiQC reports for reads and alignments
112114
```
113115

114116
You can adjust the pipeline output directory by setting `OUTPUT_DIR` under

example.luigi.cfg

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ submit_batch_info_jobs=2
3030
scheduler=local
3131
scheduler_partition=
3232
scheduler_extra_args=[]
33+
# Default tools, override as needed
34+
#cutadapt_bin=cutadapt
35+
#cell_ranger_bin=cellranger
3336

3437
#
3538
# This section contains the necessary variables for the pipeline execution
@@ -40,7 +43,7 @@ scheduler_extra_args=[]
4043
OUTPUT_DIR=pipeline-output
4144
GENOMES=genomes
4245
REFERENCES=references
43-
REFERENCES_CELL_RANGER=references-cell-ranger
46+
SINGLE_CELL_REFERENCES=references-single-cell
4447
METADATA=metadata
4548
DATA=data
4649
DATAQCDIR=data-qc
@@ -52,9 +55,6 @@ BATCHINFODIR=batch-info
5255
# RSEM
5356
RSEM_DIR=contrib/RSEM
5457

55-
# Cell Ranger
56-
cell_ranger_bin=cellranger
57-
5858
SLACK_WEBHOOK_URL=
5959

6060
[rnaseq_pipeline.sources.sra]
@@ -72,3 +72,6 @@ appdata_dir=/space/gemmaData
7272
human_reference_id=hg38_ncbi
7373
mouse_reference_id=mm10_ncbi
7474
rat_reference_id=rn7_ncbi
75+
human_single_cell_reference_id=refdata-gex-GRCh38-2024-A
76+
mouse_single_cell_reference_id=refdata-gex-GRCm39-2024-A
77+
rat_single_cell_reference_id=refdata-gex-mRatBN7-2-2024-A

rnaseq_pipeline/gemma.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@ class gemma(luigi.Config):
1818
human_reference_id: str = luigi.Parameter()
1919
mouse_reference_id: str = luigi.Parameter()
2020
rat_reference_id: str = luigi.Parameter()
21+
human_single_cell_reference_id: str = luigi.Parameter()
22+
mouse_single_cell_reference_id: str = luigi.Parameter()
23+
rat_single_cell_reference_id: str = luigi.Parameter()
2124

2225
cfg = gemma()
2326

@@ -98,6 +101,13 @@ def reference_id(self):
98101
except KeyError:
99102
raise ValueError('Unsupported Gemma taxon {}.'.format(self.taxon))
100103

104+
def single_cell_reference_id(self):
105+
try:
106+
return {'human': cfg.human_single_cell_reference_id, 'mouse': cfg.mouse_single_cell_reference_id, 'rat': cfg.rat_single_cell_reference_id}[
107+
self.taxon]
108+
except KeyError:
109+
raise ValueError('Unsupported Gemma taxon {}.'.format(self.taxon))
110+
101111
@property
102112
def platform_short_name(self):
103113
return f'Generic_{self.taxon}_ncbiIds'

rnaseq_pipeline/sources/arrayexpress.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ class DownloadArrayExpressRun(luigi.Task):
5050

5151
@property
5252
def platform(self):
53+
# TODO: detect platforms from ArrayExpress metadata
5354
return IlluminaPlatform('HiSeq 2500')
5455

5556
def run(self):

rnaseq_pipeline/targets.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ def is_stale(self):
8585
def exists(self):
8686
return super().exists() and not self.is_stale()
8787

88-
class DownloadRunTarget(luigi.LocalTarget):
88+
class DownloadRunTarget(luigi.Target):
8989
run_id: str
9090
files: list[str]
9191
layout: list[str]

rnaseq_pipeline/tasks.py

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import uuid
55
from glob import glob
66
from os.path import join, dirname
7-
from typing import Optional
87

98
import luigi
109
import luigi.task
@@ -456,8 +455,8 @@ class OrganizeSingleCellSample(luigi.Task):
456455
def run(self):
457456
runs = self.input()
458457
os.makedirs(self.output().path)
459-
for run in runs:
460-
for lane, (f, read_type) in enumerate(zip(run.files, run.layout)):
458+
for lane, run in enumerate(runs):
459+
for f, read_type in zip(run.files, run.layout):
461460
dest = join(self.output().path, f'{self.sample_id}_S1_L{lane + 1:03}_{read_type}_001.fastq.gz')
462461
if os.path.exists(dest):
463462
os.unlink(dest)
@@ -470,10 +469,6 @@ def output(self):
470469
class AlignSingleCellSample(DynamicWrapperTask):
471470
experiment_id: str
472471
sample_id: str
473-
expect_cells: Optional[int] = luigi.OptionalIntParameter(default=None, positional=False)
474-
force_cells: Optional[int] = luigi.OptionalIntParameter(default=None, positional=False)
475-
chemistry: Optional[str] = luigi.OptionalParameter(default=None, positional=False,
476-
description='Chemistry to use for Cell Ranger (default is to auto-detect)')
477472

478473
def run(self):
479474
fastqs_dir, transcriptome_dir = self.input()
@@ -482,15 +477,13 @@ def run(self):
482477
transcriptome_dir=transcriptome_dir,
483478
fastqs_dir=fastqs_dir,
484479
output_dir=self.output().path,
485-
expect_cells=self.expect_cells,
486-
force_cells=self.force_cells,
487-
chemistry=self.chemistry,
488-
# FIXME: request a node with AVX512
480+
# TODO: add an avx feature on slurm
489481
scheduler_extra_args=['--constraint', 'thrd64']
490482
)
491483

492484
def output(self):
493-
return luigi.LocalTarget(join(cfg.OUTPUT_DIR, 'quantified-single-cell', self.experiment_id, self.sample_id))
485+
return luigi.LocalTarget(
486+
join(cfg.OUTPUT_DIR, 'quantified-single-cell', self.reference_id, self.experiment_id, self.sample_id))
494487

495488
class AlignSingleCellExperiment(DynamicTaskWithOutputMixin, DynamicWrapperTask):
496489
experiment_id: str = luigi.Parameter()
@@ -591,6 +584,24 @@ def output(self):
591584
return luigi.LocalTarget(
592585
join(gemma_cfg.appdata_dir, 'metadata', self.experiment_id, 'MultiQCReports/multiqc_report.html'))
593586

587+
class SubmitSingleCellExperimentDataToGemma(RerunnableTaskMixin, GemmaCliTask):
588+
experiment_id: str = luigi.Parameter()
589+
subcommand = 'loadSingleCellData'
590+
591+
def requires(self):
592+
return AlignSingleCellExperiment(experiment_id=self.experiment_id,
593+
reference_id=self.single_cell_reference_id(),
594+
source='gemma')
595+
596+
def subcommand_args(self):
597+
return ['-e', self.experiment_id, '-a', self.platform_short_name,
598+
'--data-path', self.input().path,
599+
'--quantitation-type-recomputed-from-raw-data',
600+
'--preferred-quantitation-type',
601+
# TODO: add sequencing metadata
602+
# FIXME: add --replace
603+
]
604+
594605
@requires(SubmitExperimentDataToGemma, SubmitExperimentBatchInfoToGemma, SubmitExperimentReportToGemma)
595606
class SubmitExperimentToGemma(TaskWithOutputMixin, WrapperTask):
596607
"""

rnaseq_pipeline/webviewer/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
from os.path import basename, getctime, join, dirname
21
import datetime
32
from glob import glob
43
from os.path import basename, getctime, join, dirname

tests/test_webviewer.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ def test_experiment_summary(client):
1111
res = client.get('/experiment/GSE87750')
1212
assert res.status == '200 OK'
1313

14+
@pytest.mark.skip()
1415
def test_experiment_batch_info(client):
1516
res = client.get('/experiment/GSE87750/batch-info')
1617
assert res.status == '200 OK'

0 commit comments

Comments
 (0)