Skip to content

Commit 2a6e306

Browse files
author
Pablo Riesgo Ferreiro
committed
Merge branch 'develop' into 'master'
Release v1.4.1 See merge request tron/tron-bam-preprocessing!18
2 parents 8ce785f + 9fe9862 commit 2a6e306

File tree

8 files changed

+61
-23
lines changed

8 files changed

+61
-23
lines changed

.gitlab-ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,4 @@ stages:
1818
test:
1919
stage: test
2020
script:
21-
- make clean test
21+
- make

Makefile

Lines changed: 38 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,51 @@
1+
all : clean test check
2+
13
clean:
24
rm -rf output
3-
rm -f report.html*
4-
rm -f timeline.html*
5-
rm -f trace.txt*
6-
rm -f dag.dot*
75
rm -f .nextflow.log*
86
rm -rf .nextflow*
97

108
test:
9+
nextflow main.nf --help
1110
nextflow main.nf -profile test,conda --output output/test1
1211
nextflow main.nf -profile test,conda --skip_bqsr --output output/test2
1312
nextflow main.nf -profile test,conda --skip_realignment --output output/test3
1413
nextflow main.nf -profile test,conda --skip_deduplication --output output/test4
15-
nextflow main.nf -profile test,conda --output output/test5 --skip_metrics
14+
nextflow main.nf -profile test,conda --output output/test5 --skip_metrics --known_indels1 false --known_indels2 false
1615
nextflow main.nf -profile test,conda --output output/test6 --intervals false
1716
nextflow main.nf -profile test,conda --output output/test7 --hs_metrics_target_coverage target_coverage.txt --hs_metrics_per_base_coverage per_base_coverage.txt
1817
nextflow main.nf -profile test,conda --output output/test8 --hs_metrics_target_coverage target_coverage.txt --hs_metrics_per_base_coverage per_base_coverage.txt --collect_hs_metrics_min_base_quality 10 --collect_hs_metrics_min_mapping_quality 10 --remove_duplicates false
18+
19+
check:
20+
test -s output/test1/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
21+
test -s output/test1/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
22+
test -s output/test1/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
23+
test -s output/test1/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
24+
test -s output/test2/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
25+
test -s output/test2/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
26+
test -s output/test2/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
27+
test -s output/test2/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
28+
test -s output/test3/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
29+
test -s output/test3/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
30+
test -s output/test3/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
31+
test -s output/test3/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
32+
test -s output/test4/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
33+
test -s output/test4/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
34+
test -s output/test4/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
35+
test -s output/test4/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
36+
test -s output/test5/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
37+
test -s output/test5/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
38+
test -s output/test5/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
39+
test -s output/test5/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
40+
test -s output/test6/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
41+
test -s output/test6/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
42+
test -s output/test6/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
43+
test -s output/test6/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
44+
test -s output/test7/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
45+
test -s output/test7/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
46+
test -s output/test7/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
47+
test -s output/test7/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
48+
test -s output/test8/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
49+
test -s output/test8/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }
50+
test -s output/test8/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; }
51+
test -s output/test8/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; }

README.md

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
# TRONflow BAM preprocessing pipeline
1+
# TronFlow BAM preprocessing pipeline
22

33
[![DOI](https://zenodo.org/badge/358400957.svg)](https://zenodo.org/badge/latestdoi/358400957)
44

5-
Nextflow pipeline for the preprocessing of BAM files based on Picard and GATK.
5+
Nextflow (Di Tommaso, 2017) pipeline for the preprocessing of BAM files based on Picard and GATK (DePristo, 2011).
66

77

88
## Background
@@ -30,7 +30,7 @@ Steps:
3030
* **Base Quality Score Recalibration (BQSR)** (optional). It aims at correcting systematic errors in the sequencer when assigning the base call quality errors, as these scores are used by variant callers it improves variant calling in some situations. Implemented in GATK4
3131
* **Metrics** (optional). A number of metrics are obtained over the BAM file with Picard's CollectMetrics (eg: duplication, insert size, alignment, etc.).
3232

33-
![Pipeline](bam_preprocessing2.png)
33+
![Pipeline](figures/bam_preprocessing2.png)
3434

3535
## References
3636

@@ -45,7 +45,8 @@ This can be built from a BED file using Picard's BedToIntervalList (https://gatk
4545
## How to run it
4646

4747
```
48-
$ nextflow run tron-bioinformatics/tronflow-bam-preprocessing -r v1.3.1 --help
48+
$ nextflow run tron-bioinformatics/tronflow-bam-preprocessing --help
49+
4950
N E X T F L O W ~ version 19.07.0
5051
Launching `main.nf` [intergalactic_shannon] - revision: e707c77d7b
5152
@@ -100,3 +101,9 @@ Optional output:
100101
* Realignment intervals
101102
* Metrics
102103
```
104+
105+
106+
## References
107+
108+
* DePristo M, Banks E, Poplin R, Garimella K, Maguire J, Hartl C, Philippakis A, del Angel G, Rivas MA, Hanna M, McKenna A, Fennell T, Kernytsky A, Sivachenko A, Cibulskis K, Gabriel S, Altshuler D, Daly M. (2011). A framework for variation discovery and genotyping using next-generation DNA sequencing data. Nat Genet, 43:491-498. DOI: 10.1038/ng.806.
109+
* Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316–319. 10.1038/nbt.3820

environment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# You can use this file to create a conda environment for this pipeline:
22
# conda env create -f environment.yml
3-
name: tronflow-bam-preprocessing-1.4.0
3+
name: tronflow-bam-preprocessing
44
channels:
55
- conda-forge
66
- bioconda
File renamed without changes.

main.nf

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ params.prepare_bam_memory = "8g"
2626
params.mark_duplicates_cpus = 16
2727
params.mark_duplicates_memory = "64g"
2828
params.realignment_around_indels_cpus = 2
29-
params.realignment_around_indels_memory = "32g"
29+
params.realignment_around_indels_memory = "31g"
3030
params.bqsr_cpus = 3
3131
params.bqsr_memory = "4g"
3232
params.metrics_cpus = 1
@@ -270,18 +270,17 @@ if (!params.skip_realignment) {
270270
file("${bam.baseName}.RA.intervals") into realignment_intervals
271271

272272
script:
273-
known_indels = "" + params.known_indels1 ? " --known ${params.known_indels1}" : "" +
274-
params.known_indels2 ? " --known ${params.known_indels2}" : ""
275-
known_alleles = "" + params.known_indels1 ? " --knownAlleles ${params.known_indels1}" : "" +
276-
params.known_indels2 ? " --knownAlleles ${params.known_indels2}" : ""
273+
known_indels1 = params.known_indels1 ? " --known ${params.known_indels1}" : ""
274+
known_indels2 = params.known_indels2 ? " --known ${params.known_indels2}" : ""
275+
known_alleles1 = params.known_indels1 ? " --knownAlleles ${params.known_indels1}" : ""
276+
known_alleles2 = params.known_indels2 ? " --knownAlleles ${params.known_indels2}" : ""
277277
"""
278278
mkdir tmp
279279
280280
gatk3 -Xmx${params.realignment_around_indels_memory} -Djava.io.tmpdir=tmp -T RealignerTargetCreator \
281281
--input_file ${bam} \
282282
--out ${bam.baseName}.RA.intervals \
283-
--reference_sequence ${params.reference} \
284-
${known_indels}
283+
--reference_sequence ${params.reference} ${known_indels1} ${known_indels2}
285284
286285
gatk3 -Xmx${params.realignment_around_indels_memory} -Djava.io.tmpdir=tmp -T IndelRealigner \
287286
--input_file ${bam} \
@@ -290,8 +289,7 @@ if (!params.skip_realignment) {
290289
--targetIntervals ${bam.baseName}.RA.intervals \
291290
--consensusDeterminationModel USE_SW \
292291
--LODThresholdForCleaning 0.4 \
293-
--maxReadsInMemory 600000 \
294-
${known_alleles}
292+
--maxReadsInMemory 600000 ${known_alleles1} ${known_alleles2}
295293
"""
296294
}
297295
}

nextflow.config

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ process.shell = ['/bin/bash', '-euo', 'pipefail']
4646

4747
cleanup = true
4848

49-
VERSION = '1.4.0'
49+
VERSION = '1.4.1'
5050
DOI = 'https://zenodo.org/badge/latestdoi/358400957'
5151

5252
manifest {

test_data/test_input.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
TESTX_S1_L001 tumor test_data/TESTX_S1_L001.bam
2-
TESTX_S1_L002 normal test_data/TESTX_S1_L002.bam
1+
sample1 tumor test_data/TESTX_S1_L001.bam
2+
sample2 normal test_data/TESTX_S1_L002.bam

0 commit comments

Comments
 (0)