Skip to content

Commit a11d921

Browse files
authored
Merge pull request #5 from TRON-Bioinformatics/upgrade-gatk
upgrade GATK to 4.2.5.0
2 parents 6aaa617 + a238487 commit a11d921

File tree

7 files changed

+28
-18
lines changed

7 files changed

+28
-18
lines changed

main.nf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ params.collect_hs_metrics_min_mapping_quality = false
3030
// computational resources
3131
params.prepare_bam_cpus = 3
3232
params.prepare_bam_memory = "8g"
33-
params.mark_duplicates_cpus = 16
34-
params.mark_duplicates_memory = "64g"
33+
params.mark_duplicates_cpus = 2
34+
params.mark_duplicates_memory = "16g"
3535
params.realignment_around_indels_cpus = 2
3636
params.realignment_around_indels_memory = "31g"
3737
params.bqsr_cpus = 3

modules/01_prepare_bam.nf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ process PREPARE_BAM {
1717
memory "${params.prepare_bam_memory}"
1818
tag "${name}"
1919

20-
conda (params.enable_conda ? "bioconda::gatk4=4.2.0.0" : null)
20+
conda (params.enable_conda ? "bioconda::gatk4=4.2.5.0" : null)
2121

2222
input:
2323
tuple val(name), val(type), file(bam)
@@ -58,7 +58,7 @@ process INDEX_BAM {
5858
memory "${params.index_memory}"
5959
tag "${name}"
6060

61-
conda (params.enable_conda ? "bioconda::gatk4=4.2.0.0" : null)
61+
conda (params.enable_conda ? "bioconda::gatk4=4.2.5.0" : null)
6262

6363
input:
6464
tuple val(name), val(type), file(bam)

modules/02_mark_duplicates.nf

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
params.mark_duplicates_cpus = 16
2-
params.mark_duplicates_memory = "64g"
1+
params.mark_duplicates_cpus = 2
2+
params.mark_duplicates_memory = "16g"
33
params.remove_duplicates = true
44
params.skip_metrics = false
55
params.output = 'output'
@@ -11,7 +11,7 @@ process MARK_DUPLICATES {
1111
tag "${name}"
1212
publishDir "${params.output}/${name}/metrics/mark_duplicates", mode: "copy", pattern: "*.dedup_metrics.txt"
1313

14-
conda (params.enable_conda ? "bioconda::gatk4=4.2.0.0" : null)
14+
conda (params.enable_conda ? "bioconda::gatk4=4.2.5.0" : null)
1515

1616
input:
1717
tuple val(name), val(type), file(bam)
@@ -21,15 +21,25 @@ process MARK_DUPLICATES {
2121
file("${name}.dedup_metrics.txt") optional true
2222

2323
script:
24-
dedup_metrics = params.skip_metrics ? "": "--metrics-file ${name}.dedup_metrics.txt"
25-
remove_duplicates = params.remove_duplicates ? "--remove-all-duplicates true" : "--remove-all-duplicates false"
24+
dedup_metrics = params.skip_metrics ? "": "--METRICS_FILE ${name}.dedup_metrics.txt"
25+
remove_duplicates = params.remove_duplicates ? "--REMOVE_DUPLICATES true" : "--REMOVE_DUPLICATES false"
2626
"""
2727
mkdir tmp
2828
29-
gatk MarkDuplicatesSpark \
29+
gatk SortSam \
30+
--INPUT ${bam} \
31+
--OUTPUT ${name}.sorted.bam \
32+
--SORT_ORDER coordinate
33+
34+
gatk MarkDuplicates \
3035
--java-options '-Xmx${params.mark_duplicates_memory} -Djava.io.tmpdir=tmp' \
31-
--input ${bam} \
32-
--output ${name}.dedup.bam \
33-
--conf 'spark.executor.cores=${task.cpus}' ${remove_duplicates} ${dedup_metrics}
36+
--INPUT ${name}.sorted.bam \
37+
--OUTPUT ${name}.dedup.bam \
38+
--ASSUME_SORT_ORDER coordinate \
39+
--CREATE_INDEX true ${remove_duplicates} ${dedup_metrics}
40+
41+
cp ${name}.dedup.bai ${name}.dedup.bam.bai
42+
43+
rm -f ${name}.sorted.bam
3444
"""
3545
}

modules/03_metrics.nf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ process HS_METRICS {
1313
tag "${name}"
1414
publishDir "${params.output}/${name}/metrics/hs_metrics", mode: "copy"
1515

16-
conda (params.enable_conda ? "bioconda::gatk4=4.2.0.0" : null)
16+
conda (params.enable_conda ? "bioconda::gatk4=4.2.5.0" : null)
1717

1818
input:
1919
tuple val(name), val(type), file(bam), file(bai)
@@ -53,7 +53,7 @@ process METRICS {
5353
publishDir "${params.output}/${name}/metrics/gatk_multiple_metrics", mode: "copy"
5454

5555
// NOTE: the method CollectMultipleMetrics has a hidden dependency to R for making plots
56-
conda (params.enable_conda ? "bioconda::gatk4=4.2.0.0 r::r=3.6.0" : null)
56+
conda (params.enable_conda ? "bioconda::gatk4=4.2.5.0 r::r=3.6.0" : null)
5757

5858
input:
5959
tuple val(name), val(type), file(bam), file(bai)

modules/04_realignment_around_indels.nf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ process REALIGNMENT_AROUND_INDELS {
1313
publishDir "${params.output}/${name}/metrics/realignment", mode: "copy", pattern: "*.RA.intervals"
1414

1515
// NOTE: this dependency is fixed to GATK 3 as the realignment around indels is not anymore maintained in GATK 4
16-
// but still for some reason for GATK 3 to work the dependency to GATK 4 is needed
16+
// but still for some reason for GATK 3 to work the dependency to GATK 4.2.0.0 is needed
1717
conda (params.enable_conda ? "bioconda::gatk4=4.2.0.0 bioconda::gatk=3.8" : null)
1818

1919
input:

modules/05_bqsr.nf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ process BQSR {
1111
publishDir "${params.output}/${name}", mode: "copy"
1212
tag "${name}"
1313

14-
conda (params.enable_conda ? "bioconda::gatk4=4.2.0.0" : null)
14+
conda (params.enable_conda ? "bioconda::gatk4=4.2.5.0" : null)
1515

1616
input:
1717
tuple val(name), val(type), file(bam), file(bai)

nextflow.config

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ process.shell = ['/bin/bash', '-euo', 'pipefail']
4646

4747
cleanup = true
4848

49-
VERSION = '1.7.3'
49+
VERSION = '1.8.0'
5050
DOI = 'https://zenodo.org/badge/latestdoi/358400957'
5151

5252
manifest {

0 commit comments

Comments
 (0)