1
- params. mark_duplicates_cpus = 16
2
- params. mark_duplicates_memory = " 64g "
1
+ params. mark_duplicates_cpus = 2
2
+ params. mark_duplicates_memory = " 16g "
3
3
params. remove_duplicates = true
4
4
params. skip_metrics = false
5
5
params. output = ' output'
@@ -21,15 +21,25 @@ process MARK_DUPLICATES {
21
21
file(" ${ name} .dedup_metrics.txt" ) optional true
22
22
23
23
script:
24
- dedup_metrics = params. skip_metrics ? " " : " --metrics-file ${ name} .dedup_metrics.txt"
25
- remove_duplicates = params. remove_duplicates ? " --remove-all-duplicates true" : " --remove-all-duplicates false"
24
+ dedup_metrics = params. skip_metrics ? " " : " --METRICS_FILE ${ name} .dedup_metrics.txt"
25
+ remove_duplicates = params. remove_duplicates ? " --REMOVE_DUPLICATES true" : " --REMOVE_DUPLICATES false"
26
26
"""
27
27
mkdir tmp
28
28
29
- gatk MarkDuplicatesSpark \
29
+ gatk SortSam \
30
+ --INPUT ${ bam} \
31
+ --OUTPUT ${ name} .sorted.bam \
32
+ --SORT_ORDER coordinate
33
+
34
+ gatk MarkDuplicates \
30
35
--java-options '-Xmx${ params.mark_duplicates_memory} -Djava.io.tmpdir=tmp' \
31
- --input ${ bam} \
32
- --output ${ name} .dedup.bam \
33
- --conf 'spark.executor.cores=${ task.cpus} ' ${ remove_duplicates} ${ dedup_metrics}
36
+ --INPUT ${ name} .sorted.bam \
37
+ --OUTPUT ${ name} .dedup.bam \
38
+ --ASSUME_SORT_ORDER coordinate \
39
+ --CREATE_INDEX true ${ remove_duplicates} ${ dedup_metrics}
40
+
41
+ cp ${ name} .dedup.bai ${ name} .dedup.bam.bai
42
+
43
+ rm -f ${ name} .sorted.bam
34
44
"""
35
45
}
0 commit comments