Merge branch 'develop' into 'master'

Pablo Riesgo Ferreiro · Pablo Riesgo Ferreiro · commit feb57eeb28e6 · 2021-06-23T13:10:52.000Z
Release 1.6.0

See merge request tron/tron-bam-preprocessing!21
diff --git a/LICENSE b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2019 TRON
+Copyright (c) 2019-2021 TRON
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/Makefile b/Makefile
@@ -13,8 +13,8 @@ test:
 	nextflow main.nf -profile test,conda --skip_deduplication --output output/test4
 	nextflow main.nf -profile test,conda --output output/test5 --skip_deduplication --skip_bqsr --skip_metrics --known_indels1 false --known_indels2 false
 	nextflow main.nf -profile test,conda --output output/test6 --intervals false --skip_deduplication --skip_bqsr --skip_realignment
-	nextflow main.nf -profile test,conda --output output/test7 --hs_metrics_target_coverage target_coverage.txt --hs_metrics_per_base_coverage per_base_coverage.txt --skip_bqsr --skip_realignment
-	nextflow main.nf -profile test,conda --output output/test8 --hs_metrics_target_coverage target_coverage.txt --hs_metrics_per_base_coverage per_base_coverage.txt --collect_hs_metrics_min_base_quality 10 --collect_hs_metrics_min_mapping_quality 10 --remove_duplicates false --skip_bqsr --skip_realignment
+	nextflow main.nf -profile test,conda --output output/test7 --skip_bqsr --skip_realignment
+	nextflow main.nf -profile test,conda --output output/test8  --collect_hs_metrics_min_base_quality 10 --collect_hs_metrics_min_mapping_quality 10 --remove_duplicates false --skip_bqsr --skip_realignment
 	nextflow main.nf -profile test,conda --output output/test9 --skip_deduplication --skip_bqsr --skip_realignment  --input_files false --input_bam test_data/TESTX_S1_L001.bam
 
 check:
@@ -48,6 +48,8 @@ check:
 	test -s output/test7/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 7 output file!"; exit 1; }
 	test -s output/test8/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 8 output file!"; exit 1; }
 	test -s output/test8/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 8 output file!"; exit 1; }
+	test -s output/test8/sample1/metrics/TESTX_S1_L001.prepared.dedup.hs_metrics.txt || { echo "Missing test 8 output file!"; exit 1; }
+	test -s output/test8/sample1/metrics/TESTX_S1_L001.prepared.dedup_metrics.txt || { echo "Missing test 8 output file!"; exit 1; }
 	test -s output/test8/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 8 output file!"; exit 1; }
 	test -s output/test8/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 8 output file!"; exit 1; }
 	test -s output/test9/TESTX_S1_L001/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 9 output file!"; exit 1; }
diff --git a/README.md b/README.md
@@ -70,8 +70,6 @@ Optional input:
     * --known_indels1: path to a VCF of known indels (optional to perform realignment around indels)
     * --known_indels2: path to a second VCF of known indels (optional to perform realignment around indels)
     * --intervals: path to an intervals file to collect HS metrics from, this can be built with Picard's BedToIntervalList (default: None)
-    * --hs_metrics_target_coverage: name of output file for target HS metrics (default: None)
-    * --hs_metrics_per_base_coverage: name of output file for per base HS metrics (default: None)
     * --collect_hs_minimum_base_quality: minimum base quality for a base to contribute coverage (default: 20).
     * --collect_hs_minimum_mapping_quality: minimum mapping quality for a read to contribute coverage (default: 20).
     * --skip_bqsr: optionally skip BQSR (default: false)
diff --git a/main.nf b/main.nf
@@ -10,8 +10,6 @@ params.dbsnp = false
 params.known_indels1 = false
 params.known_indels2 = false
 params.intervals = false
-params.hs_metrics_target_coverage = false
-params.hs_metrics_per_base_coverage = false
 params.skip_bqsr = false
 params.skip_realignment = false
 params.skip_deduplication = false
@@ -136,7 +134,7 @@ if (!params.skip_deduplication) {
 	    cpus "${params.mark_duplicates_cpus}"
         memory "${params.mark_duplicates_memory}"
 	    tag "${name}"
-	    publishDir "${publish_dir}/${name}/metrics", mode: "copy", pattern: "*.dedup_metrics"
+	    publishDir "${publish_dir}/${name}/metrics", mode: "copy", pattern: "*.dedup_metrics.txt"
 
 	    input:
 	    	set name, bam_name, type, file(bam) from prepared_bams
@@ -145,10 +143,10 @@ if (!params.skip_deduplication) {
 	    	set val(name), val(bam_name), val(type),
 	    	    file("${bam.baseName}.dedup.bam"), file("${bam.baseName}.dedup.bam.bai") into deduplicated_bams,
 	    	    deduplicated_bams_for_metrics, deduplicated_bams_for_hs_metrics
-	    	file("${bam.baseName}.dedup_metrics") optional true into deduplication_metrics
+	    	file("${bam.baseName}.dedup_metrics.txt") optional true
 
         script:
-        dedup_metrics = params.skip_metrics ? "": "--metrics-file ${bam.baseName}.dedup_metrics"
+        dedup_metrics = params.skip_metrics ? "": "--metrics-file ${bam.baseName}.dedup_metrics.txt"
         remove_duplicates = params.remove_duplicates ? "--remove-all-duplicates true" : "--remove-all-duplicates false"
 	    """
 	    mkdir tmp
@@ -157,9 +155,7 @@ if (!params.skip_deduplication) {
         --java-options '-Xmx${params.mark_duplicates_memory}  -Djava.io.tmpdir=tmp' \
         --input  ${bam} \
         --output ${bam.baseName}.dedup.bam \
-        --conf 'spark.executor.cores=${task.cpus}' \
-        ${remove_duplicates} \
-        ${dedup_metrics}
+        --conf 'spark.executor.cores=${task.cpus}' ${remove_duplicates} ${dedup_metrics}
 	    """
 	}
 }
@@ -202,18 +198,11 @@ if (! params.skip_metrics) {
                 set name, bam_name, type, file(bam), file(bai) from deduplicated_bams_for_hs_metrics
 
             output:
-                file("*_metrics") optional true into txt_hs_metrics
-                file("*.pdf") optional true into pdf_hs_metrics
-                file(params.hs_metrics_target_coverage) optional true into target_hs_metrics
-                file(params.hs_metrics_per_base_coverage) optional true into per_base_hs_metrics
+                file("*_metrics") optional true
+                file("*.pdf") optional true
+                file("${bam.baseName}.hs_metrics.txt")
 
             script:
-            hs_metrics_target_coverage= params.hs_metrics_target_coverage ?
-                "--PER_TARGET_COVERAGE ${params.hs_metrics_target_coverage} --REFERENCE_SEQUENCE ${params.reference}" :
-                ""
-            hs_metrics_per_base_coverage= params.hs_metrics_per_base_coverage ?
-                "--PER_BASE_COVERAGE ${params.hs_metrics_per_base_coverage}" :
-                ""
             minimum_base_quality = params.collect_hs_metrics_min_base_quality ?
                 "--MINIMUM_BASE_QUALITY ${params.collect_hs_metrics_min_base_quality}" : ""
             minimum_mapping_quality = params.collect_hs_metrics_min_mapping_quality ?
@@ -224,10 +213,10 @@ if (! params.skip_metrics) {
             gatk CollectHsMetrics \
             --java-options '-Xmx${params.metrics_memory}  -Djava.io.tmpdir=tmp' \
             --INPUT  ${bam} \
-            --OUTPUT ${bam.baseName} \
+            --OUTPUT ${bam.baseName}.hs_metrics.txt \
             --TARGET_INTERVALS ${params.intervals} \
             --BAIT_INTERVALS ${params.intervals} \
-            ${hs_metrics_target_coverage} ${hs_metrics_per_base_coverage} ${minimum_base_quality} ${minimum_mapping_quality}
+            ${minimum_base_quality} ${minimum_mapping_quality}
             """
         }
     }
diff --git a/nextflow.config b/nextflow.config
@@ -46,7 +46,7 @@ process.shell = ['/bin/bash', '-euo', 'pipefail']
 
 cleanup = true
 
-VERSION = '1.5.0'
+VERSION = '1.6.0'
 DOI = 'https://zenodo.org/badge/latestdoi/358400957'
 
 manifest {
@@ -83,8 +83,6 @@ Optional input:
     * --known_indels1: path to a VCF of known indels (optional to perform realignment around indels)
     * --known_indels2: path to a second VCF of known indels (optional to perform realignment around indels)
     * --intervals: path to an intervals file to collect HS metrics from, this can be built with Picard's BedToIntervalList (default: None)
-    * --hs_metrics_target_coverage: name of output file for target HS metrics (default: None)
-    * --hs_metrics_per_base_coverage: name of output file for per base HS metrics (default: None)
     * --collect_hs_minimum_base_quality: minimum base quality for a base to contribute coverage (default: 20).
     * --collect_hs_minimum_mapping_quality: minimum mapping quality for a read to contribute coverage (default: 20).
     * --skip_bqsr: optionally skip BQSR (default: false)