Skip to content

Commit 266c18b

Browse files
authored
Merge pull request #11 from TRON-Bioinformatics/create-indices
Create indices
2 parents 5faa1bd + 0c151ff commit 266c18b

File tree

8 files changed

+66753
-10
lines changed

8 files changed

+66753
-10
lines changed

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,6 @@ report.html*
66
timeline.html*
77
trace.txt*
88
dag.dot*
9-
*.swp
9+
*.swp
10+
/test_data/ucsc.hg19.minimal.without_indices.dict
11+
/test_data/ucsc.hg19.minimal.without_indices.fasta.fai

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,4 @@ test:
1717
bash tests/test_08.sh
1818
bash tests/test_09.sh
1919
bash tests/test_10.sh
20+
bash tests/test_11.sh

main.nf

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ include { MARK_DUPLICATES; SPLIT_CIGAR_N_READS } from './modules/02_mark_duplica
77
include { METRICS; HS_METRICS; COVERAGE_ANALYSIS; FLAGSTAT } from './modules/03_metrics'
88
include { REALIGNMENT_AROUND_INDELS } from './modules/04_realignment_around_indels'
99
include { BQSR; CREATE_OUTPUT } from './modules/05_bqsr'
10+
include { CREATE_FAIDX; CREATE_DICT } from './modules/00_reference_indices'
1011

1112
params.help= false
1213
params.input_files = false
@@ -82,10 +83,36 @@ else if (params.input_files) {
8283
.set { input_files }
8384
}
8485

86+
workflow CHECK_REFERENCE {
87+
take:
88+
reference
89+
90+
emit:
91+
checked_reference = reference
92+
93+
main:
94+
// checks the reference and its indexes, if the indexes are not there creates them
95+
reference_file = file(reference)
96+
if (reference_file.isEmpty()) {
97+
log.error "--reference points to a non existing file"
98+
exit 1
99+
}
100+
faidx = file("${reference}.fai")
101+
if (faidx.isEmpty()) {
102+
CREATE_FAIDX(reference)
103+
}
104+
dict = file("${reference_file.getParent() }/${reference_file.baseName }*.dict")
105+
if (dict.isEmpty()) {
106+
CREATE_DICT(reference)
107+
}
108+
}
109+
85110

86111
workflow {
87112

88-
PREPARE_BAM(input_files, params.reference)
113+
CHECK_REFERENCE(params.reference)
114+
115+
PREPARE_BAM(input_files, CHECK_REFERENCE.out.checked_reference)
89116

90117
if (!params.skip_deduplication) {
91118
MARK_DUPLICATES(PREPARE_BAM.out.prepared_bams)
@@ -97,29 +124,29 @@ workflow {
97124
}
98125

99126
if (params.split_cigarn) {
100-
SPLIT_CIGAR_N_READS(deduplicated_bams, params.reference)
127+
SPLIT_CIGAR_N_READS(deduplicated_bams, CHECK_REFERENCE.out.checked_reference)
101128
deduplicated_bams = SPLIT_CIGAR_N_READS.out.split_cigarn_bams
102129
}
103130

104131
if (! params.skip_metrics) {
105132
if (params.intervals) {
106133
HS_METRICS(deduplicated_bams)
107134
}
108-
METRICS(deduplicated_bams, params.reference)
135+
METRICS(deduplicated_bams, CHECK_REFERENCE.out.checked_reference)
109136
COVERAGE_ANALYSIS(deduplicated_bams)
110137
FLAGSTAT(deduplicated_bams)
111138
}
112139

113140
if (!params.skip_realignment) {
114-
REALIGNMENT_AROUND_INDELS(deduplicated_bams, params.reference)
141+
REALIGNMENT_AROUND_INDELS(deduplicated_bams, CHECK_REFERENCE.out.checked_reference)
115142
realigned_bams = REALIGNMENT_AROUND_INDELS.out.realigned_bams
116143
}
117144
else {
118145
realigned_bams = deduplicated_bams
119146
}
120147

121148
if (!params.skip_bqsr) {
122-
BQSR(realigned_bams, params.reference)
149+
BQSR(realigned_bams, CHECK_REFERENCE.out.checked_reference)
123150
preprocessed_bams = BQSR.out.recalibrated_bams
124151
}
125152
else {

modules/00_reference_indices.nf

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
2+
process CREATE_FAIDX {
3+
cpus "1"
4+
memory "4g"
5+
tag "${name}"
6+
7+
conda (params.enable_conda ? "bioconda::samtools=1.12" : null)
8+
9+
input:
10+
val(reference)
11+
12+
"""
13+
samtools faidx ${reference}
14+
"""
15+
}
16+
17+
process CREATE_DICT {
18+
cpus "1"
19+
memory "4g"
20+
tag "${name}"
21+
22+
conda (params.enable_conda ? "bioconda::gatk4=4.2.5.0" : null)
23+
24+
input:
25+
val(reference)
26+
27+
"""
28+
gatk CreateSequenceDictionary --REFERENCE ${reference}
29+
"""
30+
}

modules/02_mark_duplicates.nf

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
params.mark_duplicates_cpus = 2
22
params.mark_duplicates_memory = "16g"
3+
params.split_reads_cpus = 2
4+
params.split_reads_memory = "4g"
35
params.remove_duplicates = true
46
params.output = 'output'
57

@@ -50,8 +52,8 @@ process MARK_DUPLICATES {
5052
}
5153

5254
process SPLIT_CIGAR_N_READS {
53-
cpus "${params.prepare_bam_cpus}"
54-
memory "${params.prepare_bam_memory}"
55+
cpus "${params.split_reads_cpus}"
56+
memory "${params.split_reads_memory}"
5557
tag "${name}"
5658
publishDir "${params.output}/${name}/", mode: "copy", pattern: "software_versions.*"
5759

@@ -70,7 +72,7 @@ process SPLIT_CIGAR_N_READS {
7072
mkdir tmp
7173
7274
gatk SplitNCigarReads \
73-
--java-options '-Xmx${params.prepare_bam_memory} -Djava.io.tmpdir=./tmp' \
75+
--java-options '-Xmx${params.split_reads_memory} -Djava.io.tmpdir=./tmp' \
7476
--input ${bam} \
7577
--output ${name}.split_cigarn.bam \
7678
--create-output-bam-index true \

nextflow.config

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ process.shell = ['/bin/bash', '-euo', 'pipefail']
4444

4545
cleanup = true
4646

47-
VERSION = '2.0.1'
47+
VERSION = '2.1.0'
4848
DOI = 'https://zenodo.org/badge/latestdoi/358400957'
4949

5050
manifest {

0 commit comments

Comments
 (0)