diff --git a/CHANGELOG.md b/CHANGELOG.md index a428d5f1a..df2fc83d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,9 @@ Special thanks to the following for their contributions to the release: ### Enhancements & fixes - [PR #1597](https://github.com/nf-core/rnaseq/pull/1597) - Bump version after release 3.20.0 +- **BREAKING:** Optimize default QC steps for performance - `skip_dupradar`, `skip_qualimap`, `skip_rseqc`, `skip_stringtie`, and `skip_bigwig` now default to `true` to improve pipeline runtime and reduce compute costs. Use `--skip_[tool] false` to restore previous behavior +- Optimize RSEM performance for Fusion filesystem by using local temporary storage and scratch directive +- Add fast mode optimization to dupRadar module for improved performance ## [[3.20.0](https://github.com/nf-core/rnaseq/releases/tag/3.20.0)] - 2025-08-18 diff --git a/docs/output.md b/docs/output.md index 2fec35e59..4f0bd5cf1 100644 --- a/docs/output.md +++ b/docs/output.md @@ -381,6 +381,10 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method- ### StringTie +:::note +StringTie is disabled by default starting in v4.0 to improve pipeline performance. Enable with `--skip_stringtie false`. +::: +
Output files @@ -396,6 +400,10 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method- ### BEDTools and bedGraphToBigWig +:::note +BigWig generation is disabled by default starting in v4.0 to reduce output file sizes and improve performance. Enable with `--skip_bigwig false`. +::: +
Output files @@ -411,6 +419,10 @@ The [bigWig](https://genome.ucsc.edu/goldenpath/help/bigWig.html) format is an i ### RSeQC +:::note +RSeQC analysis is disabled by default starting in v4.0 to improve pipeline performance. Enable with `--skip_rseqc false`. +::: + [RSeQC](<(http://rseqc.sourceforge.net/)>) is a package of scripts designed to evaluate the quality of RNA-seq data. This pipeline runs several, but not all RSeQC scripts. You can tweak the supported scripts you would like to run by adjusting the `--rseqc_modules` parameter which by default will run all of the following: `bam_stat.py`, `inner_distance.py`, `infer_experiment.py`, `junction_annotation.py`, `junction_saturation.py`,`read_distribution.py` and `read_duplication.py`. The majority of RSeQC scripts generate output files which can be plotted and summarised in the MultiQC report. @@ -591,6 +603,10 @@ RSeQC documentation: [tin.py](http://rseqc.sourceforge.net/#tin-py) ### Qualimap +:::note +Qualimap analysis is disabled by default starting in v4.0 due to its resource-intensive nature. Enable with `--skip_qualimap false`. +::: +
Output files @@ -616,6 +632,10 @@ The [Qualimap RNA-seq QC module](http://qualimap.bioinfo.cipf.es/doc_html/analys ### dupRadar +:::note +dupRadar analysis is disabled by default starting in v4.0 as it provides limited utility for bulk RNA-seq experiments. Enable with `--skip_dupradar false`. +::: +
Output files diff --git a/docs/usage.md b/docs/usage.md index 0dbf743c0..5cb9c251a 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -46,16 +46,19 @@ If you set the strandedness value to `auto`, the pipeline will sub-sample the in #### Usage Examples 1. **Forward Stranded Sample:** + - Forward fraction: 0.85 - Reverse fraction: 0.15 - **Classification:** Forward stranded 2. **Reverse Stranded Sample:** + - Forward fraction: 0.1 - Reverse fraction: 0.9 - **Classification:** Reverse stranded 3. **Unstranded Sample:** + - Forward fraction: 0.45 - Reverse fraction: 0.55 - **Classification:** Unstranded @@ -139,6 +142,54 @@ You can use `--skip_alignment --skip_pseudo_alignment` if you only want to run t Note that `--skip_alignment` and `--skip_pseudo_alignment` prevent both the execution of alignment/pseudoalignment steps and the building of their corresponding indices. For example, using `--skip_alignment` with `--aligner star_salmon` will skip both STAR alignment and index building. +## Quality Control Configuration + +### Performance-Optimized Defaults (v4.0+) + +Starting in version 4.0, several QC steps are disabled by default to improve pipeline performance and reduce compute costs for typical bulk RNA-seq analysis: + +- **`--skip_dupradar true`** - Disable dupRadar analysis (limited utility for bulk RNA-seq experiments) +- **`--skip_qualimap true`** - Disable Qualimap alignment QC (resource-intensive step) +- **`--skip_rseqc true`** - Disable RSeQC analysis suite (7 comprehensive RNA-seq modules) +- **`--skip_stringtie true`** - Disable StringTie transcriptome assembly (additional processing overhead) +- **`--skip_bigwig true`** - Disable BigWig coverage track generation (large output files) + +These changes can significantly reduce pipeline runtime and computational requirements while preserving essential QC metrics through FastQC, MultiQC, and basic alignment statistics. + +### Enabling Comprehensive QC + +For detailed quality control analysis, you can re-enable any or all QC steps: + +```bash +# Enable all QC steps (restore pre-v4.0 behavior) +nextflow run nf-core/rnaseq \ + --skip_dupradar false \ + --skip_qualimap false \ + --skip_rseqc false \ + --skip_stringtie false \ + --skip_bigwig false + +# Enable specific QC modules only +nextflow run nf-core/rnaseq --skip_qualimap false --skip_rseqc false + +# Use the master QC toggle to enable most QC steps +nextflow run nf-core/rnaseq --skip_qc false +``` + +### When to Enable Extended QC + +Consider enabling additional QC steps when: + +- Working with novel samples, non-standard protocols, or unusual experimental designs +- Troubleshooting alignment, quantification, or data quality issues +- Publishing datasets that require comprehensive QC documentation +- Optimizing library preparation or sequencing protocols +- Performing method comparisons or validation studies + +:::note +Essential QC steps like FastQC, MultiQC, and alignment metrics remain enabled by default and provide sufficient quality assessment for most RNA-seq analyses. +::: + ### Sentieon acceleration for STAR The STAR aligner can be accelerated through its Sentieon implemention using the parameter `--use_sentieon_star`. diff --git a/nextflow.config b/nextflow.config index 1e1a5ed7f..be7fd2243 100644 --- a/nextflow.config +++ b/nextflow.config @@ -89,18 +89,18 @@ params { // QC skip_qc = false - skip_bigwig = false - skip_stringtie = false + skip_bigwig = true + skip_stringtie = true skip_fastqc = false skip_preseq = true - skip_dupradar = false - skip_qualimap = false + skip_dupradar = true + skip_qualimap = true contaminant_screening = null kraken_db = null save_kraken_assignments = false save_kraken_unassigned = false bracken_precision = "S" - skip_rseqc = false + skip_rseqc = true skip_biotype_qc = false skip_deseq2_qc = false skip_multiqc = false diff --git a/nextflow_schema.json b/nextflow_schema.json index d2bc59295..ed6948c1a 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -677,12 +677,14 @@ "skip_bigwig": { "type": "boolean", "fa_icon": "fas fa-fast-forward", - "description": "Skip bigWig file creation." + "description": "Skip bigWig file creation.", + "default": true }, "skip_stringtie": { "type": "boolean", "fa_icon": "fas fa-fast-forward", - "description": "Skip StringTie." + "description": "Skip StringTie.", + "default": true }, "skip_fastqc": { "type": "boolean", @@ -698,17 +700,20 @@ "skip_dupradar": { "type": "boolean", "fa_icon": "fas fa-fast-forward", - "description": "Skip dupRadar." + "description": "Skip dupRadar.", + "default": true }, "skip_qualimap": { "type": "boolean", "fa_icon": "fas fa-fast-forward", - "description": "Skip Qualimap." + "description": "Skip Qualimap.", + "default": true }, "skip_rseqc": { "type": "boolean", "fa_icon": "fas fa-fast-forward", - "description": "Skip RSeQC." + "description": "Skip RSeQC.", + "default": true }, "skip_biotype_qc": { "type": "boolean",