Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
a6eaf38
feat: allow for multiple gene_lists, one heatmap per list, bootstrap …
dlaehnemann Oct 8, 2025
896e4bc
fix: lookup syntax
dlaehnemann Oct 8, 2025
c48cb17
chore: snakefmt
dlaehnemann Oct 8, 2025
1a67b6a
fix: typo
dlaehnemann Oct 8, 2025
dca8aef
fix: restore proper handling of topn in heatmap plotting rule
dlaehnemann Oct 9, 2025
6043b72
docs: add linkout to ensembl blog release category to determine lates…
dlaehnemann Oct 9, 2025
c9b1afb
fix: handle topn case in lookup
dlaehnemann Oct 9, 2025
5594d41
fix: provide dummy file path for topn case in heatmap rule
dlaehnemann Oct 9, 2025
b4f88d1
fix: use correct wildcard in heatmap plotting script
dlaehnemann Oct 9, 2025
fa97e67
docs: better explain diffexp/exclude in config.yaml
dlaehnemann Oct 9, 2025
3e97347
fix: correctly parse params in plot_bootstrap
dlaehnemann Oct 9, 2025
c4fb68b
fix: also test diffexp/exclude in config.yaml of CI cases
dlaehnemann Oct 9, 2025
206d244
fix: more defensive coding strategy, as suggested by @coderabbitai
dlaehnemann Oct 9, 2025
09914fb
dummy commit to trigger GitHub
dlaehnemann Oct 9, 2025
84aca9e
another dummy commit to trigger GitHub Actions
dlaehnemann Oct 9, 2025
f6709e7
docs: update comment in plot-bootstrap.R
dlaehnemann Oct 10, 2025
511cae8
fix: switch back to line-separated gene set files for plots
dlaehnemann Oct 13, 2025
e036131
fix: make boostrap plotting more modern
dlaehnemann Oct 13, 2025
107d503
fix: clean up and overhaul heatmap plotting, switch to ggalign
dlaehnemann Oct 13, 2025
a4b56c6
fix: main test config
dlaehnemann Oct 13, 2025
778f099
chore: update release versions
dlaehnemann Oct 13, 2025
b338f0e
docs: adjust wording to not use "infix"
dlaehnemann Oct 13, 2025
4c4536c
docs: write a proper sentence
dlaehnemann Oct 13, 2025
5f1c7fa
fix: length test for transcripts of interest
dlaehnemann Oct 13, 2025
626f306
fix: syntax
dlaehnemann Oct 13, 2025
4e015b3
fix: r-base to >=4.1 for scripts using native R pipe
dlaehnemann Oct 13, 2025
846b420
docs: update config/config.yaml to ensembl release 114
dlaehnemann Oct 14, 2025
8dbfca8
docs: update more 114 -> 115
dlaehnemann Oct 14, 2025
f4b0e57
docs: comment exceedingly high testing FDR .test/config/config.yaml
dlaehnemann Oct 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions .test/config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ experiment:
resources:
ref:
species: homo_sapiens
release: "114"
release: "115"
build: GRCh38
pfam: "37.0"
pfam: "37.1"
representative_transcripts: canonical
ontology:
gene_ontology: "https://release.geneontology.org/2025-07-22/ontology/go-basic.obo"
Expand All @@ -44,7 +44,9 @@ diffexp:
qq-plot: 0.05
genes_of_interest:
activate: true
genelist: "resources/gene_list.tsv"
gene_lists:
gene_list_1: "resources/gene_list.tsv"
gene_list_2: "resources/gene_list_2.tsv"

diffsplice:
activate: false
Expand Down Expand Up @@ -81,7 +83,7 @@ report:
offer_excel: true

bootstrap_plots:
FDR: 0.01
FDR: 0.5 # Intentionally high for testing.
top_n: 3
color_by: condition

Expand Down
9 changes: 1 addition & 8 deletions .test/resources/gene_list.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,4 @@ DCT
MLANA
MITF
CDK2
SOX10
ERBB3
LEF1
CTNNB1
CDH1
FN1
NGFR
AXL
SOX10
11 changes: 11 additions & 0 deletions .test/resources/gene_list_2.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
MLANA
MITF
CDK2
SOX10
ERBB3
LEF1
CTNNB1
CDH1
FN1
NGFR
AXL
4 changes: 3 additions & 1 deletion .test/three_prime/config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ scatter:

diffexp:
exclude:
- SRR8309099_2
models:
model_X:
full: ~condition
Expand All @@ -44,7 +45,8 @@ diffexp:
qq-plot: 0.05
genes_of_interest:
activate: false
genelist: "resources/gene_list.tsv"
gene_lists:
gene_list_1: "resources/gene_list.tsv"

diffsplice:
activate: false
Expand Down
3 changes: 2 additions & 1 deletion .test/three_prime/config/samples.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ SRR8309094 Control
SRR8309095 Treated
SRR8309097 Treated
SRR8309098 Control
SRR8309099 Treated
SRR8309099 Treated
SRR8309099_2 Treated
3 changes: 2 additions & 1 deletion .test/three_prime/config/units.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ SRR8309094 u1 430 43 quant_seq_test_data/SRR8309094.fastq.gz
SRR8309095 u1 430 43 quant_seq_test_data/SRR8309095.fastq.gz
SRR8309097 u1 430 43 quant_seq_test_data/SRR8309097.fastq.gz
SRR8309098 u1 430 43 quant_seq_test_data/SRR8309098.fastq.gz
SRR8309099 u1 430 43 quant_seq_test_data/SRR8309099.fastq.gz
SRR8309099 u1 430 43 quant_seq_test_data/SRR8309099.fastq.gz
SRR8309099_2 u1 430 43 quant_seq_test_data/SRR8309099.fastq.gz
47 changes: 29 additions & 18 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,27 +24,30 @@ resources:
# For a quick check, see the Ensembl species list:
# https://www.ensembl.org/info/about/species.html
# For full valid species names, consult the respective table for the release
# you specify, for example for ‘114’ this is at:
# https://ftp.ensembl.org/pub/release-114/species_EnsemblVertebrates.txt
# you specify, for example for ‘115’ this is at:
# https://ftp.ensembl.org/pub/release-115/species_EnsemblVertebrates.txt
# And to browse available downloads in more detail, see the FTP server:
# https://ftp.ensembl.org/pub/
species: homo_sapiens
# ensembl release version:
# Update this to the latest working version, when you first set up a new
# analysis on a dataset. Later, it only makes sense to update (or downgrade)
# the release versions if either (a) the version you are using consistently
# fails to download (some Ensembl release versions are just broken) or
# (b) you know that a newer version will include changes that will fix some
# error or adds transcripts that will be relevant to your analysis.
release: "114"
# analysis on a dataset. You can usually find the latest release in the
# Ensembl blog, by looking at the latest posts of the release category:
# https://www.ensembl.info/category/01-release/
# Later, it only makes sense to update (or downgrade) the release versions
# if either (a) the version you are using consistently fails to download
# (some Ensembl release versions are just broken) or (b) you know that a
# newer version will include changes that will fix some error or adds
# transcripts that will be relevant to your analysis.
release: "115"
# genome build:
# Usually, this should just be the main build listed in:
# https://ftp.ensembl.org/pub/release-114/species_EnsemblVertebrates.txt
# https://ftp.ensembl.org/pub/release-115/species_EnsemblVertebrates.txt
# For example, for homo_sapiens, you strip the assembly column entry
# "GRCh38.p12" down to "GRCh38". If in doubt, navigate to the respective
# cdna folder on the FTP server, and look for the correct build in the
# file names there. For example "GRCh38" in:
# https://ftp.ensembl.org/pub/release-114/fasta/homo_sapiens/cdna/
# https://ftp.ensembl.org/pub/release-115/fasta/homo_sapiens/cdna/
build: GRCh38
# pfam release:
# This is used for annotation of domains in differential splicing analysis.
Expand All @@ -54,7 +57,7 @@ resources:
# https://xfam.wordpress.com/
# For debugging file downloads, you can browse the FTP download server:
# https://ftp.ebi.ac.uk/pub/databases/Pfam/releases/
pfam: "37.0"
pfam: "37.1"
# representative transcripts:
# Strategy for selecting a representative transcript for each gene.
# kallisto quantifies expression on the transcript level. For datasets
Expand Down Expand Up @@ -91,8 +94,11 @@ scatter:

diffexp:
# Samples to exclude from differential expression modeling (for example,
# outliers due to technical problems).
# outliers due to technical problems). List their `sample_name` column
# entry in the form of a YAML list.
exclude:
# - sample_X
# - sample_Y
# model for sleuth differential expression analysis
# For an introduction to sleuth, see its online manual:
# https://pachterlab.github.io/sleuth/about
Expand Down Expand Up @@ -135,14 +141,19 @@ diffexp:
volcano-plot: 0.05
ma-plot: 0.05
qq-plot: 0.05
# heatmap and bootstrap plots for given set of genes:
# If you want a heatmap and bootstrap plots for a particular set of genes,
# you can set `activate: true` and provide a genelist file. In this file,
# list all your HGNC gene symbols of interest in one line, separated by
# tabulators (tabs).
# heatmap and bootstrap plots for given sets of genes:
# If you want a heatmap and bootstrap plots for particular sets of genes,
# you can set `activate: true` and provide one or more gene list files. In
# those files, list all your HGNC gene symbols of interest, one gene per line.
# The workflow will generate one heatmap plot per gene set file, and a
# bootstrap plot for each of the genes contained in any of the provided files.
genes_of_interest:
activate: false
genelist: "config/gene_list.tsv"
gene_lists:
# Use a descriptive gene list name, as this will be part of
# the filename of the resulting heatmap plot.
gene_list_1: "config/gene_list.tsv"


diffsplice:
# isoformSwitchAnalyzer
Expand Down
6 changes: 3 additions & 3 deletions workflow/envs/heatmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@ channels:
- bioconda
- nodefaults
dependencies:
- r-pheatmap =1.0.12
- r-dplyr =1.0.9
- r-tidyr =1.2.0
- r-base >=4.1
- r-tidyverse =2.0
- r-ggalign =1.1
2 changes: 1 addition & 1 deletion workflow/envs/sleuth.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ dependencies:
- r-pheatmap =1.0.12
- r-tidyverse =2.0
- r-ggpubr =0.6
- r-base =4
- r-base >=4.1
- bioconductor-limma =3.56
19 changes: 6 additions & 13 deletions workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -242,15 +242,6 @@ def kallisto_params(wildcards, input):
return extra


def input_genelist(predef_genelist):
if config["diffexp"]["genes_of_interest"]["activate"] == True:
predef_genelist = config["diffexp"]["genes_of_interest"]["genelist"]
else:
predef_genelist = []

return predef_genelist


def all_input(wildcards):
"""
Function defining all requested inputs for the rule all (below).
Expand Down Expand Up @@ -329,20 +320,22 @@ def all_input(wildcards):
"results/tables/tpm-matrix/{model}.tpm-matrix.tsv",
"results/sleuth/{model}.samples.tsv",
"results/datavzrd-reports/diffexp-{model}",
"results/plots/diffexp-heatmap/{model}.diffexp-heatmap.{mode}.pdf",
"results/plots/diffexp-heatmap/{model}.diffexp-heatmap.{gene_list}.pdf",
],
model=config["diffexp"]["models"],
mode=["topn"],
gene_list=["topn"],
)
)
if config["diffexp"]["genes_of_interest"]["activate"]:
wanted_input.extend(
expand(
[
"results/plots/diffexp-heatmap/{model}.diffexp-heatmap.{mode}.pdf",
"results/plots/diffexp-heatmap/{model}.diffexp-heatmap.{gene_list}.pdf",
],
model=config["diffexp"]["models"],
mode=["predefined"],
gene_list=lookup(
within=config, dpath="diffexp/genes_of_interest/gene_lists"
),
)
)

Expand Down
19 changes: 14 additions & 5 deletions workflow/rules/diffexp.smk
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ rule plot_bootstrap:
color_by=config["bootstrap_plots"]["color_by"],
fdr=config["bootstrap_plots"]["FDR"],
top_n=config["bootstrap_plots"]["top_n"],
genes=config["diffexp"]["genes_of_interest"],
genes_of_interest=lookup(within=config, dpath="diffexp/genes_of_interest"),
log:
"logs/plots/bootstrap/{model}/{model}.plot_bootstrap.log",
script:
Expand Down Expand Up @@ -258,18 +258,27 @@ rule tpm_matrix:
rule plot_diffexp_heatmap:
input:
logcountmatrix_file="results/tables/logcount-matrix/{model}.logcount-matrix.tsv",
predef_genelist=input_genelist,
# default provides a dummy file path for the "topn" case (a file path
# that is always valid, but never loaded)
predef_gene_list=lookup(
within=config,
dpath="diffexp/genes_of_interest/gene_lists/{gene_list}",
default=lookup(within=config, dpath="samples"),
),
output:
diffexp_heatmap=report(
"results/plots/diffexp-heatmap/{model}.diffexp-heatmap.{mode}.pdf",
"results/plots/diffexp-heatmap/{model}.diffexp-heatmap.{gene_list}.pdf",
caption="../report/plot-heatmap.rst",
category="Heatmaps",
labels={"model": "{model}-{mode}"},
labels={
"model": "{model}",
"gene list": "{gene_list}",
},
),
params:
model=get_model,
log:
"logs/plots/diffexp-heatmap/{model}.diffexp-heatmap.{mode}.log",
"logs/plots/diffexp-heatmap/{model}.diffexp-heatmap.{gene_list}.log",
conda:
"../envs/heatmap.yaml"
script:
Expand Down
9 changes: 6 additions & 3 deletions workflow/schemas/config.schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,13 @@ properties:
properties:
activate:
type: boolean
genelist:
type: string
gene_lists:
type: object
patternProperties:
"^.+$":
type: string
required:
- genelist
- gene_lists
required:
- models
- genes_of_interest
Expand Down
Loading
Loading