From 43774f0a1eff06ef7c8f9f477c175f04258cb325 Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Thu, 5 Sep 2024 10:32:55 -0400 Subject: [PATCH 01/49] Add initial IGV_HaplotypeViz WDL workflow for generating IGV screenshots --- .../PacBio/Utility/IGV_HaplotypeViz.wdl | 129 ++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl new file mode 100644 index 000000000..aeb0d6eea --- /dev/null +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl @@ -0,0 +1,129 @@ +version 1.0 + +import "../../../structs/Structs.wdl" + +workflow IGV_HaplotypeViz { + input { + # BED files containing regions to screenshot; 4th column can optionally be SVID + Array[File] beds + Array[String] run_names + + # BAM and BAI files from align_asm workflow for alignment visualization + File bam_hap1 + File bai_hap1 + File bam_hap2 + File bai_hap2 + + # FASTA files from PBAssembleWithHifiasm or bam_to_contig workflow for sequence visualization + File haplotig_fasta_hap1 + File haplotig_fasta_hap2 + + # Reference corresponding to read alignments for BAM files + File ref_fasta + File ref_fai + + # Sample id and prefix for output filenames + String sample_id + + # Number of records per shard for parallelization + Int? records_per_shard + + # Docker images for Linux and IGV headless tasks + String linux_docker + String igv_docker + } + + scatter (i in range(length(beds))) { + String sample_w_hap1 = sample_id + "_hap1" + String sample_w_hap2 = sample_id + "_hap2" + + # Run IGV for BAM alignments Haplotype 1 (H1) + call RunIGVHeadless as IGV_Hap1 { + input: + bam_or_cram=bam_hap1, + bam_or_cram_index=bai_hap1, + bed=beds[i], + sample_id=sample_w_hap1, + ref_fasta=ref_fasta, + ref_fai=ref_fai, + igv_docker=igv_docker + } + + # Run IGV for BAM alignments Haplotype 2 (H2) + call RunIGVHeadless as IGV_Hap2 { + input: + bam_or_cram=bam_hap2, + bam_or_cram_index=bai_hap2, + bed=beds[i], + sample_id=sample_w_hap2, + ref_fasta=ref_fasta, + ref_fai=ref_fai, + igv_docker=igv_docker + } + + # For sequence visualization, use FASTA for haplotigs Haplotype 1 (H1) + call RunIGVHeadless as IGV_Seq_Hap1 { + input: + bam_or_cram=haplotig_fasta_hap1, + bam_or_cram_index=bai_hap1, # Index may not be necessary for FASTA + bed=beds[i], + sample_id=sample_w_hap1, + ref_fasta=ref_fasta, # Reference may not be needed for FASTA visualization + ref_fai=ref_fai, + igv_docker=igv_docker + } + + # For sequence visualization, use FASTA for haplotigs Haplotype 2 (H2) + call RunIGVHeadless as IGV_Seq_Hap2 { + input: + bam_or_cram=haplotig_fasta_hap2, + bam_or_cram_index=bai_hap2, + bed=beds[i], + sample_id=sample_w_hap2, + ref_fasta=ref_fasta, + ref_fai=ref_fai, + igv_docker=igv_docker + } + } + + output { + Array[File] igv_screenshots_hap1 = IGV_Hap1.igv_screenshot + Array[File] igv_screenshots_hap2 = IGV_Hap2.igv_screenshot + Array[File] igv_screenshots_seq_hap1 = IGV_Seq_Hap1.igv_screenshot + Array[File] igv_screenshots_seq_hap2 = IGV_Seq_Hap2.igv_screenshot + } +} + +task RunIGVHeadless { + input { + File bam_or_cram # BAM/CRAM or FASTA file for visualization + File bam_or_cram_index # Index file for BAM/CRAM + File bed # BED file containing regions to visualize (3 or 4 columns allowed) + String sample_id # Sample ID for naming outputs + File ref_fasta # Reference genome used for alignment + File ref_fai # Index for the reference genome + String igv_docker # Docker image for running IGV headless + Int? records_per_shard # Optional: Parallelization parameter for large datasets + } + + command <<< + # Running IGV headless mode to take screenshots for each region in the BED file + igv.sh \ + -b ~{bam_or_cram} \ + -i ~{bam_or_cram_index} \ + -g ~{ref_fasta} \ + -bed ~{bed} \ + -o ~{sample_id}.igv_screenshot.png + >>> + + output { + File igv_screenshot = "~{sample_id}.igv_screenshot.png" + } + + runtime { + docker: "~{igv_docker}" + memory: "8G" + cpu: "2" + disks: "local-disk 10 HDD" + } +} From 7cb3c7eee6771fbd620a85ce160e82994e5538e1 Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Thu, 5 Sep 2024 11:22:07 -0400 Subject: [PATCH 02/49] Add new workflow for parallel processing of large BED files --- .../PacBio/Utility/IGV_HaplotypeViz.wdl | 2 +- .../Utility/IGV_HaplotypeViz_Scatter.wdl | 129 ++++++++++++++++++ 2 files changed, 130 insertions(+), 1 deletion(-) create mode 100644 wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl index aeb0d6eea..fe72a73aa 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl @@ -2,7 +2,7 @@ version 1.0 import "../../../structs/Structs.wdl" -workflow IGV_HaplotypeViz { +workflow IGV_HaplotypeViz_Scatter { input { # BED files containing regions to screenshot; 4th column can optionally be SVID Array[File] beds diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl new file mode 100644 index 000000000..aeb0d6eea --- /dev/null +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl @@ -0,0 +1,129 @@ +version 1.0 + +import "../../../structs/Structs.wdl" + +workflow IGV_HaplotypeViz { + input { + # BED files containing regions to screenshot; 4th column can optionally be SVID + Array[File] beds + Array[String] run_names + + # BAM and BAI files from align_asm workflow for alignment visualization + File bam_hap1 + File bai_hap1 + File bam_hap2 + File bai_hap2 + + # FASTA files from PBAssembleWithHifiasm or bam_to_contig workflow for sequence visualization + File haplotig_fasta_hap1 + File haplotig_fasta_hap2 + + # Reference corresponding to read alignments for BAM files + File ref_fasta + File ref_fai + + # Sample id and prefix for output filenames + String sample_id + + # Number of records per shard for parallelization + Int? records_per_shard + + # Docker images for Linux and IGV headless tasks + String linux_docker + String igv_docker + } + + scatter (i in range(length(beds))) { + String sample_w_hap1 = sample_id + "_hap1" + String sample_w_hap2 = sample_id + "_hap2" + + # Run IGV for BAM alignments Haplotype 1 (H1) + call RunIGVHeadless as IGV_Hap1 { + input: + bam_or_cram=bam_hap1, + bam_or_cram_index=bai_hap1, + bed=beds[i], + sample_id=sample_w_hap1, + ref_fasta=ref_fasta, + ref_fai=ref_fai, + igv_docker=igv_docker + } + + # Run IGV for BAM alignments Haplotype 2 (H2) + call RunIGVHeadless as IGV_Hap2 { + input: + bam_or_cram=bam_hap2, + bam_or_cram_index=bai_hap2, + bed=beds[i], + sample_id=sample_w_hap2, + ref_fasta=ref_fasta, + ref_fai=ref_fai, + igv_docker=igv_docker + } + + # For sequence visualization, use FASTA for haplotigs Haplotype 1 (H1) + call RunIGVHeadless as IGV_Seq_Hap1 { + input: + bam_or_cram=haplotig_fasta_hap1, + bam_or_cram_index=bai_hap1, # Index may not be necessary for FASTA + bed=beds[i], + sample_id=sample_w_hap1, + ref_fasta=ref_fasta, # Reference may not be needed for FASTA visualization + ref_fai=ref_fai, + igv_docker=igv_docker + } + + # For sequence visualization, use FASTA for haplotigs Haplotype 2 (H2) + call RunIGVHeadless as IGV_Seq_Hap2 { + input: + bam_or_cram=haplotig_fasta_hap2, + bam_or_cram_index=bai_hap2, + bed=beds[i], + sample_id=sample_w_hap2, + ref_fasta=ref_fasta, + ref_fai=ref_fai, + igv_docker=igv_docker + } + } + + output { + Array[File] igv_screenshots_hap1 = IGV_Hap1.igv_screenshot + Array[File] igv_screenshots_hap2 = IGV_Hap2.igv_screenshot + Array[File] igv_screenshots_seq_hap1 = IGV_Seq_Hap1.igv_screenshot + Array[File] igv_screenshots_seq_hap2 = IGV_Seq_Hap2.igv_screenshot + } +} + +task RunIGVHeadless { + input { + File bam_or_cram # BAM/CRAM or FASTA file for visualization + File bam_or_cram_index # Index file for BAM/CRAM + File bed # BED file containing regions to visualize (3 or 4 columns allowed) + String sample_id # Sample ID for naming outputs + File ref_fasta # Reference genome used for alignment + File ref_fai # Index for the reference genome + String igv_docker # Docker image for running IGV headless + Int? records_per_shard # Optional: Parallelization parameter for large datasets + } + + command <<< + # Running IGV headless mode to take screenshots for each region in the BED file + igv.sh \ + -b ~{bam_or_cram} \ + -i ~{bam_or_cram_index} \ + -g ~{ref_fasta} \ + -bed ~{bed} \ + -o ~{sample_id}.igv_screenshot.png + >>> + + output { + File igv_screenshot = "~{sample_id}.igv_screenshot.png" + } + + runtime { + docker: "~{igv_docker}" + memory: "8G" + cpu: "2" + disks: "local-disk 10 HDD" + } +} From 27f85d6736c3a26e064be92744c4a20c17cae6a8 Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Thu, 5 Sep 2024 11:27:52 -0400 Subject: [PATCH 03/49] fix the workflow name --- wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl | 2 +- wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl index fe72a73aa..aeb0d6eea 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl @@ -2,7 +2,7 @@ version 1.0 import "../../../structs/Structs.wdl" -workflow IGV_HaplotypeViz_Scatter { +workflow IGV_HaplotypeViz { input { # BED files containing regions to screenshot; 4th column can optionally be SVID Array[File] beds diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl index aeb0d6eea..fe72a73aa 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl @@ -2,7 +2,7 @@ version 1.0 import "../../../structs/Structs.wdl" -workflow IGV_HaplotypeViz { +workflow IGV_HaplotypeViz_Scatter { input { # BED files containing regions to screenshot; 4th column can optionally be SVID Array[File] beds From 6249a7c0123b4f52da71a91f0ad07bbd1f9b86a7 Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Thu, 5 Sep 2024 20:48:09 -0400 Subject: [PATCH 04/49] update the docker --- .../PacBio/Utility/IGV_HaplotypeViz.wdl | 46 +++++-------------- .../Utility/IGV_HaplotypeViz_Scatter.wdl | 46 +++++-------------- 2 files changed, 24 insertions(+), 68 deletions(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl index aeb0d6eea..36baf264b 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl @@ -37,11 +37,12 @@ workflow IGV_HaplotypeViz { String sample_w_hap1 = sample_id + "_hap1" String sample_w_hap2 = sample_id + "_hap2" - # Run IGV for BAM alignments Haplotype 1 (H1) - call RunIGVHeadless as IGV_Hap1 { + # Run IGV for both BAM and FASTA visualization for Haplotype 1 (H1) + call RunIGVHeadlessCombined as IGV_Hap1 { input: bam_or_cram=bam_hap1, bam_or_cram_index=bai_hap1, + fasta=haplotig_fasta_hap1, bed=beds[i], sample_id=sample_w_hap1, ref_fasta=ref_fasta, @@ -49,35 +50,12 @@ workflow IGV_HaplotypeViz { igv_docker=igv_docker } - # Run IGV for BAM alignments Haplotype 2 (H2) - call RunIGVHeadless as IGV_Hap2 { + # Run IGV for both BAM and FASTA visualization for Haplotype 2 (H2) + call RunIGVHeadlessCombined as IGV_Hap2 { input: bam_or_cram=bam_hap2, bam_or_cram_index=bai_hap2, - bed=beds[i], - sample_id=sample_w_hap2, - ref_fasta=ref_fasta, - ref_fai=ref_fai, - igv_docker=igv_docker - } - - # For sequence visualization, use FASTA for haplotigs Haplotype 1 (H1) - call RunIGVHeadless as IGV_Seq_Hap1 { - input: - bam_or_cram=haplotig_fasta_hap1, - bam_or_cram_index=bai_hap1, # Index may not be necessary for FASTA - bed=beds[i], - sample_id=sample_w_hap1, - ref_fasta=ref_fasta, # Reference may not be needed for FASTA visualization - ref_fai=ref_fai, - igv_docker=igv_docker - } - - # For sequence visualization, use FASTA for haplotigs Haplotype 2 (H2) - call RunIGVHeadless as IGV_Seq_Hap2 { - input: - bam_or_cram=haplotig_fasta_hap2, - bam_or_cram_index=bai_hap2, + fasta=haplotig_fasta_hap2, bed=beds[i], sample_id=sample_w_hap2, ref_fasta=ref_fasta, @@ -89,15 +67,14 @@ workflow IGV_HaplotypeViz { output { Array[File] igv_screenshots_hap1 = IGV_Hap1.igv_screenshot Array[File] igv_screenshots_hap2 = IGV_Hap2.igv_screenshot - Array[File] igv_screenshots_seq_hap1 = IGV_Seq_Hap1.igv_screenshot - Array[File] igv_screenshots_seq_hap2 = IGV_Seq_Hap2.igv_screenshot } } -task RunIGVHeadless { +task RunIGVHeadlessCombined { input { - File bam_or_cram # BAM/CRAM or FASTA file for visualization + File bam_or_cram # BAM/CRAM file for visualization File bam_or_cram_index # Index file for BAM/CRAM + File fasta # FASTA file for haplotype visualization File bed # BED file containing regions to visualize (3 or 4 columns allowed) String sample_id # Sample ID for naming outputs File ref_fasta # Reference genome used for alignment @@ -107,12 +84,13 @@ task RunIGVHeadless { } command <<< - # Running IGV headless mode to take screenshots for each region in the BED file + # Running IGV headless mode to take screenshots for both BAM and FASTA files for each region in the BED file igv.sh \ -b ~{bam_or_cram} \ -i ~{bam_or_cram_index} \ - -g ~{ref_fasta} \ + -g ~{fasta} \ -bed ~{bed} \ + -name bam,fasta \ -o ~{sample_id}.igv_screenshot.png >>> diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl index fe72a73aa..29aec1df6 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl @@ -37,11 +37,12 @@ workflow IGV_HaplotypeViz_Scatter { String sample_w_hap1 = sample_id + "_hap1" String sample_w_hap2 = sample_id + "_hap2" - # Run IGV for BAM alignments Haplotype 1 (H1) - call RunIGVHeadless as IGV_Hap1 { + # Run IGV for both BAM and FASTA visualization for Haplotype 1 (H1) + call RunIGVHeadlessCombined as IGV_Hap1 { input: bam_or_cram=bam_hap1, bam_or_cram_index=bai_hap1, + fasta=haplotig_fasta_hap1, bed=beds[i], sample_id=sample_w_hap1, ref_fasta=ref_fasta, @@ -49,35 +50,12 @@ workflow IGV_HaplotypeViz_Scatter { igv_docker=igv_docker } - # Run IGV for BAM alignments Haplotype 2 (H2) - call RunIGVHeadless as IGV_Hap2 { + # Run IGV for both BAM and FASTA visualization for Haplotype 2 (H2) + call RunIGVHeadlessCombined as IGV_Hap2 { input: bam_or_cram=bam_hap2, bam_or_cram_index=bai_hap2, - bed=beds[i], - sample_id=sample_w_hap2, - ref_fasta=ref_fasta, - ref_fai=ref_fai, - igv_docker=igv_docker - } - - # For sequence visualization, use FASTA for haplotigs Haplotype 1 (H1) - call RunIGVHeadless as IGV_Seq_Hap1 { - input: - bam_or_cram=haplotig_fasta_hap1, - bam_or_cram_index=bai_hap1, # Index may not be necessary for FASTA - bed=beds[i], - sample_id=sample_w_hap1, - ref_fasta=ref_fasta, # Reference may not be needed for FASTA visualization - ref_fai=ref_fai, - igv_docker=igv_docker - } - - # For sequence visualization, use FASTA for haplotigs Haplotype 2 (H2) - call RunIGVHeadless as IGV_Seq_Hap2 { - input: - bam_or_cram=haplotig_fasta_hap2, - bam_or_cram_index=bai_hap2, + fasta=haplotig_fasta_hap2, bed=beds[i], sample_id=sample_w_hap2, ref_fasta=ref_fasta, @@ -89,15 +67,14 @@ workflow IGV_HaplotypeViz_Scatter { output { Array[File] igv_screenshots_hap1 = IGV_Hap1.igv_screenshot Array[File] igv_screenshots_hap2 = IGV_Hap2.igv_screenshot - Array[File] igv_screenshots_seq_hap1 = IGV_Seq_Hap1.igv_screenshot - Array[File] igv_screenshots_seq_hap2 = IGV_Seq_Hap2.igv_screenshot } } -task RunIGVHeadless { +task RunIGVHeadlessCombined { input { - File bam_or_cram # BAM/CRAM or FASTA file for visualization + File bam_or_cram # BAM/CRAM file for visualization File bam_or_cram_index # Index file for BAM/CRAM + File fasta # FASTA file for haplotype visualization File bed # BED file containing regions to visualize (3 or 4 columns allowed) String sample_id # Sample ID for naming outputs File ref_fasta # Reference genome used for alignment @@ -107,12 +84,13 @@ task RunIGVHeadless { } command <<< - # Running IGV headless mode to take screenshots for each region in the BED file + # Running IGV headless mode to take screenshots for both BAM and FASTA files for each region in the BED file igv.sh \ -b ~{bam_or_cram} \ -i ~{bam_or_cram_index} \ - -g ~{ref_fasta} \ + -g ~{fasta} \ -bed ~{bed} \ + -name bam,fasta \ -o ~{sample_id}.igv_screenshot.png >>> From c2ac959ba53d10852a57632ba81f7e4bfcc7ba73 Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Fri, 6 Sep 2024 11:08:39 -0400 Subject: [PATCH 05/49] updating docker and fixing the igv command --- .../PacBio/Utility/IGV_HaplotypeViz.wdl | 92 +++++++++---------- 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl index 36baf264b..7132f4274 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl @@ -6,7 +6,7 @@ workflow IGV_HaplotypeViz { input { # BED files containing regions to screenshot; 4th column can optionally be SVID Array[File] beds - Array[String] run_names + Array[String]? run_names # BAM and BAI files from align_asm workflow for alignment visualization File bam_hap1 @@ -28,69 +28,69 @@ workflow IGV_HaplotypeViz { # Number of records per shard for parallelization Int? records_per_shard - # Docker images for Linux and IGV headless tasks - String linux_docker - String igv_docker + # Configurable CPU, memory, and disk + Int? cpu = 2 # Default is 2 CPUs + String? memory = "8G" # Default memory is 8 GB + String? disk_size = "10G" # Default disk size is 10 GB } + Array[String] default_run_names = range(length(beds)) + Array[String] used_run_names = select_first([run_names, default_run_names]) scatter (i in range(length(beds))) { - String sample_w_hap1 = sample_id + "_hap1" - String sample_w_hap2 = sample_id + "_hap2" + String run_name = used_run_names[i] + String sample_combined = sample_id + "_combined_" + run_name - # Run IGV for both BAM and FASTA visualization for Haplotype 1 (H1) - call RunIGVHeadlessCombined as IGV_Hap1 { + # Run IGV for both BAM and FASTA visualization for Haplotype 1 and 2 combined + call RunIGVHeadlessCombined { input: - bam_or_cram=bam_hap1, - bam_or_cram_index=bai_hap1, - fasta=haplotig_fasta_hap1, + bam_hap1=bam_hap1, + bai_hap1=bai_hap1, + bam_hap2=bam_hap2, + bai_hap2=bai_hap2, + fasta_hap1=haplotig_fasta_hap1, + fasta_hap2=haplotig_fasta_hap2, bed=beds[i], - sample_id=sample_w_hap1, + sample_id=sample_combined, ref_fasta=ref_fasta, ref_fai=ref_fai, - igv_docker=igv_docker - } - - # Run IGV for both BAM and FASTA visualization for Haplotype 2 (H2) - call RunIGVHeadlessCombined as IGV_Hap2 { - input: - bam_or_cram=bam_hap2, - bam_or_cram_index=bai_hap2, - fasta=haplotig_fasta_hap2, - bed=beds[i], - sample_id=sample_w_hap2, - ref_fasta=ref_fasta, - ref_fai=ref_fai, - igv_docker=igv_docker + cpu=cpu, + memory=memory, + disk_size=disk_size } } output { - Array[File] igv_screenshots_hap1 = IGV_Hap1.igv_screenshot - Array[File] igv_screenshots_hap2 = IGV_Hap2.igv_screenshot + Array[File] igv_screenshots_combined = RunIGVHeadlessCombined.igv_screenshot } } task RunIGVHeadlessCombined { input { - File bam_or_cram # BAM/CRAM file for visualization - File bam_or_cram_index # Index file for BAM/CRAM - File fasta # FASTA file for haplotype visualization - File bed # BED file containing regions to visualize (3 or 4 columns allowed) - String sample_id # Sample ID for naming outputs - File ref_fasta # Reference genome used for alignment - File ref_fai # Index for the reference genome - String igv_docker # Docker image for running IGV headless - Int? records_per_shard # Optional: Parallelization parameter for large datasets + File bam_hap1 # BAM file for Haplotype 1 + File bai_hap1 # BAI file for Haplotype 1 + File bam_hap2 # BAM file for Haplotype 2 + File bai_hap2 # BAI file for Haplotype 2 + File fasta_hap1 # FASTA file for Haplotype 1 + File fasta_hap2 # FASTA file for Haplotype 2 + File bed # BED file containing regions to visualize (3 or 4 columns allowed) + String sample_id # Sample ID for naming outputs + File ref_fasta # Reference genome used for alignment + File ref_fai # Index for the reference genome + + # Configurable resources + Int? cpu # CPUs to use + String? memory # Memory to allocate + String? disk_size # Disk size } command <<< - # Running IGV headless mode to take screenshots for both BAM and FASTA files for each region in the BED file + # Running IGV headless mode to take screenshots for both BAM and FASTA files for both haplotypes igv.sh \ - -b ~{bam_or_cram} \ - -i ~{bam_or_cram_index} \ - -g ~{fasta} \ + -b ~{bam_hap1},~{bam_hap2} \ + -i ~{bai_hap1},~{bai_hap2} \ + -g ~{fasta_hap1},~{fasta_hap2} \ -bed ~{bed} \ - -name bam,fasta \ + -name hap1_bam,hap2_bam,hap1_fasta,hap2_fasta \ -o ~{sample_id}.igv_screenshot.png >>> @@ -99,9 +99,9 @@ task RunIGVHeadlessCombined { } runtime { - docker: "~{igv_docker}" - memory: "8G" - cpu: "2" - disks: "local-disk 10 HDD" + docker: "us.gcr.io/broad-dsp-lrma/igv_docker:v952024" + memory: "~{memory}" + cpu: "~{cpu}" + disks: "local-disk ~{disk_size} HDD" } } From dd2dfc930b08a4b2b411c86e61f7493a54390364 Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Fri, 6 Sep 2024 11:27:00 -0400 Subject: [PATCH 06/49] fixing the docker and igv command --- .../Utility/IGV_HaplotypeViz_Scatter.wdl | 70 ++++++++----------- 1 file changed, 30 insertions(+), 40 deletions(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl index 29aec1df6..671316c92 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl @@ -6,7 +6,7 @@ workflow IGV_HaplotypeViz_Scatter { input { # BED files containing regions to screenshot; 4th column can optionally be SVID Array[File] beds - Array[String] run_names + Array[String]? run_names # BAM and BAI files from align_asm workflow for alignment visualization File bam_hap1 @@ -28,36 +28,24 @@ workflow IGV_HaplotypeViz_Scatter { # Number of records per shard for parallelization Int? records_per_shard - # Docker images for Linux and IGV headless tasks - String linux_docker + # Docker image for IGV headless tasks String igv_docker } scatter (i in range(length(beds))) { - String sample_w_hap1 = sample_id + "_hap1" - String sample_w_hap2 = sample_id + "_hap2" + String sample_combined = sample_id + "_combined" - # Run IGV for both BAM and FASTA visualization for Haplotype 1 (H1) - call RunIGVHeadlessCombined as IGV_Hap1 { + # Run IGV for both BAM and FASTA visualization for both haplotypes (Hap1 and Hap2) + call RunIGVHeadlessCombined { input: - bam_or_cram=bam_hap1, - bam_or_cram_index=bai_hap1, - fasta=haplotig_fasta_hap1, + bam_hap1=bam_hap1, + bai_hap1=bai_hap1, + bam_hap2=bam_hap2, + bai_hap2=bai_hap2, + fasta_hap1=haplotig_fasta_hap1, + fasta_hap2=haplotig_fasta_hap2, bed=beds[i], - sample_id=sample_w_hap1, - ref_fasta=ref_fasta, - ref_fai=ref_fai, - igv_docker=igv_docker - } - - # Run IGV for both BAM and FASTA visualization for Haplotype 2 (H2) - call RunIGVHeadlessCombined as IGV_Hap2 { - input: - bam_or_cram=bam_hap2, - bam_or_cram_index=bai_hap2, - fasta=haplotig_fasta_hap2, - bed=beds[i], - sample_id=sample_w_hap2, + sample_id=sample_combined, ref_fasta=ref_fasta, ref_fai=ref_fai, igv_docker=igv_docker @@ -65,32 +53,34 @@ workflow IGV_HaplotypeViz_Scatter { } output { - Array[File] igv_screenshots_hap1 = IGV_Hap1.igv_screenshot - Array[File] igv_screenshots_hap2 = IGV_Hap2.igv_screenshot + Array[File] igv_screenshots_combined = RunIGVHeadlessCombined.igv_screenshot } } task RunIGVHeadlessCombined { input { - File bam_or_cram # BAM/CRAM file for visualization - File bam_or_cram_index # Index file for BAM/CRAM - File fasta # FASTA file for haplotype visualization - File bed # BED file containing regions to visualize (3 or 4 columns allowed) - String sample_id # Sample ID for naming outputs - File ref_fasta # Reference genome used for alignment - File ref_fai # Index for the reference genome - String igv_docker # Docker image for running IGV headless + File bam_hap1 # BAM file for Haplotype 1 + File bai_hap1 # BAI file for Haplotype 1 + File bam_hap2 # BAM file for Haplotype 2 + File bai_hap2 # BAI file for Haplotype 2 + File fasta_hap1 # FASTA file for Haplotype 1 + File fasta_hap2 # FASTA file for Haplotype 2 + File bed # BED file containing regions to visualize (3 or 4 columns allowed) + String sample_id # Sample ID for naming outputs + File ref_fasta # Reference genome used for alignment + File ref_fai # Index for the reference genome + String igv_docker # Docker image for running IGV headless Int? records_per_shard # Optional: Parallelization parameter for large datasets } command <<< - # Running IGV headless mode to take screenshots for both BAM and FASTA files for each region in the BED file + # Running IGV headless mode to take screenshots for both BAM and FASTA files for both haplotypes igv.sh \ - -b ~{bam_or_cram} \ - -i ~{bam_or_cram_index} \ - -g ~{fasta} \ + -b ~{bam_hap1},~{bam_hap2} \ + -i ~{bai_hap1},~{bai_hap2} \ + -g ~{fasta_hap1},~{fasta_hap2} \ -bed ~{bed} \ - -name bam,fasta \ + -name hap1_bam,hap2_bam,hap1_fasta,hap2_fasta \ -o ~{sample_id}.igv_screenshot.png >>> @@ -99,7 +89,7 @@ task RunIGVHeadlessCombined { } runtime { - docker: "~{igv_docker}" + docker: "us.gcr.io/broad-dsp-lrma/igv_docker:v952024" # Updated IGV docker image memory: "8G" cpu: "2" disks: "local-disk 10 HDD" From c0fa633f4d377c9c3043aa142e2a271b26007913 Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Fri, 6 Sep 2024 13:44:40 -0400 Subject: [PATCH 07/49] fixing the runtime --- wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl index 7132f4274..ae4b2937f 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl @@ -102,6 +102,6 @@ task RunIGVHeadlessCombined { docker: "us.gcr.io/broad-dsp-lrma/igv_docker:v952024" memory: "~{memory}" cpu: "~{cpu}" - disks: "local-disk ~{disk_size} HDD" + disks: "local-disk ~{disk_size}" } } From fafa429ec7f005369b52c31e89ad6b273150ac79 Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Fri, 6 Sep 2024 13:55:36 -0400 Subject: [PATCH 08/49] fixing the runtime --- wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl index ae4b2937f..128b19833 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl @@ -102,6 +102,6 @@ task RunIGVHeadlessCombined { docker: "us.gcr.io/broad-dsp-lrma/igv_docker:v952024" memory: "~{memory}" cpu: "~{cpu}" - disks: "local-disk ~{disk_size}" + disks: "local-disk ~{disk_size} SSD" } } From 9f0d53bf21f1d651122d6f862554c35e191750b5 Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Sun, 8 Sep 2024 12:10:51 -0400 Subject: [PATCH 09/49] new wdl for taking the igv screenshots --- .../PacBio/Utility/IGV_HaplotypeViz.wdl | 127 ++++++------------ 1 file changed, 42 insertions(+), 85 deletions(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl index 128b19833..521dfbd88 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl @@ -1,107 +1,64 @@ version 1.0 -import "../../../structs/Structs.wdl" +workflow igv_screenshot_automation { -workflow IGV_HaplotypeViz { input { - # BED files containing regions to screenshot; 4th column can optionally be SVID - Array[File] beds - Array[String]? run_names - - # BAM and BAI files from align_asm workflow for alignment visualization - File bam_hap1 - File bai_hap1 - File bam_hap2 - File bai_hap2 - - # FASTA files from PBAssembleWithHifiasm or bam_to_contig workflow for sequence visualization - File haplotig_fasta_hap1 - File haplotig_fasta_hap2 - - # Reference corresponding to read alignments for BAM files - File ref_fasta - File ref_fai - - # Sample id and prefix for output filenames - String sample_id - - # Number of records per shard for parallelization - Int? records_per_shard - - # Configurable CPU, memory, and disk - Int? cpu = 2 # Default is 2 CPUs - String? memory = "8G" # Default memory is 8 GB - String? disk_size = "10G" # Default disk size is 10 GB + File asm_hap1_bam # BAM file for asm haplotype 1 + File asm_hap2_bam # BAM file for asm haplotype 2 + File hap1_bam # BAM file for haplotype 1 + File hap2_bam # BAM file for haplotype 2 + File reference_fasta # Reference FASTA file + File regions_bed # Path to the BED file with regions of interest + String genome # Reference genome version (e.g., "hg38") + Int image_height = 500 # Height for the IGV tracks } - Array[String] default_run_names = range(length(beds)) - Array[String] used_run_names = select_first([run_names, default_run_names]) - - scatter (i in range(length(beds))) { - String run_name = used_run_names[i] - String sample_combined = sample_id + "_combined_" + run_name - # Run IGV for both BAM and FASTA visualization for Haplotype 1 and 2 combined - call RunIGVHeadlessCombined { - input: - bam_hap1=bam_hap1, - bai_hap1=bai_hap1, - bam_hap2=bam_hap2, - bai_hap2=bai_hap2, - fasta_hap1=haplotig_fasta_hap1, - fasta_hap2=haplotig_fasta_hap2, - bed=beds[i], - sample_id=sample_combined, - ref_fasta=ref_fasta, - ref_fai=ref_fai, - cpu=cpu, - memory=memory, - disk_size=disk_size - } + call IGVScreenshotTask { + input: + asm_hap1_bam = asm_hap1_bam, + asm_hap2_bam = asm_hap2_bam, + hap1_bam = hap1_bam, + hap2_bam = hap2_bam, + reference_fasta = reference_fasta, + regions_bed = regions_bed, + genome = genome, + image_height = image_height } output { - Array[File] igv_screenshots_combined = RunIGVHeadlessCombined.igv_screenshot + Array[File] snapshots = IGVScreenshotTask.snapshots } } -task RunIGVHeadlessCombined { +task IGVScreenshotTask { input { - File bam_hap1 # BAM file for Haplotype 1 - File bai_hap1 # BAI file for Haplotype 1 - File bam_hap2 # BAM file for Haplotype 2 - File bai_hap2 # BAI file for Haplotype 2 - File fasta_hap1 # FASTA file for Haplotype 1 - File fasta_hap2 # FASTA file for Haplotype 2 - File bed # BED file containing regions to visualize (3 or 4 columns allowed) - String sample_id # Sample ID for naming outputs - File ref_fasta # Reference genome used for alignment - File ref_fai # Index for the reference genome - - # Configurable resources - Int? cpu # CPUs to use - String? memory # Memory to allocate - String? disk_size # Disk size + File asm_hap1_bam + File asm_hap2_bam + File hap1_bam + File hap2_bam + File reference_fasta + File regions_bed + String genome + Int image_height } - command <<< - # Running IGV headless mode to take screenshots for both BAM and FASTA files for both haplotypes - igv.sh \ - -b ~{bam_hap1},~{bam_hap2} \ - -i ~{bai_hap1},~{bai_hap2} \ - -g ~{fasta_hap1},~{fasta_hap2} \ - -bed ~{bed} \ - -name hap1_bam,hap2_bam,hap1_fasta,hap2_fasta \ - -o ~{sample_id}.igv_screenshot.png - >>> + command { + # Run the Python script with inputs for hap1 and hap2 BAM files + python3 /opt/make_igv_screenshot.py \ + ${asm_hap1_bam} ${asm_hap2_bam} ${hap1_bam} ${hap2_bam} \ + -r ${regions_bed} -g ${genome} -ht ${image_height} \ + -ref_fasta ${reference_fasta} + } output { - File igv_screenshot = "~{sample_id}.igv_screenshot.png" + # Capture all the snapshot files generated by the script + Array[File] snapshots = glob("IGV_Snapshots/*.png") } runtime { - docker: "us.gcr.io/broad-dsp-lrma/igv_docker:v952024" - memory: "~{memory}" - cpu: "~{cpu}" - disks: "local-disk ~{disk_size} SSD" + docker: "us.gcr.io/broad-dsp-lrma/igv_screenshot_docker:v982024" + memory: "8G" + cpu: 2 + disks: "local-disk 100 HDD" } } From 9682125b992b943289eb1acb87e988adfb35482d Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Mon, 9 Sep 2024 08:07:45 -0400 Subject: [PATCH 10/49] update docker and the script to use only one bam --- .../PacBio/Utility/IGV_HaplotypeViz.wdl | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl index 521dfbd88..032f6f87c 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl @@ -3,10 +3,9 @@ version 1.0 workflow igv_screenshot_automation { input { - File asm_hap1_bam # BAM file for asm haplotype 1 - File asm_hap2_bam # BAM file for asm haplotype 2 - File hap1_bam # BAM file for haplotype 1 - File hap2_bam # BAM file for haplotype 2 + File asm_hap1_bam # BAM file for assembly haplotype 1 + File asm_hap2_bam # BAM file for assembly haplotype 2 + File bam # A single BAM file for the sample File reference_fasta # Reference FASTA file File regions_bed # Path to the BED file with regions of interest String genome # Reference genome version (e.g., "hg38") @@ -17,8 +16,7 @@ workflow igv_screenshot_automation { input: asm_hap1_bam = asm_hap1_bam, asm_hap2_bam = asm_hap2_bam, - hap1_bam = hap1_bam, - hap2_bam = hap2_bam, + bam = bam, reference_fasta = reference_fasta, regions_bed = regions_bed, genome = genome, @@ -34,8 +32,7 @@ task IGVScreenshotTask { input { File asm_hap1_bam File asm_hap2_bam - File hap1_bam - File hap2_bam + File bam File reference_fasta File regions_bed String genome @@ -43,9 +40,9 @@ task IGVScreenshotTask { } command { - # Run the Python script with inputs for hap1 and hap2 BAM files + # Run the Python script with inputs for asm_hap1, asm_hap2, and bam python3 /opt/make_igv_screenshot.py \ - ${asm_hap1_bam} ${asm_hap2_bam} ${hap1_bam} ${hap2_bam} \ + ${asm_hap1_bam} ${asm_hap2_bam} ${bam} \ -r ${regions_bed} -g ${genome} -ht ${image_height} \ -ref_fasta ${reference_fasta} } @@ -59,6 +56,6 @@ task IGVScreenshotTask { docker: "us.gcr.io/broad-dsp-lrma/igv_screenshot_docker:v982024" memory: "8G" cpu: 2 - disks: "local-disk 100 HDD" + disks: "local-disk 100 HDD" # Adjust this based on file size needs } } From f5749369e2ecc89f8911dae7c7a5253c996f9843 Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Mon, 9 Sep 2024 10:07:53 -0400 Subject: [PATCH 11/49] Updated Python script and WDL for IGV snapshot automation --- wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl index 032f6f87c..659ef7af4 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl @@ -3,8 +3,8 @@ version 1.0 workflow igv_screenshot_automation { input { - File asm_hap1_bam # BAM file for assembly haplotype 1 - File asm_hap2_bam # BAM file for assembly haplotype 2 + File asm_hap1_bam # BAM file for asm haplotype 1 + File asm_hap2_bam # BAM file for asm haplotype 2 File bam # A single BAM file for the sample File reference_fasta # Reference FASTA file File regions_bed # Path to the BED file with regions of interest @@ -48,7 +48,7 @@ task IGVScreenshotTask { } output { - # Capture all the snapshot files generated by the script + # Capture all the snapshot files generated by the script from the 'IGV_Snapshots' directory Array[File] snapshots = glob("IGV_Snapshots/*.png") } From 1c119149f5af24bee900329ff4c2bee635aa7907 Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Mon, 9 Sep 2024 13:33:08 -0400 Subject: [PATCH 12/49] Update WDL and Python script to support IGV 2.18.2 with igv.sh --- wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl index 659ef7af4..6151d2e7d 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl @@ -44,7 +44,8 @@ task IGVScreenshotTask { python3 /opt/make_igv_screenshot.py \ ${asm_hap1_bam} ${asm_hap2_bam} ${bam} \ -r ${regions_bed} -g ${genome} -ht ${image_height} \ - -ref_fasta ${reference_fasta} + -ref_fasta ${reference_fasta} \ + -bin /opt/IGV_Linux_2.18.2/igv.sh # Explicitly passing the igv.sh path } output { From d10a14004841a7408bf759568fcc99e0651a1ea4 Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Mon, 9 Sep 2024 14:55:34 -0400 Subject: [PATCH 13/49] update the directory to the python script --- wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl index 6151d2e7d..af96bf84c 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl @@ -41,7 +41,7 @@ task IGVScreenshotTask { command { # Run the Python script with inputs for asm_hap1, asm_hap2, and bam - python3 /opt/make_igv_screenshot.py \ + python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \ ${asm_hap1_bam} ${asm_hap2_bam} ${bam} \ -r ${regions_bed} -g ${genome} -ht ${image_height} \ -ref_fasta ${reference_fasta} \ From 09154eda459b1f8be17006645b6a40c57a01f6a1 Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Mon, 9 Sep 2024 17:48:25 -0400 Subject: [PATCH 14/49] update bai --- .../PacBio/Utility/IGV_HaplotypeViz.wdl | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl index af96bf84c..697a3b799 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl @@ -4,8 +4,11 @@ workflow igv_screenshot_automation { input { File asm_hap1_bam # BAM file for asm haplotype 1 + File asm_hap1_bai # BAI index file for asm haplotype 1 File asm_hap2_bam # BAM file for asm haplotype 2 + File asm_hap2_bai # BAI index file for asm haplotype 2 File bam # A single BAM file for the sample + File bam_bai # BAI index file for the single BAM file File reference_fasta # Reference FASTA file File regions_bed # Path to the BED file with regions of interest String genome # Reference genome version (e.g., "hg38") @@ -15,8 +18,11 @@ workflow igv_screenshot_automation { call IGVScreenshotTask { input: asm_hap1_bam = asm_hap1_bam, + asm_hap1_bai = asm_hap1_bai, asm_hap2_bam = asm_hap2_bam, + asm_hap2_bai = asm_hap2_bai, bam = bam, + bam_bai = bam_bai, reference_fasta = reference_fasta, regions_bed = regions_bed, genome = genome, @@ -31,8 +37,11 @@ workflow igv_screenshot_automation { task IGVScreenshotTask { input { File asm_hap1_bam + File asm_hap1_bai File asm_hap2_bam + File asm_hap2_bai File bam + File bam_bai File reference_fasta File regions_bed String genome @@ -40,12 +49,20 @@ task IGVScreenshotTask { } command { + # Localize the BAM and BAI files to ensure IGV can use them + ln -s ${asm_hap1_bam} . + ln -s ${asm_hap1_bai} . + ln -s ${asm_hap2_bam} . + ln -s ${asm_hap2_bai} . + ln -s ${bam} . + ln -s ${bam_bai} . + # Run the Python script with inputs for asm_hap1, asm_hap2, and bam python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \ ${asm_hap1_bam} ${asm_hap2_bam} ${bam} \ -r ${regions_bed} -g ${genome} -ht ${image_height} \ -ref_fasta ${reference_fasta} \ - -bin /opt/IGV_Linux_2.18.2/igv.sh # Explicitly passing the igv.sh path + -bin /opt/IGV_Linux_2.18.2/igv.sh } output { From 4aedff1391b20f21c1299e58432df76701c7905f Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Mon, 9 Sep 2024 18:12:42 -0400 Subject: [PATCH 15/49] checking for bai --- wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl index 697a3b799..1202411f3 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl @@ -4,25 +4,23 @@ workflow igv_screenshot_automation { input { File asm_hap1_bam # BAM file for asm haplotype 1 - File asm_hap1_bai # BAI index file for asm haplotype 1 File asm_hap2_bam # BAM file for asm haplotype 2 - File asm_hap2_bai # BAI index file for asm haplotype 2 File bam # A single BAM file for the sample - File bam_bai # BAI index file for the single BAM file File reference_fasta # Reference FASTA file File regions_bed # Path to the BED file with regions of interest String genome # Reference genome version (e.g., "hg38") Int image_height = 500 # Height for the IGV tracks } + # Directly use .bam.bai files co-located with the BAM files call IGVScreenshotTask { input: asm_hap1_bam = asm_hap1_bam, - asm_hap1_bai = asm_hap1_bai, + asm_hap1_bai = asm_hap1_bam + ".bai", asm_hap2_bam = asm_hap2_bam, - asm_hap2_bai = asm_hap2_bai, + asm_hap2_bai = asm_hap2_bam + ".bai", bam = bam, - bam_bai = bam_bai, + bam_bai = bam + ".bai", reference_fasta = reference_fasta, regions_bed = regions_bed, genome = genome, From bd53b9cce97594c29ac314d72e9f4a77ace081b1 Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Tue, 10 Sep 2024 13:16:51 -0400 Subject: [PATCH 16/49] new wdl-the script has been tested on the vm --- .../PacBio/Utility/IGV_HaplotypeViz.wdl | 130 +++++++++--------- 1 file changed, 65 insertions(+), 65 deletions(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl index 1202411f3..6b3342843 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl @@ -1,77 +1,77 @@ version 1.0 -workflow igv_screenshot_automation { +workflow IGVScreenshotWorkflow { - input { - File asm_hap1_bam # BAM file for asm haplotype 1 - File asm_hap2_bam # BAM file for asm haplotype 2 - File bam # A single BAM file for the sample - File reference_fasta # Reference FASTA file - File regions_bed # Path to the BED file with regions of interest - String genome # Reference genome version (e.g., "hg38") - Int image_height = 500 # Height for the IGV tracks - } + input { + File aligned_bam_hap1 # BAM file for haplotype 1 + File aligned_bam_hap2 # BAM file for haplotype 2 + File alignments # BAM file for total alignments + File bed_file # BED file with regions + File fasta_file # Reference FASTA file + String sample_name # Sample name to use in filenames + Int image_height = 500 + Int memory_mb = 4000 + Int disk_gb = 100 # Disk size in GB, default to 100 GB + String docker_image = "us.gcr.io/broad-dsp-lrma/igv_screenshot_docker:v982024" # The Docker image to use + } - # Directly use .bam.bai files co-located with the BAM files - call IGVScreenshotTask { - input: - asm_hap1_bam = asm_hap1_bam, - asm_hap1_bai = asm_hap1_bam + ".bai", - asm_hap2_bam = asm_hap2_bam, - asm_hap2_bai = asm_hap2_bam + ".bai", - bam = bam, - bam_bai = bam + ".bai", - reference_fasta = reference_fasta, - regions_bed = regions_bed, - genome = genome, - image_height = image_height - } + call RunIGVScreenshot { + input: + aligned_bam_hap1 = aligned_bam_hap1, + aligned_bam_hap2 = aligned_bam_hap2, + alignments = alignments, + bed_file = bed_file, + fasta_file = fasta_file, + sample_name = sample_name, + image_height = image_height, + memory_mb = memory_mb, + disk_gb = disk_gb, + docker_image = docker_image + } - output { - Array[File] snapshots = IGVScreenshotTask.snapshots - } + output { + Array[File] snapshots = RunIGVScreenshot.snapshots + } } -task IGVScreenshotTask { - input { - File asm_hap1_bam - File asm_hap1_bai - File asm_hap2_bam - File asm_hap2_bai - File bam - File bam_bai - File reference_fasta - File regions_bed - String genome - Int image_height - } +task RunIGVScreenshot { + input { + File aligned_bam_hap1 + File aligned_bam_hap2 + File alignments + File bed_file + File fasta_file + String sample_name + Int image_height + Int memory_mb + Int disk_gb + String docker_image + } - command { - # Localize the BAM and BAI files to ensure IGV can use them - ln -s ${asm_hap1_bam} . - ln -s ${asm_hap1_bai} . - ln -s ${asm_hap2_bam} . - ln -s ${asm_hap2_bai} . - ln -s ${bam} . - ln -s ${bam_bai} . + command { + mkdir -p IGV_Snapshots + Xvfb :1 -screen 0 1024x768x16 &> xvfb.log & + export DISPLAY=:1 - # Run the Python script with inputs for asm_hap1, asm_hap2, and bam - python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \ - ${asm_hap1_bam} ${asm_hap2_bam} ${bam} \ - -r ${regions_bed} -g ${genome} -ht ${image_height} \ - -ref_fasta ${reference_fasta} \ - -bin /opt/IGV_Linux_2.18.2/igv.sh - } + # Run the IGV screenshot script with the provided inputs + python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \ + ~{aligned_bam_hap1} ~{aligned_bam_hap2} ~{alignments} \ + -r ~{bed_file} \ + -ht ~{image_height} \ + -bin /opt/IGV_Linux_2.18.2/igv.sh \ + -mem ~{memory_mb} \ + --fasta_file ~{fasta_file} \ + --sample_name ~{sample_name} + } - output { - # Capture all the snapshot files generated by the script from the 'IGV_Snapshots' directory - Array[File] snapshots = glob("IGV_Snapshots/*.png") - } + runtime { + docker: docker_image + memory: "~{memory_mb} MB" + cpu: 2 + disks: "local-disk ~{disk_gb} HDD" + } - runtime { - docker: "us.gcr.io/broad-dsp-lrma/igv_screenshot_docker:v982024" - memory: "8G" - cpu: 2 - disks: "local-disk 100 HDD" # Adjust this based on file size needs - } + output { + Array[File] snapshots = glob("IGV_Snapshots/*.png") + } } From d533e408d1ac1af75b14460886a9941f859a0117 Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Tue, 10 Sep 2024 14:09:33 -0400 Subject: [PATCH 17/49] technically the same script as IGV_HaplotypeViz but will locate the input bai as well --- .../Utility/IGV_HaplotypeViz_Scatter.wdl | 97 ------------------- .../Utility/IGV_HaplotypeViz_bai_try.wdl | 74 ++++++++++++++ 2 files changed, 74 insertions(+), 97 deletions(-) delete mode 100644 wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl create mode 100644 wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl deleted file mode 100644 index 671316c92..000000000 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl +++ /dev/null @@ -1,97 +0,0 @@ -version 1.0 - -import "../../../structs/Structs.wdl" - -workflow IGV_HaplotypeViz_Scatter { - input { - # BED files containing regions to screenshot; 4th column can optionally be SVID - Array[File] beds - Array[String]? run_names - - # BAM and BAI files from align_asm workflow for alignment visualization - File bam_hap1 - File bai_hap1 - File bam_hap2 - File bai_hap2 - - # FASTA files from PBAssembleWithHifiasm or bam_to_contig workflow for sequence visualization - File haplotig_fasta_hap1 - File haplotig_fasta_hap2 - - # Reference corresponding to read alignments for BAM files - File ref_fasta - File ref_fai - - # Sample id and prefix for output filenames - String sample_id - - # Number of records per shard for parallelization - Int? records_per_shard - - # Docker image for IGV headless tasks - String igv_docker - } - - scatter (i in range(length(beds))) { - String sample_combined = sample_id + "_combined" - - # Run IGV for both BAM and FASTA visualization for both haplotypes (Hap1 and Hap2) - call RunIGVHeadlessCombined { - input: - bam_hap1=bam_hap1, - bai_hap1=bai_hap1, - bam_hap2=bam_hap2, - bai_hap2=bai_hap2, - fasta_hap1=haplotig_fasta_hap1, - fasta_hap2=haplotig_fasta_hap2, - bed=beds[i], - sample_id=sample_combined, - ref_fasta=ref_fasta, - ref_fai=ref_fai, - igv_docker=igv_docker - } - } - - output { - Array[File] igv_screenshots_combined = RunIGVHeadlessCombined.igv_screenshot - } -} - -task RunIGVHeadlessCombined { - input { - File bam_hap1 # BAM file for Haplotype 1 - File bai_hap1 # BAI file for Haplotype 1 - File bam_hap2 # BAM file for Haplotype 2 - File bai_hap2 # BAI file for Haplotype 2 - File fasta_hap1 # FASTA file for Haplotype 1 - File fasta_hap2 # FASTA file for Haplotype 2 - File bed # BED file containing regions to visualize (3 or 4 columns allowed) - String sample_id # Sample ID for naming outputs - File ref_fasta # Reference genome used for alignment - File ref_fai # Index for the reference genome - String igv_docker # Docker image for running IGV headless - Int? records_per_shard # Optional: Parallelization parameter for large datasets - } - - command <<< - # Running IGV headless mode to take screenshots for both BAM and FASTA files for both haplotypes - igv.sh \ - -b ~{bam_hap1},~{bam_hap2} \ - -i ~{bai_hap1},~{bai_hap2} \ - -g ~{fasta_hap1},~{fasta_hap2} \ - -bed ~{bed} \ - -name hap1_bam,hap2_bam,hap1_fasta,hap2_fasta \ - -o ~{sample_id}.igv_screenshot.png - >>> - - output { - File igv_screenshot = "~{sample_id}.igv_screenshot.png" - } - - runtime { - docker: "us.gcr.io/broad-dsp-lrma/igv_docker:v952024" # Updated IGV docker image - memory: "8G" - cpu: "2" - disks: "local-disk 10 HDD" - } -} diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl new file mode 100644 index 000000000..1848be1fa --- /dev/null +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl @@ -0,0 +1,74 @@ +version 1.0 + +workflow igv_screenshot_workflow { + input { + File aligned_bam_hap1 # BAM file for haplotype 1 + File aligned_bam_hap2 # BAM file for haplotype 2 + File alignments_bam # Total alignments BAM file + File ref_fasta # Reference FASTA file + File targeted_bed_file # BED file with regions of interest + String sample_name # Sample name for naming convention + Int image_height = 500 # Height of IGV track, default to 500 + Int memory_mb = 4000 # Memory for IGV, default to 4000MB + } + + call make_igv_screenshot { + input: + aligned_bam_hap1 = aligned_bam_hap1, + aligned_bam_hap2 = aligned_bam_hap2, + alignments_bam = alignments_bam, + ref_fasta = ref_fasta, + targeted_bed_file = targeted_bed_file, + sample_name = sample_name, + image_height = image_height, + memory_mb = memory_mb + } + + output { + Array[File] pngs = make_igv_screenshot.pngs # Collect all generated PNG files + } +} + +task make_igv_screenshot { + input { + File aligned_bam_hap1 # BAM file for haplotype 1 + File aligned_bam_hap2 # BAM file for haplotype 2 + File alignments_bam # Total alignments BAM file + File ref_fasta # Reference FASTA file + File targeted_bed_file # BED file with regions of interest + String sample_name # Sample name for naming convention + Int image_height # Height of IGV track + Int memory_mb # Memory for IGV + } + + command { + # Create output directory for snapshots + mkdir -p IGV_Snapshots + + # Start a virtual framebuffer (Xvfb) to allow IGV to render without display + Xvfb :1 -screen 0 1024x768x16 &> xvfb.log & + export DISPLAY=:1 + + # Run the Python script to generate IGV screenshots + python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \ + ~{aligned_bam_hap1} ~{aligned_bam_hap2} ~{alignments_bam} \ + -r ~{targeted_bed_file} \ + -ht ~{image_height} \ + -bin /opt/IGV_Linux_2.18.2/igv.sh \ + -mem ~{memory_mb} \ + --fasta_file ~{ref_fasta} \ + --sample_name ~{sample_name} + } + + output { + # Capture all generated PNG snapshot files + Array[File] pngs = glob("IGV_Snapshots/*.png") + } + + runtime { + docker: "us.gcr.io/broad-dsp-lrma/igv_screenshot_docker:v982024" + memory: "~{memory_mb} MB" + cpu: 2 + disks: "local-disk 50 HDD" # Specify disk size if needed + } +} From 76009fecb8c66d56e02b7d81a33bdd6914c31dc2 Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Tue, 10 Sep 2024 14:27:15 -0400 Subject: [PATCH 18/49] modify the runtime attributes --- .../Utility/IGV_HaplotypeViz_bai_try.wdl | 103 ++++++++++++------ 1 file changed, 69 insertions(+), 34 deletions(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl index 1848be1fa..cf5ce5ab6 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl @@ -7,12 +7,12 @@ workflow igv_screenshot_workflow { File alignments_bam # Total alignments BAM file File ref_fasta # Reference FASTA file File targeted_bed_file # BED file with regions of interest - String sample_name # Sample name for naming convention - Int image_height = 500 # Height of IGV track, default to 500 - Int memory_mb = 4000 # Memory for IGV, default to 4000MB + String sample_name # Name for the sample (used in output naming) + String disk_type = "SSD" # Default disk type + String gcs_output_dir # GCS directory to copy outputs } - call make_igv_screenshot { + call GenerateIgvScreenshots { input: aligned_bam_hap1 = aligned_bam_hap1, aligned_bam_hap2 = aligned_bam_hap2, @@ -20,55 +20,90 @@ workflow igv_screenshot_workflow { ref_fasta = ref_fasta, targeted_bed_file = targeted_bed_file, sample_name = sample_name, - image_height = image_height, - memory_mb = memory_mb + disk_type = disk_type + } + + call FinalizeToGCS { + input: + screenshots = GenerateIgvScreenshots.screenshots, + output_dir = gcs_output_dir } output { - Array[File] pngs = make_igv_screenshot.pngs # Collect all generated PNG files + Array[File] screenshot_files = GenerateIgvScreenshots.screenshots } } -task make_igv_screenshot { +task GenerateIgvScreenshots { input { - File aligned_bam_hap1 # BAM file for haplotype 1 - File aligned_bam_hap2 # BAM file for haplotype 2 - File alignments_bam # Total alignments BAM file - File ref_fasta # Reference FASTA file - File targeted_bed_file # BED file with regions of interest - String sample_name # Sample name for naming convention - Int image_height # Height of IGV track - Int memory_mb # Memory for IGV + File aligned_bam_hap1 + File aligned_bam_hap2 + File alignments_bam + File ref_fasta + File targeted_bed_file + String sample_name + String disk_type } - command { - # Create output directory for snapshots - mkdir -p IGV_Snapshots - - # Start a virtual framebuffer (Xvfb) to allow IGV to render without display - Xvfb :1 -screen 0 1024x768x16 &> xvfb.log & - export DISPLAY=:1 + command <<< + # Ensure the snapshots directory exists and set permissions + mkdir -p /output/IGV_Snapshots && chmod 777 /output/IGV_Snapshots + + # Start a virtual frame buffer to allow IGV to render + Xvfb :1 -screen 0 1024x768x16 & export DISPLAY=:1 - # Run the Python script to generate IGV screenshots + # Run the Python script to generate IGV snapshots python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \ ~{aligned_bam_hap1} ~{aligned_bam_hap2} ~{alignments_bam} \ + --fasta_file ~{ref_fasta} \ + --sample_name ~{sample_name} \ -r ~{targeted_bed_file} \ - -ht ~{image_height} \ + -ht 500 \ -bin /opt/IGV_Linux_2.18.2/igv.sh \ - -mem ~{memory_mb} \ - --fasta_file ~{ref_fasta} \ - --sample_name ~{sample_name} + -mem 4000 + + >>> + + output { + Array[File] screenshots = glob("/output/IGV_Snapshots/*.png") + } + + # Calculate dynamic disk size based on the size of BAM files + Int disk_size = ceil(size(aligned_bam_hap1, "GiB")) + ceil(size(aligned_bam_hap2, "GiB")) + ceil(size(alignments_bam, "GiB")) + 10 + + runtime { + cpu: 4 + memory: "8 GiB" + disks: "local-disk " + disk_size + " " + disk_type + preemptible: 2 + maxRetries: 1 + docker: "us.gcr.io/broad-dsp-lrma/igv_screenshot_docker:v982024" } +} + +task FinalizeToGCS { + input { + Array[File] screenshots + String output_dir + } + + command <<< + # Copy the output PNG files to Google Cloud Storage + for file in ~{sep=' ' screenshots}; do + gsutil cp "$file" "~{output_dir}/" + done + >>> output { - # Capture all generated PNG snapshot files - Array[File] pngs = glob("IGV_Snapshots/*.png") + Array[File] uploaded_files = screenshots } runtime { - docker: "us.gcr.io/broad-dsp-lrma/igv_screenshot_docker:v982024" - memory: "~{memory_mb} MB" - cpu: 2 - disks: "local-disk 50 HDD" # Specify disk size if needed + cpu: 1 + memory: "2 GiB" + disks: "local-disk 10 HDD" + preemptible: 2 + maxRetries: 1 + docker: "gcr.io/google-containers/toolbox:latest" } } From 11f78eed07401c66b650d905de62942e179dc1c8 Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Tue, 10 Sep 2024 15:29:28 -0400 Subject: [PATCH 19/49] fixing the issue with bai --- wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl index 6b3342843..d9558132b 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl @@ -4,8 +4,11 @@ workflow IGVScreenshotWorkflow { input { File aligned_bam_hap1 # BAM file for haplotype 1 + File aligned_bam_hap1_bai # BAM index for haplotype 1 File aligned_bam_hap2 # BAM file for haplotype 2 + File aligned_bam_hap2_bai # BAM index for haplotype 2 File alignments # BAM file for total alignments + File alignments_bai # BAM index for total alignments File bed_file # BED file with regions File fasta_file # Reference FASTA file String sample_name # Sample name to use in filenames @@ -18,8 +21,11 @@ workflow IGVScreenshotWorkflow { call RunIGVScreenshot { input: aligned_bam_hap1 = aligned_bam_hap1, + aligned_bam_hap1_bai = aligned_bam_hap1_bai, aligned_bam_hap2 = aligned_bam_hap2, + aligned_bam_hap2_bai = aligned_bam_hap2_bai, alignments = alignments, + alignments_bai = alignments_bai, bed_file = bed_file, fasta_file = fasta_file, sample_name = sample_name, @@ -37,8 +43,11 @@ workflow IGVScreenshotWorkflow { task RunIGVScreenshot { input { File aligned_bam_hap1 + File aligned_bam_hap1_bai File aligned_bam_hap2 + File aligned_bam_hap2_bai File alignments + File alignments_bai File bed_file File fasta_file String sample_name @@ -56,6 +65,7 @@ task RunIGVScreenshot { # Run the IGV screenshot script with the provided inputs python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \ ~{aligned_bam_hap1} ~{aligned_bam_hap2} ~{alignments} \ + ~{aligned_bam_hap1_bai} ~{aligned_bam_hap2_bai} ~{alignments_bai} \ -r ~{bed_file} \ -ht ~{image_height} \ -bin /opt/IGV_Linux_2.18.2/igv.sh \ From 0b708963c226429b5cdd5911dbbb3c25152d6def Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Tue, 10 Sep 2024 15:34:48 -0400 Subject: [PATCH 20/49] fix the issue with output directory --- .../Utility/IGV_HaplotypeViz_bai_try.wdl | 48 ++++++++++--------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl index cf5ce5ab6..04a5e48f2 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl @@ -1,15 +1,15 @@ version 1.0 -workflow igv_screenshot_workflow { +workflow IGV_HaplotypeViz { input { - File aligned_bam_hap1 # BAM file for haplotype 1 - File aligned_bam_hap2 # BAM file for haplotype 2 - File alignments_bam # Total alignments BAM file - File ref_fasta # Reference FASTA file - File targeted_bed_file # BED file with regions of interest - String sample_name # Name for the sample (used in output naming) - String disk_type = "SSD" # Default disk type - String gcs_output_dir # GCS directory to copy outputs + File aligned_bam_hap1 + File aligned_bam_hap2 + File alignments_bam + File ref_fasta + File targeted_bed_file + String sample_name + String disk_type = "SSD" + String output_dir } call GenerateIgvScreenshots { @@ -26,11 +26,11 @@ workflow igv_screenshot_workflow { call FinalizeToGCS { input: screenshots = GenerateIgvScreenshots.screenshots, - output_dir = gcs_output_dir + output_dir = output_dir } output { - Array[File] screenshot_files = GenerateIgvScreenshots.screenshots + Array[File] final_screenshots = FinalizeToGCS.uploaded_files } } @@ -47,7 +47,7 @@ task GenerateIgvScreenshots { command <<< # Ensure the snapshots directory exists and set permissions - mkdir -p /output/IGV_Snapshots && chmod 777 /output/IGV_Snapshots + mkdir -p /cromwell_root/output/IGV_Snapshots && chmod 777 /cromwell_root/output/IGV_Snapshots # Start a virtual frame buffer to allow IGV to render Xvfb :1 -screen 0 1024x768x16 & export DISPLAY=:1 @@ -65,10 +65,9 @@ task GenerateIgvScreenshots { >>> output { - Array[File] screenshots = glob("/output/IGV_Snapshots/*.png") + Array[File] screenshots = glob("/cromwell_root/output/IGV_Snapshots/*.png") } - # Calculate dynamic disk size based on the size of BAM files Int disk_size = ceil(size(aligned_bam_hap1, "GiB")) + ceil(size(aligned_bam_hap2, "GiB")) + ceil(size(alignments_bam, "GiB")) + 10 runtime { @@ -88,22 +87,27 @@ task FinalizeToGCS { } command <<< - # Copy the output PNG files to Google Cloud Storage + set -euxo pipefail + + # Ensure the output directory exists and is properly formatted + gcs_output_dir=$(echo ~{output_dir} | sed 's:/*$::') + + # Copy all screenshots to Google Cloud Storage for file in ~{sep=' ' screenshots}; do - gsutil cp "$file" "~{output_dir}/" + gsutil cp $file $gcs_output_dir/ done >>> output { - Array[File] uploaded_files = screenshots + Array[File] uploaded_files = glob("~{output_dir}/*.png") } runtime { cpu: 1 - memory: "2 GiB" - disks: "local-disk 10 HDD" - preemptible: 2 - maxRetries: 1 - docker: "gcr.io/google-containers/toolbox:latest" + memory: "4 GiB" + disks: "local-disk 10 SSD" + preemptible: 1 + maxRetries: 2 + docker: "google/cloud-sdk:slim" } } From 7ac3c5461710e62dea5eb2aaa424e71384b666e7 Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Tue, 10 Sep 2024 15:51:08 -0400 Subject: [PATCH 21/49] fix bai --- wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl index d9558132b..9913b4581 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl @@ -4,11 +4,8 @@ workflow IGVScreenshotWorkflow { input { File aligned_bam_hap1 # BAM file for haplotype 1 - File aligned_bam_hap1_bai # BAM index for haplotype 1 File aligned_bam_hap2 # BAM file for haplotype 2 - File aligned_bam_hap2_bai # BAM index for haplotype 2 File alignments # BAM file for total alignments - File alignments_bai # BAM index for total alignments File bed_file # BED file with regions File fasta_file # Reference FASTA file String sample_name # Sample name to use in filenames @@ -21,11 +18,11 @@ workflow IGVScreenshotWorkflow { call RunIGVScreenshot { input: aligned_bam_hap1 = aligned_bam_hap1, - aligned_bam_hap1_bai = aligned_bam_hap1_bai, + aligned_bam_hap1_bai = aligned_bam_hap1 + ".bai", # Automatically infer BAI location aligned_bam_hap2 = aligned_bam_hap2, - aligned_bam_hap2_bai = aligned_bam_hap2_bai, + aligned_bam_hap2_bai = aligned_bam_hap2 + ".bai", # Automatically infer BAI location alignments = alignments, - alignments_bai = alignments_bai, + alignments_bai = alignments + ".bai", # Automatically infer BAI location bed_file = bed_file, fasta_file = fasta_file, sample_name = sample_name, From 3f7005d30c30fe3fffca92118e6b07151001ccff Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Tue, 10 Sep 2024 16:11:34 -0400 Subject: [PATCH 22/49] updates for bai --- wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl index 9913b4581..d9558132b 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl @@ -4,8 +4,11 @@ workflow IGVScreenshotWorkflow { input { File aligned_bam_hap1 # BAM file for haplotype 1 + File aligned_bam_hap1_bai # BAM index for haplotype 1 File aligned_bam_hap2 # BAM file for haplotype 2 + File aligned_bam_hap2_bai # BAM index for haplotype 2 File alignments # BAM file for total alignments + File alignments_bai # BAM index for total alignments File bed_file # BED file with regions File fasta_file # Reference FASTA file String sample_name # Sample name to use in filenames @@ -18,11 +21,11 @@ workflow IGVScreenshotWorkflow { call RunIGVScreenshot { input: aligned_bam_hap1 = aligned_bam_hap1, - aligned_bam_hap1_bai = aligned_bam_hap1 + ".bai", # Automatically infer BAI location + aligned_bam_hap1_bai = aligned_bam_hap1_bai, aligned_bam_hap2 = aligned_bam_hap2, - aligned_bam_hap2_bai = aligned_bam_hap2 + ".bai", # Automatically infer BAI location + aligned_bam_hap2_bai = aligned_bam_hap2_bai, alignments = alignments, - alignments_bai = alignments + ".bai", # Automatically infer BAI location + alignments_bai = alignments_bai, bed_file = bed_file, fasta_file = fasta_file, sample_name = sample_name, From c36a89991491c98471fbd13baef604c6c8f005fd Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Tue, 10 Sep 2024 16:20:54 -0400 Subject: [PATCH 23/49] reverting baclk --- wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl index d9558132b..9913b4581 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl @@ -4,11 +4,8 @@ workflow IGVScreenshotWorkflow { input { File aligned_bam_hap1 # BAM file for haplotype 1 - File aligned_bam_hap1_bai # BAM index for haplotype 1 File aligned_bam_hap2 # BAM file for haplotype 2 - File aligned_bam_hap2_bai # BAM index for haplotype 2 File alignments # BAM file for total alignments - File alignments_bai # BAM index for total alignments File bed_file # BED file with regions File fasta_file # Reference FASTA file String sample_name # Sample name to use in filenames @@ -21,11 +18,11 @@ workflow IGVScreenshotWorkflow { call RunIGVScreenshot { input: aligned_bam_hap1 = aligned_bam_hap1, - aligned_bam_hap1_bai = aligned_bam_hap1_bai, + aligned_bam_hap1_bai = aligned_bam_hap1 + ".bai", # Automatically infer BAI location aligned_bam_hap2 = aligned_bam_hap2, - aligned_bam_hap2_bai = aligned_bam_hap2_bai, + aligned_bam_hap2_bai = aligned_bam_hap2 + ".bai", # Automatically infer BAI location alignments = alignments, - alignments_bai = alignments_bai, + alignments_bai = alignments + ".bai", # Automatically infer BAI location bed_file = bed_file, fasta_file = fasta_file, sample_name = sample_name, From 6b9baf765c9f39db5a0dae132997ece1ce379b94 Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Tue, 10 Sep 2024 16:23:02 -0400 Subject: [PATCH 24/49] playing with bai definition --- .../Utility/IGV_HaplotypeViz_bai_try.wdl | 190 ++++++++---------- 1 file changed, 82 insertions(+), 108 deletions(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl index 04a5e48f2..d9558132b 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl @@ -1,113 +1,87 @@ version 1.0 -workflow IGV_HaplotypeViz { - input { - File aligned_bam_hap1 - File aligned_bam_hap2 - File alignments_bam - File ref_fasta - File targeted_bed_file - String sample_name - String disk_type = "SSD" - String output_dir - } - - call GenerateIgvScreenshots { - input: - aligned_bam_hap1 = aligned_bam_hap1, - aligned_bam_hap2 = aligned_bam_hap2, - alignments_bam = alignments_bam, - ref_fasta = ref_fasta, - targeted_bed_file = targeted_bed_file, - sample_name = sample_name, - disk_type = disk_type - } - - call FinalizeToGCS { - input: - screenshots = GenerateIgvScreenshots.screenshots, - output_dir = output_dir - } - - output { - Array[File] final_screenshots = FinalizeToGCS.uploaded_files - } +workflow IGVScreenshotWorkflow { + + input { + File aligned_bam_hap1 # BAM file for haplotype 1 + File aligned_bam_hap1_bai # BAM index for haplotype 1 + File aligned_bam_hap2 # BAM file for haplotype 2 + File aligned_bam_hap2_bai # BAM index for haplotype 2 + File alignments # BAM file for total alignments + File alignments_bai # BAM index for total alignments + File bed_file # BED file with regions + File fasta_file # Reference FASTA file + String sample_name # Sample name to use in filenames + Int image_height = 500 + Int memory_mb = 4000 + Int disk_gb = 100 # Disk size in GB, default to 100 GB + String docker_image = "us.gcr.io/broad-dsp-lrma/igv_screenshot_docker:v982024" # The Docker image to use + } + + call RunIGVScreenshot { + input: + aligned_bam_hap1 = aligned_bam_hap1, + aligned_bam_hap1_bai = aligned_bam_hap1_bai, + aligned_bam_hap2 = aligned_bam_hap2, + aligned_bam_hap2_bai = aligned_bam_hap2_bai, + alignments = alignments, + alignments_bai = alignments_bai, + bed_file = bed_file, + fasta_file = fasta_file, + sample_name = sample_name, + image_height = image_height, + memory_mb = memory_mb, + disk_gb = disk_gb, + docker_image = docker_image + } + + output { + Array[File] snapshots = RunIGVScreenshot.snapshots + } } -task GenerateIgvScreenshots { - input { - File aligned_bam_hap1 - File aligned_bam_hap2 - File alignments_bam - File ref_fasta - File targeted_bed_file - String sample_name - String disk_type - } - - command <<< - # Ensure the snapshots directory exists and set permissions - mkdir -p /cromwell_root/output/IGV_Snapshots && chmod 777 /cromwell_root/output/IGV_Snapshots - - # Start a virtual frame buffer to allow IGV to render - Xvfb :1 -screen 0 1024x768x16 & export DISPLAY=:1 - - # Run the Python script to generate IGV snapshots - python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \ - ~{aligned_bam_hap1} ~{aligned_bam_hap2} ~{alignments_bam} \ - --fasta_file ~{ref_fasta} \ - --sample_name ~{sample_name} \ - -r ~{targeted_bed_file} \ - -ht 500 \ - -bin /opt/IGV_Linux_2.18.2/igv.sh \ - -mem 4000 - - >>> - - output { - Array[File] screenshots = glob("/cromwell_root/output/IGV_Snapshots/*.png") - } - - Int disk_size = ceil(size(aligned_bam_hap1, "GiB")) + ceil(size(aligned_bam_hap2, "GiB")) + ceil(size(alignments_bam, "GiB")) + 10 - - runtime { - cpu: 4 - memory: "8 GiB" - disks: "local-disk " + disk_size + " " + disk_type - preemptible: 2 - maxRetries: 1 - docker: "us.gcr.io/broad-dsp-lrma/igv_screenshot_docker:v982024" - } -} - -task FinalizeToGCS { - input { - Array[File] screenshots - String output_dir - } - - command <<< - set -euxo pipefail - - # Ensure the output directory exists and is properly formatted - gcs_output_dir=$(echo ~{output_dir} | sed 's:/*$::') - - # Copy all screenshots to Google Cloud Storage - for file in ~{sep=' ' screenshots}; do - gsutil cp $file $gcs_output_dir/ - done - >>> - - output { - Array[File] uploaded_files = glob("~{output_dir}/*.png") - } - - runtime { - cpu: 1 - memory: "4 GiB" - disks: "local-disk 10 SSD" - preemptible: 1 - maxRetries: 2 - docker: "google/cloud-sdk:slim" - } +task RunIGVScreenshot { + input { + File aligned_bam_hap1 + File aligned_bam_hap1_bai + File aligned_bam_hap2 + File aligned_bam_hap2_bai + File alignments + File alignments_bai + File bed_file + File fasta_file + String sample_name + Int image_height + Int memory_mb + Int disk_gb + String docker_image + } + + command { + mkdir -p IGV_Snapshots + Xvfb :1 -screen 0 1024x768x16 &> xvfb.log & + export DISPLAY=:1 + + # Run the IGV screenshot script with the provided inputs + python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \ + ~{aligned_bam_hap1} ~{aligned_bam_hap2} ~{alignments} \ + ~{aligned_bam_hap1_bai} ~{aligned_bam_hap2_bai} ~{alignments_bai} \ + -r ~{bed_file} \ + -ht ~{image_height} \ + -bin /opt/IGV_Linux_2.18.2/igv.sh \ + -mem ~{memory_mb} \ + --fasta_file ~{fasta_file} \ + --sample_name ~{sample_name} + } + + runtime { + docker: docker_image + memory: "~{memory_mb} MB" + cpu: 2 + disks: "local-disk ~{disk_gb} HDD" + } + + output { + Array[File] snapshots = glob("IGV_Snapshots/*.png") + } } From 889c1fdfa7b6ee312f82bac28592ce3f51695123 Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Tue, 10 Sep 2024 17:03:01 -0400 Subject: [PATCH 25/49] fai --- wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl index 9913b4581..a33397a79 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl @@ -8,6 +8,7 @@ workflow IGVScreenshotWorkflow { File alignments # BAM file for total alignments File bed_file # BED file with regions File fasta_file # Reference FASTA file + File fasta_fai # FASTA index (.fai) file String sample_name # Sample name to use in filenames Int image_height = 500 Int memory_mb = 4000 @@ -25,6 +26,7 @@ workflow IGVScreenshotWorkflow { alignments_bai = alignments + ".bai", # Automatically infer BAI location bed_file = bed_file, fasta_file = fasta_file, + fasta_fai = fasta_fai, sample_name = sample_name, image_height = image_height, memory_mb = memory_mb, @@ -47,6 +49,7 @@ task RunIGVScreenshot { File alignments_bai File bed_file File fasta_file + File fasta_fai String sample_name Int image_height Int memory_mb @@ -68,6 +71,7 @@ task RunIGVScreenshot { -bin /opt/IGV_Linux_2.18.2/igv.sh \ -mem ~{memory_mb} \ --fasta_file ~{fasta_file} \ + --fasta_fai ~{fasta_fai} \ --sample_name ~{sample_name} } From b2c08364fc9b092ea30d9a4f5409448266a3c916 Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Tue, 10 Sep 2024 17:05:35 -0400 Subject: [PATCH 26/49] fai --- wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl index d9558132b..7490c60f0 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl @@ -11,6 +11,7 @@ workflow IGVScreenshotWorkflow { File alignments_bai # BAM index for total alignments File bed_file # BED file with regions File fasta_file # Reference FASTA file + File fasta_fai # FAI index for the FASTA file String sample_name # Sample name to use in filenames Int image_height = 500 Int memory_mb = 4000 @@ -28,6 +29,7 @@ workflow IGVScreenshotWorkflow { alignments_bai = alignments_bai, bed_file = bed_file, fasta_file = fasta_file, + fasta_fai = fasta_fai, sample_name = sample_name, image_height = image_height, memory_mb = memory_mb, @@ -50,6 +52,7 @@ task RunIGVScreenshot { File alignments_bai File bed_file File fasta_file + File fasta_fai # FAI index for the FASTA file String sample_name Int image_height Int memory_mb @@ -71,6 +74,7 @@ task RunIGVScreenshot { -bin /opt/IGV_Linux_2.18.2/igv.sh \ -mem ~{memory_mb} \ --fasta_file ~{fasta_file} \ + --fasta_fai ~{fasta_fai} \ --sample_name ~{sample_name} } From 814c24e18bcaae98bc243295c6235e8decab6261 Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Tue, 10 Sep 2024 19:07:53 -0400 Subject: [PATCH 27/49] updated the cmmand section --- .../Utility/IGV_HaplotypeViz_bai_try.wdl | 40 ++++++++++--------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl index 7490c60f0..5fc32bdb7 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl @@ -1,18 +1,18 @@ version 1.0 workflow IGVScreenshotWorkflow { - + input { - File aligned_bam_hap1 # BAM file for haplotype 1 - File aligned_bam_hap1_bai # BAM index for haplotype 1 - File aligned_bam_hap2 # BAM file for haplotype 2 - File aligned_bam_hap2_bai # BAM index for haplotype 2 - File alignments # BAM file for total alignments - File alignments_bai # BAM index for total alignments - File bed_file # BED file with regions - File fasta_file # Reference FASTA file - File fasta_fai # FAI index for the FASTA file - String sample_name # Sample name to use in filenames + File aligned_bam_hap1 + File aligned_bam_hap1_bai + File aligned_bam_hap2 + File aligned_bam_hap2_bai + File alignments + File alignments_bai + File bed_file + File fasta_file + File fasta_file_fai # Include the .fai file + String sample_name Int image_height = 500 Int memory_mb = 4000 Int disk_gb = 100 # Disk size in GB, default to 100 GB @@ -29,7 +29,7 @@ workflow IGVScreenshotWorkflow { alignments_bai = alignments_bai, bed_file = bed_file, fasta_file = fasta_file, - fasta_fai = fasta_fai, + fasta_file_fai = fasta_file_fai, sample_name = sample_name, image_height = image_height, memory_mb = memory_mb, @@ -43,6 +43,7 @@ workflow IGVScreenshotWorkflow { } task RunIGVScreenshot { + input { File aligned_bam_hap1 File aligned_bam_hap1_bai @@ -52,7 +53,7 @@ task RunIGVScreenshot { File alignments_bai File bed_file File fasta_file - File fasta_fai # FAI index for the FASTA file + File fasta_file_fai String sample_name Int image_height Int memory_mb @@ -60,23 +61,24 @@ task RunIGVScreenshot { String docker_image } - command { - mkdir -p IGV_Snapshots + command <<< + # Ensure the snapshots directory exists + mkdir -p /output/IGV_Snapshots && chmod 777 /output/IGV_Snapshots + + # Start a virtual frame buffer to allow IGV to render Xvfb :1 -screen 0 1024x768x16 &> xvfb.log & export DISPLAY=:1 # Run the IGV screenshot script with the provided inputs python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \ ~{aligned_bam_hap1} ~{aligned_bam_hap2} ~{alignments} \ - ~{aligned_bam_hap1_bai} ~{aligned_bam_hap2_bai} ~{alignments_bai} \ -r ~{bed_file} \ -ht ~{image_height} \ -bin /opt/IGV_Linux_2.18.2/igv.sh \ -mem ~{memory_mb} \ --fasta_file ~{fasta_file} \ - --fasta_fai ~{fasta_fai} \ --sample_name ~{sample_name} - } + >>> runtime { docker: docker_image @@ -86,6 +88,6 @@ task RunIGVScreenshot { } output { - Array[File] snapshots = glob("IGV_Snapshots/*.png") + Array[File] snapshots = glob("/output/IGV_Snapshots/*.png") } } From 1fa09470262f518c6ab57671919621cf06838c93 Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Tue, 10 Sep 2024 19:15:30 -0400 Subject: [PATCH 28/49] modify fai --- wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl index a33397a79..337c797ea 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl @@ -58,7 +58,10 @@ task RunIGVScreenshot { } command { - mkdir -p IGV_Snapshots + # Ensure the snapshots directory exists + mkdir -p /output/IGV_Snapshots && chmod 777 /output/IGV_Snapshots + + # Start a virtual frame buffer to allow IGV to render Xvfb :1 -screen 0 1024x768x16 &> xvfb.log & export DISPLAY=:1 @@ -71,7 +74,6 @@ task RunIGVScreenshot { -bin /opt/IGV_Linux_2.18.2/igv.sh \ -mem ~{memory_mb} \ --fasta_file ~{fasta_file} \ - --fasta_fai ~{fasta_fai} \ --sample_name ~{sample_name} } @@ -83,6 +85,6 @@ task RunIGVScreenshot { } output { - Array[File] snapshots = glob("IGV_Snapshots/*.png") + Array[File] snapshots = glob("/output/IGV_Snapshots/*.png") } } From 965bf77e92499df8c40875ecaff046baf228a701 Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Tue, 10 Sep 2024 19:39:30 -0400 Subject: [PATCH 29/49] fix --- wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl index 5fc32bdb7..73384ecc7 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl @@ -62,8 +62,8 @@ task RunIGVScreenshot { } command <<< - # Ensure the snapshots directory exists - mkdir -p /output/IGV_Snapshots && chmod 777 /output/IGV_Snapshots + # Ensure the snapshots directory exists under the mounted disk path + mkdir -p /cromwell_root/IGV_Snapshots && chmod 777 /cromwell_root/IGV_Snapshots # Start a virtual frame buffer to allow IGV to render Xvfb :1 -screen 0 1024x768x16 &> xvfb.log & @@ -88,6 +88,6 @@ task RunIGVScreenshot { } output { - Array[File] snapshots = glob("/output/IGV_Snapshots/*.png") + Array[File] snapshots = glob("/cromwell_root/IGV_Snapshots/*.png") } } From 4712c35f27898ede8ae36e628868c95295e30de2 Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Tue, 10 Sep 2024 19:47:29 -0400 Subject: [PATCH 30/49] gs input --- wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl index 337c797ea..05f1d8cb2 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl @@ -13,6 +13,7 @@ workflow IGVScreenshotWorkflow { Int image_height = 500 Int memory_mb = 4000 Int disk_gb = 100 # Disk size in GB, default to 100 GB + String gs_bucket_path # Google Storage bucket path String docker_image = "us.gcr.io/broad-dsp-lrma/igv_screenshot_docker:v982024" # The Docker image to use } @@ -31,6 +32,7 @@ workflow IGVScreenshotWorkflow { image_height = image_height, memory_mb = memory_mb, disk_gb = disk_gb, + gs_bucket_path = gs_bucket_path, docker_image = docker_image } @@ -54,12 +56,13 @@ task RunIGVScreenshot { Int image_height Int memory_mb Int disk_gb + String gs_bucket_path String docker_image } command { - # Ensure the snapshots directory exists - mkdir -p /output/IGV_Snapshots && chmod 777 /output/IGV_Snapshots + # Ensure the snapshots directory exists under the current working directory + mkdir -p IGV_Snapshots && chmod 777 IGV_Snapshots # Start a virtual frame buffer to allow IGV to render Xvfb :1 -screen 0 1024x768x16 &> xvfb.log & @@ -75,6 +78,12 @@ task RunIGVScreenshot { -mem ~{memory_mb} \ --fasta_file ~{fasta_file} \ --sample_name ~{sample_name} + + # Move the generated snapshots to the output directory + mv *.png IGV_Snapshots/ + + # Copy the results to the Google Storage bucket + gsutil -m cp IGV_Snapshots/*.png ~{gs_bucket_path} } runtime { @@ -85,6 +94,6 @@ task RunIGVScreenshot { } output { - Array[File] snapshots = glob("/output/IGV_Snapshots/*.png") + Array[File] snapshots = glob("IGV_Snapshots/*.png") } } From ce84cab7141d9dccf6a920343b2fe4f92ce569ce Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Tue, 10 Sep 2024 20:07:20 -0400 Subject: [PATCH 31/49] fix output --- .../Utility/IGV_HaplotypeViz_bai_try.wdl | 76 ++++++++++++++----- 1 file changed, 58 insertions(+), 18 deletions(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl index 73384ecc7..7f5fa2843 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl @@ -17,6 +17,7 @@ workflow IGVScreenshotWorkflow { Int memory_mb = 4000 Int disk_gb = 100 # Disk size in GB, default to 100 GB String docker_image = "us.gcr.io/broad-dsp-lrma/igv_screenshot_docker:v982024" # The Docker image to use + String output_gcs_path # GCS path where the screenshots will be uploaded } call RunIGVScreenshot { @@ -37,8 +38,15 @@ workflow IGVScreenshotWorkflow { docker_image = docker_image } + # Finalize the output by uploading it to the Google Cloud bucket + call FinalizeScreenshots { + input: + screenshots = RunIGVScreenshot.screenshots, + outdir = output_gcs_path + } + output { - Array[File] snapshots = RunIGVScreenshot.snapshots + Array[String] screenshots_gcs_paths = FinalizeScreenshots.gcs_paths } } @@ -62,22 +70,25 @@ task RunIGVScreenshot { } command <<< - # Ensure the snapshots directory exists under the mounted disk path - mkdir -p /cromwell_root/IGV_Snapshots && chmod 777 /cromwell_root/IGV_Snapshots - - # Start a virtual frame buffer to allow IGV to render - Xvfb :1 -screen 0 1024x768x16 &> xvfb.log & - export DISPLAY=:1 - - # Run the IGV screenshot script with the provided inputs - python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \ - ~{aligned_bam_hap1} ~{aligned_bam_hap2} ~{alignments} \ - -r ~{bed_file} \ - -ht ~{image_height} \ - -bin /opt/IGV_Linux_2.18.2/igv.sh \ - -mem ~{memory_mb} \ - --fasta_file ~{fasta_file} \ - --sample_name ~{sample_name} + set -euo pipefail + + # Create a directory for screenshots in the working directory + mkdir screenshots + + # Start a virtual frame buffer to allow IGV to render + Xvfb :1 -screen 0 1024x768x16 &> xvfb.log & + export DISPLAY=:1 + + # Run the IGV screenshot script with the provided inputs + python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \ + ~{aligned_bam_hap1} ~{aligned_bam_hap2} ~{alignments} \ + -r ~{bed_file} \ + -ht ~{image_height} \ + -bin /opt/IGV_Linux_2.18.2/igv.sh \ + -mem ~{memory_mb} \ + --fasta_file ~{fasta_file} \ + --sample_name ~{sample_name} \ + --snapshot-dir "screenshots" >>> runtime { @@ -88,6 +99,35 @@ task RunIGVScreenshot { } output { - Array[File] snapshots = glob("/cromwell_root/IGV_Snapshots/*.png") + Array[File] screenshots = glob("screenshots/*.png") + } +} + +task FinalizeScreenshots { + + input { + Array[File] screenshots # Array of screenshot files to finalize + String outdir # Google Cloud Storage directory to upload files to + } + + command <<< + set -euxo pipefail + + gcs_output_dir=$(echo "~{outdir}" | sed 's:/*$::') + + for f in ~{sep=' ' screenshots}; do + gcloud storage cp "$f" "${gcs_output_dir}/$(basename $f)" + done + >>> + + output { + Array[String] gcs_paths = read_lines("gcs_output_files.txt") + } + + runtime { + cpu: 1 + memory: "1 GiB" + disks: "local-disk 10 HDD" + docker: "us.gcr.io/broad-dsp-lrma/lr-gcloud-samtools:0.1.3" } } From 37fb974ab7127fb10fe47f5378c82991e63aa206 Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Tue, 10 Sep 2024 21:39:46 -0400 Subject: [PATCH 32/49] fixing the output-the screenshots were created --- .../Utility/IGV_HaplotypeViz_bai_try.wdl | 77 +++++-------------- 1 file changed, 19 insertions(+), 58 deletions(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl index 7f5fa2843..929aa28a8 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl @@ -17,7 +17,6 @@ workflow IGVScreenshotWorkflow { Int memory_mb = 4000 Int disk_gb = 100 # Disk size in GB, default to 100 GB String docker_image = "us.gcr.io/broad-dsp-lrma/igv_screenshot_docker:v982024" # The Docker image to use - String output_gcs_path # GCS path where the screenshots will be uploaded } call RunIGVScreenshot { @@ -38,15 +37,8 @@ workflow IGVScreenshotWorkflow { docker_image = docker_image } - # Finalize the output by uploading it to the Google Cloud bucket - call FinalizeScreenshots { - input: - screenshots = RunIGVScreenshot.screenshots, - outdir = output_gcs_path - } - output { - Array[String] screenshots_gcs_paths = FinalizeScreenshots.gcs_paths + Array[File] snapshots = RunIGVScreenshot.snapshots } } @@ -70,25 +62,23 @@ task RunIGVScreenshot { } command <<< - set -euo pipefail - - # Create a directory for screenshots in the working directory - mkdir screenshots - - # Start a virtual frame buffer to allow IGV to render - Xvfb :1 -screen 0 1024x768x16 &> xvfb.log & - export DISPLAY=:1 - - # Run the IGV screenshot script with the provided inputs - python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \ - ~{aligned_bam_hap1} ~{aligned_bam_hap2} ~{alignments} \ - -r ~{bed_file} \ - -ht ~{image_height} \ - -bin /opt/IGV_Linux_2.18.2/igv.sh \ - -mem ~{memory_mb} \ - --fasta_file ~{fasta_file} \ - --sample_name ~{sample_name} \ - --snapshot-dir "screenshots" + # Ensure the snapshots directory exists under the mounted disk path + mkdir -p /output/IGV_Snapshots + + # Start a virtual frame buffer to allow IGV to render + Xvfb :1 -screen 0 1024x768x16 &> xvfb.log & + export DISPLAY=:1 + + # Run the IGV screenshot script with the provided inputs + python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \ + ~{aligned_bam_hap1} ~{aligned_bam_hap2} ~{alignments} \ + -r ~{bed_file} \ + -ht ~{image_height} \ + -bin /opt/IGV_Linux_2.18.2/igv.sh \ + -mem ~{memory_mb} \ + --fasta_file ~{fasta_file} \ + --sample_name ~{sample_name} \ + --snapshot-dir "/output/IGV_Snapshots" >>> runtime { @@ -99,35 +89,6 @@ task RunIGVScreenshot { } output { - Array[File] screenshots = glob("screenshots/*.png") - } -} - -task FinalizeScreenshots { - - input { - Array[File] screenshots # Array of screenshot files to finalize - String outdir # Google Cloud Storage directory to upload files to - } - - command <<< - set -euxo pipefail - - gcs_output_dir=$(echo "~{outdir}" | sed 's:/*$::') - - for f in ~{sep=' ' screenshots}; do - gcloud storage cp "$f" "${gcs_output_dir}/$(basename $f)" - done - >>> - - output { - Array[String] gcs_paths = read_lines("gcs_output_files.txt") - } - - runtime { - cpu: 1 - memory: "1 GiB" - disks: "local-disk 10 HDD" - docker: "us.gcr.io/broad-dsp-lrma/lr-gcloud-samtools:0.1.3" + Array[File] snapshots = glob("/output/IGV_Snapshots/*.png") } } From f8a18c10cf96ce083e39e55a10ee6e4d22c8655f Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Tue, 10 Sep 2024 21:59:48 -0400 Subject: [PATCH 33/49] fixing the output-the screenshots were created --- .../PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl index 929aa28a8..a9a05ad4d 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl @@ -62,8 +62,10 @@ task RunIGVScreenshot { } command <<< - # Ensure the snapshots directory exists under the mounted disk path - mkdir -p /output/IGV_Snapshots + set -euo pipefail + + # Ensure the snapshots directory exists under the local disk path + mkdir -p /mnt/local-disk/IGV_Snapshots # Start a virtual frame buffer to allow IGV to render Xvfb :1 -screen 0 1024x768x16 &> xvfb.log & @@ -78,7 +80,7 @@ task RunIGVScreenshot { -mem ~{memory_mb} \ --fasta_file ~{fasta_file} \ --sample_name ~{sample_name} \ - --snapshot-dir "/output/IGV_Snapshots" + --snapshot-dir "/mnt/local-disk/IGV_Snapshots" >>> runtime { @@ -89,6 +91,6 @@ task RunIGVScreenshot { } output { - Array[File] snapshots = glob("/output/IGV_Snapshots/*.png") + Array[File] snapshots = glob("/mnt/local-disk/IGV_Snapshots/*.png") } } From cd7c7692b01d60a7c5416aee542eff298f490ef9 Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Tue, 10 Sep 2024 22:18:51 -0400 Subject: [PATCH 34/49] fixing the output-the screenshots were created --- wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl index a9a05ad4d..21220cdf6 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl @@ -64,8 +64,8 @@ task RunIGVScreenshot { command <<< set -euo pipefail - # Ensure the snapshots directory exists under the local disk path - mkdir -p /mnt/local-disk/IGV_Snapshots + # Ensure the snapshots directory exists under the mounted disk path + mkdir -p /cromwell_root/IGV_Snapshots # Start a virtual frame buffer to allow IGV to render Xvfb :1 -screen 0 1024x768x16 &> xvfb.log & @@ -80,7 +80,7 @@ task RunIGVScreenshot { -mem ~{memory_mb} \ --fasta_file ~{fasta_file} \ --sample_name ~{sample_name} \ - --snapshot-dir "/mnt/local-disk/IGV_Snapshots" + --snapshot-dir "/cromwell_root/IGV_Snapshots" >>> runtime { @@ -91,6 +91,6 @@ task RunIGVScreenshot { } output { - Array[File] snapshots = glob("/mnt/local-disk/IGV_Snapshots/*.png") + Array[File] snapshots = glob("/cromwell_root/IGV_Snapshots/*.png") } } From 2b238381f2173cf9efecbae7597d2efa5c9b264d Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Tue, 10 Sep 2024 23:21:32 -0400 Subject: [PATCH 35/49] fixing ... --- wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl index 21220cdf6..96ca42fe3 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl @@ -71,7 +71,7 @@ task RunIGVScreenshot { Xvfb :1 -screen 0 1024x768x16 &> xvfb.log & export DISPLAY=:1 - # Run the IGV screenshot script with the provided inputs + # Run the IGV screenshot script with the provided inputs, no --snapshot-dir python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \ ~{aligned_bam_hap1} ~{aligned_bam_hap2} ~{alignments} \ -r ~{bed_file} \ @@ -79,8 +79,10 @@ task RunIGVScreenshot { -bin /opt/IGV_Linux_2.18.2/igv.sh \ -mem ~{memory_mb} \ --fasta_file ~{fasta_file} \ - --sample_name ~{sample_name} \ - --snapshot-dir "/cromwell_root/IGV_Snapshots" + --sample_name ~{sample_name} + + # Move the screenshots to the output directory + mv *.png /cromwell_root/IGV_Snapshots/ >>> runtime { From 5d8fb8dbc5e12cb8b3fc9cd724d99f56a8518117 Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Tue, 10 Sep 2024 23:37:11 -0400 Subject: [PATCH 36/49] output directory --- .../PacBio/Utility/IGV_HaplotypeViz.wdl | 49 +++++++++---------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl index 05f1d8cb2..bcb13bca6 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl @@ -1,38 +1,39 @@ version 1.0 workflow IGVScreenshotWorkflow { - + input { - File aligned_bam_hap1 # BAM file for haplotype 1 - File aligned_bam_hap2 # BAM file for haplotype 2 - File alignments # BAM file for total alignments - File bed_file # BED file with regions - File fasta_file # Reference FASTA file - File fasta_fai # FASTA index (.fai) file - String sample_name # Sample name to use in filenames + File aligned_bam_hap1 + File aligned_bam_hap1_bai + File aligned_bam_hap2 + File aligned_bam_hap2_bai + File alignments + File alignments_bai + File bed_file + File fasta_file + File fasta_file_fai + String sample_name Int image_height = 500 Int memory_mb = 4000 Int disk_gb = 100 # Disk size in GB, default to 100 GB - String gs_bucket_path # Google Storage bucket path String docker_image = "us.gcr.io/broad-dsp-lrma/igv_screenshot_docker:v982024" # The Docker image to use } call RunIGVScreenshot { input: aligned_bam_hap1 = aligned_bam_hap1, - aligned_bam_hap1_bai = aligned_bam_hap1 + ".bai", # Automatically infer BAI location + aligned_bam_hap1_bai = aligned_bam_hap1_bai, aligned_bam_hap2 = aligned_bam_hap2, - aligned_bam_hap2_bai = aligned_bam_hap2 + ".bai", # Automatically infer BAI location + aligned_bam_hap2_bai = aligned_bam_hap2_bai, alignments = alignments, - alignments_bai = alignments + ".bai", # Automatically infer BAI location + alignments_bai = alignments_bai, bed_file = bed_file, fasta_file = fasta_file, - fasta_fai = fasta_fai, + fasta_file_fai = fasta_file_fai, sample_name = sample_name, image_height = image_height, memory_mb = memory_mb, disk_gb = disk_gb, - gs_bucket_path = gs_bucket_path, docker_image = docker_image } @@ -42,6 +43,7 @@ workflow IGVScreenshotWorkflow { } task RunIGVScreenshot { + input { File aligned_bam_hap1 File aligned_bam_hap1_bai @@ -51,18 +53,19 @@ task RunIGVScreenshot { File alignments_bai File bed_file File fasta_file - File fasta_fai + File fasta_file_fai String sample_name Int image_height Int memory_mb Int disk_gb - String gs_bucket_path String docker_image } - command { - # Ensure the snapshots directory exists under the current working directory - mkdir -p IGV_Snapshots && chmod 777 IGV_Snapshots + command <<< + set -euo pipefail + + # Ensure the snapshots directory exists + mkdir -p IGV_Snapshots # Start a virtual frame buffer to allow IGV to render Xvfb :1 -screen 0 1024x768x16 &> xvfb.log & @@ -71,7 +74,6 @@ task RunIGVScreenshot { # Run the IGV screenshot script with the provided inputs python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \ ~{aligned_bam_hap1} ~{aligned_bam_hap2} ~{alignments} \ - ~{aligned_bam_hap1_bai} ~{aligned_bam_hap2_bai} ~{alignments_bai} \ -r ~{bed_file} \ -ht ~{image_height} \ -bin /opt/IGV_Linux_2.18.2/igv.sh \ @@ -79,12 +81,9 @@ task RunIGVScreenshot { --fasta_file ~{fasta_file} \ --sample_name ~{sample_name} - # Move the generated snapshots to the output directory + # Move the screenshots to the IGV_Snapshots directory mv *.png IGV_Snapshots/ - - # Copy the results to the Google Storage bucket - gsutil -m cp IGV_Snapshots/*.png ~{gs_bucket_path} - } + >>> runtime { docker: docker_image From 793fddff9dbbd089481a99cc08e86a4c9c255250 Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Wed, 11 Sep 2024 08:32:49 -0400 Subject: [PATCH 37/49] again --- wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl index 96ca42fe3..2a0fa22a7 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl @@ -65,7 +65,8 @@ task RunIGVScreenshot { set -euo pipefail # Ensure the snapshots directory exists under the mounted disk path - mkdir -p /cromwell_root/IGV_Snapshots + #mkdir -p /cromwell_root/IGV_Snapshots + mkdir snap_out # Start a virtual frame buffer to allow IGV to render Xvfb :1 -screen 0 1024x768x16 &> xvfb.log & @@ -82,7 +83,7 @@ task RunIGVScreenshot { --sample_name ~{sample_name} # Move the screenshots to the output directory - mv *.png /cromwell_root/IGV_Snapshots/ + #mv *.png /cromwell_root/IGV_Snapshots/ >>> runtime { @@ -93,6 +94,6 @@ task RunIGVScreenshot { } output { - Array[File] snapshots = glob("/cromwell_root/IGV_Snapshots/*.png") + Array[File] snapshots = glob("snap_out/*.png") } } From d98e0fa2ce5514c85c429c2b4d5b8e2bec71e4e0 Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Wed, 11 Sep 2024 08:46:56 -0400 Subject: [PATCH 38/49] dixing the output --- wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl index 2a0fa22a7..a95712bd1 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl @@ -65,8 +65,8 @@ task RunIGVScreenshot { set -euo pipefail # Ensure the snapshots directory exists under the mounted disk path - #mkdir -p /cromwell_root/IGV_Snapshots - mkdir snap_out + mkdir -p /output/IGV_Snapshots + #mkdir snap_out # Start a virtual frame buffer to allow IGV to render Xvfb :1 -screen 0 1024x768x16 &> xvfb.log & @@ -82,8 +82,6 @@ task RunIGVScreenshot { --fasta_file ~{fasta_file} \ --sample_name ~{sample_name} - # Move the screenshots to the output directory - #mv *.png /cromwell_root/IGV_Snapshots/ >>> runtime { @@ -94,6 +92,6 @@ task RunIGVScreenshot { } output { - Array[File] snapshots = glob("snap_out/*.png") + Array[File] snapshots = glob("/output/IGV_Snapshots/*.png") } } From f2001921ad1b5c476d3c78edc6063abe6e74a4e0 Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Wed, 11 Sep 2024 08:56:14 -0400 Subject: [PATCH 39/49] removed mkdir --- .../PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl index a95712bd1..d5cc19d17 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl @@ -1,7 +1,7 @@ version 1.0 workflow IGVScreenshotWorkflow { - + input { File aligned_bam_hap1 File aligned_bam_hap1_bai @@ -11,7 +11,7 @@ workflow IGVScreenshotWorkflow { File alignments_bai File bed_file File fasta_file - File fasta_file_fai # Include the .fai file + File fasta_file_fai # Include the .fai file for localization String sample_name Int image_height = 500 Int memory_mb = 4000 @@ -43,7 +43,7 @@ workflow IGVScreenshotWorkflow { } task RunIGVScreenshot { - + input { File aligned_bam_hap1 File aligned_bam_hap1_bai @@ -64,15 +64,11 @@ task RunIGVScreenshot { command <<< set -euo pipefail - # Ensure the snapshots directory exists under the mounted disk path - mkdir -p /output/IGV_Snapshots - #mkdir snap_out - # Start a virtual frame buffer to allow IGV to render Xvfb :1 -screen 0 1024x768x16 &> xvfb.log & export DISPLAY=:1 - # Run the IGV screenshot script with the provided inputs, no --snapshot-dir + # Run the IGV screenshot script with the provided inputs python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \ ~{aligned_bam_hap1} ~{aligned_bam_hap2} ~{alignments} \ -r ~{bed_file} \ @@ -82,6 +78,7 @@ task RunIGVScreenshot { --fasta_file ~{fasta_file} \ --sample_name ~{sample_name} + # No need to create the snapshot directory, it's handled in the Python script >>> runtime { @@ -92,6 +89,7 @@ task RunIGVScreenshot { } output { + # Collect the output from the Python script's default snapshot directory Array[File] snapshots = glob("/output/IGV_Snapshots/*.png") } } From 7e692912d443c519810765160b26573166fa7b72 Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Wed, 11 Sep 2024 11:11:43 -0400 Subject: [PATCH 40/49] updated the docker and output directory in the docker /cromwell_root/output/IGV_Snapshots --- wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl index bcb13bca6..e3dca975c 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl @@ -65,7 +65,7 @@ task RunIGVScreenshot { set -euo pipefail # Ensure the snapshots directory exists - mkdir -p IGV_Snapshots + mkdir -p 'output/IGV_Snapshots' # Start a virtual frame buffer to allow IGV to render Xvfb :1 -screen 0 1024x768x16 &> xvfb.log & @@ -82,14 +82,14 @@ task RunIGVScreenshot { --sample_name ~{sample_name} # Move the screenshots to the IGV_Snapshots directory - mv *.png IGV_Snapshots/ + mv *.png 'output/IGV_Snapshots/' >>> runtime { docker: docker_image memory: "~{memory_mb} MB" cpu: 2 - disks: "local-disk ~{disk_gb} HDD" + disks: "local-disk ~{disk_gb} SSD" } output { From bfd534b0121c1040a9706d0e782bc0c5d2e16963 Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Wed, 11 Sep 2024 11:14:49 -0400 Subject: [PATCH 41/49] mv -- *.png output/IGV_Snapshots/ --- .../PacBio/Utility/IGV_HaplotypeViz.wdl | 2 +- .../Utility/IGV_HaplotypeViz_bai_try.wdl | 18 +++++++++++------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl index e3dca975c..85232a22e 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl @@ -82,7 +82,7 @@ task RunIGVScreenshot { --sample_name ~{sample_name} # Move the screenshots to the IGV_Snapshots directory - mv *.png 'output/IGV_Snapshots/' + mv -- *.png 'output/IGV_Snapshots/' >>> runtime { diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl index d5cc19d17..cc579a562 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl @@ -1,7 +1,7 @@ version 1.0 workflow IGVScreenshotWorkflow { - + input { File aligned_bam_hap1 File aligned_bam_hap1_bai @@ -11,7 +11,7 @@ workflow IGVScreenshotWorkflow { File alignments_bai File bed_file File fasta_file - File fasta_file_fai # Include the .fai file for localization + File fasta_file_fai # Include the .fai file String sample_name Int image_height = 500 Int memory_mb = 4000 @@ -43,7 +43,7 @@ workflow IGVScreenshotWorkflow { } task RunIGVScreenshot { - + input { File aligned_bam_hap1 File aligned_bam_hap1_bai @@ -64,11 +64,15 @@ task RunIGVScreenshot { command <<< set -euo pipefail + # Ensure the snapshots directory exists under the mounted disk path + mkdir -p 'output/IGV_Snapshots/' + #mkdir snap_out + # Start a virtual frame buffer to allow IGV to render Xvfb :1 -screen 0 1024x768x16 &> xvfb.log & export DISPLAY=:1 - # Run the IGV screenshot script with the provided inputs + # Run the IGV screenshot script with the provided inputs, no --snapshot-dir python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \ ~{aligned_bam_hap1} ~{aligned_bam_hap2} ~{alignments} \ -r ~{bed_file} \ @@ -78,7 +82,8 @@ task RunIGVScreenshot { --fasta_file ~{fasta_file} \ --sample_name ~{sample_name} - # No need to create the snapshot directory, it's handled in the Python script + # Move the screenshots to the output directory + mv -- *.png 'output/IGV_Snapshots/' >>> runtime { @@ -89,7 +94,6 @@ task RunIGVScreenshot { } output { - # Collect the output from the Python script's default snapshot directory - Array[File] snapshots = glob("/output/IGV_Snapshots/*.png") + Array[File] snapshots = glob("snap_out/*.png") } } From d60a7169026444cb1cfbfb1b2169309ebfc076c0 Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Wed, 11 Sep 2024 11:21:41 -0400 Subject: [PATCH 42/49] SSD --- wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl index cc579a562..6e2db08a0 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl @@ -90,7 +90,7 @@ task RunIGVScreenshot { docker: docker_image memory: "~{memory_mb} MB" cpu: 2 - disks: "local-disk ~{disk_gb} HDD" + disks: "local-disk ~{disk_gb} SSD" } output { From 28a6185783b9211dde843b89bd3f5f5713b73354 Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Wed, 11 Sep 2024 13:31:52 -0400 Subject: [PATCH 43/49] output fix --- wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl | 4 ++-- wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl index 85232a22e..84a877e53 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl @@ -82,7 +82,7 @@ task RunIGVScreenshot { --sample_name ~{sample_name} # Move the screenshots to the IGV_Snapshots directory - mv -- *.png 'output/IGV_Snapshots/' + #mv -- *.png 'output/IGV_Snapshots/' >>> runtime { @@ -93,6 +93,6 @@ task RunIGVScreenshot { } output { - Array[File] snapshots = glob("IGV_Snapshots/*.png") + Array[File] snapshots = glob("output/IGV_Snapshots/*.png") } } diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl index 6e2db08a0..968dec404 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl @@ -83,7 +83,7 @@ task RunIGVScreenshot { --sample_name ~{sample_name} # Move the screenshots to the output directory - mv -- *.png 'output/IGV_Snapshots/' + # mv -- *.png 'output/IGV_Snapshots/' >>> runtime { @@ -94,6 +94,6 @@ task RunIGVScreenshot { } output { - Array[File] snapshots = glob("snap_out/*.png") + Array[File] snapshots = glob("output/IGV_Snapshots/*.png") } } From de9a7e8cebc6015e7ac2dec360af36bd2a8acb7d Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Tue, 17 Sep 2024 10:23:36 -0400 Subject: [PATCH 44/49] making the folder for the script --- scripts/igv/make_igv_screenshot.py | 251 +++++++++++++++++++++++++++++ 1 file changed, 251 insertions(+) create mode 100644 scripts/igv/make_igv_screenshot.py diff --git a/scripts/igv/make_igv_screenshot.py b/scripts/igv/make_igv_screenshot.py new file mode 100644 index 000000000..3fa3348fc --- /dev/null +++ b/scripts/igv/make_igv_screenshot.py @@ -0,0 +1,251 @@ +#!/usr/bin/env python + +''' +This script will load IGV in a virtual X window, load all supplied input files +as tracks, and take snapshots at the coordinates listed in the BED formatted +region file. +''' + +# ~~~~ LOAD PACKAGES ~~~~~~ # +import sys +import os +import subprocess as sp +import argparse +import datetime +from pathlib import Path + +# ~~~~ DIRECTORY AND DEFAULTS ~~~~~~ # +THIS_DIR = os.path.dirname(os.path.realpath(__file__)) +SNAPSHOT_DIR = "/cromwell_root/output/IGV_Snapshots" # Snapshot output directory +default_igv_sh = os.path.join(THIS_DIR, 'igv.sh') +default_regions_bed = os.path.join(THIS_DIR, 'regions.bed') + +# ~~~~ CUSTOM FUNCTIONS ~~~~~~ # +def file_exists(myfile, kill=False): + ''' + Checks to make sure a file exists, optionally kills the script if file is missing. + ''' + if not os.path.isfile(myfile): + print(f"ERROR: File '{myfile}' does not exist!") + if kill: + print("Exiting...") + sys.exit() + +def check_for_fai(fasta_file): + ''' + Check to make sure a .fai index file exists for the FASTA file. + If not, alert the user. + ''' + fai_file = fasta_file + ".fai" + if not os.path.isfile(fai_file): + print(f"ERROR: FASTA index file '{fai_file}' is missing!") + print("Please generate it with 'samtools faidx' before running the script.") + sys.exit(1) + +def check_for_bai(bam_file): + ''' + Check to make sure a .bam.bai file is present in the same directory as the .bam file. + ''' + bai_file = bam_file + ".bai" + if not os.path.isfile(bai_file): + print(f"ERROR: BAM index file '{bai_file}' is missing!") + sys.exit(1) + +def verify_input_files_list(files_list): + ''' + Check to make sure input files meet criteria. + Add more criteria as issues are found. + ''' + for file in files_list: + # Check if the file exists + if not os.path.isfile(file): + print(f"ERROR: Input file '{file}' does not exist!") + sys.exit(1) + + # For BAM files, ensure the corresponding BAI file exists + if file.endswith(".bam"): + check_for_bai(file) + +def subprocess_cmd(command): + ''' + Runs a terminal command with stdout piping enabled. + ''' + process = sp.Popen(command, stdout=sp.PIPE, shell=True) + proc_stdout = process.communicate()[0].strip() + print(proc_stdout) + +def make_chrom_region_list(region_file): + ''' + Creates a list of tuples representing the regions from the BED file [(chrom, start, stop), ...]. + ''' + region_list = [] + with open(region_file) as f: + for line in f: + chrom, start, stop = line.split()[0:3] + region_list.append((chrom, start, stop)) + return region_list + +def make_IGV_chrom_loc(region): + ''' + Return a chrom location string in IGV format. + ''' + chrom, start, stop = region[0:3] + return f'{chrom}:{start}-{stop}' + +def make_snapshot_filename(region, height, sample_name=None): + ''' + Formats a filename for the IGV snapshot. + Adds more useful context to filenames by including region information. + ''' + chrom, start, stop = region[0:3] + if not chrom.startswith("chr"): + chrom = f"chr{chrom}" + if sample_name: + return os.path.join(SNAPSHOT_DIR, f"{sample_name}_{chrom}_{start}_{stop}_region_h{height}.png") + return os.path.join(SNAPSHOT_DIR, f"{chrom}_{start}_{stop}_region_h{height}.png") + +def mkdir_p(path): + ''' + Recursively create a directory using pathlib. + ''' + Path(path).mkdir(parents=True, exist_ok=True) + +def get_open_X_server(): + ''' + Search for an open Xvfb port to render into. + ''' + x_serv_command = ''' + for serv_num in $(seq 1 1000); do + if ! (xdpyinfo -display :${serv_num}) &>/dev/null; then + echo "$serv_num" && break + fi + done + ''' + process = sp.Popen(x_serv_command, stdout=sp.PIPE, shell=True) + output = process.communicate()[0].strip().decode('utf-8') + + # Handle if xdpyinfo returns unexpected output + try: + x_serv_port = int(output.split('\n')[0].strip()) # Take only the port number + except ValueError: + print(f"Unexpected xdpyinfo output: {output}") + sys.exit(1) + + return x_serv_port + +def write_IGV_script(input_files, region_file, IGV_batchscript_file, IGV_snapshot_dir, fasta_file, image_height, sample_name): + ''' + Write out a batchscript for IGV. + ''' + with open(IGV_batchscript_file, "w") as f: + # Initialize IGV + f.write("new\n") + f.write(f"genome {fasta_file}\n") + f.write(f"snapshotDirectory {IGV_snapshot_dir}\n") + f.write(f"maxPanelHeight {image_height}\n") + + # Load BAM files + for file in input_files: + f.write(f"load {file}\n") + + # Write regions and snapshots + region_list = make_chrom_region_list(region_file) + for region in region_list: + chrom_loc = make_IGV_chrom_loc(region) + svsize = int(region[2]) - int(region[1]) + + # For large regions, split snapshots into two: start and end + if svsize > 10000: + f.write(f"goto {region[0]}:{int(region[1]) - 1000}-{int(region[1]) + 500}\n") + f.write(f"snapshot {sample_name}_{region[0]}_{region[1]}_start.png\n") + f.write(f"goto {region[0]}:{int(region[2]) - 500}-{int(region[2]) + 1000}\n") + f.write(f"snapshot {sample_name}_{region[0]}_{region[2]}_end.png\n") + else: + f.write(f"goto {chrom_loc}\n") + f.write(f"snapshot {sample_name}_{region[0]}_{region[1]}_{region[2]}_region.png\n") + + f.write("exit\n") + +def run_IGV_script(igv_script, igv_sh, memMB): + ''' + Run an IGV batch script and ensure snapshots are generated. + ''' + # Ensure the output directory exists + mkdir_p(SNAPSHOT_DIR) + + # Get an open Xvfb port + x_serv_port = get_open_X_server() + print(f"Open Xvfb port found on: {x_serv_port}") + + # Build and run IGV command using igv.sh + igv_command = f"xvfb-run --auto-servernum --server-num=1 bash {igv_sh} -b {igv_script}" + print(f"IGV command: {igv_command}") + + # Record start time + startTime = datetime.datetime.now() + print(f"Started at: {startTime}") + + # Run the IGV command + subprocess_cmd(igv_command) + + # Check if snapshots were generated + snapshot_files = os.listdir(SNAPSHOT_DIR) + if len(snapshot_files) == 0: + print("ERROR: No snapshot files were generated.") + else: + print(f"Generated {len(snapshot_files)} snapshot files.") + + elapsed_time = datetime.datetime.now() - startTime + print(f"Elapsed time: {elapsed_time}") + +def main(input_files, region_file, fasta_file, image_height, igv_sh_bin, igv_mem, sample_name): + ''' + Main control function for the script. + ''' + batchscript_file = os.path.join(SNAPSHOT_DIR, "IGV_snapshots.bat") + + # Check if input files, regions, and IGV script exist + file_exists(region_file, kill=True) + file_exists(igv_sh_bin, kill=True) + verify_input_files_list(input_files) + + # Check if the reference FASTA file and its index exist + file_exists(fasta_file, kill=True) + check_for_fai(fasta_file) + + print(f"\n~~~ IGV SNAPSHOT AUTOMATOR ~~~\n") + print(f"Reference FASTA: {fasta_file}") + print(f"Track height: {image_height}") + print(f"IGV script file: {igv_sh_bin}") + print(f"Batchscript file: {batchscript_file}") + print(f"Region file: {region_file}") + + # Create output directory + mkdir_p(SNAPSHOT_DIR) + + # Write the IGV batch script + write_IGV_script(input_files=input_files, region_file=region_file, IGV_batchscript_file=batchscript_file, IGV_snapshot_dir=SNAPSHOT_DIR, fasta_file=fasta_file, image_height=image_height, sample_name=sample_name) + + # Run the IGV batch script + run_IGV_script(igv_script=batchscript_file, igv_sh=igv_sh_bin, memMB=igv_mem) + +def run(): + ''' + Parse script args to run the script. + ''' + parser = argparse.ArgumentParser(description='IGV snapshot automator') + parser.add_argument("input_files", nargs='+', help="Paths to the files to create snapshots from (e.g., .bam files).") + parser.add_argument("-r", default=default_regions_bed, type=str, dest='region_file', help="BED file with regions to create snapshots over.") + parser.add_argument("-f", "--fasta_file", required=True, help="Reference FASTA file to use.") + parser.add_argument("-ht", default='500', type=str, dest='image_height', help="Height for the IGV tracks.") + parser.add_argument("-bin", default=default_igv_sh, type=str, dest='igv_sh_bin', help="Path to the IGV sh binary to run.") + parser.add_argument("-mem", default="4000", type=str, dest='igv_mem', help="Amount of memory to allocate to IGV, in Megabytes (MB).") + parser.add_argument("--sample_name", required=True, help="Sample name to include in snapshot filenames.") + + args = parser.parse_args() + + main(input_files=args.input_files, region_file=args.region_file, fasta_file=args.fasta_file, image_height=args.image_height, igv_sh_bin=args.igv_sh_bin, igv_mem=args.igv_mem, sample_name=args.sample_name) + +if __name__ == "__main__": + run() + From c5e49fb52575312906a73c8bbf226ece53fd2687 Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Wed, 18 Sep 2024 12:42:57 -0400 Subject: [PATCH 45/49] script for more igv inputs --- scripts/igv/make_igv_screenshot.py | 124 +++++++++++++++++++++-------- 1 file changed, 90 insertions(+), 34 deletions(-) diff --git a/scripts/igv/make_igv_screenshot.py b/scripts/igv/make_igv_screenshot.py index 3fa3348fc..e154bd796 100644 --- a/scripts/igv/make_igv_screenshot.py +++ b/scripts/igv/make_igv_screenshot.py @@ -12,13 +12,18 @@ import subprocess as sp import argparse import datetime +import logging from pathlib import Path # ~~~~ DIRECTORY AND DEFAULTS ~~~~~~ # THIS_DIR = os.path.dirname(os.path.realpath(__file__)) -SNAPSHOT_DIR = "/cromwell_root/output/IGV_Snapshots" # Snapshot output directory +SNAPSHOT_DIR = "/cromwell_root/output/IGV_Snapshots" # Default snapshot output directory default_igv_sh = os.path.join(THIS_DIR, 'igv.sh') default_regions_bed = os.path.join(THIS_DIR, 'regions.bed') +default_snapshot_format = 'png' + +# ~~~~ SET UP LOGGING ~~~~~~ # +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') # ~~~~ CUSTOM FUNCTIONS ~~~~~~ # def file_exists(myfile, kill=False): @@ -26,10 +31,10 @@ def file_exists(myfile, kill=False): Checks to make sure a file exists, optionally kills the script if file is missing. ''' if not os.path.isfile(myfile): - print(f"ERROR: File '{myfile}' does not exist!") + logging.error(f"File '{myfile}' does not exist!") if kill: - print("Exiting...") - sys.exit() + logging.info("Exiting...") + sys.exit(1) def check_for_fai(fasta_file): ''' @@ -38,8 +43,7 @@ def check_for_fai(fasta_file): ''' fai_file = fasta_file + ".fai" if not os.path.isfile(fai_file): - print(f"ERROR: FASTA index file '{fai_file}' is missing!") - print("Please generate it with 'samtools faidx' before running the script.") + logging.error(f"FASTA index file '{fai_file}' is missing! Please generate it with 'samtools faidx' before running the script.") sys.exit(1) def check_for_bai(bam_file): @@ -48,7 +52,7 @@ def check_for_bai(bam_file): ''' bai_file = bam_file + ".bai" if not os.path.isfile(bai_file): - print(f"ERROR: BAM index file '{bai_file}' is missing!") + logging.error(f"BAM index file '{bai_file}' is missing!") sys.exit(1) def verify_input_files_list(files_list): @@ -59,7 +63,7 @@ def verify_input_files_list(files_list): for file in files_list: # Check if the file exists if not os.path.isfile(file): - print(f"ERROR: Input file '{file}' does not exist!") + logging.error(f"Input file '{file}' does not exist!") sys.exit(1) # For BAM files, ensure the corresponding BAI file exists @@ -72,7 +76,7 @@ def subprocess_cmd(command): ''' process = sp.Popen(command, stdout=sp.PIPE, shell=True) proc_stdout = process.communicate()[0].strip() - print(proc_stdout) + logging.info(proc_stdout) def make_chrom_region_list(region_file): ''' @@ -92,7 +96,7 @@ def make_IGV_chrom_loc(region): chrom, start, stop = region[0:3] return f'{chrom}:{start}-{stop}' -def make_snapshot_filename(region, height, sample_name=None): +def make_snapshot_filename(region, height, sample_name=None, snapshot_format='png'): ''' Formats a filename for the IGV snapshot. Adds more useful context to filenames by including region information. @@ -101,8 +105,8 @@ def make_snapshot_filename(region, height, sample_name=None): if not chrom.startswith("chr"): chrom = f"chr{chrom}" if sample_name: - return os.path.join(SNAPSHOT_DIR, f"{sample_name}_{chrom}_{start}_{stop}_region_h{height}.png") - return os.path.join(SNAPSHOT_DIR, f"{chrom}_{start}_{stop}_region_h{height}.png") + return os.path.join(SNAPSHOT_DIR, f"{sample_name}_{chrom}_{start}_{stop}_region_h{height}.{snapshot_format}") + return os.path.join(SNAPSHOT_DIR, f"{chrom}_{start}_{stop}_region_h{height}.{snapshot_format}") def mkdir_p(path): ''' @@ -128,12 +132,12 @@ def get_open_X_server(): try: x_serv_port = int(output.split('\n')[0].strip()) # Take only the port number except ValueError: - print(f"Unexpected xdpyinfo output: {output}") + logging.error(f"Unexpected xdpyinfo output: {output}") sys.exit(1) return x_serv_port -def write_IGV_script(input_files, region_file, IGV_batchscript_file, IGV_snapshot_dir, fasta_file, image_height, sample_name): +def write_IGV_script(input_files, region_file, IGV_batchscript_file, IGV_snapshot_dir, fasta_file, image_height, sample_name, snapshot_format, optional_inputs): ''' Write out a batchscript for IGV. ''' @@ -148,6 +152,11 @@ def write_IGV_script(input_files, region_file, IGV_batchscript_file, IGV_snapsho for file in input_files: f.write(f"load {file}\n") + # Load optional inputs + for opt_input in optional_inputs: + if opt_input: + f.write(f"load {opt_input}\n") + # Write regions and snapshots region_list = make_chrom_region_list(region_file) for region in region_list: @@ -157,12 +166,12 @@ def write_IGV_script(input_files, region_file, IGV_batchscript_file, IGV_snapsho # For large regions, split snapshots into two: start and end if svsize > 10000: f.write(f"goto {region[0]}:{int(region[1]) - 1000}-{int(region[1]) + 500}\n") - f.write(f"snapshot {sample_name}_{region[0]}_{region[1]}_start.png\n") + f.write(f"snapshot {sample_name}_{region[0]}_{region[1]}_start.{snapshot_format}\n") f.write(f"goto {region[0]}:{int(region[2]) - 500}-{int(region[2]) + 1000}\n") - f.write(f"snapshot {sample_name}_{region[0]}_{region[2]}_end.png\n") + f.write(f"snapshot {sample_name}_{region[0]}_{region[2]}_end.{snapshot_format}\n") else: f.write(f"goto {chrom_loc}\n") - f.write(f"snapshot {sample_name}_{region[0]}_{region[1]}_{region[2]}_region.png\n") + f.write(f"snapshot {sample_name}_{region[0]}_{region[1]}_{region[2]}_region.{snapshot_format}\n") f.write("exit\n") @@ -175,15 +184,15 @@ def run_IGV_script(igv_script, igv_sh, memMB): # Get an open Xvfb port x_serv_port = get_open_X_server() - print(f"Open Xvfb port found on: {x_serv_port}") + logging.info(f"Open Xvfb port found on: {x_serv_port}") # Build and run IGV command using igv.sh igv_command = f"xvfb-run --auto-servernum --server-num=1 bash {igv_sh} -b {igv_script}" - print(f"IGV command: {igv_command}") + logging.info(f"IGV command: {igv_command}") # Record start time startTime = datetime.datetime.now() - print(f"Started at: {startTime}") + logging.info(f"Started at: {startTime}") # Run the IGV command subprocess_cmd(igv_command) @@ -191,17 +200,20 @@ def run_IGV_script(igv_script, igv_sh, memMB): # Check if snapshots were generated snapshot_files = os.listdir(SNAPSHOT_DIR) if len(snapshot_files) == 0: - print("ERROR: No snapshot files were generated.") + logging.error("No snapshot files were generated.") else: - print(f"Generated {len(snapshot_files)} snapshot files.") + logging.info(f"Generated {len(snapshot_files)} snapshot files.") elapsed_time = datetime.datetime.now() - startTime - print(f"Elapsed time: {elapsed_time}") + logging.info(f"Elapsed time: {elapsed_time}") -def main(input_files, region_file, fasta_file, image_height, igv_sh_bin, igv_mem, sample_name): +def main(input_files, region_file, fasta_file, image_height, igv_sh_bin, igv_mem, sample_name, snapshot_format, output_dir, optional_inputs): ''' Main control function for the script. ''' + global SNAPSHOT_DIR + SNAPSHOT_DIR = output_dir + batchscript_file = os.path.join(SNAPSHOT_DIR, "IGV_snapshots.bat") # Check if input files, regions, and IGV script exist @@ -213,18 +225,34 @@ def main(input_files, region_file, fasta_file, image_height, igv_sh_bin, igv_mem file_exists(fasta_file, kill=True) check_for_fai(fasta_file) - print(f"\n~~~ IGV SNAPSHOT AUTOMATOR ~~~\n") - print(f"Reference FASTA: {fasta_file}") - print(f"Track height: {image_height}") - print(f"IGV script file: {igv_sh_bin}") - print(f"Batchscript file: {batchscript_file}") - print(f"Region file: {region_file}") + # Verify optional input files if they are provided + for opt_input in optional_inputs: + if opt_input: + file_exists(opt_input) + + logging.info(f"\n~~~ IGV SNAPSHOT AUTOMATOR ~~~\n") + logging.info(f"Reference FASTA: {fasta_file}") + logging.info(f"Track height: {image_height}") + logging.info(f"IGV script file: {igv_sh_bin}") + logging.info(f"Batchscript file: {batchscript_file}") + logging.info(f"Region file: {region_file}") + logging.info(f"Snapshot format: {snapshot_format}") # Create output directory mkdir_p(SNAPSHOT_DIR) # Write the IGV batch script - write_IGV_script(input_files=input_files, region_file=region_file, IGV_batchscript_file=batchscript_file, IGV_snapshot_dir=SNAPSHOT_DIR, fasta_file=fasta_file, image_height=image_height, sample_name=sample_name) + write_IGV_script( + input_files=input_files, + region_file=region_file, + IGV_batchscript_file=batchscript_file, + IGV_snapshot_dir=SNAPSHOT_DIR, + fasta_file=fasta_file, + image_height=image_height, + sample_name=sample_name, + snapshot_format=snapshot_format, + optional_inputs=optional_inputs + ) # Run the IGV batch script run_IGV_script(igv_script=batchscript_file, igv_sh=igv_sh_bin, memMB=igv_mem) @@ -241,11 +269,39 @@ def run(): parser.add_argument("-bin", default=default_igv_sh, type=str, dest='igv_sh_bin', help="Path to the IGV sh binary to run.") parser.add_argument("-mem", default="4000", type=str, dest='igv_mem', help="Amount of memory to allocate to IGV, in Megabytes (MB).") parser.add_argument("--sample_name", required=True, help="Sample name to include in snapshot filenames.") + parser.add_argument("--snapshot_format", default=default_snapshot_format, choices=['png', 'jpg'], help="Output format for snapshots (png or jpg).") + parser.add_argument("--output_dir", default=SNAPSHOT_DIR, help="Custom output directory for snapshots.") + parser.add_argument("--truth_haplotype_1", help="Optional path to truth haplotype 1 file.") + parser.add_argument("--truth_haplotype_2", help="Optional path to truth haplotype 2 file.") + parser.add_argument("--targeted_vcf", help="Optional path to targeted VCF file.") + parser.add_argument("--second_alignment_reads", help="Optional path to second alignment reads file.") args = parser.parse_args() - main(input_files=args.input_files, region_file=args.region_file, fasta_file=args.fasta_file, image_height=args.image_height, igv_sh_bin=args.igv_sh_bin, igv_mem=args.igv_mem, sample_name=args.sample_name) + # Validate memory input + try: + memMB = int(args.igv_mem) + if memMB <= 0: + raise ValueError + except ValueError: + logging.error("Memory allocation must be a positive integer.") + sys.exit(1) -if __name__ == "__main__": - run() + # Collect optional inputs into a list + optional_inputs = [args.truth_haplotype_1, args.truth_haplotype_2, args.targeted_vcf, args.second_alignment_reads] + + main( + input_files=args.input_files, + region_file=args.region_file, + fasta_file=args.fasta_file, + image_height=args.image_height, + igv_sh_bin=args.igv_sh_bin, + igv_mem=memMB, + sample_name=args.sample_name, + snapshot_format=args.snapshot_format, + output_dir=args.output_dir, + optional_inputs=optional_inputs + ) +if __name__ == "__main__": + run() \ No newline at end of file From 0c7e8f2c5e421498738472263f0aafb50231e1f0 Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Wed, 18 Sep 2024 12:47:07 -0400 Subject: [PATCH 46/49] first try --- .../PacBio/Utility/IGV_HaplotypeViz.wdl | 120 ++++++++++-------- 1 file changed, 68 insertions(+), 52 deletions(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl index 84a877e53..19bbe0492 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl @@ -1,71 +1,83 @@ version 1.0 workflow IGVScreenshotWorkflow { - + input { - File aligned_bam_hap1 - File aligned_bam_hap1_bai - File aligned_bam_hap2 - File aligned_bam_hap2_bai - File alignments - File alignments_bai - File bed_file - File fasta_file - File fasta_file_fai + File bam_file + File bam_file_bai + File regions_bed + File reference_fasta + File reference_fasta_fai String sample_name - Int image_height = 500 + Int image_height = 1000 Int memory_mb = 4000 - Int disk_gb = 100 # Disk size in GB, default to 100 GB - String docker_image = "us.gcr.io/broad-dsp-lrma/igv_screenshot_docker:v982024" # The Docker image to use + Int disk_gb = 100 + String docker_image = "us.gcr.io/broad-dsp-lrma/igv_screenshot_docker:v9172024" + File? truth_haplotype_1 + File? truth_haplotype_1_bai + File? truth_haplotype_2 + File? truth_haplotype_2_bai + File? targeted_vcf + File? targeted_vcf_tbi + File? second_alignment_reads + File? second_alignment_reads_bai } call RunIGVScreenshot { input: - aligned_bam_hap1 = aligned_bam_hap1, - aligned_bam_hap1_bai = aligned_bam_hap1_bai, - aligned_bam_hap2 = aligned_bam_hap2, - aligned_bam_hap2_bai = aligned_bam_hap2_bai, - alignments = alignments, - alignments_bai = alignments_bai, - bed_file = bed_file, - fasta_file = fasta_file, - fasta_file_fai = fasta_file_fai, - sample_name = sample_name, - image_height = image_height, - memory_mb = memory_mb, - disk_gb = disk_gb, - docker_image = docker_image + bam_file=bam_file, + bam_file_bai=bam_file_bai, + regions_bed=regions_bed, + reference_fasta=reference_fasta, + reference_fasta_fai=reference_fasta_fai, + sample_name=sample_name, + image_height=image_height, + memory_mb=memory_mb, + disk_gb=disk_gb, + docker_image=docker_image, + truth_haplotype_1=truth_haplotype_1, + truth_haplotype_1_bai=truth_haplotype_1_bai, + truth_haplotype_2=truth_haplotype_2, + truth_haplotype_2_bai=truth_haplotype_2_bai, + targeted_vcf=targeted_vcf, + targeted_vcf_tbi=targeted_vcf_tbi, + second_alignment_reads=second_alignment_reads, + second_alignment_reads_bai=second_alignment_reads_bai } output { - Array[File] snapshots = RunIGVScreenshot.snapshots + File igv_output_zip = RunIGVScreenshot.igv_output_zip } } task RunIGVScreenshot { input { - File aligned_bam_hap1 - File aligned_bam_hap1_bai - File aligned_bam_hap2 - File aligned_bam_hap2_bai - File alignments - File alignments_bai - File bed_file - File fasta_file - File fasta_file_fai + File bam_file + File bam_file_bai + File regions_bed + File reference_fasta + File reference_fasta_fai String sample_name Int image_height Int memory_mb Int disk_gb String docker_image + File? truth_haplotype_1 + File? truth_haplotype_1_bai + File? truth_haplotype_2 + File? truth_haplotype_2_bai + File? targeted_vcf + File? targeted_vcf_tbi + File? second_alignment_reads + File? second_alignment_reads_bai } command <<< set -euo pipefail - # Ensure the snapshots directory exists - mkdir -p 'output/IGV_Snapshots' + # Ensure the output directory exists + mkdir -p igv_output # Start a virtual frame buffer to allow IGV to render Xvfb :1 -screen 0 1024x768x16 &> xvfb.log & @@ -73,26 +85,30 @@ task RunIGVScreenshot { # Run the IGV screenshot script with the provided inputs python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \ - ~{aligned_bam_hap1} ~{aligned_bam_hap2} ~{alignments} \ - -r ~{bed_file} \ - -ht ~{image_height} \ - -bin /opt/IGV_Linux_2.18.2/igv.sh \ - -mem ~{memory_mb} \ - --fasta_file ~{fasta_file} \ - --sample_name ~{sample_name} - - # Move the screenshots to the IGV_Snapshots directory - #mv -- *.png 'output/IGV_Snapshots/' + ${bam_file} \ + -r ${regions_bed} \ + -f ${reference_fasta} \ + --sample_name ${sample_name} \ + --snapshot_format png \ + --output_dir igv_output \ + -ht ${image_height} \ + ~{if defined(truth_haplotype_1) then "--truth_haplotype_1 " + truth_haplotype_1 else ""} \ + ~{if defined(truth_haplotype_2) then "--truth_haplotype_2 " + truth_haplotype_2 else ""} \ + ~{if defined(targeted_vcf) then "--targeted_vcf " + targeted_vcf else ""} \ + ~{if defined(second_alignment_reads) then "--second_alignment_reads " + second_alignment_reads else ""} + + # Zip the output directory + zip -r igv_output.zip igv_output/ >>> runtime { docker: docker_image - memory: "~{memory_mb} MB" + memory: "${memory_mb} MB" cpu: 2 - disks: "local-disk ~{disk_gb} SSD" + disks: "local-disk ${disk_gb} HDD" } output { - Array[File] snapshots = glob("output/IGV_Snapshots/*.png") + File igv_output_zip = "igv_output.zip" } } From 89fe93f2d2dce93f5d3934fdb91f6a16cef18e4b Mon Sep 17 00:00:00 2001 From: Shadi Zaheri Date: Wed, 18 Sep 2024 14:46:01 -0400 Subject: [PATCH 47/49] optional inputs --- .../PacBio/Utility/IGV_HaplotypeViz.wdl | 111 ++++++++++-------- 1 file changed, 61 insertions(+), 50 deletions(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl index 19bbe0492..5c1eb4e00 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl @@ -3,74 +3,86 @@ version 1.0 workflow IGVScreenshotWorkflow { input { - File bam_file - File bam_file_bai + File aligned_bam1 + File aligned_bam1_bai + File aligned_bam2 + File aligned_bam2_bai File regions_bed File reference_fasta File reference_fasta_fai + File? truth_haplotype_1 + File? truth_haplotype_1_bai + File? truth_haplotype_2 + File? truth_haplotype_2_bai + File? haplotype_8x_hap1 + File? haplotype_8x_hap1_bai + File? haplotype_8x_hap2 + File? haplotype_8x_hap2_bai + File? TRGT_VCF + File? TRGT_VCF_tbi String sample_name Int image_height = 1000 Int memory_mb = 4000 Int disk_gb = 100 String docker_image = "us.gcr.io/broad-dsp-lrma/igv_screenshot_docker:v9172024" - File? truth_haplotype_1 - File? truth_haplotype_1_bai - File? truth_haplotype_2 - File? truth_haplotype_2_bai - File? targeted_vcf - File? targeted_vcf_tbi - File? second_alignment_reads - File? second_alignment_reads_bai } call RunIGVScreenshot { input: - bam_file=bam_file, - bam_file_bai=bam_file_bai, - regions_bed=regions_bed, - reference_fasta=reference_fasta, - reference_fasta_fai=reference_fasta_fai, - sample_name=sample_name, - image_height=image_height, - memory_mb=memory_mb, - disk_gb=disk_gb, - docker_image=docker_image, - truth_haplotype_1=truth_haplotype_1, - truth_haplotype_1_bai=truth_haplotype_1_bai, - truth_haplotype_2=truth_haplotype_2, - truth_haplotype_2_bai=truth_haplotype_2_bai, - targeted_vcf=targeted_vcf, - targeted_vcf_tbi=targeted_vcf_tbi, - second_alignment_reads=second_alignment_reads, - second_alignment_reads_bai=second_alignment_reads_bai + aligned_bam1 = aligned_bam1, + aligned_bam1_bai = aligned_bam1_bai, + aligned_bam2 = aligned_bam2, + aligned_bam2_bai = aligned_bam2_bai, + regions_bed = regions_bed, + reference_fasta = reference_fasta, + reference_fasta_fai = reference_fasta_fai, + truth_haplotype_1 = truth_haplotype_1, + truth_haplotype_1_bai = truth_haplotype_1_bai, + truth_haplotype_2 = truth_haplotype_2, + truth_haplotype_2_bai = truth_haplotype_2_bai, + haplotype_8x_hap1 = haplotype_8x_hap1, + haplotype_8x_hap1_bai = haplotype_8x_hap1_bai, + haplotype_8x_hap2 = haplotype_8x_hap2, + haplotype_8x_hap2_bai = haplotype_8x_hap2_bai, + TRGT_VCF = TRGT_VCF, + TRGT_VCF_tbi = TRGT_VCF_tbi, + sample_name = sample_name, + image_height = image_height, + memory_mb = memory_mb, + disk_gb = disk_gb, + docker_image = docker_image } output { - File igv_output_zip = RunIGVScreenshot.igv_output_zip + Array[File] screenshots = RunIGVScreenshot.screenshots } } task RunIGVScreenshot { - + input { - File bam_file - File bam_file_bai + File aligned_bam1 + File aligned_bam1_bai + File aligned_bam2 + File aligned_bam2_bai File regions_bed File reference_fasta File reference_fasta_fai + File? truth_haplotype_1 + File? truth_haplotype_1_bai + File? truth_haplotype_2 + File? truth_haplotype_2_bai + File? haplotype_8x_hap1 + File? haplotype_8x_hap1_bai + File? haplotype_8x_hap2 + File? haplotype_8x_hap2_bai + File? TRGT_VCF + File? TRGT_VCF_tbi String sample_name Int image_height Int memory_mb Int disk_gb String docker_image - File? truth_haplotype_1 - File? truth_haplotype_1_bai - File? truth_haplotype_2 - File? truth_haplotype_2_bai - File? targeted_vcf - File? targeted_vcf_tbi - File? second_alignment_reads - File? second_alignment_reads_bai } command <<< @@ -85,20 +97,19 @@ task RunIGVScreenshot { # Run the IGV screenshot script with the provided inputs python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \ - ${bam_file} \ + ${aligned_bam1} \ + --second_alignment_reads ${aligned_bam2} \ + ~{if defined(truth_haplotype_1) then "--truth_haplotype_1 " + truth_haplotype_1 else ""} \ + ~{if defined(truth_haplotype_2) then "--truth_haplotype_2 " + truth_haplotype_2 else ""} \ + ~{if defined(TRGT_VCF) then "--targeted_vcf " + TRGT_VCF else ""} \ + ~{if defined(haplotype_8x_hap1) then "--second_alignment_reads " + haplotype_8x_hap1 else ""} \ + ~{if defined(haplotype_8x_hap2) then "--second_alignment_reads " + haplotype_8x_hap2 else ""} \ -r ${regions_bed} \ -f ${reference_fasta} \ --sample_name ${sample_name} \ --snapshot_format png \ --output_dir igv_output \ - -ht ${image_height} \ - ~{if defined(truth_haplotype_1) then "--truth_haplotype_1 " + truth_haplotype_1 else ""} \ - ~{if defined(truth_haplotype_2) then "--truth_haplotype_2 " + truth_haplotype_2 else ""} \ - ~{if defined(targeted_vcf) then "--targeted_vcf " + targeted_vcf else ""} \ - ~{if defined(second_alignment_reads) then "--second_alignment_reads " + second_alignment_reads else ""} - - # Zip the output directory - zip -r igv_output.zip igv_output/ + -ht ${image_height} >>> runtime { @@ -109,6 +120,6 @@ task RunIGVScreenshot { } output { - File igv_output_zip = "igv_output.zip" + Array[File] screenshots = glob("igv_output/*.png") } } From 15545f96168c9187cfac740c442c0108ec06d18c Mon Sep 17 00:00:00 2001 From: Shadi Zaheri <74751641+shadizaheri@users.noreply.github.com> Date: Wed, 18 Sep 2024 20:36:30 -0400 Subject: [PATCH 48/49] make one of the bam inputs optional --- wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl index 5c1eb4e00..727eeb81d 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl @@ -5,8 +5,8 @@ workflow IGVScreenshotWorkflow { input { File aligned_bam1 File aligned_bam1_bai - File aligned_bam2 - File aligned_bam2_bai + File? aligned_bam2 + File? aligned_bam2_bai File regions_bed File reference_fasta File reference_fasta_fai @@ -63,8 +63,8 @@ task RunIGVScreenshot { input { File aligned_bam1 File aligned_bam1_bai - File aligned_bam2 - File aligned_bam2_bai + File? aligned_bam2 + File? aligned_bam2_bai File regions_bed File reference_fasta File reference_fasta_fai @@ -98,7 +98,7 @@ task RunIGVScreenshot { # Run the IGV screenshot script with the provided inputs python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \ ${aligned_bam1} \ - --second_alignment_reads ${aligned_bam2} \ + ~{if defined(aligned_bam2) then "--second_alignment_reads " + aligned_bam2 else ""} \ ~{if defined(truth_haplotype_1) then "--truth_haplotype_1 " + truth_haplotype_1 else ""} \ ~{if defined(truth_haplotype_2) then "--truth_haplotype_2 " + truth_haplotype_2 else ""} \ ~{if defined(TRGT_VCF) then "--targeted_vcf " + TRGT_VCF else ""} \ From 5e59d9d941ac761c33ed702f3f36e961726a1b2e Mon Sep 17 00:00:00 2001 From: Shadi Zaheri <74751641+shadizaheri@users.noreply.github.com> Date: Wed, 18 Sep 2024 20:53:02 -0400 Subject: [PATCH 49/49] SSD --- wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl index 727eeb81d..1f23fbbdb 100644 --- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl +++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl @@ -116,7 +116,7 @@ task RunIGVScreenshot { docker: docker_image memory: "${memory_mb} MB" cpu: 2 - disks: "local-disk ${disk_gb} HDD" + disks: "local-disk ${disk_gb} SSD" } output {