From 43774f0a1eff06ef7c8f9f477c175f04258cb325 Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Thu, 5 Sep 2024 10:32:55 -0400
Subject: [PATCH 01/49] Add initial IGV_HaplotypeViz WDL workflow for
 generating IGV screenshots

---
 .../PacBio/Utility/IGV_HaplotypeViz.wdl       | 129 ++++++++++++++++++
 1 file changed, 129 insertions(+)
 create mode 100644 wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
new file mode 100644
index 000000000..aeb0d6eea
--- /dev/null
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
@@ -0,0 +1,129 @@
+version 1.0
+
+import "../../../structs/Structs.wdl"
+
+workflow IGV_HaplotypeViz {
+  input {
+    # BED files containing regions to screenshot; 4th column can optionally be SVID
+    Array[File] beds
+    Array[String] run_names
+
+    # BAM and BAI files from align_asm workflow for alignment visualization
+    File bam_hap1
+    File bai_hap1
+    File bam_hap2
+    File bai_hap2
+
+    # FASTA files from PBAssembleWithHifiasm or bam_to_contig workflow for sequence visualization
+    File haplotig_fasta_hap1
+    File haplotig_fasta_hap2
+
+    # Reference corresponding to read alignments for BAM files
+    File ref_fasta
+    File ref_fai
+
+    # Sample id and prefix for output filenames
+    String sample_id
+
+    # Number of records per shard for parallelization
+    Int? records_per_shard
+
+    # Docker images for Linux and IGV headless tasks
+    String linux_docker
+    String igv_docker
+  }
+
+  scatter (i in range(length(beds))) {
+    String sample_w_hap1 = sample_id + "_hap1"
+    String sample_w_hap2 = sample_id + "_hap2"
+
+    # Run IGV for BAM alignments Haplotype 1 (H1)
+    call RunIGVHeadless as IGV_Hap1 {
+      input:
+        bam_or_cram=bam_hap1,
+        bam_or_cram_index=bai_hap1,
+        bed=beds[i],
+        sample_id=sample_w_hap1,
+        ref_fasta=ref_fasta,
+        ref_fai=ref_fai,
+        igv_docker=igv_docker
+    }
+
+    # Run IGV for BAM alignments Haplotype 2 (H2)
+    call RunIGVHeadless as IGV_Hap2 {
+      input:
+        bam_or_cram=bam_hap2,
+        bam_or_cram_index=bai_hap2,
+        bed=beds[i],
+        sample_id=sample_w_hap2,
+        ref_fasta=ref_fasta,
+        ref_fai=ref_fai,
+        igv_docker=igv_docker
+    }
+
+    # For sequence visualization, use FASTA for haplotigs Haplotype 1 (H1)
+    call RunIGVHeadless as IGV_Seq_Hap1 {
+      input:
+        bam_or_cram=haplotig_fasta_hap1,
+        bam_or_cram_index=bai_hap1,  # Index may not be necessary for FASTA
+        bed=beds[i],
+        sample_id=sample_w_hap1,
+        ref_fasta=ref_fasta,  # Reference may not be needed for FASTA visualization
+        ref_fai=ref_fai,
+        igv_docker=igv_docker
+    }
+
+    # For sequence visualization, use FASTA for haplotigs Haplotype 2 (H2)
+    call RunIGVHeadless as IGV_Seq_Hap2 {
+      input:
+        bam_or_cram=haplotig_fasta_hap2,
+        bam_or_cram_index=bai_hap2,
+        bed=beds[i],
+        sample_id=sample_w_hap2,
+        ref_fasta=ref_fasta,
+        ref_fai=ref_fai,
+        igv_docker=igv_docker
+    }
+  }
+
+  output {
+    Array[File] igv_screenshots_hap1 = IGV_Hap1.igv_screenshot
+    Array[File] igv_screenshots_hap2 = IGV_Hap2.igv_screenshot
+    Array[File] igv_screenshots_seq_hap1 = IGV_Seq_Hap1.igv_screenshot
+    Array[File] igv_screenshots_seq_hap2 = IGV_Seq_Hap2.igv_screenshot
+  }
+}
+
+task RunIGVHeadless {
+  input {
+    File bam_or_cram       # BAM/CRAM or FASTA file for visualization
+    File bam_or_cram_index # Index file for BAM/CRAM
+    File bed               # BED file containing regions to visualize (3 or 4 columns allowed)
+    String sample_id       # Sample ID for naming outputs
+    File ref_fasta         # Reference genome used for alignment
+    File ref_fai           # Index for the reference genome
+    String igv_docker      # Docker image for running IGV headless
+    Int? records_per_shard # Optional: Parallelization parameter for large datasets
+  }
+
+  command <<<
+    # Running IGV headless mode to take screenshots for each region in the BED file
+    igv.sh \
+    -b ~{bam_or_cram} \
+    -i ~{bam_or_cram_index} \
+    -g ~{ref_fasta} \
+    -bed ~{bed} \
+    -o ~{sample_id}.igv_screenshot.png
+  >>>
+
+  output {
+    File igv_screenshot = "~{sample_id}.igv_screenshot.png"
+  }
+
+  runtime {
+    docker: "~{igv_docker}"
+    memory: "8G"
+    cpu: "2"
+    disks: "local-disk 10 HDD"
+  }
+}

From 7cb3c7eee6771fbd620a85ce160e82994e5538e1 Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Thu, 5 Sep 2024 11:22:07 -0400
Subject: [PATCH 02/49] Add new workflow for parallel processing of large BED
 files

---
 .../PacBio/Utility/IGV_HaplotypeViz.wdl       |   2 +-
 .../Utility/IGV_HaplotypeViz_Scatter.wdl      | 129 ++++++++++++++++++
 2 files changed, 130 insertions(+), 1 deletion(-)
 create mode 100644 wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
index aeb0d6eea..fe72a73aa 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
@@ -2,7 +2,7 @@ version 1.0
 
 import "../../../structs/Structs.wdl"
 
-workflow IGV_HaplotypeViz {
+workflow IGV_HaplotypeViz_Scatter {
   input {
     # BED files containing regions to screenshot; 4th column can optionally be SVID
     Array[File] beds
diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl
new file mode 100644
index 000000000..aeb0d6eea
--- /dev/null
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl
@@ -0,0 +1,129 @@
+version 1.0
+
+import "../../../structs/Structs.wdl"
+
+workflow IGV_HaplotypeViz {
+  input {
+    # BED files containing regions to screenshot; 4th column can optionally be SVID
+    Array[File] beds
+    Array[String] run_names
+
+    # BAM and BAI files from align_asm workflow for alignment visualization
+    File bam_hap1
+    File bai_hap1
+    File bam_hap2
+    File bai_hap2
+
+    # FASTA files from PBAssembleWithHifiasm or bam_to_contig workflow for sequence visualization
+    File haplotig_fasta_hap1
+    File haplotig_fasta_hap2
+
+    # Reference corresponding to read alignments for BAM files
+    File ref_fasta
+    File ref_fai
+
+    # Sample id and prefix for output filenames
+    String sample_id
+
+    # Number of records per shard for parallelization
+    Int? records_per_shard
+
+    # Docker images for Linux and IGV headless tasks
+    String linux_docker
+    String igv_docker
+  }
+
+  scatter (i in range(length(beds))) {
+    String sample_w_hap1 = sample_id + "_hap1"
+    String sample_w_hap2 = sample_id + "_hap2"
+
+    # Run IGV for BAM alignments Haplotype 1 (H1)
+    call RunIGVHeadless as IGV_Hap1 {
+      input:
+        bam_or_cram=bam_hap1,
+        bam_or_cram_index=bai_hap1,
+        bed=beds[i],
+        sample_id=sample_w_hap1,
+        ref_fasta=ref_fasta,
+        ref_fai=ref_fai,
+        igv_docker=igv_docker
+    }
+
+    # Run IGV for BAM alignments Haplotype 2 (H2)
+    call RunIGVHeadless as IGV_Hap2 {
+      input:
+        bam_or_cram=bam_hap2,
+        bam_or_cram_index=bai_hap2,
+        bed=beds[i],
+        sample_id=sample_w_hap2,
+        ref_fasta=ref_fasta,
+        ref_fai=ref_fai,
+        igv_docker=igv_docker
+    }
+
+    # For sequence visualization, use FASTA for haplotigs Haplotype 1 (H1)
+    call RunIGVHeadless as IGV_Seq_Hap1 {
+      input:
+        bam_or_cram=haplotig_fasta_hap1,
+        bam_or_cram_index=bai_hap1,  # Index may not be necessary for FASTA
+        bed=beds[i],
+        sample_id=sample_w_hap1,
+        ref_fasta=ref_fasta,  # Reference may not be needed for FASTA visualization
+        ref_fai=ref_fai,
+        igv_docker=igv_docker
+    }
+
+    # For sequence visualization, use FASTA for haplotigs Haplotype 2 (H2)
+    call RunIGVHeadless as IGV_Seq_Hap2 {
+      input:
+        bam_or_cram=haplotig_fasta_hap2,
+        bam_or_cram_index=bai_hap2,
+        bed=beds[i],
+        sample_id=sample_w_hap2,
+        ref_fasta=ref_fasta,
+        ref_fai=ref_fai,
+        igv_docker=igv_docker
+    }
+  }
+
+  output {
+    Array[File] igv_screenshots_hap1 = IGV_Hap1.igv_screenshot
+    Array[File] igv_screenshots_hap2 = IGV_Hap2.igv_screenshot
+    Array[File] igv_screenshots_seq_hap1 = IGV_Seq_Hap1.igv_screenshot
+    Array[File] igv_screenshots_seq_hap2 = IGV_Seq_Hap2.igv_screenshot
+  }
+}
+
+task RunIGVHeadless {
+  input {
+    File bam_or_cram       # BAM/CRAM or FASTA file for visualization
+    File bam_or_cram_index # Index file for BAM/CRAM
+    File bed               # BED file containing regions to visualize (3 or 4 columns allowed)
+    String sample_id       # Sample ID for naming outputs
+    File ref_fasta         # Reference genome used for alignment
+    File ref_fai           # Index for the reference genome
+    String igv_docker      # Docker image for running IGV headless
+    Int? records_per_shard # Optional: Parallelization parameter for large datasets
+  }
+
+  command <<<
+    # Running IGV headless mode to take screenshots for each region in the BED file
+    igv.sh \
+    -b ~{bam_or_cram} \
+    -i ~{bam_or_cram_index} \
+    -g ~{ref_fasta} \
+    -bed ~{bed} \
+    -o ~{sample_id}.igv_screenshot.png
+  >>>
+
+  output {
+    File igv_screenshot = "~{sample_id}.igv_screenshot.png"
+  }
+
+  runtime {
+    docker: "~{igv_docker}"
+    memory: "8G"
+    cpu: "2"
+    disks: "local-disk 10 HDD"
+  }
+}

From 27f85d6736c3a26e064be92744c4a20c17cae6a8 Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Thu, 5 Sep 2024 11:27:52 -0400
Subject: [PATCH 03/49] fix the workflow name

---
 wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl         | 2 +-
 wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
index fe72a73aa..aeb0d6eea 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
@@ -2,7 +2,7 @@ version 1.0
 
 import "../../../structs/Structs.wdl"
 
-workflow IGV_HaplotypeViz_Scatter {
+workflow IGV_HaplotypeViz {
   input {
     # BED files containing regions to screenshot; 4th column can optionally be SVID
     Array[File] beds
diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl
index aeb0d6eea..fe72a73aa 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl
@@ -2,7 +2,7 @@ version 1.0
 
 import "../../../structs/Structs.wdl"
 
-workflow IGV_HaplotypeViz {
+workflow IGV_HaplotypeViz_Scatter {
   input {
     # BED files containing regions to screenshot; 4th column can optionally be SVID
     Array[File] beds

From 6249a7c0123b4f52da71a91f0ad07bbd1f9b86a7 Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Thu, 5 Sep 2024 20:48:09 -0400
Subject: [PATCH 04/49] update the docker

---
 .../PacBio/Utility/IGV_HaplotypeViz.wdl       | 46 +++++--------------
 .../Utility/IGV_HaplotypeViz_Scatter.wdl      | 46 +++++--------------
 2 files changed, 24 insertions(+), 68 deletions(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
index aeb0d6eea..36baf264b 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
@@ -37,11 +37,12 @@ workflow IGV_HaplotypeViz {
     String sample_w_hap1 = sample_id + "_hap1"
     String sample_w_hap2 = sample_id + "_hap2"
 
-    # Run IGV for BAM alignments Haplotype 1 (H1)
-    call RunIGVHeadless as IGV_Hap1 {
+    # Run IGV for both BAM and FASTA visualization for Haplotype 1 (H1)
+    call RunIGVHeadlessCombined as IGV_Hap1 {
       input:
         bam_or_cram=bam_hap1,
         bam_or_cram_index=bai_hap1,
+        fasta=haplotig_fasta_hap1,
         bed=beds[i],
         sample_id=sample_w_hap1,
         ref_fasta=ref_fasta,
@@ -49,35 +50,12 @@ workflow IGV_HaplotypeViz {
         igv_docker=igv_docker
     }
 
-    # Run IGV for BAM alignments Haplotype 2 (H2)
-    call RunIGVHeadless as IGV_Hap2 {
+    # Run IGV for both BAM and FASTA visualization for Haplotype 2 (H2)
+    call RunIGVHeadlessCombined as IGV_Hap2 {
       input:
         bam_or_cram=bam_hap2,
         bam_or_cram_index=bai_hap2,
-        bed=beds[i],
-        sample_id=sample_w_hap2,
-        ref_fasta=ref_fasta,
-        ref_fai=ref_fai,
-        igv_docker=igv_docker
-    }
-
-    # For sequence visualization, use FASTA for haplotigs Haplotype 1 (H1)
-    call RunIGVHeadless as IGV_Seq_Hap1 {
-      input:
-        bam_or_cram=haplotig_fasta_hap1,
-        bam_or_cram_index=bai_hap1,  # Index may not be necessary for FASTA
-        bed=beds[i],
-        sample_id=sample_w_hap1,
-        ref_fasta=ref_fasta,  # Reference may not be needed for FASTA visualization
-        ref_fai=ref_fai,
-        igv_docker=igv_docker
-    }
-
-    # For sequence visualization, use FASTA for haplotigs Haplotype 2 (H2)
-    call RunIGVHeadless as IGV_Seq_Hap2 {
-      input:
-        bam_or_cram=haplotig_fasta_hap2,
-        bam_or_cram_index=bai_hap2,
+        fasta=haplotig_fasta_hap2,
         bed=beds[i],
         sample_id=sample_w_hap2,
         ref_fasta=ref_fasta,
@@ -89,15 +67,14 @@ workflow IGV_HaplotypeViz {
   output {
     Array[File] igv_screenshots_hap1 = IGV_Hap1.igv_screenshot
     Array[File] igv_screenshots_hap2 = IGV_Hap2.igv_screenshot
-    Array[File] igv_screenshots_seq_hap1 = IGV_Seq_Hap1.igv_screenshot
-    Array[File] igv_screenshots_seq_hap2 = IGV_Seq_Hap2.igv_screenshot
   }
 }
 
-task RunIGVHeadless {
+task RunIGVHeadlessCombined {
   input {
-    File bam_or_cram       # BAM/CRAM or FASTA file for visualization
+    File bam_or_cram       # BAM/CRAM file for visualization
     File bam_or_cram_index # Index file for BAM/CRAM
+    File fasta             # FASTA file for haplotype visualization
     File bed               # BED file containing regions to visualize (3 or 4 columns allowed)
     String sample_id       # Sample ID for naming outputs
     File ref_fasta         # Reference genome used for alignment
@@ -107,12 +84,13 @@ task RunIGVHeadless {
   }
 
   command <<<
-    # Running IGV headless mode to take screenshots for each region in the BED file
+    # Running IGV headless mode to take screenshots for both BAM and FASTA files for each region in the BED file
     igv.sh \
     -b ~{bam_or_cram} \
     -i ~{bam_or_cram_index} \
-    -g ~{ref_fasta} \
+    -g ~{fasta} \
     -bed ~{bed} \
+    -name bam,fasta \
     -o ~{sample_id}.igv_screenshot.png
   >>>
 
diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl
index fe72a73aa..29aec1df6 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl
@@ -37,11 +37,12 @@ workflow IGV_HaplotypeViz_Scatter {
     String sample_w_hap1 = sample_id + "_hap1"
     String sample_w_hap2 = sample_id + "_hap2"
 
-    # Run IGV for BAM alignments Haplotype 1 (H1)
-    call RunIGVHeadless as IGV_Hap1 {
+    # Run IGV for both BAM and FASTA visualization for Haplotype 1 (H1)
+    call RunIGVHeadlessCombined as IGV_Hap1 {
       input:
         bam_or_cram=bam_hap1,
         bam_or_cram_index=bai_hap1,
+        fasta=haplotig_fasta_hap1,
         bed=beds[i],
         sample_id=sample_w_hap1,
         ref_fasta=ref_fasta,
@@ -49,35 +50,12 @@ workflow IGV_HaplotypeViz_Scatter {
         igv_docker=igv_docker
     }
 
-    # Run IGV for BAM alignments Haplotype 2 (H2)
-    call RunIGVHeadless as IGV_Hap2 {
+    # Run IGV for both BAM and FASTA visualization for Haplotype 2 (H2)
+    call RunIGVHeadlessCombined as IGV_Hap2 {
       input:
         bam_or_cram=bam_hap2,
         bam_or_cram_index=bai_hap2,
-        bed=beds[i],
-        sample_id=sample_w_hap2,
-        ref_fasta=ref_fasta,
-        ref_fai=ref_fai,
-        igv_docker=igv_docker
-    }
-
-    # For sequence visualization, use FASTA for haplotigs Haplotype 1 (H1)
-    call RunIGVHeadless as IGV_Seq_Hap1 {
-      input:
-        bam_or_cram=haplotig_fasta_hap1,
-        bam_or_cram_index=bai_hap1,  # Index may not be necessary for FASTA
-        bed=beds[i],
-        sample_id=sample_w_hap1,
-        ref_fasta=ref_fasta,  # Reference may not be needed for FASTA visualization
-        ref_fai=ref_fai,
-        igv_docker=igv_docker
-    }
-
-    # For sequence visualization, use FASTA for haplotigs Haplotype 2 (H2)
-    call RunIGVHeadless as IGV_Seq_Hap2 {
-      input:
-        bam_or_cram=haplotig_fasta_hap2,
-        bam_or_cram_index=bai_hap2,
+        fasta=haplotig_fasta_hap2,
         bed=beds[i],
         sample_id=sample_w_hap2,
         ref_fasta=ref_fasta,
@@ -89,15 +67,14 @@ workflow IGV_HaplotypeViz_Scatter {
   output {
     Array[File] igv_screenshots_hap1 = IGV_Hap1.igv_screenshot
     Array[File] igv_screenshots_hap2 = IGV_Hap2.igv_screenshot
-    Array[File] igv_screenshots_seq_hap1 = IGV_Seq_Hap1.igv_screenshot
-    Array[File] igv_screenshots_seq_hap2 = IGV_Seq_Hap2.igv_screenshot
   }
 }
 
-task RunIGVHeadless {
+task RunIGVHeadlessCombined {
   input {
-    File bam_or_cram       # BAM/CRAM or FASTA file for visualization
+    File bam_or_cram       # BAM/CRAM file for visualization
     File bam_or_cram_index # Index file for BAM/CRAM
+    File fasta             # FASTA file for haplotype visualization
     File bed               # BED file containing regions to visualize (3 or 4 columns allowed)
     String sample_id       # Sample ID for naming outputs
     File ref_fasta         # Reference genome used for alignment
@@ -107,12 +84,13 @@ task RunIGVHeadless {
   }
 
   command <<<
-    # Running IGV headless mode to take screenshots for each region in the BED file
+    # Running IGV headless mode to take screenshots for both BAM and FASTA files for each region in the BED file
     igv.sh \
     -b ~{bam_or_cram} \
     -i ~{bam_or_cram_index} \
-    -g ~{ref_fasta} \
+    -g ~{fasta} \
     -bed ~{bed} \
+    -name bam,fasta \
     -o ~{sample_id}.igv_screenshot.png
   >>>
 

From c2ac959ba53d10852a57632ba81f7e4bfcc7ba73 Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Fri, 6 Sep 2024 11:08:39 -0400
Subject: [PATCH 05/49] updating docker and fixing the igv command

---
 .../PacBio/Utility/IGV_HaplotypeViz.wdl       | 92 +++++++++----------
 1 file changed, 46 insertions(+), 46 deletions(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
index 36baf264b..7132f4274 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
@@ -6,7 +6,7 @@ workflow IGV_HaplotypeViz {
   input {
     # BED files containing regions to screenshot; 4th column can optionally be SVID
     Array[File] beds
-    Array[String] run_names
+    Array[String]? run_names  
 
     # BAM and BAI files from align_asm workflow for alignment visualization
     File bam_hap1
@@ -28,69 +28,69 @@ workflow IGV_HaplotypeViz {
     # Number of records per shard for parallelization
     Int? records_per_shard
 
-    # Docker images for Linux and IGV headless tasks
-    String linux_docker
-    String igv_docker
+    # Configurable CPU, memory, and disk
+    Int? cpu           = 2    # Default is 2 CPUs
+    String? memory     = "8G" # Default memory is 8 GB
+    String? disk_size  = "10G" # Default disk size is 10 GB
   }
+  Array[String] default_run_names = range(length(beds))
+  Array[String] used_run_names = select_first([run_names, default_run_names])
 
   scatter (i in range(length(beds))) {
-    String sample_w_hap1 = sample_id + "_hap1"
-    String sample_w_hap2 = sample_id + "_hap2"
+    String run_name = used_run_names[i]
+    String sample_combined = sample_id + "_combined_" + run_name
 
-    # Run IGV for both BAM and FASTA visualization for Haplotype 1 (H1)
-    call RunIGVHeadlessCombined as IGV_Hap1 {
+    # Run IGV for both BAM and FASTA visualization for Haplotype 1 and 2 combined
+    call RunIGVHeadlessCombined {
       input:
-        bam_or_cram=bam_hap1,
-        bam_or_cram_index=bai_hap1,
-        fasta=haplotig_fasta_hap1,
+        bam_hap1=bam_hap1,
+        bai_hap1=bai_hap1,
+        bam_hap2=bam_hap2,
+        bai_hap2=bai_hap2,
+        fasta_hap1=haplotig_fasta_hap1,
+        fasta_hap2=haplotig_fasta_hap2,
         bed=beds[i],
-        sample_id=sample_w_hap1,
+        sample_id=sample_combined,
         ref_fasta=ref_fasta,
         ref_fai=ref_fai,
-        igv_docker=igv_docker
-    }
-
-    # Run IGV for both BAM and FASTA visualization for Haplotype 2 (H2)
-    call RunIGVHeadlessCombined as IGV_Hap2 {
-      input:
-        bam_or_cram=bam_hap2,
-        bam_or_cram_index=bai_hap2,
-        fasta=haplotig_fasta_hap2,
-        bed=beds[i],
-        sample_id=sample_w_hap2,
-        ref_fasta=ref_fasta,
-        ref_fai=ref_fai,
-        igv_docker=igv_docker
+        cpu=cpu,
+        memory=memory,
+        disk_size=disk_size
     }
   }
 
   output {
-    Array[File] igv_screenshots_hap1 = IGV_Hap1.igv_screenshot
-    Array[File] igv_screenshots_hap2 = IGV_Hap2.igv_screenshot
+    Array[File] igv_screenshots_combined = RunIGVHeadlessCombined.igv_screenshot
   }
 }
 
 task RunIGVHeadlessCombined {
   input {
-    File bam_or_cram       # BAM/CRAM file for visualization
-    File bam_or_cram_index # Index file for BAM/CRAM
-    File fasta             # FASTA file for haplotype visualization
-    File bed               # BED file containing regions to visualize (3 or 4 columns allowed)
-    String sample_id       # Sample ID for naming outputs
-    File ref_fasta         # Reference genome used for alignment
-    File ref_fai           # Index for the reference genome
-    String igv_docker      # Docker image for running IGV headless
-    Int? records_per_shard # Optional: Parallelization parameter for large datasets
+    File bam_hap1         # BAM file for Haplotype 1
+    File bai_hap1         # BAI file for Haplotype 1
+    File bam_hap2         # BAM file for Haplotype 2
+    File bai_hap2         # BAI file for Haplotype 2
+    File fasta_hap1       # FASTA file for Haplotype 1
+    File fasta_hap2       # FASTA file for Haplotype 2
+    File bed              # BED file containing regions to visualize (3 or 4 columns allowed)
+    String sample_id      # Sample ID for naming outputs
+    File ref_fasta        # Reference genome used for alignment
+    File ref_fai          # Index for the reference genome
+
+    # Configurable resources
+    Int? cpu           # CPUs to use
+    String? memory     # Memory to allocate
+    String? disk_size  # Disk size
   }
 
   command <<<
-    # Running IGV headless mode to take screenshots for both BAM and FASTA files for each region in the BED file
+    # Running IGV headless mode to take screenshots for both BAM and FASTA files for both haplotypes
     igv.sh \
-    -b ~{bam_or_cram} \
-    -i ~{bam_or_cram_index} \
-    -g ~{fasta} \
+    -b ~{bam_hap1},~{bam_hap2} \
+    -i ~{bai_hap1},~{bai_hap2} \
+    -g ~{fasta_hap1},~{fasta_hap2} \
     -bed ~{bed} \
-    -name bam,fasta \
+    -name hap1_bam,hap2_bam,hap1_fasta,hap2_fasta \
     -o ~{sample_id}.igv_screenshot.png
   >>>
 
@@ -99,9 +99,9 @@ task RunIGVHeadlessCombined {
   }
 
   runtime {
-    docker: "~{igv_docker}"
-    memory: "8G"
-    cpu: "2"
-    disks: "local-disk 10 HDD"
+    docker: "us.gcr.io/broad-dsp-lrma/igv_docker:v952024"
+    memory: "~{memory}"
+    cpu: "~{cpu}"
+    disks: "local-disk ~{disk_size} HDD"
   }
 }

From dd2dfc930b08a4b2b411c86e61f7493a54390364 Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Fri, 6 Sep 2024 11:27:00 -0400
Subject: [PATCH 06/49] fixing the docker and igv command

---
 .../Utility/IGV_HaplotypeViz_Scatter.wdl      | 70 ++++++++-----------
 1 file changed, 30 insertions(+), 40 deletions(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl
index 29aec1df6..671316c92 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl
@@ -6,7 +6,7 @@ workflow IGV_HaplotypeViz_Scatter {
   input {
     # BED files containing regions to screenshot; 4th column can optionally be SVID
     Array[File] beds
-    Array[String] run_names
+    Array[String]? run_names
 
     # BAM and BAI files from align_asm workflow for alignment visualization
     File bam_hap1
@@ -28,36 +28,24 @@ workflow IGV_HaplotypeViz_Scatter {
     # Number of records per shard for parallelization
     Int? records_per_shard
 
-    # Docker images for Linux and IGV headless tasks
-    String linux_docker
+    # Docker image for IGV headless tasks
     String igv_docker
   }
 
   scatter (i in range(length(beds))) {
-    String sample_w_hap1 = sample_id + "_hap1"
-    String sample_w_hap2 = sample_id + "_hap2"
+    String sample_combined = sample_id + "_combined"
 
-    # Run IGV for both BAM and FASTA visualization for Haplotype 1 (H1)
-    call RunIGVHeadlessCombined as IGV_Hap1 {
+    # Run IGV for both BAM and FASTA visualization for both haplotypes (Hap1 and Hap2)
+    call RunIGVHeadlessCombined {
       input:
-        bam_or_cram=bam_hap1,
-        bam_or_cram_index=bai_hap1,
-        fasta=haplotig_fasta_hap1,
+        bam_hap1=bam_hap1,
+        bai_hap1=bai_hap1,
+        bam_hap2=bam_hap2,
+        bai_hap2=bai_hap2,
+        fasta_hap1=haplotig_fasta_hap1,
+        fasta_hap2=haplotig_fasta_hap2,
         bed=beds[i],
-        sample_id=sample_w_hap1,
-        ref_fasta=ref_fasta,
-        ref_fai=ref_fai,
-        igv_docker=igv_docker
-    }
-
-    # Run IGV for both BAM and FASTA visualization for Haplotype 2 (H2)
-    call RunIGVHeadlessCombined as IGV_Hap2 {
-      input:
-        bam_or_cram=bam_hap2,
-        bam_or_cram_index=bai_hap2,
-        fasta=haplotig_fasta_hap2,
-        bed=beds[i],
-        sample_id=sample_w_hap2,
+        sample_id=sample_combined,
         ref_fasta=ref_fasta,
         ref_fai=ref_fai,
         igv_docker=igv_docker
@@ -65,32 +53,34 @@ workflow IGV_HaplotypeViz_Scatter {
   }
 
   output {
-    Array[File] igv_screenshots_hap1 = IGV_Hap1.igv_screenshot
-    Array[File] igv_screenshots_hap2 = IGV_Hap2.igv_screenshot
+    Array[File] igv_screenshots_combined = RunIGVHeadlessCombined.igv_screenshot
   }
 }
 
 task RunIGVHeadlessCombined {
   input {
-    File bam_or_cram       # BAM/CRAM file for visualization
-    File bam_or_cram_index # Index file for BAM/CRAM
-    File fasta             # FASTA file for haplotype visualization
-    File bed               # BED file containing regions to visualize (3 or 4 columns allowed)
-    String sample_id       # Sample ID for naming outputs
-    File ref_fasta         # Reference genome used for alignment
-    File ref_fai           # Index for the reference genome
-    String igv_docker      # Docker image for running IGV headless
+    File bam_hap1         # BAM file for Haplotype 1
+    File bai_hap1         # BAI file for Haplotype 1
+    File bam_hap2         # BAM file for Haplotype 2
+    File bai_hap2         # BAI file for Haplotype 2
+    File fasta_hap1       # FASTA file for Haplotype 1
+    File fasta_hap2       # FASTA file for Haplotype 2
+    File bed              # BED file containing regions to visualize (3 or 4 columns allowed)
+    String sample_id      # Sample ID for naming outputs
+    File ref_fasta        # Reference genome used for alignment
+    File ref_fai          # Index for the reference genome
+    String igv_docker     # Docker image for running IGV headless
     Int? records_per_shard # Optional: Parallelization parameter for large datasets
   }
 
   command <<<
-    # Running IGV headless mode to take screenshots for both BAM and FASTA files for each region in the BED file
+    # Running IGV headless mode to take screenshots for both BAM and FASTA files for both haplotypes
     igv.sh \
-    -b ~{bam_or_cram} \
-    -i ~{bam_or_cram_index} \
-    -g ~{fasta} \
+    -b ~{bam_hap1},~{bam_hap2} \
+    -i ~{bai_hap1},~{bai_hap2} \
+    -g ~{fasta_hap1},~{fasta_hap2} \
     -bed ~{bed} \
-    -name bam,fasta \
+    -name hap1_bam,hap2_bam,hap1_fasta,hap2_fasta \
     -o ~{sample_id}.igv_screenshot.png
   >>>
 
@@ -99,7 +89,7 @@ task RunIGVHeadlessCombined {
   }
 
   runtime {
-    docker: "~{igv_docker}"
+    docker: "us.gcr.io/broad-dsp-lrma/igv_docker:v952024"  # Updated IGV docker image
     memory: "8G"
     cpu: "2"
     disks: "local-disk 10 HDD"

From c0fa633f4d377c9c3043aa142e2a271b26007913 Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Fri, 6 Sep 2024 13:44:40 -0400
Subject: [PATCH 07/49] fixing the runtime

---
 wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
index 7132f4274..ae4b2937f 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
@@ -102,6 +102,6 @@ task RunIGVHeadlessCombined {
     docker: "us.gcr.io/broad-dsp-lrma/igv_docker:v952024"
     memory: "~{memory}"
     cpu: "~{cpu}"
-    disks: "local-disk ~{disk_size} HDD"
+    disks: "local-disk ~{disk_size}"
   }
 }

From fafa429ec7f005369b52c31e89ad6b273150ac79 Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Fri, 6 Sep 2024 13:55:36 -0400
Subject: [PATCH 08/49] fixing the runtime

---
 wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
index ae4b2937f..128b19833 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
@@ -102,6 +102,6 @@ task RunIGVHeadlessCombined {
     docker: "us.gcr.io/broad-dsp-lrma/igv_docker:v952024"
     memory: "~{memory}"
     cpu: "~{cpu}"
-    disks: "local-disk ~{disk_size}"
+    disks: "local-disk ~{disk_size} SSD"
   }
 }

From 9f0d53bf21f1d651122d6f862554c35e191750b5 Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Sun, 8 Sep 2024 12:10:51 -0400
Subject: [PATCH 09/49] new wdl for taking the igv screenshots

---
 .../PacBio/Utility/IGV_HaplotypeViz.wdl       | 127 ++++++------------
 1 file changed, 42 insertions(+), 85 deletions(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
index 128b19833..521dfbd88 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
@@ -1,107 +1,64 @@
 version 1.0
 
-import "../../../structs/Structs.wdl"
+workflow igv_screenshot_automation {
 
-workflow IGV_HaplotypeViz {
   input {
-    # BED files containing regions to screenshot; 4th column can optionally be SVID
-    Array[File] beds
-    Array[String]? run_names  
-
-    # BAM and BAI files from align_asm workflow for alignment visualization
-    File bam_hap1
-    File bai_hap1
-    File bam_hap2
-    File bai_hap2
-
-    # FASTA files from PBAssembleWithHifiasm or bam_to_contig workflow for sequence visualization
-    File haplotig_fasta_hap1
-    File haplotig_fasta_hap2
-
-    # Reference corresponding to read alignments for BAM files
-    File ref_fasta
-    File ref_fai
-
-    # Sample id and prefix for output filenames
-    String sample_id
-
-    # Number of records per shard for parallelization
-    Int? records_per_shard
-
-    # Configurable CPU, memory, and disk
-    Int? cpu           = 2    # Default is 2 CPUs
-    String? memory     = "8G" # Default memory is 8 GB
-    String? disk_size  = "10G" # Default disk size is 10 GB
+    File asm_hap1_bam   # BAM file for asm haplotype 1
+    File asm_hap2_bam   # BAM file for asm haplotype 2
+    File hap1_bam       # BAM file for haplotype 1
+    File hap2_bam       # BAM file for haplotype 2
+    File reference_fasta  # Reference FASTA file
+    File regions_bed      # Path to the BED file with regions of interest
+    String genome         # Reference genome version (e.g., "hg38")
+    Int image_height = 500  # Height for the IGV tracks
   }
-  Array[String] default_run_names = range(length(beds))
-  Array[String] used_run_names = select_first([run_names, default_run_names])
-
-  scatter (i in range(length(beds))) {
-    String run_name = used_run_names[i]
-    String sample_combined = sample_id + "_combined_" + run_name
 
-    # Run IGV for both BAM and FASTA visualization for Haplotype 1 and 2 combined
-    call RunIGVHeadlessCombined {
-      input:
-        bam_hap1=bam_hap1,
-        bai_hap1=bai_hap1,
-        bam_hap2=bam_hap2,
-        bai_hap2=bai_hap2,
-        fasta_hap1=haplotig_fasta_hap1,
-        fasta_hap2=haplotig_fasta_hap2,
-        bed=beds[i],
-        sample_id=sample_combined,
-        ref_fasta=ref_fasta,
-        ref_fai=ref_fai,
-        cpu=cpu,
-        memory=memory,
-        disk_size=disk_size
-    }
+  call IGVScreenshotTask {
+    input:
+      asm_hap1_bam = asm_hap1_bam,
+      asm_hap2_bam = asm_hap2_bam,
+      hap1_bam = hap1_bam,
+      hap2_bam = hap2_bam,
+      reference_fasta = reference_fasta,
+      regions_bed = regions_bed,
+      genome = genome,
+      image_height = image_height
   }
 
   output {
-    Array[File] igv_screenshots_combined = RunIGVHeadlessCombined.igv_screenshot
+    Array[File] snapshots = IGVScreenshotTask.snapshots
   }
 }
 
-task RunIGVHeadlessCombined {
+task IGVScreenshotTask {
   input {
-    File bam_hap1         # BAM file for Haplotype 1
-    File bai_hap1         # BAI file for Haplotype 1
-    File bam_hap2         # BAM file for Haplotype 2
-    File bai_hap2         # BAI file for Haplotype 2
-    File fasta_hap1       # FASTA file for Haplotype 1
-    File fasta_hap2       # FASTA file for Haplotype 2
-    File bed              # BED file containing regions to visualize (3 or 4 columns allowed)
-    String sample_id      # Sample ID for naming outputs
-    File ref_fasta        # Reference genome used for alignment
-    File ref_fai          # Index for the reference genome
-
-    # Configurable resources
-    Int? cpu           # CPUs to use
-    String? memory     # Memory to allocate
-    String? disk_size  # Disk size
+    File asm_hap1_bam
+    File asm_hap2_bam
+    File hap1_bam
+    File hap2_bam
+    File reference_fasta
+    File regions_bed
+    String genome
+    Int image_height
   }
 
-  command <<<
-    # Running IGV headless mode to take screenshots for both BAM and FASTA files for both haplotypes
-    igv.sh \
-    -b ~{bam_hap1},~{bam_hap2} \
-    -i ~{bai_hap1},~{bai_hap2} \
-    -g ~{fasta_hap1},~{fasta_hap2} \
-    -bed ~{bed} \
-    -name hap1_bam,hap2_bam,hap1_fasta,hap2_fasta \
-    -o ~{sample_id}.igv_screenshot.png
-  >>>
+  command {
+    # Run the Python script with inputs for hap1 and hap2 BAM files
+    python3 /opt/make_igv_screenshot.py \
+            ${asm_hap1_bam} ${asm_hap2_bam} ${hap1_bam} ${hap2_bam} \
+            -r ${regions_bed} -g ${genome} -ht ${image_height} \
+            -ref_fasta ${reference_fasta}
+  }
 
   output {
-    File igv_screenshot = "~{sample_id}.igv_screenshot.png"
+    # Capture all the snapshot files generated by the script
+    Array[File] snapshots = glob("IGV_Snapshots/*.png")
   }
 
   runtime {
-    docker: "us.gcr.io/broad-dsp-lrma/igv_docker:v952024"
-    memory: "~{memory}"
-    cpu: "~{cpu}"
-    disks: "local-disk ~{disk_size} SSD"
+    docker: "us.gcr.io/broad-dsp-lrma/igv_screenshot_docker:v982024"
+    memory: "8G"
+    cpu: 2
+    disks: "local-disk 100 HDD" 
   }
 }

From 9682125b992b943289eb1acb87e988adfb35482d Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Mon, 9 Sep 2024 08:07:45 -0400
Subject: [PATCH 10/49] update docker and the script to use only one bam

---
 .../PacBio/Utility/IGV_HaplotypeViz.wdl       | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
index 521dfbd88..032f6f87c 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
@@ -3,10 +3,9 @@ version 1.0
 workflow igv_screenshot_automation {
 
   input {
-    File asm_hap1_bam   # BAM file for asm haplotype 1
-    File asm_hap2_bam   # BAM file for asm haplotype 2
-    File hap1_bam       # BAM file for haplotype 1
-    File hap2_bam       # BAM file for haplotype 2
+    File asm_hap1_bam   # BAM file for assembly haplotype 1
+    File asm_hap2_bam   # BAM file for assembly haplotype 2
+    File bam            # A single BAM file for the sample
     File reference_fasta  # Reference FASTA file
     File regions_bed      # Path to the BED file with regions of interest
     String genome         # Reference genome version (e.g., "hg38")
@@ -17,8 +16,7 @@ workflow igv_screenshot_automation {
     input:
       asm_hap1_bam = asm_hap1_bam,
       asm_hap2_bam = asm_hap2_bam,
-      hap1_bam = hap1_bam,
-      hap2_bam = hap2_bam,
+      bam = bam,
       reference_fasta = reference_fasta,
       regions_bed = regions_bed,
       genome = genome,
@@ -34,8 +32,7 @@ task IGVScreenshotTask {
   input {
     File asm_hap1_bam
     File asm_hap2_bam
-    File hap1_bam
-    File hap2_bam
+    File bam
     File reference_fasta
     File regions_bed
     String genome
@@ -43,9 +40,9 @@ task IGVScreenshotTask {
   }
 
   command {
-    # Run the Python script with inputs for hap1 and hap2 BAM files
+    # Run the Python script with inputs for asm_hap1, asm_hap2, and bam
     python3 /opt/make_igv_screenshot.py \
-            ${asm_hap1_bam} ${asm_hap2_bam} ${hap1_bam} ${hap2_bam} \
+            ${asm_hap1_bam} ${asm_hap2_bam} ${bam} \
             -r ${regions_bed} -g ${genome} -ht ${image_height} \
             -ref_fasta ${reference_fasta}
   }
@@ -59,6 +56,6 @@ task IGVScreenshotTask {
     docker: "us.gcr.io/broad-dsp-lrma/igv_screenshot_docker:v982024"
     memory: "8G"
     cpu: 2
-    disks: "local-disk 100 HDD" 
+    disks: "local-disk 100 HDD"  # Adjust this based on file size needs
   }
 }

From f5749369e2ecc89f8911dae7c7a5253c996f9843 Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Mon, 9 Sep 2024 10:07:53 -0400
Subject: [PATCH 11/49] Updated Python script and WDL for IGV snapshot
 automation

---
 wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
index 032f6f87c..659ef7af4 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
@@ -3,8 +3,8 @@ version 1.0
 workflow igv_screenshot_automation {
 
   input {
-    File asm_hap1_bam   # BAM file for assembly haplotype 1
-    File asm_hap2_bam   # BAM file for assembly haplotype 2
+    File asm_hap1_bam   # BAM file for asm haplotype 1
+    File asm_hap2_bam   # BAM file for asm haplotype 2
     File bam            # A single BAM file for the sample
     File reference_fasta  # Reference FASTA file
     File regions_bed      # Path to the BED file with regions of interest
@@ -48,7 +48,7 @@ task IGVScreenshotTask {
   }
 
   output {
-    # Capture all the snapshot files generated by the script
+    # Capture all the snapshot files generated by the script from the 'IGV_Snapshots' directory
     Array[File] snapshots = glob("IGV_Snapshots/*.png")
   }
 

From 1c119149f5af24bee900329ff4c2bee635aa7907 Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Mon, 9 Sep 2024 13:33:08 -0400
Subject: [PATCH 12/49] Update WDL and Python script to support IGV 2.18.2 with
 igv.sh

---
 wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
index 659ef7af4..6151d2e7d 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
@@ -44,7 +44,8 @@ task IGVScreenshotTask {
     python3 /opt/make_igv_screenshot.py \
             ${asm_hap1_bam} ${asm_hap2_bam} ${bam} \
             -r ${regions_bed} -g ${genome} -ht ${image_height} \
-            -ref_fasta ${reference_fasta}
+            -ref_fasta ${reference_fasta} \
+            -bin /opt/IGV_Linux_2.18.2/igv.sh  # Explicitly passing the igv.sh path
   }
 
   output {

From d10a14004841a7408bf759568fcc99e0651a1ea4 Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Mon, 9 Sep 2024 14:55:34 -0400
Subject: [PATCH 13/49] update the directory to the python script

---
 wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
index 6151d2e7d..af96bf84c 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
@@ -41,7 +41,7 @@ task IGVScreenshotTask {
 
   command {
     # Run the Python script with inputs for asm_hap1, asm_hap2, and bam
-    python3 /opt/make_igv_screenshot.py \
+    python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \
             ${asm_hap1_bam} ${asm_hap2_bam} ${bam} \
             -r ${regions_bed} -g ${genome} -ht ${image_height} \
             -ref_fasta ${reference_fasta} \

From 09154eda459b1f8be17006645b6a40c57a01f6a1 Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Mon, 9 Sep 2024 17:48:25 -0400
Subject: [PATCH 14/49] update bai

---
 .../PacBio/Utility/IGV_HaplotypeViz.wdl       | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
index af96bf84c..697a3b799 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
@@ -4,8 +4,11 @@ workflow igv_screenshot_automation {
 
   input {
     File asm_hap1_bam   # BAM file for asm haplotype 1
+    File asm_hap1_bai   # BAI index file for asm haplotype 1
     File asm_hap2_bam   # BAM file for asm haplotype 2
+    File asm_hap2_bai   # BAI index file for asm haplotype 2
     File bam            # A single BAM file for the sample
+    File bam_bai        # BAI index file for the single BAM file
     File reference_fasta  # Reference FASTA file
     File regions_bed      # Path to the BED file with regions of interest
     String genome         # Reference genome version (e.g., "hg38")
@@ -15,8 +18,11 @@ workflow igv_screenshot_automation {
   call IGVScreenshotTask {
     input:
       asm_hap1_bam = asm_hap1_bam,
+      asm_hap1_bai = asm_hap1_bai,
       asm_hap2_bam = asm_hap2_bam,
+      asm_hap2_bai = asm_hap2_bai,
       bam = bam,
+      bam_bai = bam_bai,
       reference_fasta = reference_fasta,
       regions_bed = regions_bed,
       genome = genome,
@@ -31,8 +37,11 @@ workflow igv_screenshot_automation {
 task IGVScreenshotTask {
   input {
     File asm_hap1_bam
+    File asm_hap1_bai
     File asm_hap2_bam
+    File asm_hap2_bai
     File bam
+    File bam_bai
     File reference_fasta
     File regions_bed
     String genome
@@ -40,12 +49,20 @@ task IGVScreenshotTask {
   }
 
   command {
+    # Localize the BAM and BAI files to ensure IGV can use them
+    ln -s ${asm_hap1_bam} .
+    ln -s ${asm_hap1_bai} .
+    ln -s ${asm_hap2_bam} .
+    ln -s ${asm_hap2_bai} .
+    ln -s ${bam} .
+    ln -s ${bam_bai} .
+
     # Run the Python script with inputs for asm_hap1, asm_hap2, and bam
     python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \
             ${asm_hap1_bam} ${asm_hap2_bam} ${bam} \
             -r ${regions_bed} -g ${genome} -ht ${image_height} \
             -ref_fasta ${reference_fasta} \
-            -bin /opt/IGV_Linux_2.18.2/igv.sh  # Explicitly passing the igv.sh path
+            -bin /opt/IGV_Linux_2.18.2/igv.sh
   }
 
   output {

From 4aedff1391b20f21c1299e58432df76701c7905f Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Mon, 9 Sep 2024 18:12:42 -0400
Subject: [PATCH 15/49] checking for bai

---
 wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
index 697a3b799..1202411f3 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
@@ -4,25 +4,23 @@ workflow igv_screenshot_automation {
 
   input {
     File asm_hap1_bam   # BAM file for asm haplotype 1
-    File asm_hap1_bai   # BAI index file for asm haplotype 1
     File asm_hap2_bam   # BAM file for asm haplotype 2
-    File asm_hap2_bai   # BAI index file for asm haplotype 2
     File bam            # A single BAM file for the sample
-    File bam_bai        # BAI index file for the single BAM file
     File reference_fasta  # Reference FASTA file
     File regions_bed      # Path to the BED file with regions of interest
     String genome         # Reference genome version (e.g., "hg38")
     Int image_height = 500  # Height for the IGV tracks
   }
 
+  # Directly use .bam.bai files co-located with the BAM files
   call IGVScreenshotTask {
     input:
       asm_hap1_bam = asm_hap1_bam,
-      asm_hap1_bai = asm_hap1_bai,
+      asm_hap1_bai = asm_hap1_bam + ".bai",
       asm_hap2_bam = asm_hap2_bam,
-      asm_hap2_bai = asm_hap2_bai,
+      asm_hap2_bai = asm_hap2_bam + ".bai",
       bam = bam,
-      bam_bai = bam_bai,
+      bam_bai = bam + ".bai",
       reference_fasta = reference_fasta,
       regions_bed = regions_bed,
       genome = genome,

From bd53b9cce97594c29ac314d72e9f4a77ace081b1 Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Tue, 10 Sep 2024 13:16:51 -0400
Subject: [PATCH 16/49] new wdl-the script has been tested on the vm

---
 .../PacBio/Utility/IGV_HaplotypeViz.wdl       | 130 +++++++++---------
 1 file changed, 65 insertions(+), 65 deletions(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
index 1202411f3..6b3342843 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
@@ -1,77 +1,77 @@
 version 1.0
 
-workflow igv_screenshot_automation {
+workflow IGVScreenshotWorkflow {
 
-  input {
-    File asm_hap1_bam   # BAM file for asm haplotype 1
-    File asm_hap2_bam   # BAM file for asm haplotype 2
-    File bam            # A single BAM file for the sample
-    File reference_fasta  # Reference FASTA file
-    File regions_bed      # Path to the BED file with regions of interest
-    String genome         # Reference genome version (e.g., "hg38")
-    Int image_height = 500  # Height for the IGV tracks
-  }
+    input {
+        File aligned_bam_hap1       # BAM file for haplotype 1
+        File aligned_bam_hap2       # BAM file for haplotype 2
+        File alignments             # BAM file for total alignments
+        File bed_file               # BED file with regions
+        File fasta_file             # Reference FASTA file
+        String sample_name          # Sample name to use in filenames
+        Int image_height = 500
+        Int memory_mb = 4000
+        Int disk_gb = 100           # Disk size in GB, default to 100 GB
+        String docker_image = "us.gcr.io/broad-dsp-lrma/igv_screenshot_docker:v982024"  # The Docker image to use
+    }
 
-  # Directly use .bam.bai files co-located with the BAM files
-  call IGVScreenshotTask {
-    input:
-      asm_hap1_bam = asm_hap1_bam,
-      asm_hap1_bai = asm_hap1_bam + ".bai",
-      asm_hap2_bam = asm_hap2_bam,
-      asm_hap2_bai = asm_hap2_bam + ".bai",
-      bam = bam,
-      bam_bai = bam + ".bai",
-      reference_fasta = reference_fasta,
-      regions_bed = regions_bed,
-      genome = genome,
-      image_height = image_height
-  }
+    call RunIGVScreenshot {
+        input:
+            aligned_bam_hap1 = aligned_bam_hap1,
+            aligned_bam_hap2 = aligned_bam_hap2,
+            alignments = alignments,
+            bed_file = bed_file,
+            fasta_file = fasta_file,
+            sample_name = sample_name,
+            image_height = image_height,
+            memory_mb = memory_mb,
+            disk_gb = disk_gb,
+            docker_image = docker_image
+    }
 
-  output {
-    Array[File] snapshots = IGVScreenshotTask.snapshots
-  }
+    output {
+        Array[File] snapshots = RunIGVScreenshot.snapshots
+    }
 }
 
-task IGVScreenshotTask {
-  input {
-    File asm_hap1_bam
-    File asm_hap1_bai
-    File asm_hap2_bam
-    File asm_hap2_bai
-    File bam
-    File bam_bai
-    File reference_fasta
-    File regions_bed
-    String genome
-    Int image_height
-  }
+task RunIGVScreenshot {
+    input {
+        File aligned_bam_hap1
+        File aligned_bam_hap2
+        File alignments
+        File bed_file
+        File fasta_file
+        String sample_name
+        Int image_height
+        Int memory_mb
+        Int disk_gb
+        String docker_image
+    }
 
-  command {
-    # Localize the BAM and BAI files to ensure IGV can use them
-    ln -s ${asm_hap1_bam} .
-    ln -s ${asm_hap1_bai} .
-    ln -s ${asm_hap2_bam} .
-    ln -s ${asm_hap2_bai} .
-    ln -s ${bam} .
-    ln -s ${bam_bai} .
+    command {
+        mkdir -p IGV_Snapshots
+        Xvfb :1 -screen 0 1024x768x16 &> xvfb.log &
+        export DISPLAY=:1
 
-    # Run the Python script with inputs for asm_hap1, asm_hap2, and bam
-    python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \
-            ${asm_hap1_bam} ${asm_hap2_bam} ${bam} \
-            -r ${regions_bed} -g ${genome} -ht ${image_height} \
-            -ref_fasta ${reference_fasta} \
-            -bin /opt/IGV_Linux_2.18.2/igv.sh
-  }
+        # Run the IGV screenshot script with the provided inputs
+        python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \
+          ~{aligned_bam_hap1} ~{aligned_bam_hap2} ~{alignments} \
+          -r ~{bed_file} \
+          -ht ~{image_height} \
+          -bin /opt/IGV_Linux_2.18.2/igv.sh \
+          -mem ~{memory_mb} \
+          --fasta_file ~{fasta_file} \
+          --sample_name ~{sample_name}
+    }
 
-  output {
-    # Capture all the snapshot files generated by the script from the 'IGV_Snapshots' directory
-    Array[File] snapshots = glob("IGV_Snapshots/*.png")
-  }
+    runtime {
+        docker: docker_image
+        memory: "~{memory_mb} MB"
+        cpu: 2
+        disks: "local-disk ~{disk_gb} HDD"
+    }
 
-  runtime {
-    docker: "us.gcr.io/broad-dsp-lrma/igv_screenshot_docker:v982024"
-    memory: "8G"
-    cpu: 2
-    disks: "local-disk 100 HDD"  # Adjust this based on file size needs
-  }
+    output {
+        Array[File] snapshots = glob("IGV_Snapshots/*.png")
+    }
 }

From d533e408d1ac1af75b14460886a9941f859a0117 Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Tue, 10 Sep 2024 14:09:33 -0400
Subject: [PATCH 17/49] technically the same script as IGV_HaplotypeViz but
 will locate the input bai as well

---
 .../Utility/IGV_HaplotypeViz_Scatter.wdl      | 97 -------------------
 .../Utility/IGV_HaplotypeViz_bai_try.wdl      | 74 ++++++++++++++
 2 files changed, 74 insertions(+), 97 deletions(-)
 delete mode 100644 wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl
 create mode 100644 wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl
deleted file mode 100644
index 671316c92..000000000
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_Scatter.wdl
+++ /dev/null
@@ -1,97 +0,0 @@
-version 1.0
-
-import "../../../structs/Structs.wdl"
-
-workflow IGV_HaplotypeViz_Scatter {
-  input {
-    # BED files containing regions to screenshot; 4th column can optionally be SVID
-    Array[File] beds
-    Array[String]? run_names
-
-    # BAM and BAI files from align_asm workflow for alignment visualization
-    File bam_hap1
-    File bai_hap1
-    File bam_hap2
-    File bai_hap2
-
-    # FASTA files from PBAssembleWithHifiasm or bam_to_contig workflow for sequence visualization
-    File haplotig_fasta_hap1
-    File haplotig_fasta_hap2
-
-    # Reference corresponding to read alignments for BAM files
-    File ref_fasta
-    File ref_fai
-
-    # Sample id and prefix for output filenames
-    String sample_id
-
-    # Number of records per shard for parallelization
-    Int? records_per_shard
-
-    # Docker image for IGV headless tasks
-    String igv_docker
-  }
-
-  scatter (i in range(length(beds))) {
-    String sample_combined = sample_id + "_combined"
-
-    # Run IGV for both BAM and FASTA visualization for both haplotypes (Hap1 and Hap2)
-    call RunIGVHeadlessCombined {
-      input:
-        bam_hap1=bam_hap1,
-        bai_hap1=bai_hap1,
-        bam_hap2=bam_hap2,
-        bai_hap2=bai_hap2,
-        fasta_hap1=haplotig_fasta_hap1,
-        fasta_hap2=haplotig_fasta_hap2,
-        bed=beds[i],
-        sample_id=sample_combined,
-        ref_fasta=ref_fasta,
-        ref_fai=ref_fai,
-        igv_docker=igv_docker
-    }
-  }
-
-  output {
-    Array[File] igv_screenshots_combined = RunIGVHeadlessCombined.igv_screenshot
-  }
-}
-
-task RunIGVHeadlessCombined {
-  input {
-    File bam_hap1         # BAM file for Haplotype 1
-    File bai_hap1         # BAI file for Haplotype 1
-    File bam_hap2         # BAM file for Haplotype 2
-    File bai_hap2         # BAI file for Haplotype 2
-    File fasta_hap1       # FASTA file for Haplotype 1
-    File fasta_hap2       # FASTA file for Haplotype 2
-    File bed              # BED file containing regions to visualize (3 or 4 columns allowed)
-    String sample_id      # Sample ID for naming outputs
-    File ref_fasta        # Reference genome used for alignment
-    File ref_fai          # Index for the reference genome
-    String igv_docker     # Docker image for running IGV headless
-    Int? records_per_shard # Optional: Parallelization parameter for large datasets
-  }
-
-  command <<<
-    # Running IGV headless mode to take screenshots for both BAM and FASTA files for both haplotypes
-    igv.sh \
-    -b ~{bam_hap1},~{bam_hap2} \
-    -i ~{bai_hap1},~{bai_hap2} \
-    -g ~{fasta_hap1},~{fasta_hap2} \
-    -bed ~{bed} \
-    -name hap1_bam,hap2_bam,hap1_fasta,hap2_fasta \
-    -o ~{sample_id}.igv_screenshot.png
-  >>>
-
-  output {
-    File igv_screenshot = "~{sample_id}.igv_screenshot.png"
-  }
-
-  runtime {
-    docker: "us.gcr.io/broad-dsp-lrma/igv_docker:v952024"  # Updated IGV docker image
-    memory: "8G"
-    cpu: "2"
-    disks: "local-disk 10 HDD"
-  }
-}
diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
new file mode 100644
index 000000000..1848be1fa
--- /dev/null
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
@@ -0,0 +1,74 @@
+version 1.0
+
+workflow igv_screenshot_workflow {
+  input {
+    File aligned_bam_hap1          # BAM file for haplotype 1
+    File aligned_bam_hap2          # BAM file for haplotype 2
+    File alignments_bam            # Total alignments BAM file
+    File ref_fasta                 # Reference FASTA file
+    File targeted_bed_file         # BED file with regions of interest
+    String sample_name             # Sample name for naming convention
+    Int image_height = 500         # Height of IGV track, default to 500
+    Int memory_mb = 4000           # Memory for IGV, default to 4000MB
+  }
+
+  call make_igv_screenshot {
+    input:
+      aligned_bam_hap1 = aligned_bam_hap1,
+      aligned_bam_hap2 = aligned_bam_hap2,
+      alignments_bam = alignments_bam,
+      ref_fasta = ref_fasta,
+      targeted_bed_file = targeted_bed_file,
+      sample_name = sample_name,
+      image_height = image_height,
+      memory_mb = memory_mb
+  }
+
+  output {
+    Array[File] pngs = make_igv_screenshot.pngs  # Collect all generated PNG files
+  }
+}
+
+task make_igv_screenshot {
+  input {
+    File aligned_bam_hap1          # BAM file for haplotype 1
+    File aligned_bam_hap2          # BAM file for haplotype 2
+    File alignments_bam            # Total alignments BAM file
+    File ref_fasta                 # Reference FASTA file
+    File targeted_bed_file         # BED file with regions of interest
+    String sample_name             # Sample name for naming convention
+    Int image_height               # Height of IGV track
+    Int memory_mb                  # Memory for IGV
+  }
+
+  command {
+    # Create output directory for snapshots
+    mkdir -p IGV_Snapshots
+    
+    # Start a virtual framebuffer (Xvfb) to allow IGV to render without display
+    Xvfb :1 -screen 0 1024x768x16 &> xvfb.log &
+    export DISPLAY=:1
+
+    # Run the Python script to generate IGV screenshots
+    python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \
+      ~{aligned_bam_hap1} ~{aligned_bam_hap2} ~{alignments_bam} \
+      -r ~{targeted_bed_file} \
+      -ht ~{image_height} \
+      -bin /opt/IGV_Linux_2.18.2/igv.sh \
+      -mem ~{memory_mb} \
+      --fasta_file ~{ref_fasta} \
+      --sample_name ~{sample_name}
+  }
+
+  output {
+    # Capture all generated PNG snapshot files
+    Array[File] pngs = glob("IGV_Snapshots/*.png")
+  }
+
+  runtime {
+    docker: "us.gcr.io/broad-dsp-lrma/igv_screenshot_docker:v982024"
+    memory: "~{memory_mb} MB"
+    cpu: 2
+    disks: "local-disk 50 HDD"  # Specify disk size if needed
+  }
+}

From 76009fecb8c66d56e02b7d81a33bdd6914c31dc2 Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Tue, 10 Sep 2024 14:27:15 -0400
Subject: [PATCH 18/49] modify the runtime attributes

---
 .../Utility/IGV_HaplotypeViz_bai_try.wdl      | 103 ++++++++++++------
 1 file changed, 69 insertions(+), 34 deletions(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
index 1848be1fa..cf5ce5ab6 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
@@ -7,12 +7,12 @@ workflow igv_screenshot_workflow {
     File alignments_bam            # Total alignments BAM file
     File ref_fasta                 # Reference FASTA file
     File targeted_bed_file         # BED file with regions of interest
-    String sample_name             # Sample name for naming convention
-    Int image_height = 500         # Height of IGV track, default to 500
-    Int memory_mb = 4000           # Memory for IGV, default to 4000MB
+    String sample_name             # Name for the sample (used in output naming)
+    String disk_type = "SSD"       # Default disk type
+    String gcs_output_dir          # GCS directory to copy outputs
   }
 
-  call make_igv_screenshot {
+  call GenerateIgvScreenshots {
     input:
       aligned_bam_hap1 = aligned_bam_hap1,
       aligned_bam_hap2 = aligned_bam_hap2,
@@ -20,55 +20,90 @@ workflow igv_screenshot_workflow {
       ref_fasta = ref_fasta,
       targeted_bed_file = targeted_bed_file,
       sample_name = sample_name,
-      image_height = image_height,
-      memory_mb = memory_mb
+      disk_type = disk_type
+  }
+
+  call FinalizeToGCS {
+    input:
+      screenshots = GenerateIgvScreenshots.screenshots,
+      output_dir = gcs_output_dir
   }
 
   output {
-    Array[File] pngs = make_igv_screenshot.pngs  # Collect all generated PNG files
+    Array[File] screenshot_files = GenerateIgvScreenshots.screenshots
   }
 }
 
-task make_igv_screenshot {
+task GenerateIgvScreenshots {
   input {
-    File aligned_bam_hap1          # BAM file for haplotype 1
-    File aligned_bam_hap2          # BAM file for haplotype 2
-    File alignments_bam            # Total alignments BAM file
-    File ref_fasta                 # Reference FASTA file
-    File targeted_bed_file         # BED file with regions of interest
-    String sample_name             # Sample name for naming convention
-    Int image_height               # Height of IGV track
-    Int memory_mb                  # Memory for IGV
+    File aligned_bam_hap1
+    File aligned_bam_hap2
+    File alignments_bam
+    File ref_fasta
+    File targeted_bed_file
+    String sample_name
+    String disk_type
   }
 
-  command {
-    # Create output directory for snapshots
-    mkdir -p IGV_Snapshots
-    
-    # Start a virtual framebuffer (Xvfb) to allow IGV to render without display
-    Xvfb :1 -screen 0 1024x768x16 &> xvfb.log &
-    export DISPLAY=:1
+  command <<<
+    # Ensure the snapshots directory exists and set permissions
+    mkdir -p /output/IGV_Snapshots && chmod 777 /output/IGV_Snapshots
+
+    # Start a virtual frame buffer to allow IGV to render
+    Xvfb :1 -screen 0 1024x768x16 & export DISPLAY=:1
 
-    # Run the Python script to generate IGV screenshots
+    # Run the Python script to generate IGV snapshots
     python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \
       ~{aligned_bam_hap1} ~{aligned_bam_hap2} ~{alignments_bam} \
+      --fasta_file ~{ref_fasta} \
+      --sample_name ~{sample_name} \
       -r ~{targeted_bed_file} \
-      -ht ~{image_height} \
+      -ht 500 \
       -bin /opt/IGV_Linux_2.18.2/igv.sh \
-      -mem ~{memory_mb} \
-      --fasta_file ~{ref_fasta} \
-      --sample_name ~{sample_name}
+      -mem 4000
+
+  >>>
+
+  output {
+    Array[File] screenshots = glob("/output/IGV_Snapshots/*.png")
+  }
+
+  # Calculate dynamic disk size based on the size of BAM files
+  Int disk_size = ceil(size(aligned_bam_hap1, "GiB")) + ceil(size(aligned_bam_hap2, "GiB")) + ceil(size(alignments_bam, "GiB")) + 10
+
+  runtime {
+    cpu:        4
+    memory:     "8 GiB"
+    disks:      "local-disk " + disk_size + " " + disk_type
+    preemptible: 2
+    maxRetries: 1
+    docker:     "us.gcr.io/broad-dsp-lrma/igv_screenshot_docker:v982024"
   }
+}
+
+task FinalizeToGCS {
+  input {
+    Array[File] screenshots
+    String output_dir
+  }
+
+  command <<<
+    # Copy the output PNG files to Google Cloud Storage
+    for file in ~{sep=' ' screenshots}; do
+      gsutil cp "$file" "~{output_dir}/"
+    done
+  >>>
 
   output {
-    # Capture all generated PNG snapshot files
-    Array[File] pngs = glob("IGV_Snapshots/*.png")
+    Array[File] uploaded_files = screenshots
   }
 
   runtime {
-    docker: "us.gcr.io/broad-dsp-lrma/igv_screenshot_docker:v982024"
-    memory: "~{memory_mb} MB"
-    cpu: 2
-    disks: "local-disk 50 HDD"  # Specify disk size if needed
+    cpu:        1
+    memory:     "2 GiB"
+    disks:      "local-disk 10 HDD"
+    preemptible: 2
+    maxRetries: 1
+    docker:     "gcr.io/google-containers/toolbox:latest"
   }
 }

From 11f78eed07401c66b650d905de62942e179dc1c8 Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Tue, 10 Sep 2024 15:29:28 -0400
Subject: [PATCH 19/49] fixing the issue with bai

---
 wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
index 6b3342843..d9558132b 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
@@ -4,8 +4,11 @@ workflow IGVScreenshotWorkflow {
 
     input {
         File aligned_bam_hap1       # BAM file for haplotype 1
+        File aligned_bam_hap1_bai   # BAM index for haplotype 1
         File aligned_bam_hap2       # BAM file for haplotype 2
+        File aligned_bam_hap2_bai   # BAM index for haplotype 2
         File alignments             # BAM file for total alignments
+        File alignments_bai         # BAM index for total alignments
         File bed_file               # BED file with regions
         File fasta_file             # Reference FASTA file
         String sample_name          # Sample name to use in filenames
@@ -18,8 +21,11 @@ workflow IGVScreenshotWorkflow {
     call RunIGVScreenshot {
         input:
             aligned_bam_hap1 = aligned_bam_hap1,
+            aligned_bam_hap1_bai = aligned_bam_hap1_bai,
             aligned_bam_hap2 = aligned_bam_hap2,
+            aligned_bam_hap2_bai = aligned_bam_hap2_bai,
             alignments = alignments,
+            alignments_bai = alignments_bai,
             bed_file = bed_file,
             fasta_file = fasta_file,
             sample_name = sample_name,
@@ -37,8 +43,11 @@ workflow IGVScreenshotWorkflow {
 task RunIGVScreenshot {
     input {
         File aligned_bam_hap1
+        File aligned_bam_hap1_bai
         File aligned_bam_hap2
+        File aligned_bam_hap2_bai
         File alignments
+        File alignments_bai
         File bed_file
         File fasta_file
         String sample_name
@@ -56,6 +65,7 @@ task RunIGVScreenshot {
         # Run the IGV screenshot script with the provided inputs
         python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \
           ~{aligned_bam_hap1} ~{aligned_bam_hap2} ~{alignments} \
+          ~{aligned_bam_hap1_bai} ~{aligned_bam_hap2_bai} ~{alignments_bai} \
           -r ~{bed_file} \
           -ht ~{image_height} \
           -bin /opt/IGV_Linux_2.18.2/igv.sh \

From 0b708963c226429b5cdd5911dbbb3c25152d6def Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Tue, 10 Sep 2024 15:34:48 -0400
Subject: [PATCH 20/49] fix the issue with output directory

---
 .../Utility/IGV_HaplotypeViz_bai_try.wdl      | 48 ++++++++++---------
 1 file changed, 26 insertions(+), 22 deletions(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
index cf5ce5ab6..04a5e48f2 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
@@ -1,15 +1,15 @@
 version 1.0
 
-workflow igv_screenshot_workflow {
+workflow IGV_HaplotypeViz {
   input {
-    File aligned_bam_hap1          # BAM file for haplotype 1
-    File aligned_bam_hap2          # BAM file for haplotype 2
-    File alignments_bam            # Total alignments BAM file
-    File ref_fasta                 # Reference FASTA file
-    File targeted_bed_file         # BED file with regions of interest
-    String sample_name             # Name for the sample (used in output naming)
-    String disk_type = "SSD"       # Default disk type
-    String gcs_output_dir          # GCS directory to copy outputs
+    File aligned_bam_hap1
+    File aligned_bam_hap2
+    File alignments_bam
+    File ref_fasta
+    File targeted_bed_file
+    String sample_name
+    String disk_type = "SSD"
+    String output_dir
   }
 
   call GenerateIgvScreenshots {
@@ -26,11 +26,11 @@ workflow igv_screenshot_workflow {
   call FinalizeToGCS {
     input:
       screenshots = GenerateIgvScreenshots.screenshots,
-      output_dir = gcs_output_dir
+      output_dir = output_dir
   }
 
   output {
-    Array[File] screenshot_files = GenerateIgvScreenshots.screenshots
+    Array[File] final_screenshots = FinalizeToGCS.uploaded_files
   }
 }
 
@@ -47,7 +47,7 @@ task GenerateIgvScreenshots {
 
   command <<<
     # Ensure the snapshots directory exists and set permissions
-    mkdir -p /output/IGV_Snapshots && chmod 777 /output/IGV_Snapshots
+    mkdir -p /cromwell_root/output/IGV_Snapshots && chmod 777 /cromwell_root/output/IGV_Snapshots
 
     # Start a virtual frame buffer to allow IGV to render
     Xvfb :1 -screen 0 1024x768x16 & export DISPLAY=:1
@@ -65,10 +65,9 @@ task GenerateIgvScreenshots {
   >>>
 
   output {
-    Array[File] screenshots = glob("/output/IGV_Snapshots/*.png")
+    Array[File] screenshots = glob("/cromwell_root/output/IGV_Snapshots/*.png")
   }
 
-  # Calculate dynamic disk size based on the size of BAM files
   Int disk_size = ceil(size(aligned_bam_hap1, "GiB")) + ceil(size(aligned_bam_hap2, "GiB")) + ceil(size(alignments_bam, "GiB")) + 10
 
   runtime {
@@ -88,22 +87,27 @@ task FinalizeToGCS {
   }
 
   command <<<
-    # Copy the output PNG files to Google Cloud Storage
+    set -euxo pipefail
+
+    # Ensure the output directory exists and is properly formatted
+    gcs_output_dir=$(echo ~{output_dir} | sed 's:/*$::')
+
+    # Copy all screenshots to Google Cloud Storage
     for file in ~{sep=' ' screenshots}; do
-      gsutil cp "$file" "~{output_dir}/"
+      gsutil cp $file $gcs_output_dir/
     done
   >>>
 
   output {
-    Array[File] uploaded_files = screenshots
+    Array[File] uploaded_files = glob("~{output_dir}/*.png")
   }
 
   runtime {
     cpu:        1
-    memory:     "2 GiB"
-    disks:      "local-disk 10 HDD"
-    preemptible: 2
-    maxRetries: 1
-    docker:     "gcr.io/google-containers/toolbox:latest"
+    memory:     "4 GiB"
+    disks:      "local-disk 10 SSD"
+    preemptible: 1
+    maxRetries: 2
+    docker:     "google/cloud-sdk:slim"
   }
 }

From 7ac3c5461710e62dea5eb2aaa424e71384b666e7 Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Tue, 10 Sep 2024 15:51:08 -0400
Subject: [PATCH 21/49] fix bai

---
 wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
index d9558132b..9913b4581 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
@@ -4,11 +4,8 @@ workflow IGVScreenshotWorkflow {
 
     input {
         File aligned_bam_hap1       # BAM file for haplotype 1
-        File aligned_bam_hap1_bai   # BAM index for haplotype 1
         File aligned_bam_hap2       # BAM file for haplotype 2
-        File aligned_bam_hap2_bai   # BAM index for haplotype 2
         File alignments             # BAM file for total alignments
-        File alignments_bai         # BAM index for total alignments
         File bed_file               # BED file with regions
         File fasta_file             # Reference FASTA file
         String sample_name          # Sample name to use in filenames
@@ -21,11 +18,11 @@ workflow IGVScreenshotWorkflow {
     call RunIGVScreenshot {
         input:
             aligned_bam_hap1 = aligned_bam_hap1,
-            aligned_bam_hap1_bai = aligned_bam_hap1_bai,
+            aligned_bam_hap1_bai = aligned_bam_hap1 + ".bai",  # Automatically infer BAI location
             aligned_bam_hap2 = aligned_bam_hap2,
-            aligned_bam_hap2_bai = aligned_bam_hap2_bai,
+            aligned_bam_hap2_bai = aligned_bam_hap2 + ".bai",  # Automatically infer BAI location
             alignments = alignments,
-            alignments_bai = alignments_bai,
+            alignments_bai = alignments + ".bai",  # Automatically infer BAI location
             bed_file = bed_file,
             fasta_file = fasta_file,
             sample_name = sample_name,

From 3f7005d30c30fe3fffca92118e6b07151001ccff Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Tue, 10 Sep 2024 16:11:34 -0400
Subject: [PATCH 22/49] updates for bai

---
 wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
index 9913b4581..d9558132b 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
@@ -4,8 +4,11 @@ workflow IGVScreenshotWorkflow {
 
     input {
         File aligned_bam_hap1       # BAM file for haplotype 1
+        File aligned_bam_hap1_bai   # BAM index for haplotype 1
         File aligned_bam_hap2       # BAM file for haplotype 2
+        File aligned_bam_hap2_bai   # BAM index for haplotype 2
         File alignments             # BAM file for total alignments
+        File alignments_bai         # BAM index for total alignments
         File bed_file               # BED file with regions
         File fasta_file             # Reference FASTA file
         String sample_name          # Sample name to use in filenames
@@ -18,11 +21,11 @@ workflow IGVScreenshotWorkflow {
     call RunIGVScreenshot {
         input:
             aligned_bam_hap1 = aligned_bam_hap1,
-            aligned_bam_hap1_bai = aligned_bam_hap1 + ".bai",  # Automatically infer BAI location
+            aligned_bam_hap1_bai = aligned_bam_hap1_bai,
             aligned_bam_hap2 = aligned_bam_hap2,
-            aligned_bam_hap2_bai = aligned_bam_hap2 + ".bai",  # Automatically infer BAI location
+            aligned_bam_hap2_bai = aligned_bam_hap2_bai,
             alignments = alignments,
-            alignments_bai = alignments + ".bai",  # Automatically infer BAI location
+            alignments_bai = alignments_bai,
             bed_file = bed_file,
             fasta_file = fasta_file,
             sample_name = sample_name,

From c36a89991491c98471fbd13baef604c6c8f005fd Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Tue, 10 Sep 2024 16:20:54 -0400
Subject: [PATCH 23/49] reverting baclk

---
 wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
index d9558132b..9913b4581 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
@@ -4,11 +4,8 @@ workflow IGVScreenshotWorkflow {
 
     input {
         File aligned_bam_hap1       # BAM file for haplotype 1
-        File aligned_bam_hap1_bai   # BAM index for haplotype 1
         File aligned_bam_hap2       # BAM file for haplotype 2
-        File aligned_bam_hap2_bai   # BAM index for haplotype 2
         File alignments             # BAM file for total alignments
-        File alignments_bai         # BAM index for total alignments
         File bed_file               # BED file with regions
         File fasta_file             # Reference FASTA file
         String sample_name          # Sample name to use in filenames
@@ -21,11 +18,11 @@ workflow IGVScreenshotWorkflow {
     call RunIGVScreenshot {
         input:
             aligned_bam_hap1 = aligned_bam_hap1,
-            aligned_bam_hap1_bai = aligned_bam_hap1_bai,
+            aligned_bam_hap1_bai = aligned_bam_hap1 + ".bai",  # Automatically infer BAI location
             aligned_bam_hap2 = aligned_bam_hap2,
-            aligned_bam_hap2_bai = aligned_bam_hap2_bai,
+            aligned_bam_hap2_bai = aligned_bam_hap2 + ".bai",  # Automatically infer BAI location
             alignments = alignments,
-            alignments_bai = alignments_bai,
+            alignments_bai = alignments + ".bai",  # Automatically infer BAI location
             bed_file = bed_file,
             fasta_file = fasta_file,
             sample_name = sample_name,

From 6b9baf765c9f39db5a0dae132997ece1ce379b94 Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Tue, 10 Sep 2024 16:23:02 -0400
Subject: [PATCH 24/49] playing with bai definition

---
 .../Utility/IGV_HaplotypeViz_bai_try.wdl      | 190 ++++++++----------
 1 file changed, 82 insertions(+), 108 deletions(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
index 04a5e48f2..d9558132b 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
@@ -1,113 +1,87 @@
 version 1.0
 
-workflow IGV_HaplotypeViz {
-  input {
-    File aligned_bam_hap1
-    File aligned_bam_hap2
-    File alignments_bam
-    File ref_fasta
-    File targeted_bed_file
-    String sample_name
-    String disk_type = "SSD"
-    String output_dir
-  }
-
-  call GenerateIgvScreenshots {
-    input:
-      aligned_bam_hap1 = aligned_bam_hap1,
-      aligned_bam_hap2 = aligned_bam_hap2,
-      alignments_bam = alignments_bam,
-      ref_fasta = ref_fasta,
-      targeted_bed_file = targeted_bed_file,
-      sample_name = sample_name,
-      disk_type = disk_type
-  }
-
-  call FinalizeToGCS {
-    input:
-      screenshots = GenerateIgvScreenshots.screenshots,
-      output_dir = output_dir
-  }
-
-  output {
-    Array[File] final_screenshots = FinalizeToGCS.uploaded_files
-  }
+workflow IGVScreenshotWorkflow {
+
+    input {
+        File aligned_bam_hap1       # BAM file for haplotype 1
+        File aligned_bam_hap1_bai   # BAM index for haplotype 1
+        File aligned_bam_hap2       # BAM file for haplotype 2
+        File aligned_bam_hap2_bai   # BAM index for haplotype 2
+        File alignments             # BAM file for total alignments
+        File alignments_bai         # BAM index for total alignments
+        File bed_file               # BED file with regions
+        File fasta_file             # Reference FASTA file
+        String sample_name          # Sample name to use in filenames
+        Int image_height = 500
+        Int memory_mb = 4000
+        Int disk_gb = 100           # Disk size in GB, default to 100 GB
+        String docker_image = "us.gcr.io/broad-dsp-lrma/igv_screenshot_docker:v982024"  # The Docker image to use
+    }
+
+    call RunIGVScreenshot {
+        input:
+            aligned_bam_hap1 = aligned_bam_hap1,
+            aligned_bam_hap1_bai = aligned_bam_hap1_bai,
+            aligned_bam_hap2 = aligned_bam_hap2,
+            aligned_bam_hap2_bai = aligned_bam_hap2_bai,
+            alignments = alignments,
+            alignments_bai = alignments_bai,
+            bed_file = bed_file,
+            fasta_file = fasta_file,
+            sample_name = sample_name,
+            image_height = image_height,
+            memory_mb = memory_mb,
+            disk_gb = disk_gb,
+            docker_image = docker_image
+    }
+
+    output {
+        Array[File] snapshots = RunIGVScreenshot.snapshots
+    }
 }
 
-task GenerateIgvScreenshots {
-  input {
-    File aligned_bam_hap1
-    File aligned_bam_hap2
-    File alignments_bam
-    File ref_fasta
-    File targeted_bed_file
-    String sample_name
-    String disk_type
-  }
-
-  command <<<
-    # Ensure the snapshots directory exists and set permissions
-    mkdir -p /cromwell_root/output/IGV_Snapshots && chmod 777 /cromwell_root/output/IGV_Snapshots
-
-    # Start a virtual frame buffer to allow IGV to render
-    Xvfb :1 -screen 0 1024x768x16 & export DISPLAY=:1
-
-    # Run the Python script to generate IGV snapshots
-    python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \
-      ~{aligned_bam_hap1} ~{aligned_bam_hap2} ~{alignments_bam} \
-      --fasta_file ~{ref_fasta} \
-      --sample_name ~{sample_name} \
-      -r ~{targeted_bed_file} \
-      -ht 500 \
-      -bin /opt/IGV_Linux_2.18.2/igv.sh \
-      -mem 4000
-
-  >>>
-
-  output {
-    Array[File] screenshots = glob("/cromwell_root/output/IGV_Snapshots/*.png")
-  }
-
-  Int disk_size = ceil(size(aligned_bam_hap1, "GiB")) + ceil(size(aligned_bam_hap2, "GiB")) + ceil(size(alignments_bam, "GiB")) + 10
-
-  runtime {
-    cpu:        4
-    memory:     "8 GiB"
-    disks:      "local-disk " + disk_size + " " + disk_type
-    preemptible: 2
-    maxRetries: 1
-    docker:     "us.gcr.io/broad-dsp-lrma/igv_screenshot_docker:v982024"
-  }
-}
-
-task FinalizeToGCS {
-  input {
-    Array[File] screenshots
-    String output_dir
-  }
-
-  command <<<
-    set -euxo pipefail
-
-    # Ensure the output directory exists and is properly formatted
-    gcs_output_dir=$(echo ~{output_dir} | sed 's:/*$::')
-
-    # Copy all screenshots to Google Cloud Storage
-    for file in ~{sep=' ' screenshots}; do
-      gsutil cp $file $gcs_output_dir/
-    done
-  >>>
-
-  output {
-    Array[File] uploaded_files = glob("~{output_dir}/*.png")
-  }
-
-  runtime {
-    cpu:        1
-    memory:     "4 GiB"
-    disks:      "local-disk 10 SSD"
-    preemptible: 1
-    maxRetries: 2
-    docker:     "google/cloud-sdk:slim"
-  }
+task RunIGVScreenshot {
+    input {
+        File aligned_bam_hap1
+        File aligned_bam_hap1_bai
+        File aligned_bam_hap2
+        File aligned_bam_hap2_bai
+        File alignments
+        File alignments_bai
+        File bed_file
+        File fasta_file
+        String sample_name
+        Int image_height
+        Int memory_mb
+        Int disk_gb
+        String docker_image
+    }
+
+    command {
+        mkdir -p IGV_Snapshots
+        Xvfb :1 -screen 0 1024x768x16 &> xvfb.log &
+        export DISPLAY=:1
+
+        # Run the IGV screenshot script with the provided inputs
+        python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \
+          ~{aligned_bam_hap1} ~{aligned_bam_hap2} ~{alignments} \
+          ~{aligned_bam_hap1_bai} ~{aligned_bam_hap2_bai} ~{alignments_bai} \
+          -r ~{bed_file} \
+          -ht ~{image_height} \
+          -bin /opt/IGV_Linux_2.18.2/igv.sh \
+          -mem ~{memory_mb} \
+          --fasta_file ~{fasta_file} \
+          --sample_name ~{sample_name}
+    }
+
+    runtime {
+        docker: docker_image
+        memory: "~{memory_mb} MB"
+        cpu: 2
+        disks: "local-disk ~{disk_gb} HDD"
+    }
+
+    output {
+        Array[File] snapshots = glob("IGV_Snapshots/*.png")
+    }
 }

From 889c1fdfa7b6ee312f82bac28592ce3f51695123 Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Tue, 10 Sep 2024 17:03:01 -0400
Subject: [PATCH 25/49] fai

---
 wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
index 9913b4581..a33397a79 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
@@ -8,6 +8,7 @@ workflow IGVScreenshotWorkflow {
         File alignments             # BAM file for total alignments
         File bed_file               # BED file with regions
         File fasta_file             # Reference FASTA file
+        File fasta_fai              # FASTA index (.fai) file
         String sample_name          # Sample name to use in filenames
         Int image_height = 500
         Int memory_mb = 4000
@@ -25,6 +26,7 @@ workflow IGVScreenshotWorkflow {
             alignments_bai = alignments + ".bai",  # Automatically infer BAI location
             bed_file = bed_file,
             fasta_file = fasta_file,
+            fasta_fai = fasta_fai,
             sample_name = sample_name,
             image_height = image_height,
             memory_mb = memory_mb,
@@ -47,6 +49,7 @@ task RunIGVScreenshot {
         File alignments_bai
         File bed_file
         File fasta_file
+        File fasta_fai
         String sample_name
         Int image_height
         Int memory_mb
@@ -68,6 +71,7 @@ task RunIGVScreenshot {
           -bin /opt/IGV_Linux_2.18.2/igv.sh \
           -mem ~{memory_mb} \
           --fasta_file ~{fasta_file} \
+          --fasta_fai ~{fasta_fai} \
           --sample_name ~{sample_name}
     }
 

From b2c08364fc9b092ea30d9a4f5409448266a3c916 Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Tue, 10 Sep 2024 17:05:35 -0400
Subject: [PATCH 26/49] fai

---
 wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
index d9558132b..7490c60f0 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
@@ -11,6 +11,7 @@ workflow IGVScreenshotWorkflow {
         File alignments_bai         # BAM index for total alignments
         File bed_file               # BED file with regions
         File fasta_file             # Reference FASTA file
+        File fasta_fai              # FAI index for the FASTA file
         String sample_name          # Sample name to use in filenames
         Int image_height = 500
         Int memory_mb = 4000
@@ -28,6 +29,7 @@ workflow IGVScreenshotWorkflow {
             alignments_bai = alignments_bai,
             bed_file = bed_file,
             fasta_file = fasta_file,
+            fasta_fai = fasta_fai,
             sample_name = sample_name,
             image_height = image_height,
             memory_mb = memory_mb,
@@ -50,6 +52,7 @@ task RunIGVScreenshot {
         File alignments_bai
         File bed_file
         File fasta_file
+        File fasta_fai    # FAI index for the FASTA file
         String sample_name
         Int image_height
         Int memory_mb
@@ -71,6 +74,7 @@ task RunIGVScreenshot {
           -bin /opt/IGV_Linux_2.18.2/igv.sh \
           -mem ~{memory_mb} \
           --fasta_file ~{fasta_file} \
+          --fasta_fai ~{fasta_fai} \
           --sample_name ~{sample_name}
     }
 

From 814c24e18bcaae98bc243295c6235e8decab6261 Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Tue, 10 Sep 2024 19:07:53 -0400
Subject: [PATCH 27/49] updated the cmmand section

---
 .../Utility/IGV_HaplotypeViz_bai_try.wdl      | 40 ++++++++++---------
 1 file changed, 21 insertions(+), 19 deletions(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
index 7490c60f0..5fc32bdb7 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
@@ -1,18 +1,18 @@
 version 1.0
 
 workflow IGVScreenshotWorkflow {
-
+    
     input {
-        File aligned_bam_hap1       # BAM file for haplotype 1
-        File aligned_bam_hap1_bai   # BAM index for haplotype 1
-        File aligned_bam_hap2       # BAM file for haplotype 2
-        File aligned_bam_hap2_bai   # BAM index for haplotype 2
-        File alignments             # BAM file for total alignments
-        File alignments_bai         # BAM index for total alignments
-        File bed_file               # BED file with regions
-        File fasta_file             # Reference FASTA file
-        File fasta_fai              # FAI index for the FASTA file
-        String sample_name          # Sample name to use in filenames
+        File aligned_bam_hap1
+        File aligned_bam_hap1_bai
+        File aligned_bam_hap2
+        File aligned_bam_hap2_bai
+        File alignments
+        File alignments_bai
+        File bed_file
+        File fasta_file
+        File fasta_file_fai   # Include the .fai file
+        String sample_name
         Int image_height = 500
         Int memory_mb = 4000
         Int disk_gb = 100           # Disk size in GB, default to 100 GB
@@ -29,7 +29,7 @@ workflow IGVScreenshotWorkflow {
             alignments_bai = alignments_bai,
             bed_file = bed_file,
             fasta_file = fasta_file,
-            fasta_fai = fasta_fai,
+            fasta_file_fai = fasta_file_fai,
             sample_name = sample_name,
             image_height = image_height,
             memory_mb = memory_mb,
@@ -43,6 +43,7 @@ workflow IGVScreenshotWorkflow {
 }
 
 task RunIGVScreenshot {
+    
     input {
         File aligned_bam_hap1
         File aligned_bam_hap1_bai
@@ -52,7 +53,7 @@ task RunIGVScreenshot {
         File alignments_bai
         File bed_file
         File fasta_file
-        File fasta_fai    # FAI index for the FASTA file
+        File fasta_file_fai
         String sample_name
         Int image_height
         Int memory_mb
@@ -60,23 +61,24 @@ task RunIGVScreenshot {
         String docker_image
     }
 
-    command {
-        mkdir -p IGV_Snapshots
+    command <<<
+        # Ensure the snapshots directory exists
+        mkdir -p /output/IGV_Snapshots && chmod 777 /output/IGV_Snapshots
+
+        # Start a virtual frame buffer to allow IGV to render
         Xvfb :1 -screen 0 1024x768x16 &> xvfb.log &
         export DISPLAY=:1
 
         # Run the IGV screenshot script with the provided inputs
         python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \
           ~{aligned_bam_hap1} ~{aligned_bam_hap2} ~{alignments} \
-          ~{aligned_bam_hap1_bai} ~{aligned_bam_hap2_bai} ~{alignments_bai} \
           -r ~{bed_file} \
           -ht ~{image_height} \
           -bin /opt/IGV_Linux_2.18.2/igv.sh \
           -mem ~{memory_mb} \
           --fasta_file ~{fasta_file} \
-          --fasta_fai ~{fasta_fai} \
           --sample_name ~{sample_name}
-    }
+    >>>
 
     runtime {
         docker: docker_image
@@ -86,6 +88,6 @@ task RunIGVScreenshot {
     }
 
     output {
-        Array[File] snapshots = glob("IGV_Snapshots/*.png")
+        Array[File] snapshots = glob("/output/IGV_Snapshots/*.png")
     }
 }

From 1fa09470262f518c6ab57671919621cf06838c93 Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Tue, 10 Sep 2024 19:15:30 -0400
Subject: [PATCH 28/49] modify fai

---
 wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
index a33397a79..337c797ea 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
@@ -58,7 +58,10 @@ task RunIGVScreenshot {
     }
 
     command {
-        mkdir -p IGV_Snapshots
+        # Ensure the snapshots directory exists
+        mkdir -p /output/IGV_Snapshots && chmod 777 /output/IGV_Snapshots
+
+        # Start a virtual frame buffer to allow IGV to render
         Xvfb :1 -screen 0 1024x768x16 &> xvfb.log &
         export DISPLAY=:1
 
@@ -71,7 +74,6 @@ task RunIGVScreenshot {
           -bin /opt/IGV_Linux_2.18.2/igv.sh \
           -mem ~{memory_mb} \
           --fasta_file ~{fasta_file} \
-          --fasta_fai ~{fasta_fai} \
           --sample_name ~{sample_name}
     }
 
@@ -83,6 +85,6 @@ task RunIGVScreenshot {
     }
 
     output {
-        Array[File] snapshots = glob("IGV_Snapshots/*.png")
+        Array[File] snapshots = glob("/output/IGV_Snapshots/*.png")
     }
 }

From 965bf77e92499df8c40875ecaff046baf228a701 Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Tue, 10 Sep 2024 19:39:30 -0400
Subject: [PATCH 29/49] fix

---
 wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
index 5fc32bdb7..73384ecc7 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
@@ -62,8 +62,8 @@ task RunIGVScreenshot {
     }
 
     command <<<
-        # Ensure the snapshots directory exists
-        mkdir -p /output/IGV_Snapshots && chmod 777 /output/IGV_Snapshots
+        # Ensure the snapshots directory exists under the mounted disk path
+        mkdir -p /cromwell_root/IGV_Snapshots && chmod 777 /cromwell_root/IGV_Snapshots
 
         # Start a virtual frame buffer to allow IGV to render
         Xvfb :1 -screen 0 1024x768x16 &> xvfb.log &
@@ -88,6 +88,6 @@ task RunIGVScreenshot {
     }
 
     output {
-        Array[File] snapshots = glob("/output/IGV_Snapshots/*.png")
+        Array[File] snapshots = glob("/cromwell_root/IGV_Snapshots/*.png")
     }
 }

From 4712c35f27898ede8ae36e628868c95295e30de2 Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Tue, 10 Sep 2024 19:47:29 -0400
Subject: [PATCH 30/49] gs input

---
 wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
index 337c797ea..05f1d8cb2 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
@@ -13,6 +13,7 @@ workflow IGVScreenshotWorkflow {
         Int image_height = 500
         Int memory_mb = 4000
         Int disk_gb = 100           # Disk size in GB, default to 100 GB
+        String gs_bucket_path       # Google Storage bucket path
         String docker_image = "us.gcr.io/broad-dsp-lrma/igv_screenshot_docker:v982024"  # The Docker image to use
     }
 
@@ -31,6 +32,7 @@ workflow IGVScreenshotWorkflow {
             image_height = image_height,
             memory_mb = memory_mb,
             disk_gb = disk_gb,
+            gs_bucket_path = gs_bucket_path,
             docker_image = docker_image
     }
 
@@ -54,12 +56,13 @@ task RunIGVScreenshot {
         Int image_height
         Int memory_mb
         Int disk_gb
+        String gs_bucket_path
         String docker_image
     }
 
     command {
-        # Ensure the snapshots directory exists
-        mkdir -p /output/IGV_Snapshots && chmod 777 /output/IGV_Snapshots
+        # Ensure the snapshots directory exists under the current working directory
+        mkdir -p IGV_Snapshots && chmod 777 IGV_Snapshots
 
         # Start a virtual frame buffer to allow IGV to render
         Xvfb :1 -screen 0 1024x768x16 &> xvfb.log &
@@ -75,6 +78,12 @@ task RunIGVScreenshot {
           -mem ~{memory_mb} \
           --fasta_file ~{fasta_file} \
           --sample_name ~{sample_name}
+
+        # Move the generated snapshots to the output directory
+        mv *.png IGV_Snapshots/
+
+        # Copy the results to the Google Storage bucket
+        gsutil -m cp IGV_Snapshots/*.png ~{gs_bucket_path}
     }
 
     runtime {
@@ -85,6 +94,6 @@ task RunIGVScreenshot {
     }
 
     output {
-        Array[File] snapshots = glob("/output/IGV_Snapshots/*.png")
+        Array[File] snapshots = glob("IGV_Snapshots/*.png")
     }
 }

From ce84cab7141d9dccf6a920343b2fe4f92ce569ce Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Tue, 10 Sep 2024 20:07:20 -0400
Subject: [PATCH 31/49] fix output

---
 .../Utility/IGV_HaplotypeViz_bai_try.wdl      | 76 ++++++++++++++-----
 1 file changed, 58 insertions(+), 18 deletions(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
index 73384ecc7..7f5fa2843 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
@@ -17,6 +17,7 @@ workflow IGVScreenshotWorkflow {
         Int memory_mb = 4000
         Int disk_gb = 100           # Disk size in GB, default to 100 GB
         String docker_image = "us.gcr.io/broad-dsp-lrma/igv_screenshot_docker:v982024"  # The Docker image to use
+        String output_gcs_path       # GCS path where the screenshots will be uploaded
     }
 
     call RunIGVScreenshot {
@@ -37,8 +38,15 @@ workflow IGVScreenshotWorkflow {
             docker_image = docker_image
     }
 
+    # Finalize the output by uploading it to the Google Cloud bucket
+    call FinalizeScreenshots {
+        input:
+            screenshots = RunIGVScreenshot.screenshots,
+            outdir = output_gcs_path
+    }
+
     output {
-        Array[File] snapshots = RunIGVScreenshot.snapshots
+        Array[String] screenshots_gcs_paths = FinalizeScreenshots.gcs_paths
     }
 }
 
@@ -62,22 +70,25 @@ task RunIGVScreenshot {
     }
 
     command <<<
-        # Ensure the snapshots directory exists under the mounted disk path
-        mkdir -p /cromwell_root/IGV_Snapshots && chmod 777 /cromwell_root/IGV_Snapshots
-
-        # Start a virtual frame buffer to allow IGV to render
-        Xvfb :1 -screen 0 1024x768x16 &> xvfb.log &
-        export DISPLAY=:1
-
-        # Run the IGV screenshot script with the provided inputs
-        python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \
-          ~{aligned_bam_hap1} ~{aligned_bam_hap2} ~{alignments} \
-          -r ~{bed_file} \
-          -ht ~{image_height} \
-          -bin /opt/IGV_Linux_2.18.2/igv.sh \
-          -mem ~{memory_mb} \
-          --fasta_file ~{fasta_file} \
-          --sample_name ~{sample_name}
+      set -euo pipefail
+
+      # Create a directory for screenshots in the working directory
+      mkdir screenshots
+
+      # Start a virtual frame buffer to allow IGV to render
+      Xvfb :1 -screen 0 1024x768x16 &> xvfb.log &
+      export DISPLAY=:1
+
+      # Run the IGV screenshot script with the provided inputs
+      python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \
+      ~{aligned_bam_hap1} ~{aligned_bam_hap2} ~{alignments} \
+      -r ~{bed_file} \
+      -ht ~{image_height} \
+      -bin /opt/IGV_Linux_2.18.2/igv.sh \
+      -mem ~{memory_mb} \
+      --fasta_file ~{fasta_file} \
+      --sample_name ~{sample_name} \
+      --snapshot-dir "screenshots"
     >>>
 
     runtime {
@@ -88,6 +99,35 @@ task RunIGVScreenshot {
     }
 
     output {
-        Array[File] snapshots = glob("/cromwell_root/IGV_Snapshots/*.png")
+        Array[File] screenshots = glob("screenshots/*.png")
+    }
+}
+
+task FinalizeScreenshots {
+
+    input {
+        Array[File] screenshots     # Array of screenshot files to finalize
+        String outdir               # Google Cloud Storage directory to upload files to
+    }
+
+    command <<<
+        set -euxo pipefail
+
+        gcs_output_dir=$(echo "~{outdir}" | sed 's:/*$::')
+
+        for f in ~{sep=' ' screenshots}; do
+            gcloud storage cp "$f" "${gcs_output_dir}/$(basename $f)"
+        done
+    >>>
+
+    output {
+        Array[String] gcs_paths = read_lines("gcs_output_files.txt")
+    }
+
+    runtime {
+        cpu: 1
+        memory: "1 GiB"
+        disks: "local-disk 10 HDD"
+        docker: "us.gcr.io/broad-dsp-lrma/lr-gcloud-samtools:0.1.3"
     }
 }

From 37fb974ab7127fb10fe47f5378c82991e63aa206 Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Tue, 10 Sep 2024 21:39:46 -0400
Subject: [PATCH 32/49] fixing the output-the screenshots were created

---
 .../Utility/IGV_HaplotypeViz_bai_try.wdl      | 77 +++++--------------
 1 file changed, 19 insertions(+), 58 deletions(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
index 7f5fa2843..929aa28a8 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
@@ -17,7 +17,6 @@ workflow IGVScreenshotWorkflow {
         Int memory_mb = 4000
         Int disk_gb = 100           # Disk size in GB, default to 100 GB
         String docker_image = "us.gcr.io/broad-dsp-lrma/igv_screenshot_docker:v982024"  # The Docker image to use
-        String output_gcs_path       # GCS path where the screenshots will be uploaded
     }
 
     call RunIGVScreenshot {
@@ -38,15 +37,8 @@ workflow IGVScreenshotWorkflow {
             docker_image = docker_image
     }
 
-    # Finalize the output by uploading it to the Google Cloud bucket
-    call FinalizeScreenshots {
-        input:
-            screenshots = RunIGVScreenshot.screenshots,
-            outdir = output_gcs_path
-    }
-
     output {
-        Array[String] screenshots_gcs_paths = FinalizeScreenshots.gcs_paths
+        Array[File] snapshots = RunIGVScreenshot.snapshots
     }
 }
 
@@ -70,25 +62,23 @@ task RunIGVScreenshot {
     }
 
     command <<<
-      set -euo pipefail
-
-      # Create a directory for screenshots in the working directory
-      mkdir screenshots
-
-      # Start a virtual frame buffer to allow IGV to render
-      Xvfb :1 -screen 0 1024x768x16 &> xvfb.log &
-      export DISPLAY=:1
-
-      # Run the IGV screenshot script with the provided inputs
-      python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \
-      ~{aligned_bam_hap1} ~{aligned_bam_hap2} ~{alignments} \
-      -r ~{bed_file} \
-      -ht ~{image_height} \
-      -bin /opt/IGV_Linux_2.18.2/igv.sh \
-      -mem ~{memory_mb} \
-      --fasta_file ~{fasta_file} \
-      --sample_name ~{sample_name} \
-      --snapshot-dir "screenshots"
+        # Ensure the snapshots directory exists under the mounted disk path
+        mkdir -p /output/IGV_Snapshots
+
+        # Start a virtual frame buffer to allow IGV to render
+        Xvfb :1 -screen 0 1024x768x16 &> xvfb.log &
+        export DISPLAY=:1
+
+        # Run the IGV screenshot script with the provided inputs
+        python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \
+          ~{aligned_bam_hap1} ~{aligned_bam_hap2} ~{alignments} \
+          -r ~{bed_file} \
+          -ht ~{image_height} \
+          -bin /opt/IGV_Linux_2.18.2/igv.sh \
+          -mem ~{memory_mb} \
+          --fasta_file ~{fasta_file} \
+          --sample_name ~{sample_name} \
+          --snapshot-dir "/output/IGV_Snapshots"
     >>>
 
     runtime {
@@ -99,35 +89,6 @@ task RunIGVScreenshot {
     }
 
     output {
-        Array[File] screenshots = glob("screenshots/*.png")
-    }
-}
-
-task FinalizeScreenshots {
-
-    input {
-        Array[File] screenshots     # Array of screenshot files to finalize
-        String outdir               # Google Cloud Storage directory to upload files to
-    }
-
-    command <<<
-        set -euxo pipefail
-
-        gcs_output_dir=$(echo "~{outdir}" | sed 's:/*$::')
-
-        for f in ~{sep=' ' screenshots}; do
-            gcloud storage cp "$f" "${gcs_output_dir}/$(basename $f)"
-        done
-    >>>
-
-    output {
-        Array[String] gcs_paths = read_lines("gcs_output_files.txt")
-    }
-
-    runtime {
-        cpu: 1
-        memory: "1 GiB"
-        disks: "local-disk 10 HDD"
-        docker: "us.gcr.io/broad-dsp-lrma/lr-gcloud-samtools:0.1.3"
+        Array[File] snapshots = glob("/output/IGV_Snapshots/*.png")
     }
 }

From f8a18c10cf96ce083e39e55a10ee6e4d22c8655f Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Tue, 10 Sep 2024 21:59:48 -0400
Subject: [PATCH 33/49] fixing the output-the screenshots were created

---
 .../PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl        | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
index 929aa28a8..a9a05ad4d 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
@@ -62,8 +62,10 @@ task RunIGVScreenshot {
     }
 
     command <<<
-        # Ensure the snapshots directory exists under the mounted disk path
-        mkdir -p /output/IGV_Snapshots
+        set -euo pipefail
+
+        # Ensure the snapshots directory exists under the local disk path
+        mkdir -p /mnt/local-disk/IGV_Snapshots
 
         # Start a virtual frame buffer to allow IGV to render
         Xvfb :1 -screen 0 1024x768x16 &> xvfb.log &
@@ -78,7 +80,7 @@ task RunIGVScreenshot {
           -mem ~{memory_mb} \
           --fasta_file ~{fasta_file} \
           --sample_name ~{sample_name} \
-          --snapshot-dir "/output/IGV_Snapshots"
+          --snapshot-dir "/mnt/local-disk/IGV_Snapshots"
     >>>
 
     runtime {
@@ -89,6 +91,6 @@ task RunIGVScreenshot {
     }
 
     output {
-        Array[File] snapshots = glob("/output/IGV_Snapshots/*.png")
+        Array[File] snapshots = glob("/mnt/local-disk/IGV_Snapshots/*.png")
     }
 }

From cd7c7692b01d60a7c5416aee542eff298f490ef9 Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Tue, 10 Sep 2024 22:18:51 -0400
Subject: [PATCH 34/49] fixing the output-the screenshots were created

---
 wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
index a9a05ad4d..21220cdf6 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
@@ -64,8 +64,8 @@ task RunIGVScreenshot {
     command <<<
         set -euo pipefail
 
-        # Ensure the snapshots directory exists under the local disk path
-        mkdir -p /mnt/local-disk/IGV_Snapshots
+        # Ensure the snapshots directory exists under the mounted disk path
+        mkdir -p /cromwell_root/IGV_Snapshots
 
         # Start a virtual frame buffer to allow IGV to render
         Xvfb :1 -screen 0 1024x768x16 &> xvfb.log &
@@ -80,7 +80,7 @@ task RunIGVScreenshot {
           -mem ~{memory_mb} \
           --fasta_file ~{fasta_file} \
           --sample_name ~{sample_name} \
-          --snapshot-dir "/mnt/local-disk/IGV_Snapshots"
+          --snapshot-dir "/cromwell_root/IGV_Snapshots"
     >>>
 
     runtime {
@@ -91,6 +91,6 @@ task RunIGVScreenshot {
     }
 
     output {
-        Array[File] snapshots = glob("/mnt/local-disk/IGV_Snapshots/*.png")
+        Array[File] snapshots = glob("/cromwell_root/IGV_Snapshots/*.png")
     }
 }

From 2b238381f2173cf9efecbae7597d2efa5c9b264d Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Tue, 10 Sep 2024 23:21:32 -0400
Subject: [PATCH 35/49] fixing ...

---
 wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
index 21220cdf6..96ca42fe3 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
@@ -71,7 +71,7 @@ task RunIGVScreenshot {
         Xvfb :1 -screen 0 1024x768x16 &> xvfb.log &
         export DISPLAY=:1
 
-        # Run the IGV screenshot script with the provided inputs
+        # Run the IGV screenshot script with the provided inputs, no --snapshot-dir
         python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \
           ~{aligned_bam_hap1} ~{aligned_bam_hap2} ~{alignments} \
           -r ~{bed_file} \
@@ -79,8 +79,10 @@ task RunIGVScreenshot {
           -bin /opt/IGV_Linux_2.18.2/igv.sh \
           -mem ~{memory_mb} \
           --fasta_file ~{fasta_file} \
-          --sample_name ~{sample_name} \
-          --snapshot-dir "/cromwell_root/IGV_Snapshots"
+          --sample_name ~{sample_name}
+
+        # Move the screenshots to the output directory
+        mv *.png /cromwell_root/IGV_Snapshots/
     >>>
 
     runtime {

From 5d8fb8dbc5e12cb8b3fc9cd724d99f56a8518117 Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Tue, 10 Sep 2024 23:37:11 -0400
Subject: [PATCH 36/49] output directory

---
 .../PacBio/Utility/IGV_HaplotypeViz.wdl       | 49 +++++++++----------
 1 file changed, 24 insertions(+), 25 deletions(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
index 05f1d8cb2..bcb13bca6 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
@@ -1,38 +1,39 @@
 version 1.0
 
 workflow IGVScreenshotWorkflow {
-
+    
     input {
-        File aligned_bam_hap1       # BAM file for haplotype 1
-        File aligned_bam_hap2       # BAM file for haplotype 2
-        File alignments             # BAM file for total alignments
-        File bed_file               # BED file with regions
-        File fasta_file             # Reference FASTA file
-        File fasta_fai              # FASTA index (.fai) file
-        String sample_name          # Sample name to use in filenames
+        File aligned_bam_hap1
+        File aligned_bam_hap1_bai
+        File aligned_bam_hap2
+        File aligned_bam_hap2_bai
+        File alignments
+        File alignments_bai
+        File bed_file
+        File fasta_file
+        File fasta_file_fai
+        String sample_name
         Int image_height = 500
         Int memory_mb = 4000
         Int disk_gb = 100           # Disk size in GB, default to 100 GB
-        String gs_bucket_path       # Google Storage bucket path
         String docker_image = "us.gcr.io/broad-dsp-lrma/igv_screenshot_docker:v982024"  # The Docker image to use
     }
 
     call RunIGVScreenshot {
         input:
             aligned_bam_hap1 = aligned_bam_hap1,
-            aligned_bam_hap1_bai = aligned_bam_hap1 + ".bai",  # Automatically infer BAI location
+            aligned_bam_hap1_bai = aligned_bam_hap1_bai,
             aligned_bam_hap2 = aligned_bam_hap2,
-            aligned_bam_hap2_bai = aligned_bam_hap2 + ".bai",  # Automatically infer BAI location
+            aligned_bam_hap2_bai = aligned_bam_hap2_bai,
             alignments = alignments,
-            alignments_bai = alignments + ".bai",  # Automatically infer BAI location
+            alignments_bai = alignments_bai,
             bed_file = bed_file,
             fasta_file = fasta_file,
-            fasta_fai = fasta_fai,
+            fasta_file_fai = fasta_file_fai,
             sample_name = sample_name,
             image_height = image_height,
             memory_mb = memory_mb,
             disk_gb = disk_gb,
-            gs_bucket_path = gs_bucket_path,
             docker_image = docker_image
     }
 
@@ -42,6 +43,7 @@ workflow IGVScreenshotWorkflow {
 }
 
 task RunIGVScreenshot {
+    
     input {
         File aligned_bam_hap1
         File aligned_bam_hap1_bai
@@ -51,18 +53,19 @@ task RunIGVScreenshot {
         File alignments_bai
         File bed_file
         File fasta_file
-        File fasta_fai
+        File fasta_file_fai
         String sample_name
         Int image_height
         Int memory_mb
         Int disk_gb
-        String gs_bucket_path
         String docker_image
     }
 
-    command {
-        # Ensure the snapshots directory exists under the current working directory
-        mkdir -p IGV_Snapshots && chmod 777 IGV_Snapshots
+    command <<<
+        set -euo pipefail
+
+        # Ensure the snapshots directory exists
+        mkdir -p IGV_Snapshots
 
         # Start a virtual frame buffer to allow IGV to render
         Xvfb :1 -screen 0 1024x768x16 &> xvfb.log &
@@ -71,7 +74,6 @@ task RunIGVScreenshot {
         # Run the IGV screenshot script with the provided inputs
         python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \
           ~{aligned_bam_hap1} ~{aligned_bam_hap2} ~{alignments} \
-          ~{aligned_bam_hap1_bai} ~{aligned_bam_hap2_bai} ~{alignments_bai} \
           -r ~{bed_file} \
           -ht ~{image_height} \
           -bin /opt/IGV_Linux_2.18.2/igv.sh \
@@ -79,12 +81,9 @@ task RunIGVScreenshot {
           --fasta_file ~{fasta_file} \
           --sample_name ~{sample_name}
 
-        # Move the generated snapshots to the output directory
+        # Move the screenshots to the IGV_Snapshots directory
         mv *.png IGV_Snapshots/
-
-        # Copy the results to the Google Storage bucket
-        gsutil -m cp IGV_Snapshots/*.png ~{gs_bucket_path}
-    }
+    >>>
 
     runtime {
         docker: docker_image

From 793fddff9dbbd089481a99cc08e86a4c9c255250 Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Wed, 11 Sep 2024 08:32:49 -0400
Subject: [PATCH 37/49] again

---
 wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
index 96ca42fe3..2a0fa22a7 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
@@ -65,7 +65,8 @@ task RunIGVScreenshot {
         set -euo pipefail
 
         # Ensure the snapshots directory exists under the mounted disk path
-        mkdir -p /cromwell_root/IGV_Snapshots
+        #mkdir -p /cromwell_root/IGV_Snapshots
+        mkdir snap_out
 
         # Start a virtual frame buffer to allow IGV to render
         Xvfb :1 -screen 0 1024x768x16 &> xvfb.log &
@@ -82,7 +83,7 @@ task RunIGVScreenshot {
           --sample_name ~{sample_name}
 
         # Move the screenshots to the output directory
-        mv *.png /cromwell_root/IGV_Snapshots/
+        #mv *.png /cromwell_root/IGV_Snapshots/
     >>>
 
     runtime {
@@ -93,6 +94,6 @@ task RunIGVScreenshot {
     }
 
     output {
-        Array[File] snapshots = glob("/cromwell_root/IGV_Snapshots/*.png")
+        Array[File] snapshots = glob("snap_out/*.png")
     }
 }

From d98e0fa2ce5514c85c429c2b4d5b8e2bec71e4e0 Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Wed, 11 Sep 2024 08:46:56 -0400
Subject: [PATCH 38/49] dixing the output

---
 wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
index 2a0fa22a7..a95712bd1 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
@@ -65,8 +65,8 @@ task RunIGVScreenshot {
         set -euo pipefail
 
         # Ensure the snapshots directory exists under the mounted disk path
-        #mkdir -p /cromwell_root/IGV_Snapshots
-        mkdir snap_out
+        mkdir -p /output/IGV_Snapshots
+        #mkdir snap_out
 
         # Start a virtual frame buffer to allow IGV to render
         Xvfb :1 -screen 0 1024x768x16 &> xvfb.log &
@@ -82,8 +82,6 @@ task RunIGVScreenshot {
           --fasta_file ~{fasta_file} \
           --sample_name ~{sample_name}
 
-        # Move the screenshots to the output directory
-        #mv *.png /cromwell_root/IGV_Snapshots/
     >>>
 
     runtime {
@@ -94,6 +92,6 @@ task RunIGVScreenshot {
     }
 
     output {
-        Array[File] snapshots = glob("snap_out/*.png")
+        Array[File] snapshots = glob("/output/IGV_Snapshots/*.png")
     }
 }

From f2001921ad1b5c476d3c78edc6063abe6e74a4e0 Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Wed, 11 Sep 2024 08:56:14 -0400
Subject: [PATCH 39/49] removed mkdir

---
 .../PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl    | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
index a95712bd1..d5cc19d17 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
@@ -1,7 +1,7 @@
 version 1.0
 
 workflow IGVScreenshotWorkflow {
-    
+
     input {
         File aligned_bam_hap1
         File aligned_bam_hap1_bai
@@ -11,7 +11,7 @@ workflow IGVScreenshotWorkflow {
         File alignments_bai
         File bed_file
         File fasta_file
-        File fasta_file_fai   # Include the .fai file
+        File fasta_file_fai   # Include the .fai file for localization
         String sample_name
         Int image_height = 500
         Int memory_mb = 4000
@@ -43,7 +43,7 @@ workflow IGVScreenshotWorkflow {
 }
 
 task RunIGVScreenshot {
-    
+
     input {
         File aligned_bam_hap1
         File aligned_bam_hap1_bai
@@ -64,15 +64,11 @@ task RunIGVScreenshot {
     command <<<
         set -euo pipefail
 
-        # Ensure the snapshots directory exists under the mounted disk path
-        mkdir -p /output/IGV_Snapshots
-        #mkdir snap_out
-
         # Start a virtual frame buffer to allow IGV to render
         Xvfb :1 -screen 0 1024x768x16 &> xvfb.log &
         export DISPLAY=:1
 
-        # Run the IGV screenshot script with the provided inputs, no --snapshot-dir
+        # Run the IGV screenshot script with the provided inputs
         python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \
           ~{aligned_bam_hap1} ~{aligned_bam_hap2} ~{alignments} \
           -r ~{bed_file} \
@@ -82,6 +78,7 @@ task RunIGVScreenshot {
           --fasta_file ~{fasta_file} \
           --sample_name ~{sample_name}
 
+        # No need to create the snapshot directory, it's handled in the Python script
     >>>
 
     runtime {
@@ -92,6 +89,7 @@ task RunIGVScreenshot {
     }
 
     output {
+        # Collect the output from the Python script's default snapshot directory
         Array[File] snapshots = glob("/output/IGV_Snapshots/*.png")
     }
 }

From 7e692912d443c519810765160b26573166fa7b72 Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Wed, 11 Sep 2024 11:11:43 -0400
Subject: [PATCH 40/49] updated the docker and output directory in the docker
 /cromwell_root/output/IGV_Snapshots

---
 wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
index bcb13bca6..e3dca975c 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
@@ -65,7 +65,7 @@ task RunIGVScreenshot {
         set -euo pipefail
 
         # Ensure the snapshots directory exists
-        mkdir -p IGV_Snapshots
+        mkdir -p 'output/IGV_Snapshots'
 
         # Start a virtual frame buffer to allow IGV to render
         Xvfb :1 -screen 0 1024x768x16 &> xvfb.log &
@@ -82,14 +82,14 @@ task RunIGVScreenshot {
           --sample_name ~{sample_name}
 
         # Move the screenshots to the IGV_Snapshots directory
-        mv *.png IGV_Snapshots/
+        mv *.png 'output/IGV_Snapshots/'
     >>>
 
     runtime {
         docker: docker_image
         memory: "~{memory_mb} MB"
         cpu: 2
-        disks: "local-disk ~{disk_gb} HDD"
+        disks: "local-disk ~{disk_gb} SSD"
     }
 
     output {

From bfd534b0121c1040a9706d0e782bc0c5d2e16963 Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Wed, 11 Sep 2024 11:14:49 -0400
Subject: [PATCH 41/49] mv -- *.png output/IGV_Snapshots/

---
 .../PacBio/Utility/IGV_HaplotypeViz.wdl        |  2 +-
 .../Utility/IGV_HaplotypeViz_bai_try.wdl       | 18 +++++++++++-------
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
index e3dca975c..85232a22e 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
@@ -82,7 +82,7 @@ task RunIGVScreenshot {
           --sample_name ~{sample_name}
 
         # Move the screenshots to the IGV_Snapshots directory
-        mv *.png 'output/IGV_Snapshots/'
+        mv -- *.png 'output/IGV_Snapshots/'
     >>>
 
     runtime {
diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
index d5cc19d17..cc579a562 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
@@ -1,7 +1,7 @@
 version 1.0
 
 workflow IGVScreenshotWorkflow {
-
+    
     input {
         File aligned_bam_hap1
         File aligned_bam_hap1_bai
@@ -11,7 +11,7 @@ workflow IGVScreenshotWorkflow {
         File alignments_bai
         File bed_file
         File fasta_file
-        File fasta_file_fai   # Include the .fai file for localization
+        File fasta_file_fai   # Include the .fai file
         String sample_name
         Int image_height = 500
         Int memory_mb = 4000
@@ -43,7 +43,7 @@ workflow IGVScreenshotWorkflow {
 }
 
 task RunIGVScreenshot {
-
+    
     input {
         File aligned_bam_hap1
         File aligned_bam_hap1_bai
@@ -64,11 +64,15 @@ task RunIGVScreenshot {
     command <<<
         set -euo pipefail
 
+        # Ensure the snapshots directory exists under the mounted disk path
+        mkdir -p 'output/IGV_Snapshots/'
+        #mkdir snap_out
+
         # Start a virtual frame buffer to allow IGV to render
         Xvfb :1 -screen 0 1024x768x16 &> xvfb.log &
         export DISPLAY=:1
 
-        # Run the IGV screenshot script with the provided inputs
+        # Run the IGV screenshot script with the provided inputs, no --snapshot-dir
         python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \
           ~{aligned_bam_hap1} ~{aligned_bam_hap2} ~{alignments} \
           -r ~{bed_file} \
@@ -78,7 +82,8 @@ task RunIGVScreenshot {
           --fasta_file ~{fasta_file} \
           --sample_name ~{sample_name}
 
-        # No need to create the snapshot directory, it's handled in the Python script
+        # Move the screenshots to the output directory
+        mv -- *.png 'output/IGV_Snapshots/'
     >>>
 
     runtime {
@@ -89,7 +94,6 @@ task RunIGVScreenshot {
     }
 
     output {
-        # Collect the output from the Python script's default snapshot directory
-        Array[File] snapshots = glob("/output/IGV_Snapshots/*.png")
+        Array[File] snapshots = glob("snap_out/*.png")
     }
 }

From d60a7169026444cb1cfbfb1b2169309ebfc076c0 Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Wed, 11 Sep 2024 11:21:41 -0400
Subject: [PATCH 42/49] SSD

---
 wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
index cc579a562..6e2db08a0 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
@@ -90,7 +90,7 @@ task RunIGVScreenshot {
         docker: docker_image
         memory: "~{memory_mb} MB"
         cpu: 2
-        disks: "local-disk ~{disk_gb} HDD"
+        disks: "local-disk ~{disk_gb} SSD"
     }
 
     output {

From 28a6185783b9211dde843b89bd3f5f5713b73354 Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Wed, 11 Sep 2024 13:31:52 -0400
Subject: [PATCH 43/49] output fix

---
 wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl         | 4 ++--
 wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
index 85232a22e..84a877e53 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
@@ -82,7 +82,7 @@ task RunIGVScreenshot {
           --sample_name ~{sample_name}
 
         # Move the screenshots to the IGV_Snapshots directory
-        mv -- *.png 'output/IGV_Snapshots/'
+        #mv -- *.png 'output/IGV_Snapshots/'
     >>>
 
     runtime {
@@ -93,6 +93,6 @@ task RunIGVScreenshot {
     }
 
     output {
-        Array[File] snapshots = glob("IGV_Snapshots/*.png")
+        Array[File] snapshots = glob("output/IGV_Snapshots/*.png")
     }
 }
diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
index 6e2db08a0..968dec404 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz_bai_try.wdl
@@ -83,7 +83,7 @@ task RunIGVScreenshot {
           --sample_name ~{sample_name}
 
         # Move the screenshots to the output directory
-        mv -- *.png 'output/IGV_Snapshots/'
+        # mv -- *.png 'output/IGV_Snapshots/'
     >>>
 
     runtime {
@@ -94,6 +94,6 @@ task RunIGVScreenshot {
     }
 
     output {
-        Array[File] snapshots = glob("snap_out/*.png")
+        Array[File] snapshots = glob("output/IGV_Snapshots/*.png")
     }
 }

From de9a7e8cebc6015e7ac2dec360af36bd2a8acb7d Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Tue, 17 Sep 2024 10:23:36 -0400
Subject: [PATCH 44/49] making the folder for the script

---
 scripts/igv/make_igv_screenshot.py | 251 +++++++++++++++++++++++++++++
 1 file changed, 251 insertions(+)
 create mode 100644 scripts/igv/make_igv_screenshot.py

diff --git a/scripts/igv/make_igv_screenshot.py b/scripts/igv/make_igv_screenshot.py
new file mode 100644
index 000000000..3fa3348fc
--- /dev/null
+++ b/scripts/igv/make_igv_screenshot.py
@@ -0,0 +1,251 @@
+#!/usr/bin/env python
+
+'''
+This script will load IGV in a virtual X window, load all supplied input files
+as tracks, and take snapshots at the coordinates listed in the BED formatted
+region file.
+'''
+
+# ~~~~ LOAD PACKAGES ~~~~~~ #
+import sys
+import os
+import subprocess as sp
+import argparse
+import datetime
+from pathlib import Path
+
+# ~~~~ DIRECTORY AND DEFAULTS ~~~~~~ #
+THIS_DIR = os.path.dirname(os.path.realpath(__file__))
+SNAPSHOT_DIR = "/cromwell_root/output/IGV_Snapshots"  # Snapshot output directory
+default_igv_sh = os.path.join(THIS_DIR, 'igv.sh')
+default_regions_bed = os.path.join(THIS_DIR, 'regions.bed')
+
+# ~~~~ CUSTOM FUNCTIONS ~~~~~~ #
+def file_exists(myfile, kill=False):
+    '''
+    Checks to make sure a file exists, optionally kills the script if file is missing.
+    '''
+    if not os.path.isfile(myfile):
+        print(f"ERROR: File '{myfile}' does not exist!")
+        if kill:
+            print("Exiting...")
+            sys.exit()
+
+def check_for_fai(fasta_file):
+    '''
+    Check to make sure a .fai index file exists for the FASTA file.
+    If not, alert the user.
+    '''
+    fai_file = fasta_file + ".fai"
+    if not os.path.isfile(fai_file):
+        print(f"ERROR: FASTA index file '{fai_file}' is missing!")
+        print("Please generate it with 'samtools faidx' before running the script.")
+        sys.exit(1)
+
+def check_for_bai(bam_file):
+    '''
+    Check to make sure a .bam.bai file is present in the same directory as the .bam file.
+    '''
+    bai_file = bam_file + ".bai"
+    if not os.path.isfile(bai_file):
+        print(f"ERROR: BAM index file '{bai_file}' is missing!")
+        sys.exit(1)
+
+def verify_input_files_list(files_list):
+    '''
+    Check to make sure input files meet criteria.
+    Add more criteria as issues are found.
+    '''
+    for file in files_list:
+        # Check if the file exists
+        if not os.path.isfile(file):
+            print(f"ERROR: Input file '{file}' does not exist!")
+            sys.exit(1)
+
+        # For BAM files, ensure the corresponding BAI file exists
+        if file.endswith(".bam"):
+            check_for_bai(file)
+
+def subprocess_cmd(command):
+    '''
+    Runs a terminal command with stdout piping enabled.
+    '''
+    process = sp.Popen(command, stdout=sp.PIPE, shell=True)
+    proc_stdout = process.communicate()[0].strip()
+    print(proc_stdout)
+
+def make_chrom_region_list(region_file):
+    '''
+    Creates a list of tuples representing the regions from the BED file [(chrom, start, stop), ...].
+    '''
+    region_list = []
+    with open(region_file) as f:
+        for line in f:
+            chrom, start, stop = line.split()[0:3]
+            region_list.append((chrom, start, stop))
+    return region_list
+
+def make_IGV_chrom_loc(region):
+    '''
+    Return a chrom location string in IGV format.
+    '''
+    chrom, start, stop = region[0:3]
+    return f'{chrom}:{start}-{stop}'
+
+def make_snapshot_filename(region, height, sample_name=None):
+    '''
+    Formats a filename for the IGV snapshot.
+    Adds more useful context to filenames by including region information.
+    '''
+    chrom, start, stop = region[0:3]
+    if not chrom.startswith("chr"):
+        chrom = f"chr{chrom}"
+    if sample_name:
+        return os.path.join(SNAPSHOT_DIR, f"{sample_name}_{chrom}_{start}_{stop}_region_h{height}.png")
+    return os.path.join(SNAPSHOT_DIR, f"{chrom}_{start}_{stop}_region_h{height}.png")
+
+def mkdir_p(path):
+    '''
+    Recursively create a directory using pathlib.
+    '''
+    Path(path).mkdir(parents=True, exist_ok=True)
+
+def get_open_X_server():
+    '''
+    Search for an open Xvfb port to render into.
+    '''
+    x_serv_command = '''
+    for serv_num in $(seq 1 1000); do
+        if ! (xdpyinfo -display :${serv_num}) &>/dev/null; then
+            echo "$serv_num" && break
+        fi
+    done
+    '''
+    process = sp.Popen(x_serv_command, stdout=sp.PIPE, shell=True)
+    output = process.communicate()[0].strip().decode('utf-8')
+
+    # Handle if xdpyinfo returns unexpected output
+    try:
+        x_serv_port = int(output.split('\n')[0].strip())  # Take only the port number
+    except ValueError:
+        print(f"Unexpected xdpyinfo output: {output}")
+        sys.exit(1)
+
+    return x_serv_port
+
+def write_IGV_script(input_files, region_file, IGV_batchscript_file, IGV_snapshot_dir, fasta_file, image_height, sample_name):
+    '''
+    Write out a batchscript for IGV.
+    '''
+    with open(IGV_batchscript_file, "w") as f:
+        # Initialize IGV
+        f.write("new\n")
+        f.write(f"genome {fasta_file}\n")
+        f.write(f"snapshotDirectory {IGV_snapshot_dir}\n")
+        f.write(f"maxPanelHeight {image_height}\n")
+
+        # Load BAM files
+        for file in input_files:
+            f.write(f"load {file}\n")
+
+        # Write regions and snapshots
+        region_list = make_chrom_region_list(region_file)
+        for region in region_list:
+            chrom_loc = make_IGV_chrom_loc(region)
+            svsize = int(region[2]) - int(region[1])
+
+            # For large regions, split snapshots into two: start and end
+            if svsize > 10000:
+                f.write(f"goto {region[0]}:{int(region[1]) - 1000}-{int(region[1]) + 500}\n")
+                f.write(f"snapshot {sample_name}_{region[0]}_{region[1]}_start.png\n")
+                f.write(f"goto {region[0]}:{int(region[2]) - 500}-{int(region[2]) + 1000}\n")
+                f.write(f"snapshot {sample_name}_{region[0]}_{region[2]}_end.png\n")
+            else:
+                f.write(f"goto {chrom_loc}\n")
+                f.write(f"snapshot {sample_name}_{region[0]}_{region[1]}_{region[2]}_region.png\n")
+
+        f.write("exit\n")
+
+def run_IGV_script(igv_script, igv_sh, memMB):
+    '''
+    Run an IGV batch script and ensure snapshots are generated.
+    '''
+    # Ensure the output directory exists
+    mkdir_p(SNAPSHOT_DIR)
+
+    # Get an open Xvfb port
+    x_serv_port = get_open_X_server()
+    print(f"Open Xvfb port found on: {x_serv_port}")
+
+    # Build and run IGV command using igv.sh
+    igv_command = f"xvfb-run --auto-servernum --server-num=1 bash {igv_sh} -b {igv_script}"
+    print(f"IGV command: {igv_command}")
+
+    # Record start time
+    startTime = datetime.datetime.now()
+    print(f"Started at: {startTime}")
+
+    # Run the IGV command
+    subprocess_cmd(igv_command)
+
+    # Check if snapshots were generated
+    snapshot_files = os.listdir(SNAPSHOT_DIR)
+    if len(snapshot_files) == 0:
+        print("ERROR: No snapshot files were generated.")
+    else:
+        print(f"Generated {len(snapshot_files)} snapshot files.")
+
+    elapsed_time = datetime.datetime.now() - startTime
+    print(f"Elapsed time: {elapsed_time}")
+
+def main(input_files, region_file, fasta_file, image_height, igv_sh_bin, igv_mem, sample_name):
+    '''
+    Main control function for the script.
+    '''
+    batchscript_file = os.path.join(SNAPSHOT_DIR, "IGV_snapshots.bat")
+
+    # Check if input files, regions, and IGV script exist
+    file_exists(region_file, kill=True)
+    file_exists(igv_sh_bin, kill=True)
+    verify_input_files_list(input_files)
+
+    # Check if the reference FASTA file and its index exist
+    file_exists(fasta_file, kill=True)
+    check_for_fai(fasta_file)
+
+    print(f"\n~~~ IGV SNAPSHOT AUTOMATOR ~~~\n")
+    print(f"Reference FASTA: {fasta_file}")
+    print(f"Track height: {image_height}")
+    print(f"IGV script file: {igv_sh_bin}")
+    print(f"Batchscript file: {batchscript_file}")
+    print(f"Region file: {region_file}")
+
+    # Create output directory
+    mkdir_p(SNAPSHOT_DIR)
+
+    # Write the IGV batch script
+    write_IGV_script(input_files=input_files, region_file=region_file, IGV_batchscript_file=batchscript_file, IGV_snapshot_dir=SNAPSHOT_DIR, fasta_file=fasta_file, image_height=image_height, sample_name=sample_name)
+
+    # Run the IGV batch script
+    run_IGV_script(igv_script=batchscript_file, igv_sh=igv_sh_bin, memMB=igv_mem)
+
+def run():
+    '''
+    Parse script args to run the script.
+    '''
+    parser = argparse.ArgumentParser(description='IGV snapshot automator')
+    parser.add_argument("input_files", nargs='+', help="Paths to the files to create snapshots from (e.g., .bam files).")
+    parser.add_argument("-r", default=default_regions_bed, type=str, dest='region_file', help="BED file with regions to create snapshots over.")
+    parser.add_argument("-f", "--fasta_file", required=True, help="Reference FASTA file to use.")
+    parser.add_argument("-ht", default='500', type=str, dest='image_height', help="Height for the IGV tracks.")
+    parser.add_argument("-bin", default=default_igv_sh, type=str, dest='igv_sh_bin', help="Path to the IGV sh binary to run.")
+    parser.add_argument("-mem", default="4000", type=str, dest='igv_mem', help="Amount of memory to allocate to IGV, in Megabytes (MB).")
+    parser.add_argument("--sample_name", required=True, help="Sample name to include in snapshot filenames.")
+
+    args = parser.parse_args()
+
+    main(input_files=args.input_files, region_file=args.region_file, fasta_file=args.fasta_file, image_height=args.image_height, igv_sh_bin=args.igv_sh_bin, igv_mem=args.igv_mem, sample_name=args.sample_name)
+
+if __name__ == "__main__":
+    run()
+

From c5e49fb52575312906a73c8bbf226ece53fd2687 Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Wed, 18 Sep 2024 12:42:57 -0400
Subject: [PATCH 45/49] script for more igv inputs

---
 scripts/igv/make_igv_screenshot.py | 124 +++++++++++++++++++++--------
 1 file changed, 90 insertions(+), 34 deletions(-)

diff --git a/scripts/igv/make_igv_screenshot.py b/scripts/igv/make_igv_screenshot.py
index 3fa3348fc..e154bd796 100644
--- a/scripts/igv/make_igv_screenshot.py
+++ b/scripts/igv/make_igv_screenshot.py
@@ -12,13 +12,18 @@
 import subprocess as sp
 import argparse
 import datetime
+import logging
 from pathlib import Path
 
 # ~~~~ DIRECTORY AND DEFAULTS ~~~~~~ #
 THIS_DIR = os.path.dirname(os.path.realpath(__file__))
-SNAPSHOT_DIR = "/cromwell_root/output/IGV_Snapshots"  # Snapshot output directory
+SNAPSHOT_DIR = "/cromwell_root/output/IGV_Snapshots"  # Default snapshot output directory
 default_igv_sh = os.path.join(THIS_DIR, 'igv.sh')
 default_regions_bed = os.path.join(THIS_DIR, 'regions.bed')
+default_snapshot_format = 'png'
+
+# ~~~~ SET UP LOGGING ~~~~~~ #
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
 # ~~~~ CUSTOM FUNCTIONS ~~~~~~ #
 def file_exists(myfile, kill=False):
@@ -26,10 +31,10 @@ def file_exists(myfile, kill=False):
     Checks to make sure a file exists, optionally kills the script if file is missing.
     '''
     if not os.path.isfile(myfile):
-        print(f"ERROR: File '{myfile}' does not exist!")
+        logging.error(f"File '{myfile}' does not exist!")
         if kill:
-            print("Exiting...")
-            sys.exit()
+            logging.info("Exiting...")
+            sys.exit(1)
 
 def check_for_fai(fasta_file):
     '''
@@ -38,8 +43,7 @@ def check_for_fai(fasta_file):
     '''
     fai_file = fasta_file + ".fai"
     if not os.path.isfile(fai_file):
-        print(f"ERROR: FASTA index file '{fai_file}' is missing!")
-        print("Please generate it with 'samtools faidx' before running the script.")
+        logging.error(f"FASTA index file '{fai_file}' is missing! Please generate it with 'samtools faidx' before running the script.")
         sys.exit(1)
 
 def check_for_bai(bam_file):
@@ -48,7 +52,7 @@ def check_for_bai(bam_file):
     '''
     bai_file = bam_file + ".bai"
     if not os.path.isfile(bai_file):
-        print(f"ERROR: BAM index file '{bai_file}' is missing!")
+        logging.error(f"BAM index file '{bai_file}' is missing!")
         sys.exit(1)
 
 def verify_input_files_list(files_list):
@@ -59,7 +63,7 @@ def verify_input_files_list(files_list):
     for file in files_list:
         # Check if the file exists
         if not os.path.isfile(file):
-            print(f"ERROR: Input file '{file}' does not exist!")
+            logging.error(f"Input file '{file}' does not exist!")
             sys.exit(1)
 
         # For BAM files, ensure the corresponding BAI file exists
@@ -72,7 +76,7 @@ def subprocess_cmd(command):
     '''
     process = sp.Popen(command, stdout=sp.PIPE, shell=True)
     proc_stdout = process.communicate()[0].strip()
-    print(proc_stdout)
+    logging.info(proc_stdout)
 
 def make_chrom_region_list(region_file):
     '''
@@ -92,7 +96,7 @@ def make_IGV_chrom_loc(region):
     chrom, start, stop = region[0:3]
     return f'{chrom}:{start}-{stop}'
 
-def make_snapshot_filename(region, height, sample_name=None):
+def make_snapshot_filename(region, height, sample_name=None, snapshot_format='png'):
     '''
     Formats a filename for the IGV snapshot.
     Adds more useful context to filenames by including region information.
@@ -101,8 +105,8 @@ def make_snapshot_filename(region, height, sample_name=None):
     if not chrom.startswith("chr"):
         chrom = f"chr{chrom}"
     if sample_name:
-        return os.path.join(SNAPSHOT_DIR, f"{sample_name}_{chrom}_{start}_{stop}_region_h{height}.png")
-    return os.path.join(SNAPSHOT_DIR, f"{chrom}_{start}_{stop}_region_h{height}.png")
+        return os.path.join(SNAPSHOT_DIR, f"{sample_name}_{chrom}_{start}_{stop}_region_h{height}.{snapshot_format}")
+    return os.path.join(SNAPSHOT_DIR, f"{chrom}_{start}_{stop}_region_h{height}.{snapshot_format}")
 
 def mkdir_p(path):
     '''
@@ -128,12 +132,12 @@ def get_open_X_server():
     try:
         x_serv_port = int(output.split('\n')[0].strip())  # Take only the port number
     except ValueError:
-        print(f"Unexpected xdpyinfo output: {output}")
+        logging.error(f"Unexpected xdpyinfo output: {output}")
         sys.exit(1)
 
     return x_serv_port
 
-def write_IGV_script(input_files, region_file, IGV_batchscript_file, IGV_snapshot_dir, fasta_file, image_height, sample_name):
+def write_IGV_script(input_files, region_file, IGV_batchscript_file, IGV_snapshot_dir, fasta_file, image_height, sample_name, snapshot_format, optional_inputs):
     '''
     Write out a batchscript for IGV.
     '''
@@ -148,6 +152,11 @@ def write_IGV_script(input_files, region_file, IGV_batchscript_file, IGV_snapsho
         for file in input_files:
             f.write(f"load {file}\n")
 
+        # Load optional inputs
+        for opt_input in optional_inputs:
+            if opt_input:
+                f.write(f"load {opt_input}\n")
+
         # Write regions and snapshots
         region_list = make_chrom_region_list(region_file)
         for region in region_list:
@@ -157,12 +166,12 @@ def write_IGV_script(input_files, region_file, IGV_batchscript_file, IGV_snapsho
             # For large regions, split snapshots into two: start and end
             if svsize > 10000:
                 f.write(f"goto {region[0]}:{int(region[1]) - 1000}-{int(region[1]) + 500}\n")
-                f.write(f"snapshot {sample_name}_{region[0]}_{region[1]}_start.png\n")
+                f.write(f"snapshot {sample_name}_{region[0]}_{region[1]}_start.{snapshot_format}\n")
                 f.write(f"goto {region[0]}:{int(region[2]) - 500}-{int(region[2]) + 1000}\n")
-                f.write(f"snapshot {sample_name}_{region[0]}_{region[2]}_end.png\n")
+                f.write(f"snapshot {sample_name}_{region[0]}_{region[2]}_end.{snapshot_format}\n")
             else:
                 f.write(f"goto {chrom_loc}\n")
-                f.write(f"snapshot {sample_name}_{region[0]}_{region[1]}_{region[2]}_region.png\n")
+                f.write(f"snapshot {sample_name}_{region[0]}_{region[1]}_{region[2]}_region.{snapshot_format}\n")
 
         f.write("exit\n")
 
@@ -175,15 +184,15 @@ def run_IGV_script(igv_script, igv_sh, memMB):
 
     # Get an open Xvfb port
     x_serv_port = get_open_X_server()
-    print(f"Open Xvfb port found on: {x_serv_port}")
+    logging.info(f"Open Xvfb port found on: {x_serv_port}")
 
     # Build and run IGV command using igv.sh
     igv_command = f"xvfb-run --auto-servernum --server-num=1 bash {igv_sh} -b {igv_script}"
-    print(f"IGV command: {igv_command}")
+    logging.info(f"IGV command: {igv_command}")
 
     # Record start time
     startTime = datetime.datetime.now()
-    print(f"Started at: {startTime}")
+    logging.info(f"Started at: {startTime}")
 
     # Run the IGV command
     subprocess_cmd(igv_command)
@@ -191,17 +200,20 @@ def run_IGV_script(igv_script, igv_sh, memMB):
     # Check if snapshots were generated
     snapshot_files = os.listdir(SNAPSHOT_DIR)
     if len(snapshot_files) == 0:
-        print("ERROR: No snapshot files were generated.")
+        logging.error("No snapshot files were generated.")
     else:
-        print(f"Generated {len(snapshot_files)} snapshot files.")
+        logging.info(f"Generated {len(snapshot_files)} snapshot files.")
 
     elapsed_time = datetime.datetime.now() - startTime
-    print(f"Elapsed time: {elapsed_time}")
+    logging.info(f"Elapsed time: {elapsed_time}")
 
-def main(input_files, region_file, fasta_file, image_height, igv_sh_bin, igv_mem, sample_name):
+def main(input_files, region_file, fasta_file, image_height, igv_sh_bin, igv_mem, sample_name, snapshot_format, output_dir, optional_inputs):
     '''
     Main control function for the script.
     '''
+    global SNAPSHOT_DIR
+    SNAPSHOT_DIR = output_dir
+
     batchscript_file = os.path.join(SNAPSHOT_DIR, "IGV_snapshots.bat")
 
     # Check if input files, regions, and IGV script exist
@@ -213,18 +225,34 @@ def main(input_files, region_file, fasta_file, image_height, igv_sh_bin, igv_mem
     file_exists(fasta_file, kill=True)
     check_for_fai(fasta_file)
 
-    print(f"\n~~~ IGV SNAPSHOT AUTOMATOR ~~~\n")
-    print(f"Reference FASTA: {fasta_file}")
-    print(f"Track height: {image_height}")
-    print(f"IGV script file: {igv_sh_bin}")
-    print(f"Batchscript file: {batchscript_file}")
-    print(f"Region file: {region_file}")
+    # Verify optional input files if they are provided
+    for opt_input in optional_inputs:
+        if opt_input:
+            file_exists(opt_input)
+
+    logging.info(f"\n~~~ IGV SNAPSHOT AUTOMATOR ~~~\n")
+    logging.info(f"Reference FASTA: {fasta_file}")
+    logging.info(f"Track height: {image_height}")
+    logging.info(f"IGV script file: {igv_sh_bin}")
+    logging.info(f"Batchscript file: {batchscript_file}")
+    logging.info(f"Region file: {region_file}")
+    logging.info(f"Snapshot format: {snapshot_format}")
 
     # Create output directory
     mkdir_p(SNAPSHOT_DIR)
 
     # Write the IGV batch script
-    write_IGV_script(input_files=input_files, region_file=region_file, IGV_batchscript_file=batchscript_file, IGV_snapshot_dir=SNAPSHOT_DIR, fasta_file=fasta_file, image_height=image_height, sample_name=sample_name)
+    write_IGV_script(
+        input_files=input_files,
+        region_file=region_file,
+        IGV_batchscript_file=batchscript_file,
+        IGV_snapshot_dir=SNAPSHOT_DIR,
+        fasta_file=fasta_file,
+        image_height=image_height,
+        sample_name=sample_name,
+        snapshot_format=snapshot_format,
+        optional_inputs=optional_inputs
+    )
 
     # Run the IGV batch script
     run_IGV_script(igv_script=batchscript_file, igv_sh=igv_sh_bin, memMB=igv_mem)
@@ -241,11 +269,39 @@ def run():
     parser.add_argument("-bin", default=default_igv_sh, type=str, dest='igv_sh_bin', help="Path to the IGV sh binary to run.")
     parser.add_argument("-mem", default="4000", type=str, dest='igv_mem', help="Amount of memory to allocate to IGV, in Megabytes (MB).")
     parser.add_argument("--sample_name", required=True, help="Sample name to include in snapshot filenames.")
+    parser.add_argument("--snapshot_format", default=default_snapshot_format, choices=['png', 'jpg'], help="Output format for snapshots (png or jpg).")
+    parser.add_argument("--output_dir", default=SNAPSHOT_DIR, help="Custom output directory for snapshots.")
+    parser.add_argument("--truth_haplotype_1", help="Optional path to truth haplotype 1 file.")
+    parser.add_argument("--truth_haplotype_2", help="Optional path to truth haplotype 2 file.")
+    parser.add_argument("--targeted_vcf", help="Optional path to targeted VCF file.")
+    parser.add_argument("--second_alignment_reads", help="Optional path to second alignment reads file.")
 
     args = parser.parse_args()
 
-    main(input_files=args.input_files, region_file=args.region_file, fasta_file=args.fasta_file, image_height=args.image_height, igv_sh_bin=args.igv_sh_bin, igv_mem=args.igv_mem, sample_name=args.sample_name)
+    # Validate memory input
+    try:
+        memMB = int(args.igv_mem)
+        if memMB <= 0:
+            raise ValueError
+    except ValueError:
+        logging.error("Memory allocation must be a positive integer.")
+        sys.exit(1)
 
-if __name__ == "__main__":
-    run()
+    # Collect optional inputs into a list
+    optional_inputs = [args.truth_haplotype_1, args.truth_haplotype_2, args.targeted_vcf, args.second_alignment_reads]
+
+    main(
+        input_files=args.input_files,
+        region_file=args.region_file,
+        fasta_file=args.fasta_file,
+        image_height=args.image_height,
+        igv_sh_bin=args.igv_sh_bin,
+        igv_mem=memMB,
+        sample_name=args.sample_name,
+        snapshot_format=args.snapshot_format,
+        output_dir=args.output_dir,
+        optional_inputs=optional_inputs
+    )
 
+if __name__ == "__main__":
+    run()
\ No newline at end of file

From 0c7e8f2c5e421498738472263f0aafb50231e1f0 Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Wed, 18 Sep 2024 12:47:07 -0400
Subject: [PATCH 46/49] first try

---
 .../PacBio/Utility/IGV_HaplotypeViz.wdl       | 120 ++++++++++--------
 1 file changed, 68 insertions(+), 52 deletions(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
index 84a877e53..19bbe0492 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
@@ -1,71 +1,83 @@
 version 1.0
 
 workflow IGVScreenshotWorkflow {
-    
+
     input {
-        File aligned_bam_hap1
-        File aligned_bam_hap1_bai
-        File aligned_bam_hap2
-        File aligned_bam_hap2_bai
-        File alignments
-        File alignments_bai
-        File bed_file
-        File fasta_file
-        File fasta_file_fai
+        File bam_file
+        File bam_file_bai
+        File regions_bed
+        File reference_fasta
+        File reference_fasta_fai
         String sample_name
-        Int image_height = 500
+        Int image_height = 1000
         Int memory_mb = 4000
-        Int disk_gb = 100           # Disk size in GB, default to 100 GB
-        String docker_image = "us.gcr.io/broad-dsp-lrma/igv_screenshot_docker:v982024"  # The Docker image to use
+        Int disk_gb = 100
+        String docker_image = "us.gcr.io/broad-dsp-lrma/igv_screenshot_docker:v9172024"
+        File? truth_haplotype_1
+        File? truth_haplotype_1_bai
+        File? truth_haplotype_2
+        File? truth_haplotype_2_bai
+        File? targeted_vcf
+        File? targeted_vcf_tbi
+        File? second_alignment_reads
+        File? second_alignment_reads_bai
     }
 
     call RunIGVScreenshot {
         input:
-            aligned_bam_hap1 = aligned_bam_hap1,
-            aligned_bam_hap1_bai = aligned_bam_hap1_bai,
-            aligned_bam_hap2 = aligned_bam_hap2,
-            aligned_bam_hap2_bai = aligned_bam_hap2_bai,
-            alignments = alignments,
-            alignments_bai = alignments_bai,
-            bed_file = bed_file,
-            fasta_file = fasta_file,
-            fasta_file_fai = fasta_file_fai,
-            sample_name = sample_name,
-            image_height = image_height,
-            memory_mb = memory_mb,
-            disk_gb = disk_gb,
-            docker_image = docker_image
+            bam_file=bam_file,
+            bam_file_bai=bam_file_bai,
+            regions_bed=regions_bed,
+            reference_fasta=reference_fasta,
+            reference_fasta_fai=reference_fasta_fai,
+            sample_name=sample_name,
+            image_height=image_height,
+            memory_mb=memory_mb,
+            disk_gb=disk_gb,
+            docker_image=docker_image,
+            truth_haplotype_1=truth_haplotype_1,
+            truth_haplotype_1_bai=truth_haplotype_1_bai,
+            truth_haplotype_2=truth_haplotype_2,
+            truth_haplotype_2_bai=truth_haplotype_2_bai,
+            targeted_vcf=targeted_vcf,
+            targeted_vcf_tbi=targeted_vcf_tbi,
+            second_alignment_reads=second_alignment_reads,
+            second_alignment_reads_bai=second_alignment_reads_bai
     }
 
     output {
-        Array[File] snapshots = RunIGVScreenshot.snapshots
+        File igv_output_zip = RunIGVScreenshot.igv_output_zip
     }
 }
 
 task RunIGVScreenshot {
     
     input {
-        File aligned_bam_hap1
-        File aligned_bam_hap1_bai
-        File aligned_bam_hap2
-        File aligned_bam_hap2_bai
-        File alignments
-        File alignments_bai
-        File bed_file
-        File fasta_file
-        File fasta_file_fai
+        File bam_file
+        File bam_file_bai
+        File regions_bed
+        File reference_fasta
+        File reference_fasta_fai
         String sample_name
         Int image_height
         Int memory_mb
         Int disk_gb
         String docker_image
+        File? truth_haplotype_1
+        File? truth_haplotype_1_bai
+        File? truth_haplotype_2
+        File? truth_haplotype_2_bai
+        File? targeted_vcf
+        File? targeted_vcf_tbi
+        File? second_alignment_reads
+        File? second_alignment_reads_bai
     }
 
     command <<<
         set -euo pipefail
 
-        # Ensure the snapshots directory exists
-        mkdir -p 'output/IGV_Snapshots'
+        # Ensure the output directory exists
+        mkdir -p igv_output
 
         # Start a virtual frame buffer to allow IGV to render
         Xvfb :1 -screen 0 1024x768x16 &> xvfb.log &
@@ -73,26 +85,30 @@ task RunIGVScreenshot {
 
         # Run the IGV screenshot script with the provided inputs
         python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \
-          ~{aligned_bam_hap1} ~{aligned_bam_hap2} ~{alignments} \
-          -r ~{bed_file} \
-          -ht ~{image_height} \
-          -bin /opt/IGV_Linux_2.18.2/igv.sh \
-          -mem ~{memory_mb} \
-          --fasta_file ~{fasta_file} \
-          --sample_name ~{sample_name}
-
-        # Move the screenshots to the IGV_Snapshots directory
-        #mv -- *.png 'output/IGV_Snapshots/'
+            ${bam_file} \
+            -r ${regions_bed} \
+            -f ${reference_fasta} \
+            --sample_name ${sample_name} \
+            --snapshot_format png \
+            --output_dir igv_output \
+            -ht ${image_height} \
+            ~{if defined(truth_haplotype_1) then "--truth_haplotype_1 " + truth_haplotype_1 else ""} \
+            ~{if defined(truth_haplotype_2) then "--truth_haplotype_2 " + truth_haplotype_2 else ""} \
+            ~{if defined(targeted_vcf) then "--targeted_vcf " + targeted_vcf else ""} \
+            ~{if defined(second_alignment_reads) then "--second_alignment_reads " + second_alignment_reads else ""}
+        
+        # Zip the output directory
+        zip -r igv_output.zip igv_output/
     >>>
 
     runtime {
         docker: docker_image
-        memory: "~{memory_mb} MB"
+        memory: "${memory_mb} MB"
         cpu: 2
-        disks: "local-disk ~{disk_gb} SSD"
+        disks: "local-disk ${disk_gb} HDD"
     }
 
     output {
-        Array[File] snapshots = glob("output/IGV_Snapshots/*.png")
+        File igv_output_zip = "igv_output.zip"
     }
 }

From 89fe93f2d2dce93f5d3934fdb91f6a16cef18e4b Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <szaheri@broadinstitute.org>
Date: Wed, 18 Sep 2024 14:46:01 -0400
Subject: [PATCH 47/49] optional inputs

---
 .../PacBio/Utility/IGV_HaplotypeViz.wdl       | 111 ++++++++++--------
 1 file changed, 61 insertions(+), 50 deletions(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
index 19bbe0492..5c1eb4e00 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
@@ -3,74 +3,86 @@ version 1.0
 workflow IGVScreenshotWorkflow {
 
     input {
-        File bam_file
-        File bam_file_bai
+        File aligned_bam1
+        File aligned_bam1_bai
+        File aligned_bam2
+        File aligned_bam2_bai
         File regions_bed
         File reference_fasta
         File reference_fasta_fai
+        File? truth_haplotype_1
+        File? truth_haplotype_1_bai
+        File? truth_haplotype_2
+        File? truth_haplotype_2_bai
+        File? haplotype_8x_hap1
+        File? haplotype_8x_hap1_bai
+        File? haplotype_8x_hap2
+        File? haplotype_8x_hap2_bai
+        File? TRGT_VCF
+        File? TRGT_VCF_tbi
         String sample_name
         Int image_height = 1000
         Int memory_mb = 4000
         Int disk_gb = 100
         String docker_image = "us.gcr.io/broad-dsp-lrma/igv_screenshot_docker:v9172024"
-        File? truth_haplotype_1
-        File? truth_haplotype_1_bai
-        File? truth_haplotype_2
-        File? truth_haplotype_2_bai
-        File? targeted_vcf
-        File? targeted_vcf_tbi
-        File? second_alignment_reads
-        File? second_alignment_reads_bai
     }
 
     call RunIGVScreenshot {
         input:
-            bam_file=bam_file,
-            bam_file_bai=bam_file_bai,
-            regions_bed=regions_bed,
-            reference_fasta=reference_fasta,
-            reference_fasta_fai=reference_fasta_fai,
-            sample_name=sample_name,
-            image_height=image_height,
-            memory_mb=memory_mb,
-            disk_gb=disk_gb,
-            docker_image=docker_image,
-            truth_haplotype_1=truth_haplotype_1,
-            truth_haplotype_1_bai=truth_haplotype_1_bai,
-            truth_haplotype_2=truth_haplotype_2,
-            truth_haplotype_2_bai=truth_haplotype_2_bai,
-            targeted_vcf=targeted_vcf,
-            targeted_vcf_tbi=targeted_vcf_tbi,
-            second_alignment_reads=second_alignment_reads,
-            second_alignment_reads_bai=second_alignment_reads_bai
+            aligned_bam1 = aligned_bam1,
+            aligned_bam1_bai = aligned_bam1_bai,
+            aligned_bam2 = aligned_bam2,
+            aligned_bam2_bai = aligned_bam2_bai,
+            regions_bed = regions_bed,
+            reference_fasta = reference_fasta,
+            reference_fasta_fai = reference_fasta_fai,
+            truth_haplotype_1 = truth_haplotype_1,
+            truth_haplotype_1_bai = truth_haplotype_1_bai,
+            truth_haplotype_2 = truth_haplotype_2,
+            truth_haplotype_2_bai = truth_haplotype_2_bai,
+            haplotype_8x_hap1 = haplotype_8x_hap1,
+            haplotype_8x_hap1_bai = haplotype_8x_hap1_bai,
+            haplotype_8x_hap2 = haplotype_8x_hap2,
+            haplotype_8x_hap2_bai = haplotype_8x_hap2_bai,
+            TRGT_VCF = TRGT_VCF,
+            TRGT_VCF_tbi = TRGT_VCF_tbi,
+            sample_name = sample_name,
+            image_height = image_height,
+            memory_mb = memory_mb,
+            disk_gb = disk_gb,
+            docker_image = docker_image
     }
 
     output {
-        File igv_output_zip = RunIGVScreenshot.igv_output_zip
+        Array[File] screenshots = RunIGVScreenshot.screenshots
     }
 }
 
 task RunIGVScreenshot {
-    
+
     input {
-        File bam_file
-        File bam_file_bai
+        File aligned_bam1
+        File aligned_bam1_bai
+        File aligned_bam2
+        File aligned_bam2_bai
         File regions_bed
         File reference_fasta
         File reference_fasta_fai
+        File? truth_haplotype_1
+        File? truth_haplotype_1_bai
+        File? truth_haplotype_2
+        File? truth_haplotype_2_bai
+        File? haplotype_8x_hap1
+        File? haplotype_8x_hap1_bai
+        File? haplotype_8x_hap2
+        File? haplotype_8x_hap2_bai
+        File? TRGT_VCF
+        File? TRGT_VCF_tbi
         String sample_name
         Int image_height
         Int memory_mb
         Int disk_gb
         String docker_image
-        File? truth_haplotype_1
-        File? truth_haplotype_1_bai
-        File? truth_haplotype_2
-        File? truth_haplotype_2_bai
-        File? targeted_vcf
-        File? targeted_vcf_tbi
-        File? second_alignment_reads
-        File? second_alignment_reads_bai
     }
 
     command <<<
@@ -85,20 +97,19 @@ task RunIGVScreenshot {
 
         # Run the IGV screenshot script with the provided inputs
         python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \
-            ${bam_file} \
+            ${aligned_bam1} \
+            --second_alignment_reads ${aligned_bam2} \
+            ~{if defined(truth_haplotype_1) then "--truth_haplotype_1 " + truth_haplotype_1 else ""} \
+            ~{if defined(truth_haplotype_2) then "--truth_haplotype_2 " + truth_haplotype_2 else ""} \
+            ~{if defined(TRGT_VCF) then "--targeted_vcf " + TRGT_VCF else ""} \
+            ~{if defined(haplotype_8x_hap1) then "--second_alignment_reads " + haplotype_8x_hap1 else ""} \
+            ~{if defined(haplotype_8x_hap2) then "--second_alignment_reads " + haplotype_8x_hap2 else ""} \
             -r ${regions_bed} \
             -f ${reference_fasta} \
             --sample_name ${sample_name} \
             --snapshot_format png \
             --output_dir igv_output \
-            -ht ${image_height} \
-            ~{if defined(truth_haplotype_1) then "--truth_haplotype_1 " + truth_haplotype_1 else ""} \
-            ~{if defined(truth_haplotype_2) then "--truth_haplotype_2 " + truth_haplotype_2 else ""} \
-            ~{if defined(targeted_vcf) then "--targeted_vcf " + targeted_vcf else ""} \
-            ~{if defined(second_alignment_reads) then "--second_alignment_reads " + second_alignment_reads else ""}
-        
-        # Zip the output directory
-        zip -r igv_output.zip igv_output/
+            -ht ${image_height}
     >>>
 
     runtime {
@@ -109,6 +120,6 @@ task RunIGVScreenshot {
     }
 
     output {
-        File igv_output_zip = "igv_output.zip"
+        Array[File] screenshots = glob("igv_output/*.png")
     }
 }

From 15545f96168c9187cfac740c442c0108ec06d18c Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <74751641+shadizaheri@users.noreply.github.com>
Date: Wed, 18 Sep 2024 20:36:30 -0400
Subject: [PATCH 48/49] make one of the bam inputs optional

---
 wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
index 5c1eb4e00..727eeb81d 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
@@ -5,8 +5,8 @@ workflow IGVScreenshotWorkflow {
     input {
         File aligned_bam1
         File aligned_bam1_bai
-        File aligned_bam2
-        File aligned_bam2_bai
+        File? aligned_bam2
+        File? aligned_bam2_bai
         File regions_bed
         File reference_fasta
         File reference_fasta_fai
@@ -63,8 +63,8 @@ task RunIGVScreenshot {
     input {
         File aligned_bam1
         File aligned_bam1_bai
-        File aligned_bam2
-        File aligned_bam2_bai
+        File? aligned_bam2
+        File? aligned_bam2_bai
         File regions_bed
         File reference_fasta
         File reference_fasta_fai
@@ -98,7 +98,7 @@ task RunIGVScreenshot {
         # Run the IGV screenshot script with the provided inputs
         python3 /opt/IGV_Linux_2.18.2/make_igv_screenshot.py \
             ${aligned_bam1} \
-            --second_alignment_reads ${aligned_bam2} \
+            ~{if defined(aligned_bam2) then "--second_alignment_reads " + aligned_bam2 else ""} \
             ~{if defined(truth_haplotype_1) then "--truth_haplotype_1 " + truth_haplotype_1 else ""} \
             ~{if defined(truth_haplotype_2) then "--truth_haplotype_2 " + truth_haplotype_2 else ""} \
             ~{if defined(TRGT_VCF) then "--targeted_vcf " + TRGT_VCF else ""} \

From 5e59d9d941ac761c33ed702f3f36e961726a1b2e Mon Sep 17 00:00:00 2001
From: Shadi Zaheri <74751641+shadizaheri@users.noreply.github.com>
Date: Wed, 18 Sep 2024 20:53:02 -0400
Subject: [PATCH 49/49] SSD

---
 wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
index 727eeb81d..1f23fbbdb 100644
--- a/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
+++ b/wdl/pipelines/PacBio/Utility/IGV_HaplotypeViz.wdl
@@ -116,7 +116,7 @@ task RunIGVScreenshot {
         docker: docker_image
         memory: "${memory_mb} MB"
         cpu: 2
-        disks: "local-disk ${disk_gb} HDD"
+        disks: "local-disk ${disk_gb} SSD"
     }
 
     output {