griffithlab · susannasiebert · Nov 3, 2025 · Oct 15, 2025 · Oct 17, 2025 · Oct 20, 2025
diff --git a/pvactools/lib/aggregate_all_epitopes.py b/pvactools/lib/aggregate_all_epitopes.py
@@ -598,17 +598,31 @@ def sort_transcripts(self, annotations, included_df):
         transcript_table = pd.DataFrame.from_records(records)
 
         transcript_table['Biotype Sort'] = transcript_table.Biotype.map(lambda x: 1 if x == 'protein_coding' else 2)
-        transcript_table['TSL Sort'] = transcript_table.apply(lambda x: 1 if is_preferred_transcript(x, self.transcript_prioritization_strategy, self.maximum_transcript_support_level) else 2, axis=1)
-
+        transcript_table['mane_select_sort'] = transcript_table["MANE Select"].apply(lambda x: 1 if x else 2)
+        transcript_table['canonical_sort'] = transcript_table["Canonical"].apply(lambda x: 1 if x else 2)
+        transcript_table['tsl_sort'] = transcript_table["Transcript Support Level"].apply(lambda x: 6 if x in ['NA', 'Not Supported'] or pd.isna(x) else int(x))
+        sort_columns = [
+            "Biotype Sort",
+            "mane_select_sort",
+            "canonical_sort",
+            "tsl_sort",
+            "Length",
+            "Expr"
+        ]
+        sort_orders = [
+            True,
+            True,
+            True,
+            True,
+            False,
+            False
+        ]
         if self.allow_incomplete_transcripts:
             transcript_table['Transcript CDS Flags Sort'] = transcript_table['Transcript CDS Flags'].apply(lambda x: 1 if x == "None" else (2 if any(flag in str(x) for flag in ["cds_start_nf", "cds_end_nf"]) else 1))
-            sort_columns = ["Biotype Sort", "Transcript CDS Flags Sort", "TSL Sort", "Length"]
-            sort_order = [True, True, True, False]
-        else:
-            sort_columns = ["Biotype Sort", "TSL Sort", "Length"]
-            sort_order = [True, True, False]
+            sort_columns.insert(0, 'Transcript CDS Flags Sort')
+            sort_orders.insert(0, True)
 
-        transcript_table.sort_values(by=sort_columns, ascending=sort_order, inplace=True)
+        transcript_table.sort_values(by=sort_columns, ascending=sort_orders, inplace=True)
         return transcript_table
 
     def calculate_unique_peptide_count(self, included_df):

diff --git a/pvactools/lib/get_best_candidate.py b/pvactools/lib/get_best_candidate.py
@@ -1,3 +1,5 @@
+import pandas as pd
+
 from pvactools.lib.anchor_residue_pass import AnchorResiduePass
 from pvactools.lib.run_utils import is_preferred_transcript
 
@@ -19,11 +21,19 @@ def __init__(
         self.allow_incomplete_transcripts = allow_incomplete_transcripts
 
     def get(self, df):
-        #get all entries with Biotype 'protein_coding'
-        biotype_df = df[df['Biotype'] == 'protein_coding']
+        #get all entries that don't have CDS Flags
+        if self.allow_incomplete_transcripts:
+            cds_df = df[df['Transcript CDS Flags'] == 'None']
+            if cds_df.shape[0] == 0:
+                cds_df = df
+        else:
+            cds_df = df
+
+        #subset cds dataframe to only get entries with Biotype 'protein_coding'
+        biotype_df = cds_df[cds_df['Biotype'] == 'protein_coding']
         #if there are none, reset to previous dataframe
         if biotype_df.shape[0] == 0:
-            biotype_df = df
+            biotype_df = cds_df
 
         #subset protein_coding dataframe to only preferred transcripts
         biotype_df['transcript_pass'] = biotype_df.apply(lambda x: is_preferred_transcript(x, self.transcript_prioritization_strategy, self.maximum_transcript_support_level), axis=1)
@@ -32,7 +42,7 @@ def get(self, df):
         if transcript_df.shape[0] == 0:
             transcript_df = biotype_df
 
-        #subset tsl dataframe to only include entries with no problematic positions
+        #subset transcript dataframe to only include entries with no problematic positions
         if 'Problematic Positions' in transcript_df:
             prob_pos_df = transcript_df[transcript_df['Problematic Positions'] == "None"]
             #if this results in an empty dataframe, reset to previous dataframe
@@ -47,28 +57,31 @@ def get(self, df):
         if anchor_residue_pass_df.shape[0] == 0:
             anchor_residue_pass_df = prob_pos_df
 
-        # if allow_incomplete_transcripts is True, deprioritize certain flags
-        if self.allow_incomplete_transcripts:
-            anchor_residue_pass_df['Transcript CDS Flags Sort'] = anchor_residue_pass_df['Transcript CDS Flags'].apply(
-                lambda x: 1 if x == "None" else (2 if any(flag in str(x) for flag in ["cds_start_nf", "cds_end_nf"]) else 1)
-            )
-            sort_columns = [
-                'Transcript CDS Flags Sort',
-                f"{self.mt_top_score_metric} MT {self.top_score_mode}",
-                'Transcript Length',
-            ]
-            sort_order = [True, True, False]
-        else:
-            sort_columns = [
-                f"{self.mt_top_score_metric} MT {self.top_score_mode}",
-                'Transcript Length',
-            ]
-            sort_order = [True, False]
-
-        #determine the entry with the lowest IC50 Score, transcript prioritization status, and longest Transcript
+        #set up sorting criteria
+        anchor_residue_pass_df['mane_select_sort'] = anchor_residue_pass_df["MANE Select"].apply(lambda x: 1 if x else 2)
+        anchor_residue_pass_df['canonical_sort'] = anchor_residue_pass_df["Canonical"].apply(lambda x: 1 if x else 2)
+        anchor_residue_pass_df['tsl_sort'] = anchor_residue_pass_df["Transcript Support Level"].apply(lambda x: 6 if x in ['NA', 'Not Supported'] or pd.isna(x) else int(x))
+        sort_columns = [
+            f"{self.mt_top_score_metric} MT {self.top_score_mode}",
+            "mane_select_sort",
+            "canonical_sort",
+            "tsl_sort",
+            "Transcript Length",
+            "Transcript Expression"
+        ]
+        sort_orders = [
+            True,
+            True,
+            True,
+            True,
+            False,
+            False
+        ]
+
+        #Sort the dataframe according to the criteria and pick the first (best) one
         anchor_residue_pass_df.sort_values(
             by=sort_columns,
-            ascending=sort_order,
+            ascending=sort_orders,
             inplace=True
         )
         return anchor_residue_pass_df.iloc[0]
@@ -127,11 +140,19 @@ def __init__(
         self.allow_incomplete_transcripts=allow_incomplete_transcripts
 
     def get(self, df):
-        #get all entries with Biotype 'protein_coding'
-        biotype_df = df[df['Biotype'] == 'protein_coding']
+        #get all entries that don't have CDS Flags
+        if self.allow_incomplete_transcripts:
+            cds_df = df[df['Transcript CDS Flags'] == 'None']
+            if cds_df.shape[0] == 0:
+                cds_df = df
+        else:
+            cds_df = df
+
+        #subset cds dataframe to only get entries with Biotype 'protein_coding'
+        biotype_df = cds_df[cds_df['Biotype'] == 'protein_coding']
         #if there are none, reset to previous dataframe
         if biotype_df.shape[0] == 0:
-            biotype_df = df
+            biotype_df = cds_df
 
         #subset protein_coding dataframe to only preferred transcripts
         biotype_df['transcript_pass'] = biotype_df.apply(lambda x: is_preferred_transcript(x, self.transcript_prioritization_strategy, self.maximum_transcript_support_level), axis=1)
@@ -149,28 +170,31 @@ def get(self, df):
         else:
             prob_pos_df = transcript_df
 
-        # if allow_incomplete_transcripts is True, deprioritize certain flags
-        if self.allow_incomplete_transcripts:
-            prob_pos_df['Transcript CDS Flags Sort'] = prob_pos_df['Transcript CDS Flags'].apply(
-                lambda x: 1 if x == "None" else (2 if any(flag in str(x) for flag in ["cds_start_nf", "cds_end_nf"]) else 1)
-            )
-            sort_columns = [
-                'Transcript CDS Flags Sort',
-                f"{self.mt_top_score_metric} {self.top_score_mode}",
-                "WT Protein Length",
-            ]
-            sort_order = [True, True, False]
-        else:
-            sort_columns = [
-                f"{self.mt_top_score_metric} {self.top_score_mode}",
-                "WT Protein Length",
-            ]
-            sort_order = [True, False]
-
-        #determine the entry with the lowest IC50 Score, transcript prioritization status, and longest Transcript
+        #set up sorting criteria
+        prob_pos_df['mane_select_sort'] = prob_pos_df["MANE Select"].apply(lambda x: 1 if x else 2)
+        prob_pos_df['canonical_sort'] = prob_pos_df["Canonical"].apply(lambda x: 1 if x else 2)
+        prob_pos_df['tsl_sort'] = prob_pos_df["Transcript Support Level"].apply(lambda x: 6 if x in ['NA', 'Not Supported'] or pd.isna(x) else int(x))
+        sort_columns = [
+            f"{self.mt_top_score_metric} {self.top_score_mode}",
+            "mane_select_sort",
+            "canonical_sort",
+            "tsl_sort",
+            "WT Protein Length",
+            "Transcript Expression"
+        ]
+        sort_orders = [
+            True,
+            True,
+            True,
+            True,
+            False,
+            False
+        ]
+
+        #Sort the dataframe according to the criteria and pick the first (best) one
         prob_pos_df.sort_values(
             by=sort_columns,
-            ascending=sort_order,
+            ascending=sort_orders,
             inplace=True
         )
         return prob_pos_df.iloc[0]
diff --git a/pvactools/lib/input_file_converter.py b/pvactools/lib/input_file_converter.py
@@ -647,7 +647,7 @@ def execute(self):
                         else:
                             output_row['mane_select'] = 'True'
                     else:
-                        output_row['mane_select'] = 'Not Supported'
+                        output_row['mane_select'] = 'Not Run'
 
                     for (tag, key, comparison_fields) in zip(['TX', 'GX'], ['transcript_expression', 'gene_expression'], [[transcript_name], [ensembl_gene_id, gene_name]]):
                         if tag in self.vcf_reader.header.format_ids():

diff --git a/pvactools/lib/post_processor.py b/pvactools/lib/post_processor.py
@@ -6,6 +6,7 @@
 from pvactools.lib.aggregate_all_epitopes import PvacseqAggregateAllEpitopes, PvacfuseAggregateAllEpitopes, PvacbindAggregateAllEpitopes, PvacspliceAggregateAllEpitopes
 from pvactools.lib.binding_filter import BindingFilter
 from pvactools.lib.filter import Filter, FilterCriterion
+from pvactools.lib.transcript_filter import TranscriptFilter
 from pvactools.lib.top_score_filter import PvacseqTopScoreFilter, PvacfuseTopScoreFilter, PvacbindTopScoreFilter, PvacspliceTopScoreFilter
 from pvactools.lib.calculate_manufacturability import CalculateManufacturability
 from pvactools.lib.calculate_reference_proteome_similarity import CalculateReferenceProteomeSimilarity
@@ -225,12 +226,11 @@ def execute_coverage_filter(self):
     def execute_transcript_support_level_filter(self):
         if self.run_transcript_support_level_filter:
             print("Running Transcript Support Level Filter")
-            filter_criteria = [FilterCriterion('Transcript Support Level', '<=', self.maximum_transcript_support_level, exclude_nas=True, skip_value='Not Supported')]
-            Filter(
+            TranscriptFilter(
                 self.coverage_filter_fh.name,
                 self.transcript_support_level_filter_fh.name,
-                filter_criteria,
-                ['Transcript Support Level'],
+                self.transcript_prioritization_strategy,
+                self.maximum_transcript_support_level
             ).execute()
             print("Complete")
         else:

diff --git a/pvactools/lib/update_tiers.py b/pvactools/lib/update_tiers.py
@@ -6,6 +6,7 @@
 import tempfile
 import shutil
 import argparse
+import json
 
 from pvactools.lib.prediction_class import PredictionClass
 from pvactools.lib.run_utils import is_preferred_transcript, float_range, transcript_prioritization_strategy
@@ -33,6 +34,7 @@ def execute(self):
         output_df.to_csv(self.output_file, sep='\t', na_rep='NA', index=False, float_format='%.3f')
         shutil.copy(self.output_file.name, self.input_file)
         self.output_file.close()
+        self.update_metrics_file()
 
     @abstractmethod
     def get_tier(self, mutation):
@@ -42,6 +44,9 @@ def get_tier(self, mutation):
     def sort_table(self, output_lines):
         raise Exception("Must implement method in child class")
 
+    def update_metrics_file(self):
+        pass
+
     @classmethod
     def parser(cls, tool):
         parser = argparse.ArgumentParser(
@@ -53,6 +58,11 @@ def parser(cls, tool):
             'input_file',
             help="Input aggregated file with tiers to update. This file will be overwritten with the output."
         )
+        if tool in ['pvacseq']:
+            parser.add_argument(
+                'metrics_file',
+                help="metrics.json file corresponding to the input aggregated file. This file will be overwritten to update tiering parameters used by this command."
+            )
         if tool in ['pvacseq', 'pvacsplice']:
             parser.add_argument(
                 'vaf_clonal', type=float_range(0.0, 1.0),
@@ -99,7 +109,13 @@ def parser(cls, tool):
                 help="Tumor RNA Coverage Cutoff to consider when evaluating the expression criteria. Only sites above this read depth cutoff will be considered.",
                 default=10
             )
-        if tool in ['pvacseq', 'pvacfuse', 'pvacsplice']:
+        if tool in ['pvacseq', 'pvacsplice']:
+            parser.add_argument(
+                '--expn-val', type=float,
+                help="Gene and Transcript Expression cutoff. Sites above this cutoff will be considered.",
+                default=1.0
+            )
+        if tool in ['pvacfuse']:
             parser.add_argument(
                 '--expn-val', type=float,
                 help="Expression Cutoff to consider when evaluating the expression criteria. Expression is meassured as FFPM (fusion fragments per million total reads). Sites above this cutoff will be considered.",
@@ -167,6 +183,7 @@ def __init__(
             allele_specific_anchors=False,
             anchor_contribution_threshold=0.8,
             top_score_metric2='ic50',
+            metrics_file=None,
         ):
         self.input_file = input_file
         self.output_file = tempfile.NamedTemporaryFile()
@@ -180,6 +197,7 @@ def __init__(
         self.trna_vaf = trna_vaf
         self.transcript_prioritization_strategy = transcript_prioritization_strategy
         self.maximum_transcript_support_level = maximum_transcript_support_level
+        self.metrics_file=metrics_file
         if top_score_metric2 == "percentile":
             self.top_score_mode = "%ile MT"
         else:
@@ -341,6 +359,28 @@ def sort_table(self, output_lines):
 
         return df
 
+    def update_metrics_file(self):
+        if self.metrics_file is not None:
+            output_metrics_file = tempfile.NamedTemporaryFile()
+            with open(self.metrics_file, 'r') as input_fh, open(output_metrics_file.name, 'w') as output_fh:
+                metrics = json.loads(input_fh.read())
+                metrics['vaf_clonal'] = round(self.vaf_clonal, 3)
+                metrics['vaf_subclonal'] = round(self.vaf_clonal/2, 3)
+                metrics['binding_threshold'] = self.binding_threshold
+                metrics['trna_vaf'] = self.trna_vaf
+                metrics['trna_cov'] = self.trna_cov
+                metrics['allele_expr_threshold'] = self.allele_expr_threshold
+                metrics['transcript_prioritization_strategy'] = sorted(self.transcript_prioritization_strategy)
+                metrics['maximum_transcript_support_level'] = self.maximum_transcript_support_level
+                metrics['percentile_threshold'] = self.percentile_threshold
+                metrics['percentile_threshold_strategy'] = self.percentile_threshold_strategy
+                metrics['use_allele_specific_binding_thresholds'] = self.use_allele_specific_binding_thresholds
+                metrics['top_score_metric2'] = 'ic50' if self.top_score_mode == "IC50 MT" else 'percentile'
+                metrics['allele_specific_anchors'] = self.anchor_calculator.use_allele_specific_anchors
+                metrics['anchor_contribution_threshold'] = self.anchor_calculator.anchor_contribution_threshold
+                json.dump(metrics, output_fh, indent=2, separators=(',', ': '))
+            shutil.copy(output_metrics_file.name, self.metrics_file)
+
 class PvacfuseUpdateTiers(UpdateTiers, metaclass=ABCMeta):
     def __init__(
         self,

diff --git a/pvactools/tools/compare.py b/pvactools/tools/compare.py
@@ -123,6 +123,8 @@ def define_parser():
     default_reference_match_columns = ["Peptide", "Match Window"]
 
     parser = argparse.ArgumentParser(
+        "pvactools compare",
+        description="Run a comparison between two output results folders",
         formatter_class=argparse.ArgumentDefaultsHelpFormatter
     )
     parser.add_argument(

diff --git a/pvactools/tools/pvacseq/update_tiers.py b/pvactools/tools/pvacseq/update_tiers.py
@@ -26,6 +26,7 @@ def main(args_input = sys.argv[1:]):
         allele_specific_anchors=args.allele_specific_anchors,
         anchor_contribution_threshold=args.anchor_contribution_threshold,
         top_score_metric2=args.top_score_metric2,
+        metrics_file=args.metrics_file,
     ).execute()
 
 if __name__ == "__main__":

diff --git a/pvactools/tools/pvacsplice/generate_aggregated_report.py b/pvactools/tools/pvacsplice/generate_aggregated_report.py
@@ -85,6 +85,15 @@ def define_parser():
         default=1.0,
         help="Gene and Expression cutoff. Used to calculate the allele expression cutoff for tiering.",
     )
+    parser.add_argument(
+        "--transcript-prioritization-strategy", type=transcript_prioritization_strategy(),
+        help="Specify the criteria to consider when prioritizing or filtering transcripts of the neoantigen candidates during aggregate report creation or TSL filtering. "
+             + "'canonical' will prioritize/select candidates resulting from variants on a Ensembl canonical transcript. "
+             + "'mane_select' will prioritize/select candidates resulting from variants on a MANE select transcript. "
+             + "'tsl' will prioritize/select candidates where the transcript support level (TSL) matches the --maximum-transcript-support-level. "
+             + "When selecting more than one criteria, a transcript meeting EITHER of the selected criteria will be prioritized/selected.",
+        default=['canonical', 'mane_select', 'tsl']
+    )
     parser.add_argument(
         "--maximum-transcript-support-level", type=int,
         help="The threshold to use for filtering epitopes on the Ensembl transcript support level (TSL). "
@@ -114,6 +123,7 @@ def main(args_input = sys.argv[1:]):
         trna_vaf=args.trna_vaf,
         trna_cov=args.trna_cov,
         expn_val=args.expn_val,
+        transcript_prioritization_strategy=args.transcript_prioritization_strategy,
         maximum_transcript_support_level=args.maximum_transcript_support_level,
         top_score_metric=args.top_score_metric,
         top_score_metric2=args.top_score_metric2,